Changeset 195503 in webkit


Ignore:
Timestamp:
Jan 22, 2016, 7:24:42 PM (10 years ago)
Author:
fpizlo@apple.com
Message:

B3 should strength-reduce division by a constant
https://bugs.webkit.org/show_bug.cgi?id=153386

Reviewed by Benjamin Poulain.

You can turn a 32-bit division by a constant into a 64-bit multiplication by a constant
plus some shifts. A book called "Hacker's Delight" has a bunch of math about this. The
hard part is finding the constant by which to multiply, and the amount by which to shift.
The book tells you some theroems, but you still have to turn that into code by thinking
deep thoughts. Luckily I was able to avoid that because it turns out that LLVM already
has code for this. It's called APInt::magic(), where APInt is their class for reasoning
about integers.

The code has a compatible license to ours and we have already in the past taken code from
LLVM. So, that's what this patch does. The LLVM code is localized in
B3ComputeDivisionMagic.h. Then reduceStrength() uses that to construct the multiply+shift
sequence.

This is an enormous speed-up on AsmBench-0.9/bigfib.cpp.js. It makes us as fast on that
test as LLVM. It reduces our deficit on AsmBench to 1.5%. Previously it was 4.5%.

  • JavaScriptCore.xcodeproj/project.pbxproj:
  • b3/B3ComputeDivisionMagic.h: Added.

(JSC::B3::computeDivisionMagic):

  • b3/B3ReduceStrength.cpp:
Location:
trunk/Source/JavaScriptCore
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/ChangeLog

    r195502 r195503  
     12016-01-22  Filip Pizlo  <fpizlo@apple.com>
     2
     3        B3 should strength-reduce division by a constant
     4        https://bugs.webkit.org/show_bug.cgi?id=153386
     5
     6        Reviewed by Benjamin Poulain.
     7
     8        You can turn a 32-bit division by a constant into a 64-bit multiplication by a constant
     9        plus some shifts. A book called "Hacker's Delight" has a bunch of math about this. The
     10        hard part is finding the constant by which to multiply, and the amount by which to shift.
     11        The book tells you some theroems, but you still have to turn that into code by thinking
     12        deep thoughts. Luckily I was able to avoid that because it turns out that LLVM already
     13        has code for this. It's called APInt::magic(), where APInt is their class for reasoning
     14        about integers.
     15
     16        The code has a compatible license to ours and we have already in the past taken code from
     17        LLVM. So, that's what this patch does. The LLVM code is localized in
     18        B3ComputeDivisionMagic.h. Then reduceStrength() uses that to construct the multiply+shift
     19        sequence.
     20
     21        This is an enormous speed-up on AsmBench-0.9/bigfib.cpp.js. It makes us as fast on that
     22        test as LLVM. It reduces our deficit on AsmBench to 1.5%. Previously it was 4.5%.
     23
     24        * JavaScriptCore.xcodeproj/project.pbxproj:
     25        * b3/B3ComputeDivisionMagic.h: Added.
     26        (JSC::B3::computeDivisionMagic):
     27        * b3/B3ReduceStrength.cpp:
     28
    1292016-01-22  Saam barati  <sbarati@apple.com>
    230
  • trunk/Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj

    r195417 r195503  
    493493                0F8335B81639C1EA001443B5 /* ArrayAllocationProfile.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8335B51639C1E3001443B5 /* ArrayAllocationProfile.h */; settings = {ATTRIBUTES = (Private, ); }; };
    494494                0F8364B7164B0C110053329A /* DFGBranchDirection.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8364B5164B0C0E0053329A /* DFGBranchDirection.h */; };
     495                0F86AE201C5311C5006BE8EC /* B3ComputeDivisionMagic.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F86AE1F1C5311C5006BE8EC /* B3ComputeDivisionMagic.h */; };
    495496                0F885E111849A3BE00F1E3FA /* BytecodeUseDef.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F885E101849A3BE00F1E3FA /* BytecodeUseDef.h */; settings = {ATTRIBUTES = (Private, ); }; };
    496497                0F893BDB1936E23C001211F4 /* DFGStructureAbstractValue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F893BDA1936E23C001211F4 /* DFGStructureAbstractValue.cpp */; };
     
    26752676                0F8335B51639C1E3001443B5 /* ArrayAllocationProfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ArrayAllocationProfile.h; sourceTree = "<group>"; };
    26762677                0F8364B5164B0C0E0053329A /* DFGBranchDirection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DFGBranchDirection.h; path = dfg/DFGBranchDirection.h; sourceTree = "<group>"; };
     2678                0F86AE1F1C5311C5006BE8EC /* B3ComputeDivisionMagic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = B3ComputeDivisionMagic.h; path = b3/B3ComputeDivisionMagic.h; sourceTree = "<group>"; };
    26772679                0F885E101849A3BE00F1E3FA /* BytecodeUseDef.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BytecodeUseDef.h; sourceTree = "<group>"; };
    26782680                0F893BDA1936E23C001211F4 /* DFGStructureAbstractValue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DFGStructureAbstractValue.cpp; path = dfg/DFGStructureAbstractValue.cpp; sourceTree = "<group>"; };
     
    47724774                                0F338DFF1BF0276C0013C88F /* B3Compilation.cpp */,
    47734775                                0F338E001BF0276C0013C88F /* B3Compilation.h */,
     4776                                0F86AE1F1C5311C5006BE8EC /* B3ComputeDivisionMagic.h */,
    47744777                                0FEC84C31BDACDAC0080FF74 /* B3Const32Value.cpp */,
    47754778                                0FEC84C41BDACDAC0080FF74 /* B3Const32Value.h */,
     
    79907993                                1474C33B16AA2D950062F01D /* PrototypeMap.h in Headers */,
    79917994                                0F5780A218FE1E98001E72D9 /* PureNaN.h in Headers */,
     7995                                0F86AE201C5311C5006BE8EC /* B3ComputeDivisionMagic.h in Headers */,
    79927996                                0F15CD231BA5F9860031FFD3 /* PutByIdFlags.h in Headers */,
    79937997                                0F9332A414CA7DD90085F3C6 /* PutByIdStatus.h in Headers */,
  • trunk/Source/JavaScriptCore/b3/B3ReduceStrength.cpp

    r195466 r195503  
    3131#include "B3BasicBlockInlines.h"
    3232#include "B3BlockInsertionSet.h"
     33#include "B3ComputeDivisionMagic.h"
    3334#include "B3ControlValue.h"
    3435#include "B3Dominators.h"
     
    509510            // are strictly weaker: it has corner cases where it's allowed to do anything it
    510511            // likes.
    511             replaceWithNewValue(m_value->child(0)->divConstant(m_proc, m_value->child(1)));
     512            if (replaceWithNewValue(m_value->child(0)->divConstant(m_proc, m_value->child(1))))
     513                break;
     514
     515            if (m_value->child(1)->hasInt()) {
     516                switch (m_value->child(1)->asInt()) {
     517                case -1:
     518                    // Turn this: Div(value, -1)
     519                    // Into this: Neg(value)
     520                    replaceWithNewValue(
     521                        m_proc.add<Value>(Neg, m_value->origin(), m_value->child(0)));
     522                    break;
     523
     524                case 0:
     525                    // Turn this: Div(value, 0)
     526                    // Into this: 0
     527                    // We can do this because it's precisely correct for ChillDiv and for Div we
     528                    // are allowed to do whatever we want.
     529                    m_value->replaceWithIdentity(m_value->child(1));
     530                    m_changed = true;
     531                    break;
     532
     533                case 1:
     534                    // Turn this: Div(value, 1)
     535                    // Into this: value
     536                    m_value->replaceWithIdentity(m_value->child(0));
     537                    m_changed = true;
     538                    break;
     539
     540                default:
     541                    // Perform super comprehensive strength reduction of division. Currently we
     542                    // only do this for 32-bit divisions, since we need a high multiply
     543                    // operation. We emulate it using 64-bit multiply. We can't emulate 64-bit
     544                    // high multiply with a 128-bit multiply because we don't have a 128-bit
     545                    // multiply. We could do it with a patchpoint if we cared badly enough.
     546
     547                    if (m_value->type() != Int32)
     548                        break;
     549
     550                    int32_t divisor = m_value->child(1)->asInt32();
     551                    DivisionMagic<int32_t> magic = computeDivisionMagic(divisor);
     552
     553                    // Perform the "high" multiplication. We do it just to get the high bits.
     554                    // This is sort of like multiplying by the reciprocal, just more gnarly. It's
     555                    // from Hacker's Delight and I don't claim to understand it.
     556                    Value* magicQuotient = m_insertionSet.insert<Value>(
     557                        m_index, Trunc, m_value->origin(),
     558                        m_insertionSet.insert<Value>(
     559                            m_index, ZShr, m_value->origin(),
     560                            m_insertionSet.insert<Value>(
     561                                m_index, Mul, m_value->origin(),
     562                                m_insertionSet.insert<Value>(
     563                                    m_index, SExt32, m_value->origin(), m_value->child(0)),
     564                                m_insertionSet.insert<Const64Value>(
     565                                    m_index, m_value->origin(), magic.magicMultiplier)),
     566                            m_insertionSet.insert<Const32Value>(
     567                                m_index, m_value->origin(), 32)));
     568
     569                    if (divisor > 0 && magic.magicMultiplier < 0) {
     570                        magicQuotient = m_insertionSet.insert<Value>(
     571                            m_index, Add, m_value->origin(), magicQuotient, m_value->child(0));
     572                    }
     573                    if (divisor < 0 && magic.magicMultiplier > 0) {
     574                        magicQuotient = m_insertionSet.insert<Value>(
     575                            m_index, Sub, m_value->origin(), magicQuotient, m_value->child(0));
     576                    }
     577                    if (magic.shift > 0) {
     578                        magicQuotient = m_insertionSet.insert<Value>(
     579                            m_index, SShr, m_value->origin(), magicQuotient,
     580                            m_insertionSet.insert<Const32Value>(
     581                                m_index, m_value->origin(), magic.shift));
     582                    }
     583                    m_value->replaceWithIdentity(
     584                        m_insertionSet.insert<Value>(
     585                            m_index, Add, m_value->origin(), magicQuotient,
     586                            m_insertionSet.insert<Value>(
     587                                m_index, ZShr, m_value->origin(), magicQuotient,
     588                                m_insertionSet.insert<Const32Value>(
     589                                    m_index, m_value->origin(), 31))));
     590                    m_changed = true;
     591                    break;
     592                }
     593
     594                if (m_value->opcode() != ChillDiv && m_value->opcode() != Div)
     595                    break;
     596            }
    512597            break;
    513598
Note: See TracChangeset for help on using the changeset viewer.