Changeset 95758 in webkit


Ignore:
Timestamp:
Sep 22, 2011 3:42:54 PM (13 years ago)
Author:
fpizlo@apple.com
Message:

DFG JIT does not support to_primitive or strcat
https://bugs.webkit.org/show_bug.cgi?id=68582

Reviewed by Darin Adler.

This adds functional support for to_primitive and strcat. It focuses
on minimizing the amount of code emitted on to_primitive (if we know
that it is a primitive or can speculate cheaply, then we omit the
slow path) and on keeping the implementation of strcat simple while
leveraging whatever optimizations we have already. In particular,
unlike the Call and Construct nodes which require extending the size
of the DFG's callee registers, StrCat takes advantage of the fact
that no JS code can run while StrCat is in progress and uses a
scratch buffer, rather than the register file, to store the list of
values to concatenate. This was done mainly to keep the code simple,
but there are probably other benefits to keeping call frame sizes
down. Essentially, this patch ensures that the presence of an
op_strcat does not mess up any other optimizations we might do while
ensuring that if you do execute it, it'll work about as well as you'd
expect.

When combined with the previous patch for integer division, this is a
14% speed-up on Kraken. Without it, it would have been a 2% loss.

  • assembler/AbstractMacroAssembler.h:

(JSC::AbstractMacroAssembler::TrustedImmPtr::TrustedImmPtr):

  • dfg/DFGByteCodeParser.cpp:

(JSC::DFG::ByteCodeParser::parseBlock):

  • dfg/DFGCapabilities.h:

(JSC::DFG::canCompileOpcode):

  • dfg/DFGJITCodeGenerator.h:

(JSC::DFG::JITCodeGenerator::callOperation):

  • dfg/DFGJITCompiler.cpp:

(JSC::DFG::JITCompiler::exitSpeculativeWithOSR):

  • dfg/DFGNode.h:
  • dfg/DFGOperations.cpp:
  • dfg/DFGOperations.h:
  • dfg/DFGPropagator.cpp:

(JSC::DFG::Propagator::propagateNodePredictions):
(JSC::DFG::Propagator::performNodeCSE):

  • dfg/DFGSpeculativeJIT.cpp:

(JSC::DFG::SpeculativeJIT::compile):

  • runtime/JSGlobalData.cpp:

(JSC::JSGlobalData::JSGlobalData):
(JSC::JSGlobalData::~JSGlobalData):

  • runtime/JSGlobalData.h:

(JSC::JSGlobalData::scratchBufferForSize):

Location:
trunk/Source/JavaScriptCore
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/ChangeLog

    r95754 r95758  
     12011-09-21  Filip Pizlo  <fpizlo@apple.com>
     2
     3        DFG JIT does not support to_primitive or strcat
     4        https://bugs.webkit.org/show_bug.cgi?id=68582
     5
     6        Reviewed by Darin Adler.
     7       
     8        This adds functional support for to_primitive and strcat. It focuses
     9        on minimizing the amount of code emitted on to_primitive (if we know
     10        that it is a primitive or can speculate cheaply, then we omit the
     11        slow path) and on keeping the implementation of strcat simple while
     12        leveraging whatever optimizations we have already. In particular,
     13        unlike the Call and Construct nodes which require extending the size
     14        of the DFG's callee registers, StrCat takes advantage of the fact
     15        that no JS code can run while StrCat is in progress and uses a
     16        scratch buffer, rather than the register file, to store the list of
     17        values to concatenate. This was done mainly to keep the code simple,
     18        but there are probably other benefits to keeping call frame sizes
     19        down. Essentially, this patch ensures that the presence of an
     20        op_strcat does not mess up any other optimizations we might do while
     21        ensuring that if you do execute it, it'll work about as well as you'd
     22        expect.
     23       
     24        When combined with the previous patch for integer division, this is a
     25        14% speed-up on Kraken. Without it, it would have been a 2% loss.
     26
     27        * assembler/AbstractMacroAssembler.h:
     28        (JSC::AbstractMacroAssembler::TrustedImmPtr::TrustedImmPtr):
     29        * dfg/DFGByteCodeParser.cpp:
     30        (JSC::DFG::ByteCodeParser::parseBlock):
     31        * dfg/DFGCapabilities.h:
     32        (JSC::DFG::canCompileOpcode):
     33        * dfg/DFGJITCodeGenerator.h:
     34        (JSC::DFG::JITCodeGenerator::callOperation):
     35        * dfg/DFGJITCompiler.cpp:
     36        (JSC::DFG::JITCompiler::exitSpeculativeWithOSR):
     37        * dfg/DFGNode.h:
     38        * dfg/DFGOperations.cpp:
     39        * dfg/DFGOperations.h:
     40        * dfg/DFGPropagator.cpp:
     41        (JSC::DFG::Propagator::propagateNodePredictions):
     42        (JSC::DFG::Propagator::performNodeCSE):
     43        * dfg/DFGSpeculativeJIT.cpp:
     44        (JSC::DFG::SpeculativeJIT::compile):
     45        * runtime/JSGlobalData.cpp:
     46        (JSC::JSGlobalData::JSGlobalData):
     47        (JSC::JSGlobalData::~JSGlobalData):
     48        * runtime/JSGlobalData.h:
     49        (JSC::JSGlobalData::scratchBufferForSize):
     50
    1512011-09-22  Filip Pizlo  <fpizlo@apple.com>
    252
  • trunk/Source/JavaScriptCore/assembler/AbstractMacroAssembler.h

    r89630 r95758  
    162162        {
    163163        }
     164       
     165        // This is only here so that TrustedImmPtr(0) does not confuse the C++
     166        // overload handling rules.
     167        explicit TrustedImmPtr(int value)
     168            : m_value(0)
     169        {
     170            ASSERT_UNUSED(value, !value);
     171        }
     172
     173        explicit TrustedImmPtr(size_t value)
     174            : m_value(reinterpret_cast<void*>(value))
     175        {
     176        }
    164177
    165178        intptr_t asIntptr()
  • trunk/Source/JavaScriptCore/dfg/DFGByteCodeParser.cpp

    r95754 r95758  
    969969            NEXT_OPCODE(op_not);
    970970        }
     971           
     972        case op_to_primitive: {
     973            NodeIndex value = get(currentInstruction[2].u.operand);
     974            set(currentInstruction[1].u.operand, addToGraph(ToPrimitive, value));
     975            NEXT_OPCODE(op_to_primitive);
     976        }
     977           
     978        case op_strcat: {
     979            int startOperand = currentInstruction[2].u.operand;
     980            int numOperands = currentInstruction[3].u.operand;
     981            for (int operandIdx = startOperand; operandIdx < startOperand + numOperands; ++operandIdx)
     982                addVarArgChild(get(operandIdx));
     983            set(currentInstruction[1].u.operand, addToGraph(Node::VarArg, StrCat, OpInfo(0), OpInfo(0)));
     984            NEXT_OPCODE(op_strcat);
     985        }
    971986
    972987        case op_less: {
  • trunk/Source/JavaScriptCore/dfg/DFGCapabilities.h

    r95753 r95758  
    117117    case op_resolve:
    118118    case op_resolve_base:
     119    case op_strcat:
     120    case op_to_primitive:
    119121    case op_throw:
    120122    case op_throw_reference_error:
  • trunk/Source/JavaScriptCore/dfg/DFGJITCodeGenerator.h

    r95523 r95758  
    890890        callOperation((J_DFGOperation_EP)operation, result, identifier);
    891891    }
     892    void callOperation(J_DFGOperation_EPS operation, GPRReg result, void* pointer, size_t size)
     893    {
     894        ASSERT(isFlushed());
     895
     896        m_jit.move(JITCompiler::TrustedImmPtr(size), GPRInfo::argumentGPR2);
     897        m_jit.move(JITCompiler::TrustedImmPtr(pointer), GPRInfo::argumentGPR1);
     898        m_jit.move(GPRInfo::callFrameRegister, GPRInfo::argumentGPR0);
     899
     900        appendCallWithExceptionCheck(operation);
     901        m_jit.move(GPRInfo::returnValueGPR, result);
     902    }
    892903    void callOperation(J_DFGOperation_EJP operation, GPRReg result, GPRReg arg1, void* pointer)
    893904    {
  • trunk/Source/JavaScriptCore/dfg/DFGJITCompiler.cpp

    r95681 r95758  
    219219    }
    220220   
    221     EncodedJSValue* scratchBuffer = static_cast<EncodedJSValue*>(globalData()->osrScratchBufferForSize(sizeof(EncodedJSValue) * (numberOfPoisonedVirtualRegisters + (numberOfDisplacedVirtualRegisters <= GPRInfo::numberOfRegisters ? 0 : numberOfDisplacedVirtualRegisters))));
     221    EncodedJSValue* scratchBuffer = static_cast<EncodedJSValue*>(globalData()->scratchBufferForSize(sizeof(EncodedJSValue) * (numberOfPoisonedVirtualRegisters + (numberOfDisplacedVirtualRegisters <= GPRInfo::numberOfRegisters ? 0 : numberOfDisplacedVirtualRegisters))));
    222222
    223223    // From here on, the code assumes that it is profitable to maximize the distance
  • trunk/Source/JavaScriptCore/dfg/DFGNode.h

    r95754 r95758  
    297297    macro(InstanceOf, NodeResultBoolean) \
    298298    macro(LogicalNot, NodeResultBoolean | NodeMightClobber) \
     299    macro(ToPrimitive, NodeResultJS | NodeMustGenerate | NodeClobbersWorld) \
     300    macro(StrCat, NodeResultJS | NodeMustGenerate | NodeHasVarArgs | NodeClobbersWorld) \
    299301    \
    300302    /* Block terminals. */\
  • trunk/Source/JavaScriptCore/dfg/DFGOperations.cpp

    r95672 r95758  
    675675}
    676676
     677EncodedJSValue operationToPrimitive(ExecState* exec, EncodedJSValue value)
     678{
     679    return JSValue::encode(JSValue::decode(value).toPrimitive(exec));
     680}
     681
     682EncodedJSValue operationStrCat(ExecState* exec, void* start, size_t size)
     683{
     684    return JSValue::encode(jsString(exec, static_cast<Register*>(start), size));
     685}
     686
    677687void operationThrowHasInstanceError(ExecState* exec, EncodedJSValue encodedBase)
    678688{
  • trunk/Source/JavaScriptCore/dfg/DFGOperations.h

    r95672 r95758  
    4646typedef EncodedJSValue (*J_DFGOperation_EJI)(ExecState*, EncodedJSValue, Identifier*);
    4747typedef EncodedJSValue (*J_DFGOperation_EP)(ExecState*, void*);
     48typedef EncodedJSValue (*J_DFGOperation_EPS)(ExecState*, void*, size_t);
    4849typedef EncodedJSValue (*J_DFGOperation_EI)(ExecState*, Identifier*);
    4950typedef RegisterSizedBoolean (*Z_DFGOperation_EJ)(ExecState*, EncodedJSValue);
     
    7576EncodedJSValue operationResolveBase(ExecState*, Identifier*);
    7677EncodedJSValue operationResolveBaseStrictPut(ExecState*, Identifier*);
     78EncodedJSValue operationToPrimitive(ExecState*, EncodedJSValue);
     79EncodedJSValue operationStrCat(ExecState*, void* start, size_t);
    7780void operationThrowHasInstanceError(ExecState*, EncodedJSValue base);
    7881void operationPutByValStrict(ExecState*, EncodedJSValue encodedBase, EncodedJSValue encodedProperty, EncodedJSValue encodedValue);
  • trunk/Source/JavaScriptCore/dfg/DFGPropagator.cpp

    r95754 r95758  
    547547            break;
    548548        }
     549           
     550        case StrCat: {
     551            changed |= setPrediction(makePrediction(PredictString, StrongPrediction));
     552            break;
     553        }
     554           
     555        case ToPrimitive: {
     556            PredictedType child = m_predictions[node.child1()];
     557            if (isStrongPrediction(child)) {
     558                if (isObjectPrediction(child)) {
     559                    // I'd love to fold this case into the case below, but I can't, because
     560                    // removing PredictObjectMask from something that only has an object
     561                    // prediction and nothing else means we have an ill-formed PredictedType
     562                    // (strong predict-none). This should be killed once we remove all traces
     563                    // of static (aka weak) predictions.
     564                    changed |= mergePrediction(makePrediction(PredictString, StrongPrediction));
     565                } else if (child & PredictObjectMask) {
     566                    // Objects get turned into strings. So if the input has hints of objectness,
     567                    // the output will have hinsts of stringiness.
     568                    changed |= mergePrediction(mergePredictions(child & ~PredictObjectMask, makePrediction(PredictString, StrongPrediction)));
     569                } else
     570                    changed |= mergePrediction(child);
     571            }
     572            break;
     573        }
    549574
    550575#ifndef NDEBUG
     
    10111036        printf("   %s @%u: ", Graph::opName(m_graph[m_compileIndex].op), m_compileIndex);
    10121037#endif
    1013 
     1038       
     1039        // NOTE: there are some nodes that we deliberately don't CSE even though we
     1040        // probably could, like StrCat and ToPrimitive. That's because there is no
     1041        // evidence that doing CSE on these nodes would result in a performance
     1042        // progression. Hence considering these nodes in CSE would just mean that this
     1043        // code does more work with no win. Of course, we may want to reconsider this,
     1044        // since StrCat is trivially CSE-able. It's not trivially doable for
     1045        // ToPrimitive, but we could change that with some speculations if we really
     1046        // needed to.
     1047       
    10141048        switch (node.op) {
    10151049       
  • trunk/Source/JavaScriptCore/dfg/DFGSpeculativeJIT.cpp

    r95754 r95758  
    15701570        break;
    15711571    }
     1572       
     1573    case ToPrimitive: {
     1574        if (shouldSpeculateInteger(node.child1())) {
     1575            // It's really profitable to speculate integer, since it's really cheap,
     1576            // it means we don't have to do any real work, and we emit a lot less code.
     1577           
     1578            SpeculateIntegerOperand op1(this, node.child1());
     1579            GPRTemporary result(this, op1);
     1580           
     1581            m_jit.move(op1.gpr(), result.gpr());
     1582            if (op1.format() == DataFormatInteger)
     1583                m_jit.orPtr(GPRInfo::tagTypeNumberRegister, result.gpr());
     1584           
     1585            jsValueResult(result.gpr(), m_compileIndex);
     1586            break;
     1587        }
     1588       
     1589        // FIXME: Add string speculation here.
     1590       
     1591        bool wasPrimitive = isKnownNumeric(node.child1()) || isKnownBoolean(node.child1());
     1592       
     1593        JSValueOperand op1(this, node.child1());
     1594        GPRTemporary result(this, op1);
     1595       
     1596        GPRReg op1GPR = op1.gpr();
     1597        GPRReg resultGPR = result.gpr();
     1598       
     1599        op1.use();
     1600       
     1601        if (wasPrimitive)
     1602            m_jit.move(op1GPR, resultGPR);
     1603        else {
     1604            MacroAssembler::JumpList alreadyPrimitive;
     1605           
     1606            alreadyPrimitive.append(m_jit.branchTestPtr(MacroAssembler::NonZero, op1GPR, GPRInfo::tagMaskRegister));
     1607            alreadyPrimitive.append(m_jit.branchPtr(MacroAssembler::Equal, MacroAssembler::Address(op1GPR), MacroAssembler::TrustedImmPtr(m_jit.globalData()->jsStringVPtr)));
     1608           
     1609            silentSpillAllRegisters(resultGPR);
     1610            m_jit.move(op1GPR, GPRInfo::argumentGPR1);
     1611            m_jit.move(GPRInfo::callFrameRegister, GPRInfo::argumentGPR0);
     1612            appendCallWithExceptionCheck(operationToPrimitive);
     1613            m_jit.move(GPRInfo::returnValueGPR, resultGPR);
     1614            silentFillAllRegisters(resultGPR);
     1615           
     1616            MacroAssembler::Jump done = m_jit.jump();
     1617           
     1618            alreadyPrimitive.link(&m_jit);
     1619            m_jit.move(op1GPR, resultGPR);
     1620           
     1621            done.link(&m_jit);
     1622        }
     1623       
     1624        jsValueResult(resultGPR, m_compileIndex, UseChildrenCalledExplicitly);
     1625        break;
     1626    }
     1627       
     1628    case StrCat: {
     1629        // We really don't want to grow the register file just to do a StrCat. Say we
     1630        // have 50 functions on the stack that all have a StrCat in them that has
     1631        // upwards of 10 operands. In the DFG this would mean that each one gets
     1632        // some random virtual register, and then to do the StrCat we'd need a second
     1633        // span of 10 operands just to have somewhere to copy the 10 operands to, where
     1634        // they'd be contiguous and we could easily tell the C code how to find them.
     1635        // Ugly! So instead we use the scratchBuffer infrastructure in JSGlobalData. That
     1636        // way, those 50 functions will share the same scratchBuffer for offloading their
     1637        // StrCat operands. It's about as good as we can do, unless we start doing
     1638        // virtual register coalescing to ensure that operands to StrCat get spilled
     1639        // in exactly the place where StrCat wants them, or else have the StrCat
     1640        // refer to those operands' SetLocal instructions to force them to spill in
     1641        // the right place. Basically, any way you cut it, the current approach
     1642        // probably has the best balance of performance and sensibility in the sense
     1643        // that it does not increase the complexity of the DFG JIT just to make StrCat
     1644        // fast and pretty.
     1645       
     1646        EncodedJSValue* buffer = static_cast<EncodedJSValue*>(m_jit.globalData()->scratchBufferForSize(sizeof(EncodedJSValue) * node.numChildren()));
     1647       
     1648        for (unsigned operandIdx = 0; operandIdx < node.numChildren(); ++operandIdx) {
     1649            JSValueOperand operand(this, m_jit.graph().m_varArgChildren[node.firstChild() + operandIdx]);
     1650            GPRReg opGPR = operand.gpr();
     1651            operand.use();
     1652           
     1653            m_jit.storePtr(opGPR, buffer + operandIdx);
     1654        }
     1655       
     1656        flushRegisters();
     1657       
     1658        GPRResult result(this);
     1659       
     1660        callOperation(operationStrCat, result.gpr(), buffer, node.numChildren());
     1661       
     1662        jsValueResult(result.gpr(), m_compileIndex, UseChildrenCalledExplicitly);
     1663        break;
     1664    }
    15721665
    15731666    case ConvertThis: {
  • trunk/Source/JavaScriptCore/runtime/JSGlobalData.cpp

    r95751 r95758  
    187187    , heap(this, heapSize)
    188188#if ENABLE(DFG_JIT)
    189     , sizeOfLastOSRScratchBuffer(0)
     189    , sizeOfLastScratchBuffer(0)
    190190#endif
    191191    , dynamicGlobalObject(0)
     
    353353
    354354#if ENABLE(DFG_JIT)
    355     for (unsigned i = 0; i < osrScratchBuffers.size(); ++i)
    356         fastFree(osrScratchBuffers[i]);
     355    for (unsigned i = 0; i < scratchBuffers.size(); ++i)
     356        fastFree(scratchBuffers[i]);
    357357#endif
    358358}
  • trunk/Source/JavaScriptCore/runtime/JSGlobalData.h

    r95751 r95758  
    234234#endif
    235235#if ENABLE(DFG_JIT)
    236         Vector<void*> osrScratchBuffers;
    237         size_t sizeOfLastOSRScratchBuffer;
    238        
    239         void* osrScratchBufferForSize(size_t size)
     236        Vector<void*> scratchBuffers;
     237        size_t sizeOfLastScratchBuffer;
     238       
     239        void* scratchBufferForSize(size_t size)
    240240        {
    241241            if (!size)
    242242                return 0;
    243243           
    244             if (size > sizeOfLastOSRScratchBuffer) {
     244            if (size > sizeOfLastScratchBuffer) {
    245245                // Protect against a N^2 memory usage pathology by ensuring
    246246                // that at worst, we get a geometric series, meaning that the
    247247                // total memory usage is somewhere around
    248248                // max(scratch buffer size) * 4.
    249                 sizeOfLastOSRScratchBuffer = size * 2;
     249                sizeOfLastScratchBuffer = size * 2;
    250250               
    251                 osrScratchBuffers.append(fastMalloc(sizeOfLastOSRScratchBuffer));
     251                scratchBuffers.append(fastMalloc(sizeOfLastScratchBuffer));
    252252            }
    253253           
    254             return osrScratchBuffers.last();
     254            return scratchBuffers.last();
    255255        }
    256256#endif
Note: See TracChangeset for help on using the changeset viewer.