Changeset 100881 in webkit


Ignore:
Timestamp:
Nov 20, 2011 8:47:00 PM (12 years ago)
Author:
yuqiang.xian@intel.com
Message:

Improve modulo operation on 32bit platforms
https://bugs.webkit.org/show_bug.cgi?id=72501

Reviewed by Filip Pizlo.

Extend softModulo to support X86 and MIPS in baseline JIT.
Apply the same optimization to 32bit DFG JIT.
1% gain on Kraken, tested on Linux Core i7 Nehalem 32bit.

  • dfg/DFGSpeculativeJIT.h:
  • dfg/DFGSpeculativeJIT32_64.cpp:

(JSC::DFG::SpeculativeJIT::compileSoftModulo):
(JSC::DFG::SpeculativeJIT::compile):

  • jit/JITArithmetic32_64.cpp:

(JSC::JIT::emit_op_mod):
(JSC::JIT::emitSlow_op_mod):

  • jit/JITOpcodes32_64.cpp:

(JSC::JIT::softModulo):

  • wtf/Platform.h:
Location:
trunk/Source/JavaScriptCore
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/ChangeLog

    r100880 r100881  
     12011-11-20  Yuqiang Xian  <yuqiang.xian@intel.com>
     2
     3        Improve modulo operation on 32bit platforms
     4        https://bugs.webkit.org/show_bug.cgi?id=72501
     5
     6        Reviewed by Filip Pizlo.
     7
     8        Extend softModulo to support X86 and MIPS in baseline JIT.
     9        Apply the same optimization to 32bit DFG JIT.
     10        1% gain on Kraken, tested on Linux Core i7 Nehalem 32bit.
     11
     12        * dfg/DFGSpeculativeJIT.h:
     13        * dfg/DFGSpeculativeJIT32_64.cpp:
     14        (JSC::DFG::SpeculativeJIT::compileSoftModulo):
     15        (JSC::DFG::SpeculativeJIT::compile):
     16        * jit/JITArithmetic32_64.cpp:
     17        (JSC::JIT::emit_op_mod):
     18        (JSC::JIT::emitSlow_op_mod):
     19        * jit/JITOpcodes32_64.cpp:
     20        (JSC::JIT::softModulo):
     21        * wtf/Platform.h:
     22
    1232011-11-18  Filip Pizlo  <fpizlo@apple.com>
    224
  • trunk/Source/JavaScriptCore/dfg/DFGSpeculativeJIT.h

    r100829 r100881  
    19181918    void compileGetByValOnByteArray(Node&);
    19191919    void compilePutByValForByteArray(GPRReg base, GPRReg property, Node&);
    1920    
     1920#if USE(JSVALUE32_64)
     1921    void compileSoftModulo(Node&);
     1922#endif
     1923
    19211924    // It is acceptable to have structure be equal to scratch, so long as you're fine
    19221925    // with the structure GPR being clobbered.
  • trunk/Source/JavaScriptCore/dfg/DFGSpeculativeJIT32_64.cpp

    r100878 r100881  
    20512051        noResult(m_compileIndex, UseChildrenCalledExplicitly);
    20522052    }
     2053}
     2054
     2055void SpeculativeJIT::compileSoftModulo(Node& node)
     2056{
     2057    SpeculateIntegerOperand op1(this, node.child1());
     2058    SpeculateIntegerOperand op2(this, node.child2());
     2059    GPRReg op1Gpr = op1.gpr();
     2060    GPRReg op2Gpr = op2.gpr();
     2061
     2062    speculationCheck(JSValueRegs(), NoNode, m_jit.branchTest32(JITCompiler::Zero, op2Gpr));
     2063
     2064#if CPU(X86)
     2065    GPRTemporary eax(this, X86Registers::eax);
     2066    GPRTemporary edx(this, X86Registers::edx);
     2067    GPRReg temp2 = InvalidGPRReg;
     2068    if (op2Gpr == X86Registers::eax || op2Gpr == X86Registers::edx) {
     2069        temp2 = allocate();
     2070        m_jit.move(op2Gpr, temp2);
     2071        op2Gpr = temp2;
     2072    }
     2073    GPRReg resultGPR = edx.gpr();
     2074    GPRReg scratchGPR = eax.gpr();
     2075#else
     2076    GPRTemporary result(this);
     2077    GPRTemporary scratch(this);
     2078    GPRReg resultGPR = result.gpr();
     2079    GPRReg scratchGPR = scratch.gpr();
     2080#endif
     2081
     2082    GPRTemporary scratch2(this);
     2083    GPRReg scratchGPR2 = scratch2.gpr();
     2084    JITCompiler::JumpList exitBranch;
     2085
     2086    // resultGPR is to hold the ABS value of the dividend before final result is produced
     2087    m_jit.move(op1Gpr, resultGPR);
     2088    // scratchGPR2 is to hold the ABS value of the divisor
     2089    m_jit.move(op2Gpr, scratchGPR2);
     2090
     2091    // Check for negative result remainder
     2092    // According to ECMA-262, the sign of the result equals the sign of the dividend
     2093    JITCompiler::Jump positiveDividend = m_jit.branch32(JITCompiler::GreaterThanOrEqual, op1Gpr, TrustedImm32(0));
     2094    m_jit.neg32(resultGPR);
     2095    m_jit.move(TrustedImm32(1), scratchGPR);
     2096    JITCompiler::Jump saveCondition = m_jit.jump();
     2097
     2098    positiveDividend.link(&m_jit);
     2099    m_jit.move(TrustedImm32(0), scratchGPR);
     2100
     2101    // Save the condition for negative remainder
     2102    saveCondition.link(&m_jit);
     2103    m_jit.push(scratchGPR);
     2104
     2105    JITCompiler::Jump positiveDivisor = m_jit.branch32(JITCompiler::GreaterThanOrEqual, op2Gpr, TrustedImm32(0));
     2106    m_jit.neg32(scratchGPR2);
     2107
     2108    positiveDivisor.link(&m_jit);
     2109    exitBranch.append(m_jit.branch32(JITCompiler::LessThan, resultGPR, scratchGPR2));
     2110
     2111    // Power of two fast case
     2112    m_jit.move(scratchGPR2, scratchGPR);
     2113    m_jit.sub32(TrustedImm32(1), scratchGPR);
     2114    JITCompiler::Jump notPowerOfTwo = m_jit.branchTest32(JITCompiler::NonZero, scratchGPR, scratchGPR2);
     2115    m_jit.and32(scratchGPR, resultGPR);
     2116    exitBranch.append(m_jit.jump());
     2117
     2118    notPowerOfTwo.link(&m_jit);
     2119
     2120#if CPU(X86)
     2121    m_jit.move(resultGPR, eax.gpr());
     2122    m_jit.assembler().cdq();
     2123    m_jit.assembler().idivl_r(scratchGPR2);
     2124#elif CPU(ARM_THUMB2)
     2125    GPRTemporary scratch3(this);
     2126    GPRReg scratchGPR3 = scratch3.gpr();
     2127    m_jit.countLeadingZeros32(scratchGPR2, scratchGPR);
     2128    m_jit.countLeadingZeros32(resultGPR, scratchGPR3);
     2129    m_jit.sub32(scratchGPR3, scratchGPR);
     2130
     2131    JITCompiler::Jump useFullTable = m_jit.branch32(JITCompiler::Equal, scratchGPR, TrustedImm32(31));
     2132
     2133    m_jit.neg32(scratchGPR);
     2134    m_jit.add32(TrustedImm32(31), scratchGPR);
     2135
     2136    int elementSizeByShift = -1;
     2137    elementSizeByShift = 3;
     2138    m_jit.relativeTableJump(scratchGPR, elementSizeByShift);
     2139
     2140    useFullTable.link(&m_jit);
     2141    // Modulo table
     2142    for (int i = 31; i > 0; --i) {
     2143        ShiftTypeAndAmount shift(SRType_LSL, i);
     2144        m_jit.assembler().sub_S(scratchGPR, resultGPR, scratchGPR2, shift);
     2145        m_jit.assembler().it(ARMv7Assembler::ConditionCS);
     2146        m_jit.assembler().mov(resultGPR, scratchGPR);
     2147    }
     2148
     2149    JITCompiler::Jump lower = m_jit.branch32(JITCompiler::Below, resultGPR, scratchGPR2);
     2150    m_jit.sub32(scratchGPR2, resultGPR);
     2151    lower.link(&m_jit);
     2152#endif // CPU(X86)
     2153
     2154    exitBranch.link(&m_jit);
     2155
     2156    // Check for negative remainder
     2157    m_jit.pop(scratchGPR);
     2158    JITCompiler::Jump positiveResult = m_jit.branch32(JITCompiler::Equal, scratchGPR, TrustedImm32(0));
     2159    m_jit.neg32(resultGPR);
     2160    positiveResult.link(&m_jit);
     2161
     2162    integerResult(resultGPR, m_compileIndex);
     2163
     2164#if CPU(X86)
     2165    if (temp2 != InvalidGPRReg)
     2166        unlock(temp2);
     2167#endif
    20532168}
    20542169
     
    25422657
    25432658    case ArithMod: {
    2544 #if CPU(X86)
    25452659        if (!at(node.child1()).shouldNotSpeculateInteger() && !at(node.child2()).shouldNotSpeculateInteger()
    25462660            && node.canSpeculateInteger()) {
    2547             SpeculateIntegerOperand op1(this, node.child1());
    2548             SpeculateIntegerOperand op2(this, node.child2());
    2549             GPRTemporary eax(this, X86Registers::eax);
    2550             GPRTemporary edx(this, X86Registers::edx);
    2551             GPRReg op1Gpr = op1.gpr();
    2552             GPRReg op2Gpr = op2.gpr();
    2553 
    2554             speculationCheck(JSValueRegs(), NoNode, m_jit.branchTest32(JITCompiler::Zero, op2Gpr));
    2555 
    2556             GPRReg temp2 = InvalidGPRReg;
    2557             if (op2Gpr == X86Registers::eax || op2Gpr == X86Registers::edx) {
    2558                 temp2 = allocate();
    2559                 m_jit.move(op2Gpr, temp2);
    2560                 op2Gpr = temp2;
    2561             }
    2562 
    2563             m_jit.move(op1Gpr, eax.gpr());
    2564             m_jit.assembler().cdq();
    2565             m_jit.assembler().idivl_r(op2Gpr);
    2566 
    2567             if (temp2 != InvalidGPRReg)
    2568                 unlock(temp2);
    2569 
    2570             integerResult(edx.gpr(), m_compileIndex);
     2661            compileSoftModulo(node);
    25712662            break;
    25722663        }
    2573 #endif
    25742664       
    25752665        SpeculateDoubleOperand op1(this, node.child1());
     
    25852675        callOperation(fmodAsDFGOperation, result.fpr(), op1FPR, op2FPR);
    25862676       
    2587 #if !CPU(X86)
    2588         if (!at(node.child1()).shouldNotSpeculateInteger() && !at(node.child2()).shouldNotSpeculateInteger()
    2589             && node.canSpeculateInteger()) {
    2590             FPRTemporary scratch(this, op2);
    2591             GPRTemporary intResult(this);
    2592 
    2593             JITCompiler::JumpList failureCases;
    2594             m_jit.branchConvertDoubleToInt32(result.fpr(), intResult.gpr(), failureCases, scratch.fpr());
    2595             speculationCheck(JSValueRegs(), NoNode, failureCases);
    2596 
    2597             integerResult(intResult.gpr(), m_compileIndex);
    2598             break;
    2599         }
    2600 #endif
    2601 
    26022677        doubleResult(result.fpr(), m_compileIndex);
    26032678        break;
  • trunk/Source/JavaScriptCore/jit/JITArithmetic32_64.cpp

    r99889 r100881  
    11921192/* ------------------------------ BEGIN: OP_MOD ------------------------------ */
    11931193
    1194 #if CPU(X86) || CPU(X86_64) || CPU(MIPS)
    1195 
    11961194void JIT::emit_op_mod(Instruction* currentInstruction)
    11971195{
     
    11991197    unsigned op1 = currentInstruction[2].u.operand;
    12001198    unsigned op2 = currentInstruction[3].u.operand;
     1199
     1200#if ENABLE(JIT_USE_SOFT_MODULO)
    12011201
    12021202#if CPU(X86) || CPU(X86_64)
     
    12081208#endif
    12091209
    1210     if (isOperandConstantImmediateInt(op2) && getConstantOperand(op2).asInt32() != 0) {
    1211         emitLoad(op1, regT1, regT0);
    1212         move(Imm32(getConstantOperand(op2).asInt32()), regT2);
    1213         addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag)));
    1214         if (getConstantOperand(op2).asInt32() == -1)
    1215             addSlowCase(branch32(Equal, regT0, TrustedImm32(0x80000000))); // -2147483648 / -1 => EXC_ARITHMETIC
    1216     } else {
    1217         emitLoad2(op1, regT1, regT0, op2, regT3, regT2);
    1218         addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag)));
    1219         addSlowCase(branch32(NotEqual, regT3, TrustedImm32(JSValue::Int32Tag)));
    1220 
    1221         addSlowCase(branch32(Equal, regT0, TrustedImm32(0x80000000))); // -2147483648 / -1 => EXC_ARITHMETIC
    1222         addSlowCase(branch32(Equal, regT2, TrustedImm32(0))); // divide by 0
    1223     }
    1224 
    1225     move(regT0, regT3); // Save dividend payload, in case of 0.
    1226 #if CPU(X86) || CPU(X86_64)
    1227     m_assembler.cdq();
    1228     m_assembler.idivl_r(regT2);
    1229 #elif CPU(MIPS)
    1230     m_assembler.div(regT0, regT2);
    1231     m_assembler.mfhi(regT1);
    1232 #endif
    1233 
    1234     // If the remainder is zero and the dividend is negative, the result is -0.
    1235     Jump storeResult1 = branchTest32(NonZero, regT1);
    1236     Jump storeResult2 = branchTest32(Zero, regT3, TrustedImm32(0x80000000)); // not negative
    1237     emitStore(dst, jsNumber(-0.0));
    1238     Jump end = jump();
    1239 
    1240     storeResult1.link(this);
    1241     storeResult2.link(this);
    1242     emitStoreInt32(dst, regT1, (op1 == dst || op2 == dst));
    1243     end.link(this);
    1244 }
    1245 
    1246 void JIT::emitSlow_op_mod(Instruction* currentInstruction, Vector<SlowCaseEntry>::iterator& iter)
    1247 {
    1248     unsigned dst = currentInstruction[1].u.operand;
    1249     unsigned op1 = currentInstruction[2].u.operand;
    1250     unsigned op2 = currentInstruction[3].u.operand;
    1251 
    1252     if (isOperandConstantImmediateInt(op2) && getConstantOperand(op2).asInt32() != 0) {
    1253         linkSlowCase(iter); // int32 check
    1254         if (getConstantOperand(op2).asInt32() == -1)
    1255             linkSlowCase(iter); // 0x80000000 check
    1256     } else {
    1257         linkSlowCase(iter); // int32 check
    1258         linkSlowCase(iter); // int32 check
    1259         linkSlowCase(iter); // 0 check
    1260         linkSlowCase(iter); // 0x80000000 check
    1261     }
    1262 
    1263     JITStubCall stubCall(this, cti_op_mod);
    1264     stubCall.addArgument(op1);
    1265     stubCall.addArgument(op2);
    1266     stubCall.call(dst);
    1267 }
    1268 
    1269 #else // CPU(X86) || CPU(X86_64) || CPU(MIPS)
    1270 
    1271 void JIT::emit_op_mod(Instruction* currentInstruction)
    1272 {
    1273     unsigned dst = currentInstruction[1].u.operand;
    1274     unsigned op1 = currentInstruction[2].u.operand;
    1275     unsigned op2 = currentInstruction[3].u.operand;
    1276 
    1277 #if ENABLE(JIT_USE_SOFT_MODULO)
    12781210    emitLoad2(op1, regT1, regT0, op2, regT3, regT2);
    12791211    addSlowCase(branch32(NotEqual, regT1, TrustedImm32(JSValue::Int32Tag)));
     
    13131245}
    13141246
    1315 #endif // CPU(X86) || CPU(X86_64)
    1316 
    13171247/* ------------------------------ END: OP_MOD ------------------------------ */
    13181248
  • trunk/Source/JavaScriptCore/jit/JITOpcodes32_64.cpp

    r100165 r100881  
    16611661void JIT::softModulo()
    16621662{
    1663     push(regT1);
    1664     push(regT3);
    16651663    move(regT2, regT3);
    16661664    move(regT0, regT2);
    16671665    move(TrustedImm32(0), regT1);
     1666    JumpList exitBranch;
    16681667
    16691668    // Check for negative result reminder
     
    16811680    push(regT1);
    16821681
    1683     Jump exitBranch = branch32(LessThan, regT2, regT3);
     1682    exitBranch.append(branch32(LessThan, regT2, regT3));
    16841683
    16851684    // Power of two fast case
    16861685    move(regT3, regT0);
    16871686    sub32(TrustedImm32(1), regT0);
    1688     Jump powerOfTwo = branchTest32(NonZero, regT0, regT3);
     1687    Jump notPowerOfTwo = branchTest32(NonZero, regT0, regT3);
    16891688    and32(regT0, regT2);
    1690     powerOfTwo.link(this);
    1691 
    1692     and32(regT3, regT0);
    1693 
    1694     Jump exitBranch2 = branchTest32(Zero, regT0);
    1695 
     1689    exitBranch.append(jump());
     1690
     1691    notPowerOfTwo.link(this);
     1692
     1693#if CPU(X86) || CPU(X86_64)
     1694    move(regT2, regT0);
     1695    m_assembler.cdq();
     1696    m_assembler.idivl_r(regT3);
     1697    move(regT1, regT2);
     1698#elif CPU(MIPS)
     1699    m_assembler.div(regT2, regT3);
     1700    m_assembler.mfhi(regT2);
     1701#else
    16961702    countLeadingZeros32(regT2, regT0);
    16971703    countLeadingZeros32(regT3, regT1);
     
    17301736    sub32(regT3, regT2);
    17311737    lower.link(this);
     1738#endif
    17321739
    17331740    exitBranch.link(this);
    1734     exitBranch2.link(this);
    17351741
    17361742    // Check for negative reminder
     
    17411747
    17421748    move(regT2, regT0);
    1743 
    1744     pop(regT3);
    1745     pop(regT1);
    17461749    ret();
    17471750}
  • trunk/Source/JavaScriptCore/wtf/Platform.h

    r100180 r100881  
    956956#endif
    957957
     958#if CPU(X86) || CPU(X86_64) || CPU(MIPS)
     959#if !defined(ENABLE_JIT_USE_SOFT_MODULO)
     960#define ENABLE_JIT_USE_SOFT_MODULO 1
     961#endif
     962#endif
     963
    958964#if CPU(X86) && COMPILER(MSVC)
    959965#define JSC_HOST_CALL __fastcall
Note: See TracChangeset for help on using the changeset viewer.