Changeset 246946 in webkit


Ignore:
Timestamp:
Jun 28, 2019 4:19:11 PM (5 years ago)
Author:
Justin Michaud
Message:

Add b3 macro lowering for CheckMul on arm64
https://bugs.webkit.org/show_bug.cgi?id=199251

Reviewed by Robin Morisset.

JSTests:

  • microbenchmarks/check-mul-constant.js: Added.

(doTest):

  • microbenchmarks/check-mul-no-constant.js: Added.

(doTest):

  • microbenchmarks/check-mul-power-of-two.js: Added.

(doTest):

Source/JavaScriptCore:

  • Lower CheckMul for 32-bit arguments on arm64 into a mul and then an overflow check.
  • Add a new opcode to air on arm64 for smull (multiplySignExtend32).
  • Fuse sign extend 32 + mul into smull (taking two 32-bit arguments and producing 64 bits).
  • 1.25x speedup on power of two microbenchmark, 1.15x speedup on normal constant microbenchmark, and no change on the no-constant benchmark.

Also, skip some of the b3 tests that were failing before this patch so that the new tests can run
to completion.

  • assembler/MacroAssemblerARM64.h:

(JSC::MacroAssemblerARM64::multiplySignExtend32):

  • assembler/testmasm.cpp:

(JSC::testMul32SignExtend):
(JSC::run):

  • b3/B3LowerMacros.cpp:
  • b3/B3LowerToAir.cpp:
  • b3/air/AirOpcode.opcodes:
  • b3/testb3.cpp:

(JSC::B3::testMulArgs32SignExtend):
(JSC::B3::testMulImm32SignExtend):
(JSC::B3::testMemoryFence):
(JSC::B3::testStoreFence):
(JSC::B3::testLoadFence):
(JSC::B3::testPinRegisters):
(JSC::B3::run):

Location:
trunk
Files:
3 added
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/JSTests/ChangeLog

    r246851 r246946  
     12019-06-28  Justin Michaud  <justin_michaud@apple.com>
     2
     3        Add b3 macro lowering for CheckMul on arm64
     4        https://bugs.webkit.org/show_bug.cgi?id=199251
     5
     6        Reviewed by Robin Morisset.
     7
     8        * microbenchmarks/check-mul-constant.js: Added.
     9        (doTest):
     10        * microbenchmarks/check-mul-no-constant.js: Added.
     11        (doTest):
     12        * microbenchmarks/check-mul-power-of-two.js: Added.
     13        (doTest):
     14
    1152019-06-26  Keith Miller  <keith_miller@apple.com>
    216
  • trunk/Source/JavaScriptCore/ChangeLog

    r246925 r246946  
     12019-06-28  Justin Michaud  <justin_michaud@apple.com>
     2
     3        Add b3 macro lowering for CheckMul on arm64
     4        https://bugs.webkit.org/show_bug.cgi?id=199251
     5
     6        Reviewed by Robin Morisset.
     7
     8        - Lower CheckMul for 32-bit arguments on arm64 into a mul and then an overflow check.
     9        - Add a new opcode to air on arm64 for smull (multiplySignExtend32).
     10        - Fuse sign extend 32 + mul into smull (taking two 32-bit arguments and producing 64 bits).
     11        - 1.25x speedup on power of two microbenchmark, 1.15x speedup on normal constant microbenchmark,
     12          and no change on the no-constant benchmark.
     13        Also, skip some of the b3 tests that were failing before this patch so that the new tests can run
     14        to completion.
     15
     16        * assembler/MacroAssemblerARM64.h:
     17        (JSC::MacroAssemblerARM64::multiplySignExtend32):
     18        * assembler/testmasm.cpp:
     19        (JSC::testMul32SignExtend):
     20        (JSC::run):
     21        * b3/B3LowerMacros.cpp:
     22        * b3/B3LowerToAir.cpp:
     23        * b3/air/AirOpcode.opcodes:
     24        * b3/testb3.cpp:
     25        (JSC::B3::testMulArgs32SignExtend):
     26        (JSC::B3::testMulImm32SignExtend):
     27        (JSC::B3::testMemoryFence):
     28        (JSC::B3::testStoreFence):
     29        (JSC::B3::testLoadFence):
     30        (JSC::B3::testPinRegisters):
     31        (JSC::B3::run):
     32
    1332019-06-28  Konstantin Tokarev  <annulen@yandex.ru>
    234
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h

    r246451 r246946  
    570570    }
    571571
     572    void multiplySignExtend32(RegisterID left, RegisterID right, RegisterID dest)
     573    {
     574        m_assembler.smull(dest, left, right);
     575    }
     576
    572577    void div32(RegisterID dividend, RegisterID divisor, RegisterID dest)
    573578    {
  • trunk/Source/JavaScriptCore/assembler/testmasm.cpp

    r246451 r246946  
    352352}
    353353
     354#if CPU(ARM64)
     355void testMul32SignExtend()
     356{
     357    for (auto value : int32Operands()) {
     358        auto mul = compile([=] (CCallHelpers& jit) {
     359            jit.emitFunctionPrologue();
     360
     361            jit.multiplySignExtend32(GPRInfo::argumentGPR0, GPRInfo::argumentGPR1, GPRInfo::returnValueGPR);
     362
     363            jit.emitFunctionEpilogue();
     364            jit.ret();
     365        });
     366
     367        for (auto value2 : int32Operands())
     368            CHECK_EQ(invoke<long int>(mul, value, value2), ((long int) value) * ((long int) value2));
     369    }
     370}
     371#endif
     372
    354373#if CPU(X86) || CPU(X86_64) || CPU(ARM64)
    355374void testCompareFloat(MacroAssembler::DoubleCondition condition)
     
    11111130    RUN(testMul32WithImmediates());
    11121131
     1132#if CPU(ARM64)
     1133    RUN(testMul32SignExtend());
     1134#endif
     1135
    11131136#if CPU(X86) || CPU(X86_64) || CPU(ARM64)
    11141137    RUN(testCompareFloat(MacroAssembler::DoubleEqual));
  • trunk/Source/JavaScriptCore/b3/B3LowerMacros.cpp

    r242100 r246946  
    178178            }
    179179
     180            case CheckMul: {
     181                if (isARM64() && m_value->child(0)->type() == Int32) {
     182                    CheckValue* checkMul = m_value->as<CheckValue>();
     183
     184                    Value* left = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(0));
     185                    Value* right = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(1));
     186                    Value* mulResult = m_insertionSet.insert<Value>(m_index, Mul, m_origin, left, right);
     187                    Value* mulResult32 = m_insertionSet.insert<Value>(m_index, Trunc, m_origin, mulResult);
     188                    Value* upperResult = m_insertionSet.insert<Value>(m_index, Trunc, m_origin,
     189                        m_insertionSet.insert<Value>(m_index, SShr, m_origin, mulResult, m_insertionSet.insert<Const32Value>(m_index, m_origin, 32)));
     190                    Value* signBit = m_insertionSet.insert<Value>(m_index, SShr, m_origin,
     191                        mulResult32,
     192                        m_insertionSet.insert<Const32Value>(m_index, m_origin, 31));
     193                    Value* hasOverflowed = m_insertionSet.insert<Value>(m_index, NotEqual, m_origin, upperResult, signBit);
     194
     195                    CheckValue* check = m_insertionSet.insert<CheckValue>(m_index, Check, m_origin, hasOverflowed);
     196                    check->setGenerator(checkMul->generator());
     197                    check->clobberEarly(checkMul->earlyClobbered());
     198                    check->clobberLate(checkMul->lateClobbered());
     199                    check->append(checkMul->constrainedChild(0));
     200                    check->append(checkMul->constrainedChild(1));
     201
     202                    m_value->replaceWithIdentity(mulResult32);
     203                    m_changed = true;
     204                }
     205                break;
     206            }
     207
    180208            case Switch: {
    181209                SwitchValue* switchValue = m_value->as<SwitchValue>();
  • trunk/Source/JavaScriptCore/b3/B3LowerToAir.cpp

    r246368 r246946  
    26032603
    26042604        case Mul: {
     2605            if (m_value->type() == Int64
     2606                && isValidForm(MultiplySignExtend32, Arg::Tmp, Arg::Tmp, Arg::Tmp)
     2607                && m_value->child(0)->opcode() == SExt32
     2608                && !m_locked.contains(m_value->child(0))) {
     2609                Value* opLeft = m_value->child(0);
     2610                Value* left = opLeft->child(0);
     2611                Value* opRight = m_value->child(1);
     2612                Value* right = nullptr;
     2613
     2614                if (opRight->opcode() == SExt32 && !m_locked.contains(opRight->child(0))) {
     2615                    right = opRight->child(0);
     2616                } else if (m_value->child(1)->isRepresentableAs<int32_t>() && !m_locked.contains(m_value->child(1))) {
     2617                    // We just use the 64-bit const int as a 32 bit const int directly
     2618                    right = opRight;
     2619                }
     2620
     2621                if (right) {
     2622                    append(MultiplySignExtend32, tmp(left), tmp(right), tmp(m_value));
     2623                    return;
     2624                }
     2625            }
    26052626            appendBinOp<Mul32, Mul64, MulDouble, MulFloat, Commutative>(
    26062627                m_value->child(0), m_value->child(1));
  • trunk/Source/JavaScriptCore/b3/air/AirOpcode.opcodes

    r241577 r246946  
    262262    Tmp, Tmp, Tmp
    263263
     264arm64: MultiplySignExtend32 U:G:32, U:G:32, ZD:G:64
     265    Tmp, Tmp, Tmp
     266
    264267arm64: Div32 U:G:32, U:G:32, ZD:G:32
    265268    Tmp, Tmp, Tmp
  • trunk/Source/JavaScriptCore/b3/testb3.cpp

    r244712 r246946  
    11901190}
    11911191
     1192void testMulArgs32SignExtend(int a, int b)
     1193{
     1194    Procedure proc;
     1195    if (proc.optLevel() < 1)
     1196        return;
     1197    BasicBlock* root = proc.addBlock();
     1198    Value* arg1 = root->appendNew<Value>(
     1199        proc, Trunc, Origin(),
     1200        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
     1201    Value* arg2 = root->appendNew<Value>(
     1202        proc, Trunc, Origin(),
     1203        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
     1204    Value* arg164 = root->appendNew<Value>(proc, SExt32, Origin(), arg1);
     1205    Value* arg264 = root->appendNew<Value>(proc, SExt32, Origin(), arg2);
     1206    Value* mul = root->appendNew<Value>(proc, Mul, Origin(), arg164, arg264);
     1207    root->appendNewControlValue(proc, Return, Origin(), mul);
     1208
     1209    auto code = compileProc(proc);
     1210
     1211    CHECK(invoke<long int>(*code, a, b) == ((long int) a) * ((long int) b));
     1212}
     1213
     1214void testMulImm32SignExtend(const int a, int b)
     1215{
     1216    Procedure proc;
     1217    if (proc.optLevel() < 1)
     1218        return;
     1219    BasicBlock* root = proc.addBlock();
     1220    Value* arg1 = root->appendNew<Const64Value>(proc, Origin(), a);
     1221    Value* arg2 = root->appendNew<Value>(
     1222        proc, Trunc, Origin(),
     1223        root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
     1224    Value* arg264 = root->appendNew<Value>(proc, SExt32, Origin(), arg2);
     1225    Value* mul = root->appendNew<Value>(proc, Mul, Origin(), arg1, arg264);
     1226    root->appendNewControlValue(proc, Return, Origin(), mul);
     1227
     1228    auto code = compileProc(proc);
     1229
     1230    CHECK(invoke<long int>(*code, b) == ((long int) a) * ((long int) b));
     1231}
     1232
    11921233void testMulLoadTwice()
    11931234{
     
    1463614677{
    1463714678    Procedure proc;
    14638    
    14639     BasicBlock* root = proc.addBlock();
    14640    
     14679
     14680    BasicBlock* root = proc.addBlock();
     14681
    1464114682    root->appendNew<FenceValue>(proc, Origin());
    1464214683    root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42));
    14643    
     14684
    1464414685    auto code = compileProc(proc);
    1464514686    CHECK_EQ(invoke<int>(*code), 42);
     
    1464714688        checkUsesInstruction(*code, "lock or $0x0, (%rsp)");
    1464814689    if (isARM64())
    14649         checkUsesInstruction(*code, "dmb    ish");
     14690        checkUsesInstruction(*code, "dmb     ish");
    1465014691    checkDoesNotUseInstruction(*code, "mfence");
    14651     checkDoesNotUseInstruction(*code, "dmb    ishst");
     14692    checkDoesNotUseInstruction(*code, "dmb     ishst");
    1465214693}
    1465314694
     
    1465514696{
    1465614697    Procedure proc;
    14657    
    14658     BasicBlock* root = proc.addBlock();
    14659    
     14698
     14699    BasicBlock* root = proc.addBlock();
     14700
    1466014701    root->appendNew<FenceValue>(proc, Origin(), HeapRange::top(), HeapRange());
    1466114702    root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42));
    14662    
     14703
    1466314704    auto code = compileProc(proc);
    1466414705    CHECK_EQ(invoke<int>(*code), 42);
     
    1466614707    checkDoesNotUseInstruction(*code, "mfence");
    1466714708    if (isARM64())
    14668         checkUsesInstruction(*code, "dmb    ishst");
     14709        checkUsesInstruction(*code, "dmb     ishst");
    1466914710}
    1467014711
     
    1467214713{
    1467314714    Procedure proc;
    14674    
    14675     BasicBlock* root = proc.addBlock();
    14676    
     14715
     14716    BasicBlock* root = proc.addBlock();
     14717
    1467714718    root->appendNew<FenceValue>(proc, Origin(), HeapRange(), HeapRange::top());
    1467814719    root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42));
    14679    
     14720
    1468014721    auto code = compileProc(proc);
    1468114722    CHECK_EQ(invoke<int>(*code), 42);
     
    1468314724    checkDoesNotUseInstruction(*code, "mfence");
    1468414725    if (isARM64())
    14685         checkUsesInstruction(*code, "dmb    ish");
    14686     checkDoesNotUseInstruction(*code, "dmb    ishst");
     14726        checkUsesInstruction(*code, "dmb     ish");
     14727    checkDoesNotUseInstruction(*code, "dmb     ishst");
    1468714728}
    1468814729
     
    1496215003        CHECK_EQ(usesCSRs, !pin);
    1496315004    };
    14964    
     15005
    1496515006    go(true);
    1496615007    go(false);
     
    1715817199
    1715917200    auto shouldRun = [&] (const char* testName) -> bool {
     17201        // FIXME: These tests fail <https://bugs.webkit.org/show_bug.cgi?id=199330>.
     17202        if (!filter && isARM64()) {
     17203            for (auto& failingTest : {
     17204                "testReportUsedRegistersLateUseFollowedByEarlyDefDoesNotMarkUseAsDead",
     17205                "testNegFloatWithUselessDoubleConversion",
     17206                "testPinRegisters",
     17207            }) {
     17208                if (WTF::findIgnoringASCIICaseWithoutLength(testName, failingTest) != WTF::notFound) {
     17209                    dataLogLn("*** Warning: Skipping known-bad test: ", testName);
     17210                    return false;
     17211                }
     17212            }
     17213        }
     17214        if (!filter && isX86()) {
     17215            for (auto& failingTest : {
     17216                "testReportUsedRegistersLateUseFollowedByEarlyDefDoesNotMarkUseAsDead",
     17217            }) {
     17218                if (WTF::findIgnoringASCIICaseWithoutLength(testName, failingTest) != WTF::notFound) {
     17219                    dataLogLn("*** Warning: Skipping known-bad test: ", testName);
     17220                    return false;
     17221                }
     17222            }
     17223        }
    1716017224        return !filter || WTF::findIgnoringASCIICaseWithoutLength(testName, filter) != WTF::notFound;
    1716117225    };
     
    1727817342    RUN(testMulImmArg(1, 0));
    1727917343    RUN(testMulImmArg(3, 3));
     17344    RUN(testMulImm32SignExtend(1, 2));
     17345    RUN(testMulImm32SignExtend(0, 2));
     17346    RUN(testMulImm32SignExtend(1, 0));
     17347    RUN(testMulImm32SignExtend(3, 3));
     17348    RUN(testMulImm32SignExtend(0xFFFFFFFF, 0xFFFFFFFF));
     17349    RUN(testMulImm32SignExtend(0xFFFFFFFE, 0xFFFFFFFF));
     17350    RUN(testMulImm32SignExtend(0xFFFFFFFF, 0xFFFFFFFE));
    1728017351    RUN(testMulArgs32(1, 1));
    1728117352    RUN(testMulArgs32(1, 2));
     17353    RUN(testMulArgs32(0xFFFFFFFF, 0xFFFFFFFF));
     17354    RUN(testMulArgs32(0xFFFFFFFE, 0xFFFFFFFF));
     17355    RUN(testMulArgs32SignExtend(1, 1));
     17356    RUN(testMulArgs32SignExtend(1, 2));
     17357    RUN(testMulArgs32SignExtend(0xFFFFFFFF, 0xFFFFFFFF));
     17358    RUN(testMulArgs32SignExtend(0xFFFFFFFE, 0xFFFFFFFF));
    1728217359    RUN(testMulLoadTwice());
    1728317360    RUN(testMulAddArgsLeft());
Note: See TracChangeset for help on using the changeset viewer.