Changeset 246946 in webkit
- Timestamp:
- Jun 28, 2019 4:19:11 PM (5 years ago)
- Location:
- trunk
- Files:
-
- 3 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JSTests/ChangeLog
r246851 r246946 1 2019-06-28 Justin Michaud <justin_michaud@apple.com> 2 3 Add b3 macro lowering for CheckMul on arm64 4 https://bugs.webkit.org/show_bug.cgi?id=199251 5 6 Reviewed by Robin Morisset. 7 8 * microbenchmarks/check-mul-constant.js: Added. 9 (doTest): 10 * microbenchmarks/check-mul-no-constant.js: Added. 11 (doTest): 12 * microbenchmarks/check-mul-power-of-two.js: Added. 13 (doTest): 14 1 15 2019-06-26 Keith Miller <keith_miller@apple.com> 2 16 -
trunk/Source/JavaScriptCore/ChangeLog
r246925 r246946 1 2019-06-28 Justin Michaud <justin_michaud@apple.com> 2 3 Add b3 macro lowering for CheckMul on arm64 4 https://bugs.webkit.org/show_bug.cgi?id=199251 5 6 Reviewed by Robin Morisset. 7 8 - Lower CheckMul for 32-bit arguments on arm64 into a mul and then an overflow check. 9 - Add a new opcode to air on arm64 for smull (multiplySignExtend32). 10 - Fuse sign extend 32 + mul into smull (taking two 32-bit arguments and producing 64 bits). 11 - 1.25x speedup on power of two microbenchmark, 1.15x speedup on normal constant microbenchmark, 12 and no change on the no-constant benchmark. 13 Also, skip some of the b3 tests that were failing before this patch so that the new tests can run 14 to completion. 15 16 * assembler/MacroAssemblerARM64.h: 17 (JSC::MacroAssemblerARM64::multiplySignExtend32): 18 * assembler/testmasm.cpp: 19 (JSC::testMul32SignExtend): 20 (JSC::run): 21 * b3/B3LowerMacros.cpp: 22 * b3/B3LowerToAir.cpp: 23 * b3/air/AirOpcode.opcodes: 24 * b3/testb3.cpp: 25 (JSC::B3::testMulArgs32SignExtend): 26 (JSC::B3::testMulImm32SignExtend): 27 (JSC::B3::testMemoryFence): 28 (JSC::B3::testStoreFence): 29 (JSC::B3::testLoadFence): 30 (JSC::B3::testPinRegisters): 31 (JSC::B3::run): 32 1 33 2019-06-28 Konstantin Tokarev <annulen@yandex.ru> 2 34 -
trunk/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
r246451 r246946 570 570 } 571 571 572 void multiplySignExtend32(RegisterID left, RegisterID right, RegisterID dest) 573 { 574 m_assembler.smull(dest, left, right); 575 } 576 572 577 void div32(RegisterID dividend, RegisterID divisor, RegisterID dest) 573 578 { -
trunk/Source/JavaScriptCore/assembler/testmasm.cpp
r246451 r246946 352 352 } 353 353 354 #if CPU(ARM64) 355 void testMul32SignExtend() 356 { 357 for (auto value : int32Operands()) { 358 auto mul = compile([=] (CCallHelpers& jit) { 359 jit.emitFunctionPrologue(); 360 361 jit.multiplySignExtend32(GPRInfo::argumentGPR0, GPRInfo::argumentGPR1, GPRInfo::returnValueGPR); 362 363 jit.emitFunctionEpilogue(); 364 jit.ret(); 365 }); 366 367 for (auto value2 : int32Operands()) 368 CHECK_EQ(invoke<long int>(mul, value, value2), ((long int) value) * ((long int) value2)); 369 } 370 } 371 #endif 372 354 373 #if CPU(X86) || CPU(X86_64) || CPU(ARM64) 355 374 void testCompareFloat(MacroAssembler::DoubleCondition condition) … … 1111 1130 RUN(testMul32WithImmediates()); 1112 1131 1132 #if CPU(ARM64) 1133 RUN(testMul32SignExtend()); 1134 #endif 1135 1113 1136 #if CPU(X86) || CPU(X86_64) || CPU(ARM64) 1114 1137 RUN(testCompareFloat(MacroAssembler::DoubleEqual)); -
trunk/Source/JavaScriptCore/b3/B3LowerMacros.cpp
r242100 r246946 178 178 } 179 179 180 case CheckMul: { 181 if (isARM64() && m_value->child(0)->type() == Int32) { 182 CheckValue* checkMul = m_value->as<CheckValue>(); 183 184 Value* left = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(0)); 185 Value* right = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(1)); 186 Value* mulResult = m_insertionSet.insert<Value>(m_index, Mul, m_origin, left, right); 187 Value* mulResult32 = m_insertionSet.insert<Value>(m_index, Trunc, m_origin, mulResult); 188 Value* upperResult = m_insertionSet.insert<Value>(m_index, Trunc, m_origin, 189 m_insertionSet.insert<Value>(m_index, SShr, m_origin, mulResult, m_insertionSet.insert<Const32Value>(m_index, m_origin, 32))); 190 Value* signBit = m_insertionSet.insert<Value>(m_index, SShr, m_origin, 191 mulResult32, 192 m_insertionSet.insert<Const32Value>(m_index, m_origin, 31)); 193 Value* hasOverflowed = m_insertionSet.insert<Value>(m_index, NotEqual, m_origin, upperResult, signBit); 194 195 CheckValue* check = m_insertionSet.insert<CheckValue>(m_index, Check, m_origin, hasOverflowed); 196 check->setGenerator(checkMul->generator()); 197 check->clobberEarly(checkMul->earlyClobbered()); 198 check->clobberLate(checkMul->lateClobbered()); 199 check->append(checkMul->constrainedChild(0)); 200 check->append(checkMul->constrainedChild(1)); 201 202 m_value->replaceWithIdentity(mulResult32); 203 m_changed = true; 204 } 205 break; 206 } 207 180 208 case Switch: { 181 209 SwitchValue* switchValue = m_value->as<SwitchValue>(); -
trunk/Source/JavaScriptCore/b3/B3LowerToAir.cpp
r246368 r246946 2603 2603 2604 2604 case Mul: { 2605 if (m_value->type() == Int64 2606 && isValidForm(MultiplySignExtend32, Arg::Tmp, Arg::Tmp, Arg::Tmp) 2607 && m_value->child(0)->opcode() == SExt32 2608 && !m_locked.contains(m_value->child(0))) { 2609 Value* opLeft = m_value->child(0); 2610 Value* left = opLeft->child(0); 2611 Value* opRight = m_value->child(1); 2612 Value* right = nullptr; 2613 2614 if (opRight->opcode() == SExt32 && !m_locked.contains(opRight->child(0))) { 2615 right = opRight->child(0); 2616 } else if (m_value->child(1)->isRepresentableAs<int32_t>() && !m_locked.contains(m_value->child(1))) { 2617 // We just use the 64-bit const int as a 32 bit const int directly 2618 right = opRight; 2619 } 2620 2621 if (right) { 2622 append(MultiplySignExtend32, tmp(left), tmp(right), tmp(m_value)); 2623 return; 2624 } 2625 } 2605 2626 appendBinOp<Mul32, Mul64, MulDouble, MulFloat, Commutative>( 2606 2627 m_value->child(0), m_value->child(1)); -
trunk/Source/JavaScriptCore/b3/air/AirOpcode.opcodes
r241577 r246946 262 262 Tmp, Tmp, Tmp 263 263 264 arm64: MultiplySignExtend32 U:G:32, U:G:32, ZD:G:64 265 Tmp, Tmp, Tmp 266 264 267 arm64: Div32 U:G:32, U:G:32, ZD:G:32 265 268 Tmp, Tmp, Tmp -
trunk/Source/JavaScriptCore/b3/testb3.cpp
r244712 r246946 1190 1190 } 1191 1191 1192 void testMulArgs32SignExtend(int a, int b) 1193 { 1194 Procedure proc; 1195 if (proc.optLevel() < 1) 1196 return; 1197 BasicBlock* root = proc.addBlock(); 1198 Value* arg1 = root->appendNew<Value>( 1199 proc, Trunc, Origin(), 1200 root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0)); 1201 Value* arg2 = root->appendNew<Value>( 1202 proc, Trunc, Origin(), 1203 root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1)); 1204 Value* arg164 = root->appendNew<Value>(proc, SExt32, Origin(), arg1); 1205 Value* arg264 = root->appendNew<Value>(proc, SExt32, Origin(), arg2); 1206 Value* mul = root->appendNew<Value>(proc, Mul, Origin(), arg164, arg264); 1207 root->appendNewControlValue(proc, Return, Origin(), mul); 1208 1209 auto code = compileProc(proc); 1210 1211 CHECK(invoke<long int>(*code, a, b) == ((long int) a) * ((long int) b)); 1212 } 1213 1214 void testMulImm32SignExtend(const int a, int b) 1215 { 1216 Procedure proc; 1217 if (proc.optLevel() < 1) 1218 return; 1219 BasicBlock* root = proc.addBlock(); 1220 Value* arg1 = root->appendNew<Const64Value>(proc, Origin(), a); 1221 Value* arg2 = root->appendNew<Value>( 1222 proc, Trunc, Origin(), 1223 root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1)); 1224 Value* arg264 = root->appendNew<Value>(proc, SExt32, Origin(), arg2); 1225 Value* mul = root->appendNew<Value>(proc, Mul, Origin(), arg1, arg264); 1226 root->appendNewControlValue(proc, Return, Origin(), mul); 1227 1228 auto code = compileProc(proc); 1229 1230 CHECK(invoke<long int>(*code, b) == ((long int) a) * ((long int) b)); 1231 } 1232 1192 1233 void testMulLoadTwice() 1193 1234 { … … 14636 14677 { 14637 14678 Procedure proc; 14638 14639 BasicBlock* root = proc.addBlock(); 14640 14679 14680 BasicBlock* root = proc.addBlock(); 14681 14641 14682 root->appendNew<FenceValue>(proc, Origin()); 14642 14683 root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42)); 14643 14684 14644 14685 auto code = compileProc(proc); 14645 14686 CHECK_EQ(invoke<int>(*code), 42); … … 14647 14688 checkUsesInstruction(*code, "lock or $0x0, (%rsp)"); 14648 14689 if (isARM64()) 14649 checkUsesInstruction(*code, "dmb ish");14690 checkUsesInstruction(*code, "dmb ish"); 14650 14691 checkDoesNotUseInstruction(*code, "mfence"); 14651 checkDoesNotUseInstruction(*code, "dmb ishst");14692 checkDoesNotUseInstruction(*code, "dmb ishst"); 14652 14693 } 14653 14694 … … 14655 14696 { 14656 14697 Procedure proc; 14657 14658 BasicBlock* root = proc.addBlock(); 14659 14698 14699 BasicBlock* root = proc.addBlock(); 14700 14660 14701 root->appendNew<FenceValue>(proc, Origin(), HeapRange::top(), HeapRange()); 14661 14702 root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42)); 14662 14703 14663 14704 auto code = compileProc(proc); 14664 14705 CHECK_EQ(invoke<int>(*code), 42); … … 14666 14707 checkDoesNotUseInstruction(*code, "mfence"); 14667 14708 if (isARM64()) 14668 checkUsesInstruction(*code, "dmb ishst");14709 checkUsesInstruction(*code, "dmb ishst"); 14669 14710 } 14670 14711 … … 14672 14713 { 14673 14714 Procedure proc; 14674 14675 BasicBlock* root = proc.addBlock(); 14676 14715 14716 BasicBlock* root = proc.addBlock(); 14717 14677 14718 root->appendNew<FenceValue>(proc, Origin(), HeapRange(), HeapRange::top()); 14678 14719 root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42)); 14679 14720 14680 14721 auto code = compileProc(proc); 14681 14722 CHECK_EQ(invoke<int>(*code), 42); … … 14683 14724 checkDoesNotUseInstruction(*code, "mfence"); 14684 14725 if (isARM64()) 14685 checkUsesInstruction(*code, "dmb ish");14686 checkDoesNotUseInstruction(*code, "dmb ishst");14726 checkUsesInstruction(*code, "dmb ish"); 14727 checkDoesNotUseInstruction(*code, "dmb ishst"); 14687 14728 } 14688 14729 … … 14962 15003 CHECK_EQ(usesCSRs, !pin); 14963 15004 }; 14964 15005 14965 15006 go(true); 14966 15007 go(false); … … 17158 17199 17159 17200 auto shouldRun = [&] (const char* testName) -> bool { 17201 // FIXME: These tests fail <https://bugs.webkit.org/show_bug.cgi?id=199330>. 17202 if (!filter && isARM64()) { 17203 for (auto& failingTest : { 17204 "testReportUsedRegistersLateUseFollowedByEarlyDefDoesNotMarkUseAsDead", 17205 "testNegFloatWithUselessDoubleConversion", 17206 "testPinRegisters", 17207 }) { 17208 if (WTF::findIgnoringASCIICaseWithoutLength(testName, failingTest) != WTF::notFound) { 17209 dataLogLn("*** Warning: Skipping known-bad test: ", testName); 17210 return false; 17211 } 17212 } 17213 } 17214 if (!filter && isX86()) { 17215 for (auto& failingTest : { 17216 "testReportUsedRegistersLateUseFollowedByEarlyDefDoesNotMarkUseAsDead", 17217 }) { 17218 if (WTF::findIgnoringASCIICaseWithoutLength(testName, failingTest) != WTF::notFound) { 17219 dataLogLn("*** Warning: Skipping known-bad test: ", testName); 17220 return false; 17221 } 17222 } 17223 } 17160 17224 return !filter || WTF::findIgnoringASCIICaseWithoutLength(testName, filter) != WTF::notFound; 17161 17225 }; … … 17278 17342 RUN(testMulImmArg(1, 0)); 17279 17343 RUN(testMulImmArg(3, 3)); 17344 RUN(testMulImm32SignExtend(1, 2)); 17345 RUN(testMulImm32SignExtend(0, 2)); 17346 RUN(testMulImm32SignExtend(1, 0)); 17347 RUN(testMulImm32SignExtend(3, 3)); 17348 RUN(testMulImm32SignExtend(0xFFFFFFFF, 0xFFFFFFFF)); 17349 RUN(testMulImm32SignExtend(0xFFFFFFFE, 0xFFFFFFFF)); 17350 RUN(testMulImm32SignExtend(0xFFFFFFFF, 0xFFFFFFFE)); 17280 17351 RUN(testMulArgs32(1, 1)); 17281 17352 RUN(testMulArgs32(1, 2)); 17353 RUN(testMulArgs32(0xFFFFFFFF, 0xFFFFFFFF)); 17354 RUN(testMulArgs32(0xFFFFFFFE, 0xFFFFFFFF)); 17355 RUN(testMulArgs32SignExtend(1, 1)); 17356 RUN(testMulArgs32SignExtend(1, 2)); 17357 RUN(testMulArgs32SignExtend(0xFFFFFFFF, 0xFFFFFFFF)); 17358 RUN(testMulArgs32SignExtend(0xFFFFFFFE, 0xFFFFFFFF)); 17282 17359 RUN(testMulLoadTwice()); 17283 17360 RUN(testMulAddArgsLeft());
Note: See TracChangeset
for help on using the changeset viewer.