Changeset 219899 in webkit
- Timestamp:
- Jul 25, 2017 7:23:01 PM (7 years ago)
- Location:
- trunk/Source/JavaScriptCore
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r219898 r219899 1 2017-07-25 JF Bastien <jfbastien@apple.com> 2 3 WebAssembly: generate smaller binaries 4 https://bugs.webkit.org/show_bug.cgi?id=174818 5 6 Reviewed by Filip Pizlo. 7 8 This patch reduces generated code size for WebAssembly in 2 ways: 9 10 1. Use the ZR register when storing zero on ARM64. 11 2. Synthesize wasm context lazily. 12 13 This leads to a modest size reduction on both x86-64 and ARM64 for 14 large WebAssembly games, without any performance loss on WasmBench 15 and TitzerBench. 16 17 The reason this works is that these games, using Emscripten, 18 generate 100k+ tiny functions, and our JIT allocation granule 19 rounds all allocations up to 32 bytes. There are plenty of other 20 simple gains to be had, I've filed a follow-up bug at 21 webkit.org/b/174819 22 23 We should further avoid the per-function cost of tiering, which 24 represents the bulk of code generated for small functions. 25 26 * assembler/MacroAssemblerARM64.h: 27 (JSC::MacroAssemblerARM64::storeZero64): 28 * assembler/MacroAssemblerX86_64.h: 29 (JSC::MacroAssemblerX86_64::storeZero64): 30 * b3/B3LowerToAir.cpp: 31 (JSC::B3::Air::LowerToAir::createStore): this doesn't make sense 32 for x86 because it constrains register reuse and codegen in a way 33 that doesn't affect ARM64 because it has a dedicated zero 34 register. 35 * b3/air/AirOpcode.opcodes: add the storeZero64 opcode. 36 * wasm/WasmB3IRGenerator.cpp: 37 (JSC::Wasm::B3IRGenerator::instanceValue): 38 (JSC::Wasm::B3IRGenerator::restoreWasmContext): 39 (JSC::Wasm::B3IRGenerator::B3IRGenerator): 40 (JSC::Wasm::B3IRGenerator::materializeWasmContext): Deleted. 41 1 42 2017-07-23 Filip Pizlo <fpizlo@apple.com> 2 43 -
trunk/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
r219434 r219899 1344 1344 } 1345 1345 1346 void storeZero64(ImplicitAddress address) 1347 { 1348 store64(ARM64Registers::zr, address); 1349 } 1350 1351 void storeZero64(BaseIndex address) 1352 { 1353 store64(ARM64Registers::zr, address); 1354 } 1355 1346 1356 DataLabel32 store64WithAddressOffsetPatch(RegisterID src, Address address) 1347 1357 { -
trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h
r217127 r219899 895 895 } 896 896 897 void storeZero64(ImplicitAddress address) 898 { 899 store64(TrustedImm32(0), address); 900 } 901 902 void storeZero64(BaseIndex address) 903 { 904 store64(TrustedImm32(0), address); 905 } 906 897 907 DataLabel32 store64WithAddressOffsetPatch(RegisterID src, Address address) 898 908 { -
trunk/Source/JavaScriptCore/b3/B3LowerToAir.cpp
r219702 r219899 1019 1019 Inst createStore(Air::Kind move, Value* value, const Arg& dest) 1020 1020 { 1021 if (imm(value) && isValidForm(move.opcode, Arg::Imm, dest.kind())) 1022 return Inst(move, m_value, imm(value), dest); 1021 if (auto imm_value = imm(value)) { 1022 if (isARM64() && imm_value.value() == 0) { 1023 switch (move.opcode) { 1024 default: 1025 break; 1026 case Air::Move32: 1027 if (isValidForm(StoreZero32, dest.kind()) && dest.isValidForm(Width32)) 1028 return Inst(StoreZero32, m_value, dest); 1029 break; 1030 case Air::Move: 1031 if (isValidForm(StoreZero64, dest.kind()) && dest.isValidForm(Width64)) 1032 return Inst(StoreZero64, m_value, dest); 1033 break; 1034 } 1035 } 1036 if (isValidForm(move.opcode, Arg::Imm, dest.kind())) 1037 return Inst(move, m_value, imm_value, dest); 1038 } 1023 1039 1024 1040 return Inst(move, m_value, tmp(value), dest); -
trunk/Source/JavaScriptCore/b3/air/AirOpcode.opcodes
r217127 r219899 650 650 Addr, Addr, Tmp 651 651 652 # FIXME: StoreZero32 and StoreZero64 are hacks on ARM64, we can do better: https://bugs.webkit.org/show_bug.cgi?id=174821 652 653 StoreZero32 D:G:32 654 Addr 655 Index 656 657 64: StoreZero64 D:G:64 653 658 Addr 654 659 Index -
trunk/Source/JavaScriptCore/wasm/WasmB3IRGenerator.cpp
r218883 r219899 246 246 int32_t WARN_UNUSED_RETURN fixupPointerPlusOffset(ExpressionType&, uint32_t); 247 247 248 Value* materializeWasmContext(BasicBlock*);249 248 void restoreWasmContext(Procedure&, BasicBlock*, Value*); 250 249 void restoreWebAssemblyGlobalState(const MemoryInformation&, Value* instance, Procedure&, BasicBlock*); … … 252 251 Origin origin(); 253 252 254 FunctionParser<B3IRGenerator>* m_parser ;253 FunctionParser<B3IRGenerator>* m_parser { nullptr }; 255 254 const ModuleInformation& m_info; 256 const MemoryMode m_mode ;257 const CompilationMode m_compilationMode ;258 const unsigned m_functionIndex ;259 const TierUpCount* m_tierUp ;255 const MemoryMode m_mode { MemoryMode::BoundsChecking }; 256 const CompilationMode m_compilationMode { CompilationMode::BBQMode }; 257 const unsigned m_functionIndex { UINT_MAX }; 258 const TierUpCount* m_tierUp { nullptr }; 260 259 261 260 Procedure& m_proc; 262 BasicBlock* m_currentBlock ;261 BasicBlock* m_currentBlock { nullptr }; 263 262 Vector<Variable*> m_locals; 264 263 Vector<UnlinkedWasmToWasmCall>& m_unlinkedWasmToWasmCalls; // List each call site and the function index whose address it should be patched with. 265 264 HashMap<ValueKey, Value*> m_constantPool; 266 265 InsertionSet m_constantInsertionValues; 267 GPRReg m_memoryBaseGPR ;266 GPRReg m_memoryBaseGPR { InvalidGPRReg }; 268 267 GPRReg m_memorySizeGPR { InvalidGPRReg }; 269 GPRReg m_wasmContextGPR; 270 Value* m_instanceValue; // FIXME: make this lazy https://bugs.webkit.org/show_bug.cgi?id=169792 268 GPRReg m_wasmContextGPR { InvalidGPRReg }; 271 269 bool m_makesCalls { false }; 270 271 Value* m_instanceValue { nullptr }; // Always use the accessor below to ensure the instance value is materialized when used. 272 bool m_usesInstanceValue { false }; 273 Value* instanceValue() 274 { 275 m_usesInstanceValue = true; 276 return m_instanceValue; 277 } 278 272 279 uint32_t m_maxNumJSCallArguments { 0 }; 273 280 }; … … 281 288 } 282 289 return offset; 283 }284 285 Value* B3IRGenerator::materializeWasmContext(BasicBlock* block)286 {287 if (useFastTLSForContext()) {288 PatchpointValue* patchpoint = block->appendNew<PatchpointValue>(m_proc, pointerType(), Origin());289 if (CCallHelpers::loadWasmContextNeedsMacroScratchRegister())290 patchpoint->clobber(RegisterSet::macroScratchRegisters());291 patchpoint->setGenerator(292 [=] (CCallHelpers& jit, const StackmapGenerationParams& params) {293 AllowMacroScratchRegisterUsageIf allowScratch(jit, CCallHelpers::loadWasmContextNeedsMacroScratchRegister());294 jit.loadWasmContext(params[0].gpr());295 });296 return patchpoint;297 }298 299 // FIXME: Because WasmToWasm call clobbers wasmContext register and does not restore it, we need to restore it in the caller side.300 // This prevents us from using ArgumentReg to this (logically) immutable pinned register.301 PatchpointValue* patchpoint = block->appendNew<PatchpointValue>(m_proc, pointerType(), Origin());302 patchpoint->effects.writesPinned = false;303 patchpoint->effects.readsPinned = true;304 patchpoint->resultConstraint = ValueRep::reg(m_wasmContextGPR);305 patchpoint->setGenerator([] (CCallHelpers&, const StackmapGenerationParams&) { });306 return patchpoint;307 290 } 308 291 … … 330 313 patchpoint->effects = effects; 331 314 patchpoint->clobberLate(RegisterSet(m_wasmContextGPR)); 332 patchpoint->append( m_instanceValue, ValueRep::SomeRegister);315 patchpoint->append(instanceValue(), ValueRep::SomeRegister); 333 316 GPRReg wasmContextGPR = m_wasmContextGPR; 334 317 patchpoint->setGenerator([=] (CCallHelpers& jit, const StackmapGenerationParams& param) { … … 385 368 wasmCallingConvention().setupFrameInPrologue(&compilation->calleeMoveLocation, m_proc, Origin(), m_currentBlock); 386 369 387 m_instanceValue = materializeWasmContext(m_currentBlock);388 389 370 { 390 371 B3::Value* framePointer = m_currentBlock->appendNew<B3::Value>(m_proc, B3::FramePointer, Origin()); 391 B3::PatchpointValue* stackOverflowCheck = m_currentBlock->appendNew<B3::PatchpointValue>(m_proc, B3::Void, Origin()); 372 B3::PatchpointValue* stackOverflowCheck = m_currentBlock->appendNew<B3::PatchpointValue>(m_proc, pointerType(), Origin()); 373 m_instanceValue = stackOverflowCheck; 392 374 stackOverflowCheck->appendSomeRegister(framePointer); 393 stackOverflowCheck->appendSomeRegister(m_instanceValue);394 375 stackOverflowCheck->clobber(RegisterSet::macroScratchRegisters()); 376 if (!useFastTLSForContext()) { 377 // FIXME: Because WasmToWasm call clobbers wasmContext register and does not restore it, we need to restore it in the caller side. 378 // This prevents us from using ArgumentReg to this (logically) immutable pinned register. 379 stackOverflowCheck->effects.writesPinned = false; 380 stackOverflowCheck->effects.readsPinned = true; 381 stackOverflowCheck->resultConstraint = ValueRep::reg(m_wasmContextGPR); 382 } 395 383 stackOverflowCheck->numGPScratchRegisters = 2; 396 384 stackOverflowCheck->setGenerator([=] (CCallHelpers& jit, const B3::StackmapGenerationParams& params) { 397 AllowMacroScratchRegisterUsage allowScratch(jit);398 GPRReg fp = params[0].gpr();399 GPRReg context = params[1].gpr();400 GPRReg scratch1 = params.gpScratch(0);401 GPRReg scratch2 = params.gpScratch(1);402 403 385 const Checked<int32_t> wasmFrameSize = params.proc().frameSize(); 404 386 const unsigned minimumParentCheckSize = WTF::roundUpToMultipleOf(stackAlignmentBytes(), 1024); … … 418 400 const int32_t checkSize = m_makesCalls ? (wasmFrameSize + extraFrameSize).unsafeGet() : wasmFrameSize.unsafeGet(); 419 401 bool needUnderflowCheck = static_cast<unsigned>(checkSize) > Options::reservedZoneSize(); 402 bool needsOverflowCheck = m_makesCalls || wasmFrameSize >= minimumParentCheckSize || needUnderflowCheck; 403 404 GPRReg context = useFastTLSForContext() ? params[0].gpr() : m_wasmContextGPR; 405 420 406 // This allows leaf functions to not do stack checks if their frame size is within 421 407 // certain limits since their caller would have already done the check. 422 if (m_makesCalls || wasmFrameSize >= minimumParentCheckSize || needUnderflowCheck) { 408 if (needsOverflowCheck) { 409 AllowMacroScratchRegisterUsage allowScratch(jit); 410 GPRReg fp = params[1].gpr(); 411 GPRReg scratch1 = params.gpScratch(0); 412 GPRReg scratch2 = params.gpScratch(1); 413 414 if (useFastTLSForContext()) 415 jit.loadWasmContext(context); 416 423 417 jit.loadPtr(CCallHelpers::Address(context, Context::offsetOfCachedStackLimit()), scratch2); 424 418 jit.addPtr(CCallHelpers::TrustedImm32(-checkSize), fp, scratch1); … … 430 424 linkBuffer.link(overflow, CodeLocationLabel(Thunks::singleton().stub(throwStackOverflowFromWasmThunkGenerator).code())); 431 425 }); 426 } else if (m_usesInstanceValue && useFastTLSForContext()) { 427 // No overflow check is needed, but the instance values still needs to be correct. 428 AllowMacroScratchRegisterUsageIf allowScratch(jit, CCallHelpers::loadWasmContextNeedsMacroScratchRegister()); 429 jit.loadWasmContext(context); 430 } else { 431 // We said we'd return a pointer. We don't actually need to because it isn't used, but the patchpoint conservatively said it had effects (potential stack check) which prevent it from getting removed. 432 432 } 433 433 }); … … 564 564 result = m_currentBlock->appendNew<CCallValue>(m_proc, Int32, origin(), 565 565 m_currentBlock->appendNew<ConstPtrValue>(m_proc, origin(), bitwise_cast<void*>(growMemory)), 566 m_instanceValue, delta);567 568 restoreWebAssemblyGlobalState(m_info.memory, m_instanceValue, m_proc, m_currentBlock);566 instanceValue(), delta); 567 568 restoreWebAssemblyGlobalState(m_info.memory, instanceValue(), m_proc, m_currentBlock); 569 569 570 570 return { }; … … 573 573 auto B3IRGenerator::addCurrentMemory(ExpressionType& result) -> PartialResult 574 574 { 575 Value* memoryObject = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), m_instanceValue, safeCast<int32_t>(JSWebAssemblyInstance::offsetOfMemory()));575 Value* memoryObject = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), instanceValue(), safeCast<int32_t>(JSWebAssemblyInstance::offsetOfMemory())); 576 576 577 577 static_assert(sizeof(decltype(static_cast<JSWebAssemblyInstance*>(nullptr)->memory()->memory().size())) == sizeof(uint64_t), "codegen relies on this size"); … … 597 597 auto B3IRGenerator::getGlobal(uint32_t index, ExpressionType& result) -> PartialResult 598 598 { 599 Value* globalsArray = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), m_instanceValue, safeCast<int32_t>(JSWebAssemblyInstance::offsetOfGlobals()));599 Value* globalsArray = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), instanceValue(), safeCast<int32_t>(JSWebAssemblyInstance::offsetOfGlobals())); 600 600 result = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, toB3Type(m_info.globals[index].type), origin(), globalsArray, safeCast<int32_t>(index * sizeof(Register))); 601 601 return { }; … … 605 605 { 606 606 ASSERT(toB3Type(m_info.globals[index].type) == value->type()); 607 Value* globalsArray = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), m_instanceValue, safeCast<int32_t>(JSWebAssemblyInstance::offsetOfGlobals()));607 Value* globalsArray = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), instanceValue(), safeCast<int32_t>(JSWebAssemblyInstance::offsetOfGlobals())); 608 608 m_currentBlock->appendNew<MemoryValue>(m_proc, Store, origin(), value, globalsArray, safeCast<int32_t>(index * sizeof(Register))); 609 609 return { }; … … 1073 1073 1074 1074 // FIXME imports can be linked here, instead of generating a patchpoint, because all import stubs are generated before B3 compilation starts. https://bugs.webkit.org/show_bug.cgi?id=166462 1075 Value* functionImport = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), m_instanceValue, safeCast<int32_t>(JSWebAssemblyInstance::offsetOfImportFunction(functionIndex)));1075 Value* functionImport = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), instanceValue(), safeCast<int32_t>(JSWebAssemblyInstance::offsetOfImportFunction(functionIndex))); 1076 1076 Value* jsTypeOfImport = m_currentBlock->appendNew<MemoryValue>(m_proc, Load8Z, origin(), functionImport, safeCast<int32_t>(JSCell::typeInfoTypeOffset())); 1077 1077 Value* isWasmCall = m_currentBlock->appendNew<Value>(m_proc, Equal, origin(), jsTypeOfImport, m_currentBlock->appendNew<Const32Value>(m_proc, origin(), WebAssemblyFunctionType)); … … 1106 1106 // https://bugs.webkit.org/show_bug.cgi?id=170375 1107 1107 Value* codeBlock = isJSBlock->appendNew<MemoryValue>(m_proc, 1108 Load, pointerType(), origin(), m_instanceValue, safeCast<int32_t>(JSWebAssemblyInstance::offsetOfCodeBlock()));1108 Load, pointerType(), origin(), instanceValue(), safeCast<int32_t>(JSWebAssemblyInstance::offsetOfCodeBlock())); 1109 1109 Value* jumpDestination = isJSBlock->appendNew<MemoryValue>(m_proc, 1110 1110 Load, pointerType(), origin(), codeBlock, safeCast<int32_t>(JSWebAssemblyCodeBlock::offsetOfImportWasmToJSStub(functionIndex))); … … 1137 1137 1138 1138 // The call could have been to another WebAssembly instance, and / or could have modified our Memory. 1139 restoreWebAssemblyGlobalState(m_info.memory, m_instanceValue, m_proc, continuation);1139 restoreWebAssemblyGlobalState(m_info.memory, instanceValue(), m_proc, continuation); 1140 1140 } else { 1141 1141 result = wasmCallingConvention().setupCall(m_proc, m_currentBlock, origin(), args, toB3Type(returnType), … … 1173 1173 { 1174 1174 ExpressionType table = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), 1175 m_instanceValue, safeCast<int32_t>(JSWebAssemblyInstance::offsetOfTable()));1175 instanceValue(), safeCast<int32_t>(JSWebAssemblyInstance::offsetOfTable())); 1176 1176 callableFunctionBuffer = m_currentBlock->appendNew<MemoryValue>(m_proc, Load, pointerType(), origin(), 1177 1177 table, safeCast<int32_t>(JSWebAssemblyTable::offsetOfFunctions())); … … 1237 1237 jsObject, safeCast<int32_t>(WebAssemblyFunctionBase::offsetOfInstance())); 1238 1238 Value* isSameContext = m_currentBlock->appendNew<Value>(m_proc, Equal, origin(), 1239 newContext, m_instanceValue);1239 newContext, instanceValue()); 1240 1240 m_currentBlock->appendNewControlValue(m_proc, B3::Branch, origin(), 1241 1241 isSameContext, FrequentedBlock(continuation), FrequentedBlock(doContextSwitch)); … … 1248 1248 patchpoint->clobber(RegisterSet::macroScratchRegisters()); 1249 1249 patchpoint->append(newContext, ValueRep::SomeRegister); 1250 patchpoint->append( m_instanceValue, ValueRep::SomeRegister);1250 patchpoint->append(instanceValue(), ValueRep::SomeRegister); 1251 1251 patchpoint->setGenerator([=] (CCallHelpers& jit, const B3::StackmapGenerationParams& params) { 1252 1252 AllowMacroScratchRegisterUsage allowScratch(jit); … … 1297 1297 1298 1298 // The call could have been to another WebAssembly instance, and / or could have modified our Memory. 1299 restoreWebAssemblyGlobalState(m_info.memory, m_instanceValue, m_proc, m_currentBlock);1299 restoreWebAssemblyGlobalState(m_info.memory, instanceValue(), m_proc, m_currentBlock); 1300 1300 1301 1301 return { };
Note: See TracChangeset
for help on using the changeset viewer.