Changeset 223691 in webkit
- Timestamp:
- Oct 19, 2017 9:27:44 AM (7 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JSTests/ChangeLog
r223645 r223691 1 2017-10-19 Robin Morisset <rmorisset@apple.com> 2 3 Turn recursive tail calls into loops 4 https://bugs.webkit.org/show_bug.cgi?id=176601 5 6 Reviewed by Saam Barati. 7 8 Add some simple test that computes factorial in several ways, and other trivial computations. 9 They all tests the case where foo calls bar (in an inlineable way) that then does a tail call. 10 Depending on the nature of both calls, it is possible or not to turn the tail call into a loop. 11 I have no clear way of checking that the call was indeed transformed, but I can check that the code computes the right result 12 (which it doesn't if that tail call is transformed into a loop in the unsound cases). 13 14 * stress/inline-call-to-recursive-tail-call.js: Added. 15 (factorial.aux): 16 (factorial): 17 (factorial2.aux): 18 (factorial2.id): 19 (factorial2): 20 (factorial3.aux): 21 (factorial3): 22 (aux): 23 (factorial4): 24 (test): 25 1 26 2017-10-18 Mark Lam <mark.lam@apple.com> 2 27 -
trunk/Source/JavaScriptCore/ChangeLog
r223645 r223691 1 2017-10-19 Robin Morisset <rmorisset@apple.com> 2 3 Turn recursive tail calls into loops 4 https://bugs.webkit.org/show_bug.cgi?id=176601 5 6 Reviewed by Saam Barati. 7 8 We want to turn recursive tail calls into loops early in the pipeline, so that the loops can then be optimized. 9 One difficulty is that we need to split the entry block of the function we are jumping to in order to have somewhere to jump to. 10 Worse: it is not necessarily the first block of the codeBlock, because of inlining! So we must do the splitting in the DFGByteCodeParser, at the same time as inlining. 11 We do this part through modifying the computation of the jump targets. 12 Importantly, we only do this splitting for functions that have tail calls. 13 It is the only case where the optimisation is sound, and doing the splitting unconditionnaly destroys performance on Octane/raytrace. 14 15 We must then do the actual transformation also in DFGByteCodeParser, to avoid code motion moving code out of the body of what will become a loop. 16 The transformation is entirely contained in handleRecursiveTailCall, which is hooked to the inlining machinery. 17 18 * bytecode/CodeBlock.h: 19 (JSC::CodeBlock::hasTailCalls const): 20 * bytecode/PreciseJumpTargets.cpp: 21 (JSC::getJumpTargetsForBytecodeOffset): 22 (JSC::computePreciseJumpTargetsInternal): 23 * bytecode/UnlinkedCodeBlock.cpp: 24 (JSC::UnlinkedCodeBlock::UnlinkedCodeBlock): 25 * bytecode/UnlinkedCodeBlock.h: 26 (JSC::UnlinkedCodeBlock::hasTailCalls const): 27 (JSC::UnlinkedCodeBlock::setHasTailCalls): 28 * bytecompiler/BytecodeGenerator.cpp: 29 (JSC::BytecodeGenerator::emitEnter): 30 (JSC::BytecodeGenerator::emitCallInTailPosition): 31 * dfg/DFGByteCodeParser.cpp: 32 (JSC::DFG::ByteCodeParser::allocateTargetableBlock): 33 (JSC::DFG::ByteCodeParser::makeBlockTargetable): 34 (JSC::DFG::ByteCodeParser::handleCall): 35 (JSC::DFG::ByteCodeParser::handleRecursiveTailCall): 36 (JSC::DFG::ByteCodeParser::parseBlock): 37 (JSC::DFG::ByteCodeParser::parse): 38 1 39 2017-10-18 Mark Lam <mark.lam@apple.com> 2 40 -
trunk/Source/JavaScriptCore/bytecode/CodeBlock.h
r223155 r223691 743 743 // Call this to cause an optimization trigger to fire soon, but 744 744 // not necessarily the next one. This makes sense if optimization 745 // succeeds. Successful yoptimization means that all calls are745 // succeeds. Successful optimization means that all calls are 746 746 // relinked to the optimized code, so this only affects call 747 747 // frames that are still executing this CodeBlock. The value here … … 925 925 std::optional<CodeOrigin> findPC(void* pc); 926 926 #endif 927 928 bool hasTailCalls() const { return m_unlinkedCode->hasTailCalls(); } 927 929 928 930 protected: -
trunk/Source/JavaScriptCore/bytecode/PreciseJumpTargets.cpp
r219702 r223691 43 43 if (opcodeID == op_loop_hint) 44 44 out.append(bytecodeOffset); 45 else if (opcodeID == op_enter && codeBlock->hasTailCalls()) { 46 // We need to insert a jump after op_enter, so recursive tail calls have somewhere to jump to. 47 // But we only want to pay that price for functions that have at least one tail call. 48 out.append(bytecodeOffset + opcodeLengths[op_enter]); 49 } 45 50 } 46 51 … … 54 59 { 55 60 ASSERT(out.isEmpty()); 56 57 // We will derive a superset of the jump targets that the code block thinks it has. 58 // So, if the code block claims there are none, then we are done. 61 62 // The code block has a superset of the jump targets. So if it claims to have none, we are done. 59 63 if (Mode == ComputePreciseJumpTargetsMode::FollowCodeBlockClaim && !codeBlock->numberOfJumpTargets()) 60 64 return; -
trunk/Source/JavaScriptCore/bytecode/UnlinkedCodeBlock.cpp
r218412 r223691 71 71 , m_derivedContextType(static_cast<unsigned>(info.derivedContextType())) 72 72 , m_evalContextType(static_cast<unsigned>(info.evalContextType())) 73 , m_hasTailCalls(false) 73 74 , m_lineCount(0) 74 75 , m_endColumn(UINT_MAX) -
trunk/Source/JavaScriptCore/bytecode/UnlinkedCodeBlock.h
r218861 r223691 130 130 bool isArrowFunctionContext() const { return m_isArrowFunctionContext; } 131 131 bool isClassContext() const { return m_isClassContext; } 132 bool hasTailCalls() const { return m_hasTailCalls; } 133 void setHasTailCalls() { m_hasTailCalls = true; } 132 134 133 135 void addExpressionInfo(unsigned instructionOffset, int divot, … … 443 445 unsigned m_derivedContextType : 2; 444 446 unsigned m_evalContextType : 2; 447 unsigned m_hasTailCalls : 1; 445 448 unsigned m_lineCount; 446 449 unsigned m_endColumn; -
trunk/Source/JavaScriptCore/bytecompiler/BytecodeGenerator.cpp
r223318 r223691 1314 1314 { 1315 1315 emitOpcode(op_enter); 1316 1317 // We must add the end of op_enter as a potential jump target, because the bytecode parser may decide to split its basic block 1318 // to have somewhere to jump to if there is a recursive tail-call that points to this function. 1319 m_codeBlock->addJumpTarget(instructions().size()); 1320 // This disables peephole optimizations when an instruction is a jump target 1321 m_lastOpcodeID = op_end; 1316 1322 } 1317 1323 … … 3348 3354 RegisterID* BytecodeGenerator::emitCallInTailPosition(RegisterID* dst, RegisterID* func, ExpectedFunction expectedFunction, CallArguments& callArguments, const JSTextPosition& divot, const JSTextPosition& divotStart, const JSTextPosition& divotEnd, DebuggableCall debuggableCall) 3349 3355 { 3350 return emitCall(m_inTailPosition ? op_tail_call : op_call, dst, func, expectedFunction, callArguments, divot, divotStart, divotEnd, debuggableCall); 3356 if (m_inTailPosition) { 3357 m_codeBlock->setHasTailCalls(); 3358 return emitCall(op_tail_call, dst, func, expectedFunction, callArguments, divot, divotStart, divotEnd, debuggableCall); 3359 } 3360 return emitCall(op_call, dst, func, expectedFunction, callArguments, divot, divotStart, divotEnd, debuggableCall); 3351 3361 } 3352 3362 -
trunk/Source/JavaScriptCore/dfg/DFGByteCodeParser.cpp
r223614 r223691 225 225 void emitArgumentPhantoms(int registerOffset, int argumentCountIncludingThis); 226 226 Node* getArgumentCount(); 227 bool handleRecursiveTailCall(Node* callTargetNode, const CallLinkStatus&, int registerOffset, VirtualRegister thisArgument, int argumentCountIncludingThis); 227 228 unsigned inliningCost(CallVariant, int argumentCountIncludingThis, InlineCallFrame::Kind); // Return UINT_MAX if it's not an inlining candidate. By convention, intrinsics have a cost of 1. 228 229 // Handle inlining. Return true if it succeeded, false if we need to plant a call. … … 232 233 template<typename ChecksFunctor> 233 234 void inlineCall(Node* callTargetNode, int resultOperand, CallVariant, int registerOffset, int argumentCountIncludingThis, unsigned nextOffset, InlineCallFrame::Kind, BasicBlock* continuationBlock, const ChecksFunctor& insertChecks); 234 void cancelLinkingForBlock(InlineStackEntry*, BasicBlock*); // Only works when the given block is the last one to have been added for that inline stack entry.235 235 // Handle intrinsic functions. Return true if it succeeded, false if we need to plant a call. 236 236 template<typename ChecksFunctor> … … 1128 1128 Vector<BasicBlock*> m_blockLinkingTargets; 1129 1129 1130 // This is set by op_enter in parseBlock(), and indicates the first block of the function.1131 BasicBlock* m_entryBlock;1132 1133 1130 // Optional: a continuation block for returns to jump to. It is set by early returns if it does not exist. 1134 1131 BasicBlock* m_continuationBlock; … … 1218 1215 Ref<BasicBlock> block = adoptRef(*new BasicBlock(bytecodeIndex, m_numArguments, m_numLocals, 1)); 1219 1216 BasicBlock* blockPtr = block.ptr(); 1217 // m_blockLinkingTargets must always be sorted in increasing order of bytecodeBegin 1218 if (m_inlineStackTop->m_blockLinkingTargets.size()) 1219 ASSERT(m_inlineStackTop->m_blockLinkingTargets.last()->bytecodeBegin < bytecodeIndex); 1220 1220 m_inlineStackTop->m_blockLinkingTargets.append(blockPtr); 1221 1221 m_graph.appendBlock(WTFMove(block)); … … 1235 1235 ASSERT(block->bytecodeBegin == UINT_MAX); 1236 1236 block->bytecodeBegin = bytecodeIndex; 1237 // m_blockLinkingTargets must always be sorted in increasing order of bytecodeBegin 1238 if (m_inlineStackTop->m_blockLinkingTargets.size()) 1239 ASSERT(m_inlineStackTop->m_blockLinkingTargets.last()->bytecodeBegin < bytecodeIndex); 1237 1240 m_inlineStackTop->m_blockLinkingTargets.append(block); 1238 1241 } … … 1295 1298 if (callLinkStatus.canOptimize()) { 1296 1299 VirtualRegister thisArgument = virtualRegisterForArgument(0, registerOffset); 1300 1301 if (op == TailCall && handleRecursiveTailCall(callTarget, callLinkStatus, registerOffset, thisArgument, argumentCountIncludingThis)) 1302 return Terminal; 1297 1303 1298 1304 // Inlining is quite complex, and managed by a pipeline of functions: … … 1411 1417 for (int i = 0; i < argumentCountIncludingThis; ++i) 1412 1418 addToGraph(Phantom, get(virtualRegisterForArgument(i, registerOffset))); 1419 } 1420 1421 bool ByteCodeParser::handleRecursiveTailCall(Node* callTargetNode, const CallLinkStatus& callLinkStatus, int registerOffset, VirtualRegister thisArgument, int argumentCountIncludingThis) 1422 { 1423 // FIXME: We currently only do this optimisation in the simple, non-polymorphic case. 1424 // https://bugs.webkit.org/show_bug.cgi?id=178390 1425 if (callLinkStatus.couldTakeSlowPath() || callLinkStatus.size() != 1) 1426 return false; 1427 1428 auto targetExecutable = callLinkStatus[0].executable(); 1429 InlineStackEntry* stackEntry = m_inlineStackTop; 1430 do { 1431 if (targetExecutable != stackEntry->executable()) 1432 continue; 1433 VERBOSE_LOG(" We found a recursive tail call, trying to optimize it into a jump.\n"); 1434 1435 if (auto* callFrame = stackEntry->m_inlineCallFrame) { 1436 // Some code may statically use the argument count from the InlineCallFrame, so it would be invalid to loop back if it does not match. 1437 // We "continue" instead of returning false in case another stack entry further on the stack has the right number of arguments. 1438 if (argumentCountIncludingThis != static_cast<int>(callFrame->argumentCountIncludingThis)) 1439 continue; 1440 } else { 1441 // We are in the machine code entry (i.e. the original caller). 1442 // If we have more arguments than the number of parameters to the function, it is not clear where we could put them on the stack. 1443 if (argumentCountIncludingThis > m_codeBlock->numParameters()) 1444 return false; 1445 } 1446 1447 // We must add some check that the profiling information was correct and the target of this call is what we thought 1448 emitFunctionChecks(callLinkStatus[0], callTargetNode, thisArgument); 1449 // We flush everything, as if we were in the backedge of a loop (see treatment of op_jmp in parseBlock). 1450 flushForTerminal(); 1451 1452 // We must set the arguments to the right values 1453 int argIndex = 0; 1454 for (; argIndex < argumentCountIncludingThis; ++argIndex) { 1455 Node* value = get(virtualRegisterForArgument(argIndex, registerOffset)); 1456 setDirect(stackEntry->remapOperand(virtualRegisterForArgument(argIndex)), value, NormalSet); 1457 } 1458 Node* undefined = addToGraph(JSConstant, OpInfo(m_constantUndefined)); 1459 for (; argIndex < stackEntry->m_codeBlock->numParameters(); ++argIndex) 1460 setDirect(stackEntry->remapOperand(virtualRegisterForArgument(argIndex)), undefined, NormalSet); 1461 1462 // We must repeat the work of op_enter here as we will jump right after it. 1463 // We jump right after it and not before it, because of some invariant saying that a CFG root cannot have predecessors in the IR. 1464 for (int i = 0; i < stackEntry->m_codeBlock->m_numVars; ++i) 1465 setDirect(stackEntry->remapOperand(virtualRegisterForLocal(i)), undefined, NormalSet); 1466 1467 // We want to emit the SetLocals with an exit origin that points to the place we are jumping to. 1468 unsigned oldIndex = m_currentIndex; 1469 auto oldStackTop = m_inlineStackTop; 1470 m_inlineStackTop = stackEntry; 1471 m_currentIndex = OPCODE_LENGTH(op_enter); 1472 m_exitOK = true; 1473 processSetLocalQueue(); 1474 m_currentIndex = oldIndex; 1475 m_inlineStackTop = oldStackTop; 1476 m_exitOK = false; 1477 1478 BasicBlock** entryBlockPtr = tryBinarySearch<BasicBlock*, unsigned>(stackEntry->m_blockLinkingTargets, stackEntry->m_blockLinkingTargets.size(), OPCODE_LENGTH(op_enter), getBytecodeBeginForBlock); 1479 RELEASE_ASSERT(entryBlockPtr); 1480 addJumpTo(*entryBlockPtr); 1481 return true; 1482 // It would be unsound to jump over a non-tail call: the "tail" call is not really a tail call in that case. 1483 } while (stackEntry->m_inlineCallFrame && stackEntry->m_inlineCallFrame->kind == TailCall && (stackEntry = stackEntry->m_caller)); 1484 1485 // The tail call was not recursive 1486 return false; 1413 1487 } 1414 1488 … … 4215 4289 set(virtualRegisterForLocal(i), undefined, ImmediateNakedSet); 4216 4290 4217 m_inlineStackTop->m_entryBlock = m_currentBlock;4218 4219 4291 NEXT_OPCODE(op_enter); 4220 4292 } … … 5403 5475 NEXT_OPCODE(op_tail_call); 5404 5476 else 5405 LAST_OPCODE(op_tail_call); 5477 LAST_OPCODE_LINKED(op_tail_call); 5478 // We use LAST_OPCODE_LINKED instead of LAST_OPCODE because if the tail call was optimized, it may now be a jump to a bytecode index in a different InlineStackEntry. 5406 5479 } 5407 5480 … … 6431 6504 6432 6505 m_graph.determineReachability(); 6433 if (Options::verboseDFGBytecodeParsing())6434 m_graph.dump();6435 6506 m_graph.killUnreachableBlocks(); 6436 6507
Note: See TracChangeset
for help on using the changeset viewer.