Changeset 86536 in webkit
- Timestamp:
- May 16, 2011 12:21:01 AM (13 years ago)
- Location:
- trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r86535 r86536 1 2011-05-15 Gavin Barraclough <barraclough@apple.com> 2 3 Reviewed by Geoff Garen & Michael Saboff. 4 5 https://bugs.webkit.org/show_bug.cgi?id=60860 6 Add layout tests for some regular expressions that used to crash the compiler. 7 8 * fast/regex/parentheses-expected.txt: 9 * fast/regex/script-tests/parentheses.js: 10 1 11 2011-05-06 Philippe Normand <pnormand@igalia.com> 2 12 -
trunk/LayoutTests/fast/regex/parentheses-expected.txt
r85541 r86536 88 88 PASS regexp51.exec('abc') is null 89 89 PASS regexp51.exec(s) is ['){-,}P{Any}',')',undefined] 90 PASS 'Hi Bob'.match(/(Rob)|(Bob)|(Robert)|(Bobby)/) is ['Bob',undefined,'Bob',undefined,undefined] 90 PASS 'Hi Bob'.match(regexp52) is ['Bob',undefined,'Bob',undefined,undefined] 91 PASS regexp53.exec('#') is ['',undefined,''] 92 PASS regexp54.exec('#') is ['','',undefined,undefined,''] 93 PASS regexp55.exec('#') is ['',''] 91 94 PASS successfullyParsed is true 92 95 -
trunk/LayoutTests/fast/regex/script-tests/parentheses.js
r85541 r86536 213 213 214 214 var regexp48 = /^(?:(\w+):\/*([\w\.\-\d]+)(?::(\d+)|)(?=(?:\/|$))|)(?:$|\/?(.*?)(?:\?(.*?)?|)(?:#(.*)|)$)/; 215 /* The regexp on the prior line confuses Xcode syntax highlighting, this coment fixes it! */ 215 216 shouldBe("regexp48.exec('http://www.acme.com/this/is/a/path/file.txt')", "['http://www.acme.com/this/is/a/path/file.txt','http','www.acme.com',undefined,'this/is/a/path/file.txt',undefined,undefined]"); 216 217 217 218 var regexp49 = /(?:([^:]*?)(?:(?:\?(.*?)?)?)(?:(?:#)?)$)|(?:^(?:(\w+):\/*([\w\.\-\d]+)(?::(\d+)|)(?=(?:\/|$))|)(?:$|\/?(.*?)(?:\?(.*?)?|)(?:#(.*)|)$))/; 219 /* The regexp on the prior line confuses Xcode syntax highlighting, this coment fixes it! */ 218 220 shouldBe("regexp49.exec('http://www.acme.com/this/is/a/path/file.txt')", "['http://www.acme.com/this/is/a/path/file.txt',undefined,undefined,'http','www.acme.com',undefined,'this/is/a/path/file.txt',undefined,undefined]"); 219 221 … … 228 230 shouldBe("regexp51.exec(s)", "[')\x9e{-,}P{Any}',')',undefined]"); 229 231 230 shouldBe("'Hi Bob'.match(/(Rob)|(Bob)|(Robert)|(Bobby)/)", "['Bob',undefined,'Bob',undefined,undefined]"); 232 var regexp52 = /(Rob)|(Bob)|(Robert)|(Bobby)/; 233 shouldBe("'Hi Bob'.match(regexp52)", "['Bob',undefined,'Bob',undefined,undefined]"); 234 235 // Test cases discovered by fuzzing that crashed the compiler. 236 var regexp53 = /(?=(?:(?:(gB)|(?!cs|<))((?=(?!v6){0,})))|(?=#)+?)/m; 237 shouldBe("regexp53.exec('#')", "['',undefined,'']"); 238 var regexp54 = /((?:(?:()|(?!))((?=(?!))))|())/m; 239 shouldBe("regexp54.exec('#')", "['','',undefined,undefined,'']"); 240 var regexp55 = /(?:(?:(?:a?|(?:))((?:)))|a?)/m; 241 shouldBe("regexp55.exec('#')", "['','']"); 231 242 232 243 var successfullyParsed = true; -
trunk/Source/JavaScriptCore/ChangeLog
r86528 r86536 1 2011-05-15 Gavin Barraclough <barraclough@apple.com> 2 3 Reviewed by Geoff Garen & Michael Saboff. 4 5 https://bugs.webkit.org/show_bug.cgi?id=60860 6 Simplify backtracking in YARR JIT 7 8 YARR JIT currently performs a single pass of code generation over the pattern, 9 with special handling to allow the code generation for some backtracking code 10 out of line. We can simplify things by moving to a common mechanism whereby all 11 forwards matching code is generated in one pass, and all backtracking code is 12 generated in another. Backtracking code can be generated in reverse order, to 13 optimized the common fall-through case. 14 15 To make it easier to walk over the pattern, we can first convert to a more 16 byte-code like format before JIT generating. In time we should unify this with 17 the YARR interpreter to more closely unify the two. 18 19 * yarr/YarrJIT.cpp: 20 (JSC::Yarr::YarrGenerator::jumpIfNoAvailableInput): 21 (JSC::Yarr::YarrGenerator::YarrOp::YarrOp): 22 (JSC::Yarr::YarrGenerator::BacktrackingState::BacktrackingState): 23 (JSC::Yarr::YarrGenerator::BacktrackingState::append): 24 (JSC::Yarr::YarrGenerator::BacktrackingState::fallthrough): 25 (JSC::Yarr::YarrGenerator::BacktrackingState::link): 26 (JSC::Yarr::YarrGenerator::BacktrackingState::linkTo): 27 (JSC::Yarr::YarrGenerator::BacktrackingState::takeBacktracksToJumpList): 28 (JSC::Yarr::YarrGenerator::BacktrackingState::isEmpty): 29 (JSC::Yarr::YarrGenerator::BacktrackingState::linkDataLabels): 30 (JSC::Yarr::YarrGenerator::BacktrackingState::ReturnAddressRecord::ReturnAddressRecord): 31 (JSC::Yarr::YarrGenerator::generateAssertionBOL): 32 (JSC::Yarr::YarrGenerator::backtrackAssertionBOL): 33 (JSC::Yarr::YarrGenerator::generateAssertionEOL): 34 (JSC::Yarr::YarrGenerator::backtrackAssertionEOL): 35 (JSC::Yarr::YarrGenerator::matchAssertionWordchar): 36 (JSC::Yarr::YarrGenerator::generateAssertionWordBoundary): 37 (JSC::Yarr::YarrGenerator::backtrackAssertionWordBoundary): 38 (JSC::Yarr::YarrGenerator::generatePatternCharacterOnce): 39 (JSC::Yarr::YarrGenerator::backtrackPatternCharacterOnce): 40 (JSC::Yarr::YarrGenerator::generatePatternCharacterFixed): 41 (JSC::Yarr::YarrGenerator::backtrackPatternCharacterFixed): 42 (JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy): 43 (JSC::Yarr::YarrGenerator::backtrackPatternCharacterGreedy): 44 (JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy): 45 (JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy): 46 (JSC::Yarr::YarrGenerator::generateCharacterClassOnce): 47 (JSC::Yarr::YarrGenerator::backtrackCharacterClassOnce): 48 (JSC::Yarr::YarrGenerator::generateCharacterClassFixed): 49 (JSC::Yarr::YarrGenerator::backtrackCharacterClassFixed): 50 (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy): 51 (JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy): 52 (JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy): 53 (JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy): 54 (JSC::Yarr::YarrGenerator::generateTerm): 55 (JSC::Yarr::YarrGenerator::backtrackTerm): 56 (JSC::Yarr::YarrGenerator::generate): 57 (JSC::Yarr::YarrGenerator::backtrack): 58 (JSC::Yarr::YarrGenerator::opCompileParenthesesSubpattern): 59 (JSC::Yarr::YarrGenerator::opCompileParentheticalAssertion): 60 (JSC::Yarr::YarrGenerator::opCompileAlternative): 61 (JSC::Yarr::YarrGenerator::opCompileBody): 62 (JSC::Yarr::YarrGenerator::YarrGenerator): 63 (JSC::Yarr::YarrGenerator::compile): 64 1 65 2011-05-15 Adam Barth <abarth@webkit.org> 2 66 -
trunk/Source/JavaScriptCore/yarr/YarrJIT.cpp
r85541 r86536 229 229 230 230 // Jumps if input not available; will have (incorrectly) incremented already! 231 Jump jumpIfNoAvailableInput(unsigned countToCheck) 232 { 233 add32(Imm32(countToCheck), index); 231 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0) 232 { 233 if (countToCheck) 234 add32(Imm32(countToCheck), index); 234 235 return branch32(Above, index, length); 235 236 } … … 296 297 } 297 298 298 struct IndirectJumpEntry { 299 IndirectJumpEntry(int32_t stackOffset) 300 : m_stackOffset(stackOffset) 299 enum YarrOpCode { 300 // These nodes wrap body alternatives - those in the main disjunction, 301 // rather than subpatterns or assertions. These are chained together in 302 // a doubly linked list, with a 'begin' node for the first alternative, 303 // a 'next' node for each subsequent alternative, and an 'end' node at 304 // the end. In the case of repeating alternatives, the 'end' node also 305 // has a reference back to 'begin'. 306 OpBodyAlternativeBegin, 307 OpBodyAlternativeNext, 308 OpBodyAlternativeEnd, 309 // Similar to the body alternatives, but used for subpatterns with two 310 // or more alternatives. 311 OpNestedAlternativeBegin, 312 OpNestedAlternativeNext, 313 OpNestedAlternativeEnd, 314 // Used for alternatives in subpatterns where there is only a single 315 // alternative (backtrackingis easier in these cases), or for alternatives 316 // which never need to be backtracked (those in parenthetical assertions, 317 // terminal subpatterns). 318 OpSimpleNestedAlternativeBegin, 319 OpSimpleNestedAlternativeNext, 320 OpSimpleNestedAlternativeEnd, 321 // Used to wrap 'Once' subpattern matches (quantityCount == 1). 322 OpParenthesesSubpatternOnceBegin, 323 OpParenthesesSubpatternOnceEnd, 324 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). 325 OpParenthesesSubpatternTerminalBegin, 326 OpParenthesesSubpatternTerminalEnd, 327 // Used to wrap parenthetical assertions. 328 OpParentheticalAssertionBegin, 329 OpParentheticalAssertionEnd, 330 // Wraps all simple terms (pattern characters, character classes). 331 OpTerm, 332 // Where an expression contains only 'once through' body alternatives 333 // and no repeating ones, this op is used to return match failure. 334 OpMatchFailed 335 }; 336 337 // This structure is used to hold the compiled opcode information, 338 // including reference back to the original PatternTerm/PatternAlternatives, 339 // and JIT compilation data structures. 340 struct YarrOp { 341 explicit YarrOp(PatternTerm* term) 342 : m_op(OpTerm) 343 , m_term(term) 344 , m_isDeadCode(false) 301 345 { 302 346 } 303 347 304 IndirectJumpEntry(int32_t stackOffset, Jump jump) 305 : m_stackOffset(stackOffset) 348 explicit YarrOp(YarrOpCode op) 349 : m_op(op) 350 , m_isDeadCode(false) 306 351 { 307 addJump(jump); 308 } 309 310 IndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) 311 : m_stackOffset(stackOffset) 352 } 353 354 // The operation, as a YarrOpCode, and also a reference to the PatternTerm. 355 YarrOpCode m_op; 356 PatternTerm* m_term; 357 358 // For alternatives, this holds the PatternAlternative and doubly linked 359 // references to this alternative's siblings. In the case of the 360 // OpBodyAlternativeEnd node at the end of a section of repeating nodes, 361 // m_nextOp will reference the OpBodyAlternativeBegin node of the first 362 // repeating alternative. 363 PatternAlternative* m_alternative; 364 size_t m_previousOp; 365 size_t m_nextOp; 366 367 // Used to record a set of Jumps out of the generated code, typically 368 // used for jumps out to backtracking code, and a single reentry back 369 // into the code for a node (likely where a backtrack will trigger 370 // rematching). 371 Label m_reentry; 372 JumpList m_jumps; 373 374 // This flag is used to null out the second pattern character, when 375 // two are fused to match a pair together. 376 bool m_isDeadCode; 377 378 // Currently used in the case of some of the more complex management of 379 // 'm_checked', to cache the offset used in this alternative, to avoid 380 // recalculating it. 381 int m_checkAdjust; 382 383 // Used by OpNestedAlternativeNext/End to hold the pointer to the 384 // value that will be pushed into the pattern's frame to return to, 385 // upon backtracking back into the disjunction. 386 DataLabelPtr m_returnAddress; 387 }; 388 389 // BacktrackingState 390 // This class encapsulates information about the state of code generation 391 // whilst generating the code for backtracking, when a term fails to match. 392 // Upon entry to code generation of the backtracking code for a given node, 393 // the Backtracking state will hold references to all control flow sources 394 // that are outputs in need of further backtracking from the prior node 395 // generated (which is the subsequent operation in the regular expression, 396 // and in the m_ops Vector, since we generated backtracking backwards). 397 // These references to control flow take the form of: 398 // - A jump list of jumps, to be linked to code that will backtrack them 399 // further. 400 // - A set of DataLabelPtr values, to be populated with values to be 401 // treated effectively as return addresses backtracking into complex 402 // subpatterns. 403 // - A flag indicating that the current sequence of generated code up to 404 // this point requires backtracking. 405 class BacktrackingState { 406 public: 407 BacktrackingState() 408 : m_pendingFallthrough(false) 312 409 { 313 addDataLabel(dataLabel); 314 } 315 316 void addJump(Jump jump) 410 } 411 412 // Add a jump or jumps, a return address, or set the flag indicating 413 // that the current 'fallthrough' control flow requires backtracking. 414 void append(const Jump& jump) 317 415 { 318 m_relJumps.append(jump); 319 } 320 321 void addDataLabel(DataLabelPtr dataLabel) 416 m_laterFailures.append(jump); 417 } 418 void append(JumpList& jumpList) 322 419 { 323 m_dataLabelPtrVector.append(dataLabel); 324 } 325 326 int32_t m_stackOffset; 327 JumpList m_relJumps; 328 Vector<DataLabelPtr, 16> m_dataLabelPtrVector; 420 m_laterFailures.append(jumpList); 421 } 422 void append(const DataLabelPtr& returnAddress) 423 { 424 m_pendingReturns.append(returnAddress); 425 } 426 void fallthrough() 427 { 428 ASSERT(!m_pendingFallthrough); 429 m_pendingFallthrough = true; 430 } 431 432 // These methods clear the backtracking state, either linking to the 433 // current location, a provided label, or copying the backtracking out 434 // to a JumpList. All actions may require code generation to take place, 435 // and as such are passed a pointer to the assembler. 436 void link(MacroAssembler* assembler) 437 { 438 if (m_pendingReturns.size()) { 439 Label here(assembler); 440 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) 441 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); 442 m_pendingReturns.clear(); 443 } 444 m_laterFailures.link(assembler); 445 m_laterFailures.clear(); 446 m_pendingFallthrough = false; 447 } 448 void linkTo(Label label, MacroAssembler* assembler) 449 { 450 if (m_pendingReturns.size()) { 451 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) 452 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label)); 453 m_pendingReturns.clear(); 454 } 455 if (m_pendingFallthrough) 456 assembler->jump(label); 457 m_laterFailures.linkTo(label, assembler); 458 m_laterFailures.clear(); 459 m_pendingFallthrough = false; 460 } 461 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler) 462 { 463 if (m_pendingReturns.size()) { 464 Label here(assembler); 465 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) 466 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); 467 m_pendingReturns.clear(); 468 m_pendingFallthrough = true; 469 } 470 if (m_pendingFallthrough) 471 jumpList.append(assembler->jump()); 472 jumpList.append(m_laterFailures); 473 m_laterFailures.clear(); 474 m_pendingFallthrough = false; 475 } 476 477 bool isEmpty() 478 { 479 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough; 480 } 481 482 // Called at the end of code generation to link all return addresses. 483 void linkDataLabels(LinkBuffer& linkBuffer) 484 { 485 ASSERT(isEmpty()); 486 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) 487 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation)); 488 } 489 490 private: 491 struct ReturnAddressRecord { 492 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation) 493 : m_dataLabel(dataLabel) 494 , m_backtrackLocation(backtrackLocation) 495 { 496 } 497 498 DataLabelPtr m_dataLabel; 499 Label m_backtrackLocation; 500 }; 501 502 JumpList m_laterFailures; 503 bool m_pendingFallthrough; 504 Vector<DataLabelPtr, 4> m_pendingReturns; 505 Vector<ReturnAddressRecord, 4> m_backtrackRecords; 329 506 }; 330 507 331 struct AlternativeBacktrackRecord { 332 DataLabelPtr dataLabel; 333 Label backtrackLocation; 334 335 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation) 336 : dataLabel(dataLabel) 337 , backtrackLocation(backtrackLocation) 338 { 339 } 340 }; 341 342 struct ParenthesesTail; 343 struct TermGenerationState; 344 345 struct GenerationState { 346 typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap; 347 348 GenerationState() 349 : m_parenNestingLevel(0) 350 { 351 } 352 353 void addIndirectJumpEntry(int32_t stackOffset, Jump jump) 354 { 355 IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); 356 357 ASSERT(stackOffset >= 0); 358 359 uint32_t offset = static_cast<uint32_t>(stackOffset); 360 361 if (result == m_indirectJumpMap.end()) 362 m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump)); 363 else 364 result->second->addJump(jump); 365 } 366 367 void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps) 368 { 369 JumpList::JumpVector jumpVector = jumps.jumps(); 370 size_t size = jumpVector.size(); 371 for (size_t i = 0; i < size; ++i) 372 addIndirectJumpEntry(stackOffset, jumpVector[i]); 373 374 jumps.empty(); 375 } 376 377 void addIndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) 378 { 379 IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); 380 381 ASSERT(stackOffset >= 0); 382 383 uint32_t offset = static_cast<uint32_t>(stackOffset); 384 385 if (result == m_indirectJumpMap.end()) 386 m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, dataLabel)); 387 else 388 result->second->addDataLabel(dataLabel); 389 } 390 391 void emitIndirectJumpTable(MacroAssembler* masm) 392 { 393 for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) { 394 IndirectJumpEntry* indJumpEntry = iter->second; 395 size_t size = indJumpEntry->m_dataLabelPtrVector.size(); 396 if (size) { 397 // Link any associated DataLabelPtr's with indirect jump via label 398 Label hereLabel = masm->label(); 399 for (size_t i = 0; i < size; ++i) 400 m_backtrackRecords.append(AlternativeBacktrackRecord(indJumpEntry->m_dataLabelPtrVector[i], hereLabel)); 401 } 402 indJumpEntry->m_relJumps.link(masm); 403 masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset)); 404 delete indJumpEntry; 405 } 406 } 407 408 void incrementParenNestingLevel() 409 { 410 ++m_parenNestingLevel; 411 } 412 413 void decrementParenNestingLevel() 414 { 415 --m_parenNestingLevel; 416 } 417 418 ParenthesesTail* addParenthesesTail(PatternTerm& term, JumpList* jumpListToPriorParen) 419 { 420 OwnPtr<ParenthesesTail> tail = adoptPtr(new ParenthesesTail(term, m_parenNestingLevel, jumpListToPriorParen)); 421 ParenthesesTail* rawTail = tail.get(); 422 423 m_parenTails.append(tail.release()); 424 m_parenTailsForIteration.append(rawTail); 425 426 return rawTail; 427 } 428 429 void emitParenthesesTail(YarrGenerator* generator) 430 { 431 unsigned vectorSize = m_parenTails.size(); 432 bool priorBacktrackFallThrough = false; 433 434 // Emit in reverse order so parentTail N can fall through to N-1 435 for (unsigned index = vectorSize; index > 0; --index) { 436 JumpList jumpsToNext; 437 priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1); 438 if (index > 1) 439 jumpsToNext.linkTo(generator->label(), generator); 440 else 441 addJumpsToNextInteration(jumpsToNext); 442 } 443 m_parenTails.clear(); 444 } 445 446 void addJumpToNextInteration(Jump jump) 447 { 448 m_jumpsToNextInteration.append(jump); 449 } 450 451 void addJumpsToNextInteration(JumpList jumps) 452 { 453 m_jumpsToNextInteration.append(jumps); 454 } 455 456 void addDataLabelToNextIteration(DataLabelPtr dataLabel) 457 { 458 m_dataPtrsToNextIteration.append(dataLabel); 459 } 460 461 void linkToNextIteration(Label label) 462 { 463 m_nextIteration = label; 464 465 for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i) 466 m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration)); 467 468 m_dataPtrsToNextIteration.clear(); 469 470 for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i) 471 m_parenTailsForIteration[i]->setNextIteration(m_nextIteration); 472 473 m_parenTailsForIteration.clear(); 474 } 475 476 void linkToNextIteration(YarrGenerator* generator) 477 { 478 m_jumpsToNextInteration.linkTo(m_nextIteration, generator); 479 } 480 481 int m_parenNestingLevel; 482 Vector<AlternativeBacktrackRecord> m_backtrackRecords; 483 IndirectJumpHashMap m_indirectJumpMap; 484 Label m_nextIteration; 485 Vector<OwnPtr<ParenthesesTail> > m_parenTails; 486 JumpList m_jumpsToNextInteration; 487 Vector<DataLabelPtr> m_dataPtrsToNextIteration; 488 Vector<ParenthesesTail*> m_parenTailsForIteration; 489 }; 490 491 struct BacktrackDestination { 492 typedef enum { 493 NoBacktrack, 494 BacktrackLabel, 495 BacktrackStackOffset, 496 BacktrackJumpList, 497 BacktrackLinked 498 } BacktrackType; 499 500 BacktrackDestination() 501 : m_backtrackType(NoBacktrack) 502 , m_backtrackToLabel(0) 503 , m_subDataLabelPtr(0) 504 , m_nextBacktrack(0) 505 , m_backtrackSourceLabel(0) 506 , m_backtrackSourceJumps(0) 507 { 508 } 509 510 BacktrackDestination(int32_t stackOffset) 511 : m_backtrackType(BacktrackStackOffset) 512 , m_backtrackStackOffset(stackOffset) 513 , m_backtrackToLabel(0) 514 , m_subDataLabelPtr(0) 515 , m_nextBacktrack(0) 516 , m_backtrackSourceLabel(0) 517 , m_backtrackSourceJumps(0) 518 { 519 } 520 521 BacktrackDestination(Label label) 522 : m_backtrackType(BacktrackLabel) 523 , m_backtrackLabel(label) 524 , m_backtrackToLabel(0) 525 , m_subDataLabelPtr(0) 526 , m_nextBacktrack(0) 527 , m_backtrackSourceLabel(0) 528 , m_backtrackSourceJumps(0) 529 { 530 } 531 532 void clear(bool doDataLabelClear = true) 533 { 534 m_backtrackType = NoBacktrack; 535 if (doDataLabelClear) 536 clearDataLabel(); 537 m_nextBacktrack = 0; 538 } 539 540 void clearDataLabel() 541 { 542 m_dataLabelPtr = DataLabelPtr(); 543 } 544 545 bool hasDestination() 546 { 547 return (m_backtrackType != NoBacktrack); 548 } 549 550 bool isStackOffset() 551 { 552 return (m_backtrackType == BacktrackStackOffset); 553 } 554 555 bool isLabel() 556 { 557 return (m_backtrackType == BacktrackLabel); 558 } 559 560 bool isJumpList() 561 { 562 return (m_backtrackType == BacktrackJumpList); 563 } 564 565 bool hasDataLabel() 566 { 567 return m_dataLabelPtr.isSet(); 568 } 569 570 void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true) 571 { 572 m_backtrackType = rhs.m_backtrackType; 573 if (m_backtrackType == BacktrackStackOffset) 574 m_backtrackStackOffset = rhs.m_backtrackStackOffset; 575 else if (m_backtrackType == BacktrackLabel) 576 m_backtrackLabel = rhs.m_backtrackLabel; 577 if (copyDataLabel) 578 m_dataLabelPtr = rhs.m_dataLabelPtr; 579 m_backtrackSourceJumps = rhs.m_backtrackSourceJumps; 580 m_backtrackSourceLabel = rhs.m_backtrackSourceLabel; 581 } 582 583 void copyTo(BacktrackDestination& lhs) 584 { 585 lhs.m_backtrackType = m_backtrackType; 586 if (m_backtrackType == BacktrackStackOffset) 587 lhs.m_backtrackStackOffset = m_backtrackStackOffset; 588 else if (m_backtrackType == BacktrackLabel) 589 lhs.m_backtrackLabel = m_backtrackLabel; 590 lhs.m_backtrackSourceJumps = m_backtrackSourceJumps; 591 lhs.m_backtrackSourceLabel = m_backtrackSourceLabel; 592 lhs.m_dataLabelPtr = m_dataLabelPtr; 593 lhs.m_backTrackJumps = m_backTrackJumps; 594 } 595 596 void addBacktrackJump(Jump jump) 597 { 598 m_backTrackJumps.append(jump); 599 } 600 601 void setStackOffset(int32_t stackOffset) 602 { 603 m_backtrackType = BacktrackStackOffset; 604 m_backtrackStackOffset = stackOffset; 605 } 606 607 void setLabel(Label label) 608 { 609 m_backtrackType = BacktrackLabel; 610 m_backtrackLabel = label; 611 } 612 613 void setNextBacktrackLabel(Label label) 614 { 615 if (m_nextBacktrack) 616 m_nextBacktrack->setLabel(label); 617 } 618 619 void propagateBacktrackToLabel(const BacktrackDestination& rhs) 620 { 621 if (!m_backtrackToLabel && rhs.m_backtrackToLabel) 622 m_backtrackToLabel = rhs.m_backtrackToLabel; 623 } 624 625 void setBacktrackToLabel(Label* backtrackToLabel) 626 { 627 if (!m_backtrackToLabel) 628 m_backtrackToLabel = backtrackToLabel; 629 } 630 631 bool hasBacktrackToLabel() 632 { 633 return m_backtrackToLabel; 634 } 635 636 void setBacktrackJumpList(JumpList* jumpList) 637 { 638 m_backtrackType = BacktrackJumpList; 639 m_backtrackSourceJumps = jumpList; 640 } 641 642 void setBacktrackSourceLabel(Label* backtrackSourceLabel) 643 { 644 m_backtrackSourceLabel = backtrackSourceLabel; 645 } 646 647 void setDataLabel(DataLabelPtr dp) 648 { 649 if (m_subDataLabelPtr) { 650 *m_subDataLabelPtr = dp; 651 m_subDataLabelPtr = 0; 652 } else { 653 ASSERT(!hasDataLabel()); 654 m_dataLabelPtr = dp; 655 } 656 } 657 658 void clearSubDataLabelPtr() 659 { 660 m_subDataLabelPtr = 0; 661 } 662 663 void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr) 664 { 665 m_subDataLabelPtr = subDataLabelPtr; 666 } 667 668 void linkToNextBacktrack(BacktrackDestination* nextBacktrack) 669 { 670 m_nextBacktrack = nextBacktrack; 671 } 672 673 int32_t getStackOffset() 674 { 675 ASSERT(m_backtrackType == BacktrackStackOffset); 676 return m_backtrackStackOffset; 677 } 678 679 Label getLabel() 680 { 681 ASSERT(m_backtrackType == BacktrackLabel); 682 return m_backtrackLabel; 683 } 684 685 JumpList& getBacktrackJumps() 686 { 687 return m_backTrackJumps; 688 } 689 690 DataLabelPtr& getDataLabel() 691 { 692 return m_dataLabelPtr; 693 } 694 695 void jumpToBacktrack(MacroAssembler* masm) 696 { 697 if (isJumpList()) { 698 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 699 masm->jump().linkTo(*m_backtrackSourceLabel, masm); 700 else 701 m_backtrackSourceJumps->append(masm->jump()); 702 } else if (isStackOffset()) 703 masm->jump(Address(stackPointerRegister, m_backtrackStackOffset)); 704 else if (isLabel()) 705 masm->jump().linkTo(m_backtrackLabel, masm); 706 else 707 m_backTrackJumps.append(masm->jump()); 708 } 709 710 void jumpToBacktrack(YarrGenerator* generator, Jump jump) 711 { 712 if (isJumpList()) { 713 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 714 jump.linkTo(*m_backtrackSourceLabel, generator); 715 else 716 m_backtrackSourceJumps->append(jump); 717 } else if (isStackOffset()) 718 generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump); 719 else if (isLabel()) 720 jump.linkTo(getLabel(), generator); 721 else 722 m_backTrackJumps.append(jump); 723 } 724 725 void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) 726 { 727 if (isJumpList()) { 728 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 729 jumps.linkTo(*m_backtrackSourceLabel, generator); 730 else 731 m_backtrackSourceJumps->append(jumps); 732 } else if (isStackOffset()) 733 generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps); 734 else if (isLabel()) 735 jumps.linkTo(getLabel(), generator); 736 else 737 m_backTrackJumps.append(jumps); 738 } 739 740 bool plantJumpToBacktrackIfExists(YarrGenerator* generator) 741 { 742 if (isJumpList()) { 743 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 744 generator->jump(*m_backtrackSourceLabel); 745 else 746 m_backtrackSourceJumps->append(generator->jump()); 747 748 return true; 749 } 750 751 if (isStackOffset()) { 752 generator->jump(Address(stackPointerRegister, getStackOffset())); 753 return true; 754 } 755 756 if (isLabel()) { 757 generator->jump(getLabel()); 758 if (hasDataLabel()) { 759 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel())); 760 clearDataLabel(); 761 } 762 return true; 763 } 764 765 return false; 766 } 767 768 void linkBacktrackToLabel(Label backtrackLabel) 769 { 770 if (m_backtrackToLabel) 771 *m_backtrackToLabel = backtrackLabel; 772 } 773 774 void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) 775 { 776 Label hereLabel = generator->label(); 777 778 if (m_backtrackToLabel) { 779 *m_backtrackToLabel = hereLabel; 780 m_backtrackToLabel = 0; 781 } 782 783 m_backTrackJumps.link(generator); 784 785 if (nextIteration) 786 generator->m_expressionState.linkToNextIteration(hereLabel); 787 788 if (hasDataLabel()) { 789 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel)); 790 // data label cleared as a result of the clear() below 791 } 792 793 clear(); 794 } 795 796 void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) 797 { 798 m_backTrackJumps.linkTo(label, generator); 799 800 if (nextIteration) 801 generator->m_expressionState.linkToNextIteration(label); 802 803 if (hasDataLabel()) { 804 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label)); 805 clearDataLabel(); 806 } 807 } 808 809 private: 810 BacktrackType m_backtrackType; 811 int32_t m_backtrackStackOffset; 812 Label m_backtrackLabel; 813 DataLabelPtr m_dataLabelPtr; 814 Label* m_backtrackToLabel; 815 DataLabelPtr* m_subDataLabelPtr; 816 BacktrackDestination* m_nextBacktrack; 817 Label* m_backtrackSourceLabel; 818 JumpList* m_backtrackSourceJumps; 819 JumpList m_backTrackJumps; 820 }; 821 822 struct TermGenerationState { 823 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) 824 : disjunction(disjunction) 825 , checkedTotal(checkedTotal) 826 , m_subParenNum(0) 827 , m_linkedBacktrack(0) 828 , m_jumpList(0) 829 { 830 } 831 832 void resetAlternative() 833 { 834 m_backtrack.clear(); 835 alt = 0; 836 } 837 bool alternativeValid() 838 { 839 return alt < disjunction->m_alternatives.size(); 840 } 841 void nextAlternative() 842 { 843 ++alt; 844 } 845 PatternAlternative* alternative() 846 { 847 return disjunction->m_alternatives[alt]; 848 } 849 bool isLastAlternative() 850 { 851 return (alt + 1) == disjunction->m_alternatives.size(); 852 } 853 854 void resetTerm() 855 { 856 ASSERT(alternativeValid()); 857 t = 0; 858 m_subParenNum = 0; 859 } 860 bool termValid() 861 { 862 ASSERT(alternativeValid()); 863 return t < alternative()->m_terms.size(); 864 } 865 void nextTerm() 866 { 867 ASSERT(alternativeValid()); 868 ++t; 869 } 870 PatternTerm& term() 871 { 872 ASSERT(alternativeValid()); 873 return alternative()->m_terms[t]; 874 } 875 bool isLastTerm() 876 { 877 ASSERT(alternativeValid()); 878 return (t + 1) == alternative()->m_terms.size(); 879 } 880 unsigned getSubParenNum() 881 { 882 return m_subParenNum++; 883 } 884 bool isMainDisjunction() 885 { 886 return !disjunction->m_parent; 887 } 888 889 void setJumpListToPriorParen(JumpList* jumpList) 890 { 891 m_jumpList = jumpList; 892 } 893 894 JumpList* getJumpListToPriorParen() 895 { 896 return m_jumpList; 897 } 898 899 PatternTerm& lookaheadTerm() 900 { 901 ASSERT(alternativeValid()); 902 ASSERT((t + 1) < alternative()->m_terms.size()); 903 return alternative()->m_terms[t + 1]; 904 } 905 bool isSinglePatternCharacterLookaheadTerm() 906 { 907 ASSERT(alternativeValid()); 908 return ((t + 1) < alternative()->m_terms.size()) 909 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter) 910 && (lookaheadTerm().quantityType == QuantifierFixedCount) 911 && (lookaheadTerm().quantityCount == 1); 912 } 913 914 int inputOffset() 915 { 916 return term().inputPosition - checkedTotal; 917 } 918 919 void clearBacktrack() 920 { 921 m_backtrack.clear(false); 922 m_linkedBacktrack = 0; 923 } 924 925 void jumpToBacktrack(MacroAssembler* masm) 926 { 927 m_backtrack.jumpToBacktrack(masm); 928 } 929 930 void jumpToBacktrack(YarrGenerator* generator, Jump jump) 931 { 932 m_backtrack.jumpToBacktrack(generator, jump); 933 } 934 935 void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) 936 { 937 m_backtrack.jumpToBacktrack(generator, jumps); 938 } 939 940 bool plantJumpToBacktrackIfExists(YarrGenerator* generator) 941 { 942 return m_backtrack.plantJumpToBacktrackIfExists(generator); 943 } 944 945 void linkDataLabelToBacktrackIfExists(YarrGenerator* generator, DataLabelPtr dataLabel) 946 { 947 // If we have a stack offset backtrack destination, use it directly 948 if (m_backtrack.isStackOffset()) { 949 generator->m_expressionState.addIndirectJumpEntry(m_backtrack.getStackOffset(), dataLabel); 950 m_backtrack.clearSubDataLabelPtr(); 951 } else { 952 // If we have a backtrack label, connect the datalabel to it directly. 953 if (m_backtrack.isLabel()) { 954 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, m_backtrack.getLabel())); 955 m_backtrack.clearSubDataLabelPtr(); 956 } else 957 setBacktrackDataLabel(dataLabel); 958 } 959 } 960 961 void addBacktrackJump(Jump jump) 962 { 963 m_backtrack.addBacktrackJump(jump); 964 } 965 966 void setBacktrackDataLabel(DataLabelPtr dp) 967 { 968 m_backtrack.setDataLabel(dp); 969 } 970 971 void setBackTrackStackOffset(int32_t stackOffset) 972 { 973 m_backtrack.setStackOffset(stackOffset); 974 } 975 976 void setBacktrackLabel(Label label) 977 { 978 m_backtrack.setLabel(label); 979 } 980 981 void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) 982 { 983 m_backtrack.linkAlternativeBacktracks(generator, nextIteration); 984 m_linkedBacktrack = 0; 985 } 986 987 void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) 988 { 989 m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration); 990 } 991 992 void setBacktrackLink(BacktrackDestination* linkedBacktrack) 993 { 994 m_linkedBacktrack = linkedBacktrack; 995 } 996 997 void chainBacktracks(BacktrackDestination* followonBacktrack) 998 { 999 if (m_linkedBacktrack) 1000 m_linkedBacktrack->linkToNextBacktrack(followonBacktrack); 1001 } 1002 1003 BacktrackDestination& getBacktrackDestination() 1004 { 1005 return m_backtrack; 1006 } 1007 1008 void propagateBacktrackingFrom(YarrGenerator* generator, BacktrackDestination& backtrack, bool doJump = true) 1009 { 1010 if (doJump) 1011 m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps()); 1012 1013 if (m_backtrack.isLabel() && backtrack.hasBacktrackToLabel()) 1014 backtrack.linkBacktrackToLabel(m_backtrack.getLabel()); 1015 1016 if (backtrack.hasDestination()) { 1017 if (m_backtrack.hasDataLabel()) 1018 generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel()); 1019 1020 m_backtrack.copyTarget(backtrack, doJump); 1021 } 1022 } 1023 1024 PatternDisjunction* disjunction; 1025 int checkedTotal; 1026 private: 1027 unsigned alt; 1028 unsigned t; 1029 unsigned m_subParenNum; 1030 BacktrackDestination m_backtrack; 1031 BacktrackDestination* m_linkedBacktrack; 1032 JumpList* m_jumpList; 1033 }; 1034 1035 struct ParenthesesTail { 1036 ParenthesesTail(PatternTerm& term, int nestingLevel, JumpList* jumpListToPriorParen) 1037 : m_term(term) 1038 , m_nestingLevel(nestingLevel) 1039 , m_subParenIndex(0) 1040 , m_jumpListToPriorParen(jumpListToPriorParen) 1041 { 1042 } 1043 1044 void processBacktracks(YarrGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough) 1045 { 1046 m_nonGreedyTryParentheses = nonGreedyTryParentheses; 1047 m_fallThrough = fallThrough; 1048 1049 m_subParenIndex = state.getSubParenNum(); 1050 parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack); 1051 state.chainBacktracks(&m_backtrack); 1052 BacktrackDestination& stateBacktrack = state.getBacktrackDestination(); 1053 stateBacktrack.copyTo(m_backtrack); 1054 stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel); 1055 state.setBacktrackLink(&m_backtrack); 1056 stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr); 1057 1058 m_doDirectBacktrack = m_parenBacktrack.hasDestination(); 1059 1060 if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy)) 1061 m_doDirectBacktrack = false; 1062 1063 if (m_doDirectBacktrack) 1064 state.propagateBacktrackingFrom(generator, m_parenBacktrack, false); 1065 else { 1066 stateBacktrack.setBacktrackJumpList(&m_afterBacktrackJumps); 1067 stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens); 1068 } 1069 } 1070 1071 void setNextIteration(Label nextIteration) 1072 { 1073 if (!m_nestingLevel && !m_backtrackToLabel.isSet()) 1074 m_backtrackToLabel = nextIteration; 1075 } 1076 1077 void addAfterParenJump(Jump jump) 1078 { 1079 m_afterBacktrackJumps.append(jump); 1080 } 1081 1082 bool generateCode(YarrGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough) 1083 { 1084 const RegisterID indexTemporary = regT0; 1085 unsigned parenthesesFrameLocation = m_term.frameLocation; 1086 Jump fromPriorBacktrack; 1087 bool needJumpForPriorParenTail = false; 1088 1089 if (priorBackTrackFallThrough 1090 && ((m_term.quantityType == QuantifierGreedy) 1091 || (m_term.quantityType == QuantifierNonGreedy) 1092 || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) { 1093 // If the prior paren tail code assumed that it could fall through, 1094 // but we need to generate after paren backtrack code, then provide 1095 // a jump around that code for the prior paren tail code. 1096 // A regular expressing like ((xxx)...)? needs this. 1097 fromPriorBacktrack = generator->jump(); 1098 needJumpForPriorParenTail = true; 1099 } 1100 1101 if (!m_backtrack.hasDestination()) { 1102 if (m_backtrackToLabel.isSet()) { 1103 m_backtrack.setLabel(m_backtrackToLabel); 1104 nextBacktrackFallThrough = false; 1105 } else if (m_jumpListToPriorParen) { 1106 // If we don't have a destination, go back to either the prior paren or the next outer paren. 1107 m_backtrack.setBacktrackJumpList(m_jumpListToPriorParen); 1108 nextBacktrackFallThrough = false; 1109 } else 1110 m_backtrack.setBacktrackJumpList(&jumpsToNext); 1111 } else 1112 nextBacktrackFallThrough = false; 1113 1114 // A failure AFTER the parens jumps here - Backtrack to this paren 1115 m_backtrackFromAfterParens = generator->label(); 1116 1117 if (m_dataAfterLabelPtr.isSet()) 1118 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens)); 1119 1120 m_afterBacktrackJumps.link(generator); 1121 1122 if (m_term.quantityType == QuantifierGreedy) { 1123 // If this is -1 we have now tested with both with and without the parens. 1124 generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); 1125 m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, TrustedImm32(-1))); 1126 } else if (m_term.quantityType == QuantifierNonGreedy) { 1127 // If this is -1 we have now tested with both with and without the parens. 1128 generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); 1129 generator->branch32(Equal, indexTemporary, TrustedImm32(-1)).linkTo(m_nonGreedyTryParentheses, generator); 1130 } 1131 1132 if (!m_doDirectBacktrack) 1133 m_parenBacktrack.plantJumpToBacktrackIfExists(generator); 1134 1135 // A failure WITHIN the parens jumps here 1136 if (needJumpForPriorParenTail) 1137 fromPriorBacktrack.link(generator); 1138 m_parenBacktrack.linkAlternativeBacktracks(generator); 1139 m_withinBacktrackJumps.link(generator); 1140 1141 if (m_term.capture()) 1142 generator->store32(TrustedImm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int))); 1143 1144 if (m_term.quantityType == QuantifierGreedy) { 1145 generator->storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 1146 generator->jump().linkTo(m_fallThrough, generator); 1147 nextBacktrackFallThrough = false; 1148 } else if (!nextBacktrackFallThrough) 1149 m_backtrack.jumpToBacktrack(generator); 1150 1151 if (!m_doDirectBacktrack) 1152 m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens); 1153 1154 return nextBacktrackFallThrough; 1155 } 1156 1157 PatternTerm& m_term; 1158 int m_nestingLevel; 1159 unsigned m_subParenIndex; 1160 JumpList* m_jumpListToPriorParen; 1161 Label m_nonGreedyTryParentheses; 1162 Label m_fallThrough; 1163 Label m_backtrackToLabel; 1164 Label m_backtrackFromAfterParens; 1165 DataLabelPtr m_dataAfterLabelPtr; 1166 JumpList m_withinBacktrackJumps; 1167 JumpList m_afterBacktrackJumps; 1168 BacktrackDestination m_parenBacktrack; 1169 BacktrackDestination m_backtrack; 1170 bool m_doDirectBacktrack; 1171 }; 1172 1173 void generateAssertionBOL(TermGenerationState& state) 1174 { 1175 PatternTerm& term = state.term(); 508 // Generation methods: 509 // =================== 510 511 // This method provides a default implementation of backtracking common 512 // to many terms; terms commonly jump out of the forwards matching path 513 // on any failed conditions, and add these jumps to the m_jumps list. If 514 // no special handling is required we can often just backtrack to m_jumps. 515 void backtrackTermDefault(size_t opIndex) 516 { 517 YarrOp& op = m_ops[opIndex]; 518 m_backtrackingState.append(op.m_jumps); 519 } 520 521 void generateAssertionBOL(size_t opIndex) 522 { 523 YarrOp& op = m_ops[opIndex]; 524 PatternTerm* term = op.m_term; 1176 525 1177 526 if (m_pattern.m_multiline) { … … 1179 528 1180 529 JumpList matchDest; 1181 if (!term .inputPosition)1182 matchDest.append(branch32(Equal, index, Imm32( state.checkedTotal)));1183 1184 readCharacter( state.inputOffset() - 1, character);530 if (!term->inputPosition) 531 matchDest.append(branch32(Equal, index, Imm32(m_checked))); 532 533 readCharacter((term->inputPosition - m_checked) - 1, character); 1185 534 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 1186 state.jumpToBacktrack(this);535 op.m_jumps.append(jump()); 1187 536 1188 537 matchDest.link(this); 1189 538 } else { 1190 539 // Erk, really should poison out these alternatives early. :-/ 1191 if (term .inputPosition)1192 state.jumpToBacktrack(this);540 if (term->inputPosition) 541 op.m_jumps.append(jump()); 1193 542 else 1194 state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal))); 1195 } 1196 } 1197 1198 void generateAssertionEOL(TermGenerationState& state) 1199 { 1200 PatternTerm& term = state.term(); 543 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); 544 } 545 } 546 void backtrackAssertionBOL(size_t opIndex) 547 { 548 backtrackTermDefault(opIndex); 549 } 550 551 void generateAssertionEOL(size_t opIndex) 552 { 553 YarrOp& op = m_ops[opIndex]; 554 PatternTerm* term = op.m_term; 1201 555 1202 556 if (m_pattern.m_multiline) { … … 1204 558 1205 559 JumpList matchDest; 1206 if (term .inputPosition == state.checkedTotal)560 if (term->inputPosition == m_checked) 1207 561 matchDest.append(atEndOfInput()); 1208 562 1209 readCharacter( state.inputOffset(), character);563 readCharacter((term->inputPosition - m_checked), character); 1210 564 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 1211 state.jumpToBacktrack(this);565 op.m_jumps.append(jump()); 1212 566 1213 567 matchDest.link(this); 1214 568 } else { 1215 if (term .inputPosition == state.checkedTotal)1216 state.jumpToBacktrack(this,notAtEndOfInput());569 if (term->inputPosition == m_checked) 570 op.m_jumps.append(notAtEndOfInput()); 1217 571 // Erk, really should poison out these alternatives early. :-/ 1218 572 else 1219 state.jumpToBacktrack(this); 1220 } 573 op.m_jumps.append(jump()); 574 } 575 } 576 void backtrackAssertionEOL(size_t opIndex) 577 { 578 backtrackTermDefault(opIndex); 1221 579 } 1222 580 1223 581 // Also falls though on nextIsNotWordChar. 1224 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) 1225 { 582 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) 583 { 584 YarrOp& op = m_ops[opIndex]; 585 PatternTerm* term = op.m_term; 586 1226 587 const RegisterID character = regT0; 1227 PatternTerm& term = state.term(); 1228 1229 if (term.inputPosition == state.checkedTotal) 588 589 if (term->inputPosition == m_checked) 1230 590 nextIsNotWordChar.append(atEndOfInput()); 1231 591 1232 readCharacter( state.inputOffset(), character);592 readCharacter((term->inputPosition - m_checked), character); 1233 593 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); 1234 594 } 1235 595 1236 void generateAssertionWordBoundary(TermGenerationState& state) 1237 { 596 void generateAssertionWordBoundary(size_t opIndex) 597 { 598 YarrOp& op = m_ops[opIndex]; 599 PatternTerm* term = op.m_term; 600 1238 601 const RegisterID character = regT0; 1239 PatternTerm& term = state.term();1240 602 1241 603 Jump atBegin; 1242 604 JumpList matchDest; 1243 if (!term .inputPosition)1244 atBegin = branch32(Equal, index, Imm32( state.checkedTotal));1245 readCharacter( state.inputOffset() - 1, character);605 if (!term->inputPosition) 606 atBegin = branch32(Equal, index, Imm32(m_checked)); 607 readCharacter((term->inputPosition - m_checked) - 1, character); 1246 608 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); 1247 if (!term .inputPosition)609 if (!term->inputPosition) 1248 610 atBegin.link(this); 1249 611 … … 1251 613 JumpList nonWordCharThenWordChar; 1252 614 JumpList nonWordCharThenNonWordChar; 1253 if (term .invert()) {1254 matchAssertionWordchar( state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);615 if (term->invert()) { 616 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar); 1255 617 nonWordCharThenWordChar.append(jump()); 1256 618 } else { 1257 matchAssertionWordchar( state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);619 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar); 1258 620 nonWordCharThenNonWordChar.append(jump()); 1259 621 } 1260 state.jumpToBacktrack(this,nonWordCharThenNonWordChar);622 op.m_jumps.append(nonWordCharThenNonWordChar); 1261 623 1262 624 // We jump here if the last character was a wordchar. … … 1264 626 JumpList wordCharThenWordChar; 1265 627 JumpList wordCharThenNonWordChar; 1266 if (term .invert()) {1267 matchAssertionWordchar( state, wordCharThenNonWordChar, wordCharThenWordChar);628 if (term->invert()) { 629 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar); 1268 630 wordCharThenWordChar.append(jump()); 1269 631 } else { 1270 matchAssertionWordchar( state, wordCharThenWordChar, wordCharThenNonWordChar);632 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar); 1271 633 // This can fall-though! 1272 634 } 1273 635 1274 state.jumpToBacktrack(this,wordCharThenWordChar);636 op.m_jumps.append(wordCharThenWordChar); 1275 637 1276 638 nonWordCharThenWordChar.link(this); 1277 639 wordCharThenNonWordChar.link(this); 1278 640 } 1279 1280 void generatePatternCharacterSingle(TermGenerationState& state) 1281 { 641 void backtrackAssertionWordBoundary(size_t opIndex) 642 { 643 backtrackTermDefault(opIndex); 644 } 645 646 void generatePatternCharacterOnce(size_t opIndex) 647 { 648 YarrOp& op = m_ops[opIndex]; 649 650 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed 651 // node, so there must always be at least one more node. 652 ASSERT(opIndex + 1 < m_ops.size()); 653 YarrOp& nextOp = m_ops[opIndex + 1]; 654 655 if (op.m_isDeadCode) 656 return; 657 658 PatternTerm* term = op.m_term; 659 UChar ch = term->patternCharacter; 660 1282 661 const RegisterID character = regT0; 1283 UChar ch = state.term().patternCharacter; 662 663 if (nextOp.m_op == OpTerm) { 664 PatternTerm* nextTerm = nextOp.m_term; 665 if (nextTerm->type == PatternTerm::TypePatternCharacter 666 && nextTerm->quantityType == QuantifierFixedCount 667 && nextTerm->quantityCount == 1 668 && nextTerm->inputPosition == (term->inputPosition + 1)) { 669 670 UChar ch2 = nextTerm->patternCharacter; 671 672 int mask = 0; 673 int chPair = ch | (ch2 << 16); 674 675 if (m_pattern.m_ignoreCase) { 676 if (isASCIIAlpha(ch)) 677 mask |= 32; 678 if (isASCIIAlpha(ch2)) 679 mask |= 32 << 16; 680 } 681 682 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); 683 if (mask) { 684 load32WithUnalignedHalfWords(address, character); 685 or32(Imm32(mask), character); 686 op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask))); 687 } else 688 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair))); 689 690 nextOp.m_isDeadCode = true; 691 return; 692 } 693 } 1284 694 1285 695 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1286 readCharacter( state.inputOffset(), character);696 readCharacter(term->inputPosition - m_checked, character); 1287 697 or32(TrustedImm32(32), character); 1288 state.jumpToBacktrack(this,branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));698 op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 1289 699 } else { 1290 700 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1291 state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset())); 1292 } 1293 } 1294 1295 void generatePatternCharacterPair(TermGenerationState& state) 1296 { 1297 const RegisterID character = regT0; 1298 UChar ch1 = state.term().patternCharacter; 1299 UChar ch2 = state.lookaheadTerm().patternCharacter; 1300 1301 int mask = 0; 1302 int chPair = ch1 | (ch2 << 16); 1303 1304 if (m_pattern.m_ignoreCase) { 1305 if (isASCIIAlpha(ch1)) 1306 mask |= 32; 1307 if (isASCIIAlpha(ch2)) 1308 mask |= 32 << 16; 1309 } 1310 1311 if (mask) { 1312 load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); 1313 or32(Imm32(mask), character); 1314 state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask))); 1315 } else 1316 state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair))); 1317 } 1318 1319 void generatePatternCharacterFixed(TermGenerationState& state) 1320 { 701 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked)); 702 } 703 } 704 void backtrackPatternCharacterOnce(size_t opIndex) 705 { 706 backtrackTermDefault(opIndex); 707 } 708 709 void generatePatternCharacterFixed(size_t opIndex) 710 { 711 YarrOp& op = m_ops[opIndex]; 712 PatternTerm* term = op.m_term; 713 UChar ch = term->patternCharacter; 714 1321 715 const RegisterID character = regT0; 1322 716 const RegisterID countRegister = regT1; 1323 PatternTerm& term = state.term();1324 UChar ch = term.patternCharacter;1325 717 1326 718 move(index, countRegister); 1327 sub32(Imm32(term .quantityCount), countRegister);719 sub32(Imm32(term->quantityCount), countRegister); 1328 720 1329 721 Label loop(this); 722 BaseIndex address(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar)); 723 1330 724 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1331 load16( BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);725 load16(address, character); 1332 726 or32(TrustedImm32(32), character); 1333 state.jumpToBacktrack(this,branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));727 op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 1334 728 } else { 1335 729 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1336 state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)));730 op.m_jumps.append(branch16(NotEqual, address, Imm32(ch))); 1337 731 } 1338 732 add32(TrustedImm32(1), countRegister); 1339 733 branch32(NotEqual, countRegister, index).linkTo(loop, this); 1340 734 } 1341 1342 void generatePatternCharacterGreedy(TermGenerationState& state) 1343 { 735 void backtrackPatternCharacterFixed(size_t opIndex) 736 { 737 backtrackTermDefault(opIndex); 738 } 739 740 void generatePatternCharacterGreedy(size_t opIndex) 741 { 742 YarrOp& op = m_ops[opIndex]; 743 PatternTerm* term = op.m_term; 744 UChar ch = term->patternCharacter; 745 1344 746 const RegisterID character = regT0; 1345 747 const RegisterID countRegister = regT1; 1346 PatternTerm& term = state.term();1347 UChar ch = term.patternCharacter;1348 748 1349 749 move(TrustedImm32(0), countRegister); … … 1353 753 failures.append(atEndOfInput()); 1354 754 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1355 readCharacter( state.inputOffset(), character);755 readCharacter(term->inputPosition - m_checked, character); 1356 756 or32(TrustedImm32(32), character); 1357 757 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 1358 758 } else { 1359 759 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1360 failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));760 failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked)); 1361 761 } 1362 762 1363 763 add32(TrustedImm32(1), countRegister); 1364 764 add32(TrustedImm32(1), index); 1365 if (term.quantityCount != quantifyInfinite) { 1366 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); 765 if (term->quantityCount == quantifyInfinite) 766 jump(loop); 767 else 768 branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this); 769 770 failures.link(this); 771 op.m_reentry = label(); 772 773 storeToFrame(countRegister, term->frameLocation); 774 775 } 776 void backtrackPatternCharacterGreedy(size_t opIndex) 777 { 778 YarrOp& op = m_ops[opIndex]; 779 PatternTerm* term = op.m_term; 780 781 const RegisterID countRegister = regT1; 782 783 m_backtrackingState.link(this); 784 785 loadFromFrame(term->frameLocation, countRegister); 786 m_backtrackingState.append(branchTest32(Zero, countRegister)); 787 sub32(TrustedImm32(1), countRegister); 788 sub32(TrustedImm32(1), index); 789 jump(op.m_reentry); 790 } 791 792 void generatePatternCharacterNonGreedy(size_t opIndex) 793 { 794 YarrOp& op = m_ops[opIndex]; 795 PatternTerm* term = op.m_term; 796 797 const RegisterID countRegister = regT1; 798 799 move(TrustedImm32(0), countRegister); 800 op.m_reentry = label(); 801 storeToFrame(countRegister, term->frameLocation); 802 } 803 void backtrackPatternCharacterNonGreedy(size_t opIndex) 804 { 805 YarrOp& op = m_ops[opIndex]; 806 PatternTerm* term = op.m_term; 807 UChar ch = term->patternCharacter; 808 809 const RegisterID character = regT0; 810 const RegisterID countRegister = regT1; 811 812 JumpList nonGreedyFailures; 813 814 m_backtrackingState.link(this); 815 816 loadFromFrame(term->frameLocation, countRegister); 817 818 nonGreedyFailures.append(atEndOfInput()); 819 if (term->quantityCount != quantifyInfinite) 820 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount))); 821 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 822 readCharacter(term->inputPosition - m_checked, character); 823 or32(TrustedImm32(32), character); 824 nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 825 } else { 826 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 827 nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked)); 828 } 829 830 add32(TrustedImm32(1), countRegister); 831 add32(TrustedImm32(1), index); 832 833 jump(op.m_reentry); 834 835 nonGreedyFailures.link(this); 836 sub32(countRegister, index); 837 m_backtrackingState.fallthrough(); 838 } 839 840 void generateCharacterClassOnce(size_t opIndex) 841 { 842 YarrOp& op = m_ops[opIndex]; 843 PatternTerm* term = op.m_term; 844 845 const RegisterID character = regT0; 846 847 JumpList matchDest; 848 readCharacter((term->inputPosition - m_checked), character); 849 matchCharacterClass(character, matchDest, term->characterClass); 850 851 if (term->invert()) 852 op.m_jumps.append(matchDest); 853 else { 854 op.m_jumps.append(jump()); 855 matchDest.link(this); 856 } 857 } 858 void backtrackCharacterClassOnce(size_t opIndex) 859 { 860 backtrackTermDefault(opIndex); 861 } 862 863 void generateCharacterClassFixed(size_t opIndex) 864 { 865 YarrOp& op = m_ops[opIndex]; 866 PatternTerm* term = op.m_term; 867 868 const RegisterID character = regT0; 869 const RegisterID countRegister = regT1; 870 871 move(index, countRegister); 872 sub32(Imm32(term->quantityCount), countRegister); 873 874 Label loop(this); 875 JumpList matchDest; 876 load16(BaseIndex(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar)), character); 877 matchCharacterClass(character, matchDest, term->characterClass); 878 879 if (term->invert()) 880 op.m_jumps.append(matchDest); 881 else { 882 op.m_jumps.append(jump()); 883 matchDest.link(this); 884 } 885 886 add32(TrustedImm32(1), countRegister); 887 branch32(NotEqual, countRegister, index).linkTo(loop, this); 888 } 889 void backtrackCharacterClassFixed(size_t opIndex) 890 { 891 backtrackTermDefault(opIndex); 892 } 893 894 void generateCharacterClassGreedy(size_t opIndex) 895 { 896 YarrOp& op = m_ops[opIndex]; 897 PatternTerm* term = op.m_term; 898 899 const RegisterID character = regT0; 900 const RegisterID countRegister = regT1; 901 902 move(TrustedImm32(0), countRegister); 903 904 JumpList failures; 905 Label loop(this); 906 failures.append(atEndOfInput()); 907 908 if (term->invert()) { 909 readCharacter(term->inputPosition - m_checked, character); 910 matchCharacterClass(character, failures, term->characterClass); 911 } else { 912 JumpList matchDest; 913 readCharacter(term->inputPosition - m_checked, character); 914 matchCharacterClass(character, matchDest, term->characterClass); 915 failures.append(jump()); 916 matchDest.link(this); 917 } 918 919 add32(TrustedImm32(1), countRegister); 920 add32(TrustedImm32(1), index); 921 if (term->quantityCount != quantifyInfinite) { 922 branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this); 1367 923 failures.append(jump()); 1368 924 } else 1369 925 jump(loop); 1370 926 1371 Label backtrackBegin(this); 1372 loadFromFrame(term.frameLocation, countRegister); 1373 state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); 927 failures.link(this); 928 op.m_reentry = label(); 929 930 storeToFrame(countRegister, term->frameLocation); 931 } 932 void backtrackCharacterClassGreedy(size_t opIndex) 933 { 934 YarrOp& op = m_ops[opIndex]; 935 PatternTerm* term = op.m_term; 936 937 const RegisterID countRegister = regT1; 938 939 m_backtrackingState.link(this); 940 941 loadFromFrame(term->frameLocation, countRegister); 942 m_backtrackingState.append(branchTest32(Zero, countRegister)); 1374 943 sub32(TrustedImm32(1), countRegister); 1375 944 sub32(TrustedImm32(1), index); 1376 1377 failures.link(this); 1378 1379 storeToFrame(countRegister, term.frameLocation); 1380 1381 state.setBacktrackLabel(backtrackBegin); 1382 } 1383 1384 void generatePatternCharacterNonGreedy(TermGenerationState& state) 1385 { 945 jump(op.m_reentry); 946 } 947 948 void generateCharacterClassNonGreedy(size_t opIndex) 949 { 950 YarrOp& op = m_ops[opIndex]; 951 PatternTerm* term = op.m_term; 952 953 const RegisterID countRegister = regT1; 954 955 move(TrustedImm32(0), countRegister); 956 op.m_reentry = label(); 957 storeToFrame(countRegister, term->frameLocation); 958 } 959 void backtrackCharacterClassNonGreedy(size_t opIndex) 960 { 961 YarrOp& op = m_ops[opIndex]; 962 PatternTerm* term = op.m_term; 963 1386 964 const RegisterID character = regT0; 1387 965 const RegisterID countRegister = regT1; 1388 PatternTerm& term = state.term(); 1389 UChar ch = term.patternCharacter; 1390 1391 move(TrustedImm32(0), countRegister); 1392 1393 Jump firstTimeDoNothing = jump(); 1394 1395 Label hardFail(this); 1396 sub32(countRegister, index); 1397 state.jumpToBacktrack(this); 966 967 JumpList nonGreedyFailures; 968 969 m_backtrackingState.link(this); 1398 970 1399 971 Label backtrackBegin(this); 1400 loadFromFrame(term.frameLocation, countRegister); 1401 1402 atEndOfInput().linkTo(hardFail, this); 1403 if (term.quantityCount != quantifyInfinite) 1404 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); 1405 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1406 readCharacter(state.inputOffset(), character); 1407 or32(TrustedImm32(32), character); 1408 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this); 1409 } else { 1410 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1411 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this); 972 loadFromFrame(term->frameLocation, countRegister); 973 974 nonGreedyFailures.append(atEndOfInput()); 975 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount))); 976 977 JumpList matchDest; 978 readCharacter(term->inputPosition - m_checked, character); 979 matchCharacterClass(character, matchDest, term->characterClass); 980 981 if (term->invert()) 982 nonGreedyFailures.append(matchDest); 983 else { 984 nonGreedyFailures.append(jump()); 985 matchDest.link(this); 1412 986 } 1413 987 … … 1415 989 add32(TrustedImm32(1), index); 1416 990 1417 firstTimeDoNothing.link(this); 1418 storeToFrame(countRegister, term.frameLocation); 1419 1420 state.setBacktrackLabel(backtrackBegin); 1421 } 1422 1423 void generateCharacterClassSingle(TermGenerationState& state) 1424 { 1425 const RegisterID character = regT0; 1426 PatternTerm& term = state.term(); 1427 1428 JumpList matchDest; 1429 readCharacter(state.inputOffset(), character); 1430 matchCharacterClass(character, matchDest, term.characterClass); 1431 1432 if (term.invert()) 1433 state.jumpToBacktrack(this, matchDest); 1434 else { 1435 state.jumpToBacktrack(this); 1436 matchDest.link(this); 1437 } 1438 } 1439 1440 void generateCharacterClassFixed(TermGenerationState& state) 1441 { 1442 const RegisterID character = regT0; 1443 const RegisterID countRegister = regT1; 1444 PatternTerm& term = state.term(); 1445 1446 move(index, countRegister); 1447 sub32(Imm32(term.quantityCount), countRegister); 1448 1449 Label loop(this); 1450 JumpList matchDest; 1451 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); 1452 matchCharacterClass(character, matchDest, term.characterClass); 1453 1454 if (term.invert()) 1455 state.jumpToBacktrack(this, matchDest); 1456 else { 1457 state.jumpToBacktrack(this); 1458 matchDest.link(this); 1459 } 1460 1461 add32(TrustedImm32(1), countRegister); 1462 branch32(NotEqual, countRegister, index).linkTo(loop, this); 1463 } 1464 1465 void generateCharacterClassGreedy(TermGenerationState& state) 1466 { 1467 const RegisterID character = regT0; 1468 const RegisterID countRegister = regT1; 1469 PatternTerm& term = state.term(); 1470 1471 move(TrustedImm32(0), countRegister); 1472 1473 JumpList failures; 1474 Label loop(this); 1475 failures.append(atEndOfInput()); 1476 1477 if (term.invert()) { 1478 readCharacter(state.inputOffset(), character); 1479 matchCharacterClass(character, failures, term.characterClass); 1480 } else { 1481 JumpList matchDest; 1482 readCharacter(state.inputOffset(), character); 1483 matchCharacterClass(character, matchDest, term.characterClass); 1484 failures.append(jump()); 1485 matchDest.link(this); 1486 } 1487 1488 add32(TrustedImm32(1), countRegister); 1489 add32(TrustedImm32(1), index); 1490 if (term.quantityCount != quantifyInfinite) { 1491 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); 1492 failures.append(jump()); 1493 } else 1494 jump(loop); 1495 1496 Label backtrackBegin(this); 1497 loadFromFrame(term.frameLocation, countRegister); 1498 state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); 1499 sub32(TrustedImm32(1), countRegister); 1500 sub32(TrustedImm32(1), index); 1501 1502 failures.link(this); 1503 1504 storeToFrame(countRegister, term.frameLocation); 1505 1506 state.setBacktrackLabel(backtrackBegin); 1507 } 1508 1509 void generateCharacterClassNonGreedy(TermGenerationState& state) 1510 { 1511 const RegisterID character = regT0; 1512 const RegisterID countRegister = regT1; 1513 PatternTerm& term = state.term(); 1514 1515 move(TrustedImm32(0), countRegister); 1516 1517 Jump firstTimeDoNothing = jump(); 1518 1519 Label hardFail(this); 991 jump(op.m_reentry); 992 993 nonGreedyFailures.link(this); 1520 994 sub32(countRegister, index); 1521 state.jumpToBacktrack(this); 1522 1523 Label backtrackBegin(this); 1524 loadFromFrame(term.frameLocation, countRegister); 1525 1526 atEndOfInput().linkTo(hardFail, this); 1527 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); 1528 1529 JumpList matchDest; 1530 readCharacter(state.inputOffset(), character); 1531 matchCharacterClass(character, matchDest, term.characterClass); 1532 1533 if (term.invert()) 1534 matchDest.linkTo(hardFail, this); 1535 else { 1536 jump(hardFail); 1537 matchDest.link(this); 1538 } 1539 1540 add32(TrustedImm32(1), countRegister); 1541 add32(TrustedImm32(1), index); 1542 1543 firstTimeDoNothing.link(this); 1544 storeToFrame(countRegister, term.frameLocation); 1545 1546 state.setBacktrackLabel(backtrackBegin); 1547 } 1548 1549 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) 1550 { 1551 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); 1552 ASSERT(parenthesesTerm.quantityCount == 1); 1553 1554 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; 1555 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; 1556 1557 if (disjunction->m_alternatives.size() == 1) { 1558 state.resetAlternative(); 1559 ASSERT(state.alternativeValid()); 1560 PatternAlternative* alternative = state.alternative(); 1561 optimizeAlternative(alternative); 1562 1563 int countToCheck = alternative->m_minimumSize - preCheckedCount; 1564 if (countToCheck) { 1565 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount)); 1566 1567 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists' 1568 // will be forced to always trampoline into here, just to decrement the index. 1569 // Ick. 1570 Jump skip = jump(); 1571 1572 Label backtrackBegin(this); 1573 sub32(Imm32(countToCheck), index); 1574 state.addBacktrackJump(jump()); 1575 1576 skip.link(this); 1577 1578 state.setBacktrackLabel(backtrackBegin); 1579 1580 state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck)); 1581 state.checkedTotal += countToCheck; 1582 } 1583 1584 for (state.resetTerm(); state.termValid(); state.nextTerm()) 1585 generateTerm(state); 1586 1587 state.checkedTotal -= countToCheck; 1588 } else { 1589 JumpList successes; 1590 bool propogateBacktrack = false; 1591 1592 // Save current state's paren jump list for use with each alternative 1593 JumpList* outerJumpList = state.getJumpListToPriorParen(); 1594 1595 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative(), state.setJumpListToPriorParen(outerJumpList)) { 1596 PatternAlternative* alternative = state.alternative(); 1597 optimizeAlternative(alternative); 1598 1599 ASSERT(alternative->m_minimumSize >= preCheckedCount); 1600 int countToCheck = alternative->m_minimumSize - preCheckedCount; 1601 if (countToCheck) { 1602 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); 1603 state.checkedTotal += countToCheck; 1604 } 1605 1606 for (state.resetTerm(); state.termValid(); state.nextTerm()) 1607 generateTerm(state); 1608 1609 // Matched an alternative. 1610 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); 1611 1612 if (!state.isLastAlternative() || countToCheck) 1613 successes.append(jump()); 1614 1615 // Alternative did not match. 1616 1617 // Do we have a backtrack destination? 1618 // if so, link the data label to it. 1619 state.linkDataLabelToBacktrackIfExists(this, dataLabel); 1620 1621 if (!state.isLastAlternative() || countToCheck) 1622 state.linkAlternativeBacktracks(this); 1623 1624 if (countToCheck) { 1625 sub32(Imm32(countToCheck), index); 1626 state.checkedTotal -= countToCheck; 1627 } else if (state.isLastAlternative()) 1628 propogateBacktrack = true; 1629 } 1630 // We fall through to here when the last alternative fails. 1631 // Add a backtrack out of here for the parenthese handling code to link up. 1632 if (!propogateBacktrack) 1633 state.addBacktrackJump(jump()); 1634 1635 // Save address on stack for the parens code to backtrack to, to retry the 1636 // next alternative. 1637 state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*)); 1638 1639 successes.link(this); 1640 } 1641 } 1642 1643 void generateParenthesesSingle(TermGenerationState& state) 1644 { 1645 const RegisterID indexTemporary = regT0; 1646 PatternTerm& term = state.term(); 1647 PatternDisjunction* disjunction = term.parentheses.disjunction; 1648 ASSERT(term.quantityCount == 1); 1649 1650 unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0; 1651 1652 unsigned parenthesesFrameLocation = term.frameLocation; 1653 unsigned alternativeFrameLocation = parenthesesFrameLocation; 1654 if (term.quantityType != QuantifierFixedCount) 1655 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1656 1657 // optimized case - no capture & no quantifier can be handled in a light-weight manner. 1658 if (!term.capture() && (term.quantityType == QuantifierFixedCount)) { 1659 m_expressionState.incrementParenNestingLevel(); 1660 1661 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1662 1663 // Use the current state's jump list for the nested parentheses. 1664 parenthesesState.setJumpListToPriorParen(state.getJumpListToPriorParen()); 1665 1666 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1667 // this expects that any backtracks back out of the parentheses will be in the 1668 // parenthesesState's m_backTrackJumps vector, and that if they need backtracking 1669 // they will have set an entry point on the parenthesesState's m_backtrackLabel. 1670 BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination(); 1671 BacktrackDestination& stateBacktrack = state.getBacktrackDestination(); 1672 1673 state.propagateBacktrackingFrom(this, parenthesesBacktrack); 1674 stateBacktrack.propagateBacktrackToLabel(parenthesesBacktrack); 1675 1676 state.setJumpListToPriorParen(parenthesesState.getJumpListToPriorParen()); 1677 1678 m_expressionState.decrementParenNestingLevel(); 1679 } else { 1680 Jump nonGreedySkipParentheses; 1681 Label nonGreedyTryParentheses; 1682 if (term.quantityType == QuantifierGreedy) 1683 storeToFrame(index, parenthesesFrameLocation); 1684 else if (term.quantityType == QuantifierNonGreedy) { 1685 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 1686 nonGreedySkipParentheses = jump(); 1687 nonGreedyTryParentheses = label(); 1688 storeToFrame(index, parenthesesFrameLocation); 1689 } 1690 1691 // store the match start index 1692 if (term.capture()) { 1693 int inputOffset = state.inputOffset() - preCheckedCount; 1694 if (inputOffset) { 1695 move(index, indexTemporary); 1696 add32(Imm32(inputOffset), indexTemporary); 1697 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); 1698 } else 1699 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); 1700 } 1701 1702 ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getJumpListToPriorParen()); 1703 1704 m_expressionState.incrementParenNestingLevel(); 1705 1706 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1707 1708 // Save the parenthesesTail for backtracking from nested parens to this one. 1709 parenthesesState.setJumpListToPriorParen(&parenthesesTail->m_withinBacktrackJumps); 1710 1711 // generate the body of the parentheses 1712 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1713 1714 // For non-fixed counts, backtrack if we didn't match anything. 1715 if (term.quantityType != QuantifierFixedCount) 1716 parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*))))); 1717 1718 // store the match end index 1719 if (term.capture()) { 1720 int inputOffset = state.inputOffset(); 1721 if (inputOffset) { 1722 move(index, indexTemporary); 1723 add32(Imm32(state.inputOffset()), indexTemporary); 1724 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); 1725 } else 1726 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); 1727 } 1728 1729 m_expressionState.decrementParenNestingLevel(); 1730 1731 parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label()); 1732 1733 state.setJumpListToPriorParen(&parenthesesTail->m_afterBacktrackJumps); 1734 1735 parenthesesState.getBacktrackDestination().clear(); 1736 1737 if (term.quantityType == QuantifierNonGreedy) 1738 nonGreedySkipParentheses.link(this); 1739 } 1740 } 1741 1742 void generateParenthesesGreedyNoBacktrack(TermGenerationState& state) 1743 { 1744 PatternTerm& parenthesesTerm = state.term(); 1745 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; 1746 ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern); 1747 ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle. 1748 1749 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1750 1751 Label matchAgain(this); 1752 1753 storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later. 1754 1755 for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) { 1756 1757 PatternAlternative* alternative = parenthesesState.alternative(); 1758 optimizeAlternative(alternative); 1759 1760 int countToCheck = alternative->m_minimumSize; 1761 if (countToCheck) { 1762 parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); 1763 parenthesesState.checkedTotal += countToCheck; 1764 } 1765 1766 for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm()) 1767 generateTerm(parenthesesState); 1768 1769 // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet. 1770 branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain); 1771 1772 // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack, 1773 // or fall through to try the next alternative if no backtrack is available. 1774 parenthesesState.plantJumpToBacktrackIfExists(this); 1775 1776 parenthesesState.linkAlternativeBacktracks(this); 1777 1778 // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop. 1779 1780 if (countToCheck) { 1781 sub32(Imm32(countToCheck), index); 1782 parenthesesState.checkedTotal -= countToCheck; 1783 } 1784 } 1785 1786 // If the last alternative falls through to here, we have a failed match... 1787 // Which means that we match whatever we have matched up to this point (even if nothing). 1788 } 1789 1790 void generateParentheticalAssertion(TermGenerationState& state) 1791 { 1792 PatternTerm& term = state.term(); 1793 PatternDisjunction* disjunction = term.parentheses.disjunction; 1794 ASSERT(term.quantityCount == 1); 1795 ASSERT(term.quantityType == QuantifierFixedCount); 1796 1797 unsigned parenthesesFrameLocation = term.frameLocation; 1798 unsigned alternativeFrameLocation = parenthesesFrameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; 1799 1800 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; 1801 1802 if (term.invert()) { 1803 // Inverted case 1804 storeToFrame(index, parenthesesFrameLocation); 1805 1806 state.checkedTotal -= countCheckedAfterAssertion; 1807 if (countCheckedAfterAssertion) 1808 sub32(Imm32(countCheckedAfterAssertion), index); 1809 1810 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1811 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1812 // Success! - which means - Fail! 1813 loadFromFrame(parenthesesFrameLocation, index); 1814 state.jumpToBacktrack(this); 1815 1816 // And fail means success. 1817 parenthesesState.linkAlternativeBacktracks(this); 1818 1819 loadFromFrame(parenthesesFrameLocation, index); 1820 1821 state.checkedTotal += countCheckedAfterAssertion; 1822 } else { 1823 // Normal case 1824 storeToFrame(index, parenthesesFrameLocation); 1825 1826 state.checkedTotal -= countCheckedAfterAssertion; 1827 if (countCheckedAfterAssertion) 1828 sub32(Imm32(countCheckedAfterAssertion), index); 1829 1830 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1831 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1832 // Success! - which means - Success! 1833 loadFromFrame(parenthesesFrameLocation, index); 1834 Jump success = jump(); 1835 1836 parenthesesState.linkAlternativeBacktracks(this); 1837 1838 loadFromFrame(parenthesesFrameLocation, index); 1839 state.jumpToBacktrack(this); 1840 1841 success.link(this); 1842 1843 state.checkedTotal += countCheckedAfterAssertion; 1844 } 1845 } 1846 1847 void generateTerm(TermGenerationState& state) 1848 { 1849 PatternTerm& term = state.term(); 1850 1851 switch (term.type) { 995 m_backtrackingState.fallthrough(); 996 } 997 998 // Code generation/backtracking for simple terms 999 // (pattern characters, character classes, and assertions). 1000 // These methods farm out work to the set of functions above. 1001 void generateTerm(size_t opIndex) 1002 { 1003 YarrOp& op = m_ops[opIndex]; 1004 PatternTerm* term = op.m_term; 1005 1006 switch (term->type) { 1007 case PatternTerm::TypePatternCharacter: 1008 switch (term->quantityType) { 1009 case QuantifierFixedCount: 1010 if (term->quantityCount == 1) 1011 generatePatternCharacterOnce(opIndex); 1012 else 1013 generatePatternCharacterFixed(opIndex); 1014 break; 1015 case QuantifierGreedy: 1016 generatePatternCharacterGreedy(opIndex); 1017 break; 1018 case QuantifierNonGreedy: 1019 generatePatternCharacterNonGreedy(opIndex); 1020 break; 1021 } 1022 break; 1023 1024 case PatternTerm::TypeCharacterClass: 1025 switch (term->quantityType) { 1026 case QuantifierFixedCount: 1027 if (term->quantityCount == 1) 1028 generateCharacterClassOnce(opIndex); 1029 else 1030 generateCharacterClassFixed(opIndex); 1031 break; 1032 case QuantifierGreedy: 1033 generateCharacterClassGreedy(opIndex); 1034 break; 1035 case QuantifierNonGreedy: 1036 generateCharacterClassNonGreedy(opIndex); 1037 break; 1038 } 1039 break; 1040 1852 1041 case PatternTerm::TypeAssertionBOL: 1853 generateAssertionBOL( state);1042 generateAssertionBOL(opIndex); 1854 1043 break; 1855 1044 1856 1045 case PatternTerm::TypeAssertionEOL: 1857 generateAssertionEOL( state);1046 generateAssertionEOL(opIndex); 1858 1047 break; 1859 1048 1860 1049 case PatternTerm::TypeAssertionWordBoundary: 1861 generateAssertionWordBoundary( state);1050 generateAssertionWordBoundary(opIndex); 1862 1051 break; 1863 1052 1864 case PatternTerm::TypePatternCharacter: 1865 switch (term.quantityType) { 1866 case QuantifierFixedCount: 1867 if (term.quantityCount == 1) { 1868 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) { 1869 generatePatternCharacterPair(state); 1870 state.nextTerm(); 1871 } else 1872 generatePatternCharacterSingle(state); 1873 } else 1874 generatePatternCharacterFixed(state); 1875 break; 1876 case QuantifierGreedy: 1877 generatePatternCharacterGreedy(state); 1878 break; 1879 case QuantifierNonGreedy: 1880 generatePatternCharacterNonGreedy(state); 1881 break; 1882 } 1053 case PatternTerm::TypeForwardReference: 1883 1054 break; 1884 1055 1885 case PatternTerm::TypeCharacterClass: 1886 switch (term.quantityType) { 1887 case QuantifierFixedCount: 1888 if (term.quantityCount == 1) 1889 generateCharacterClassSingle(state); 1890 else 1891 generateCharacterClassFixed(state); 1892 break; 1893 case QuantifierGreedy: 1894 generateCharacterClassGreedy(state); 1895 break; 1896 case QuantifierNonGreedy: 1897 generateCharacterClassNonGreedy(state); 1898 break; 1899 } 1900 break; 1901 1056 case PatternTerm::TypeParenthesesSubpattern: 1057 case PatternTerm::TypeParentheticalAssertion: 1058 ASSERT_NOT_REACHED(); 1902 1059 case PatternTerm::TypeBackReference: 1903 1060 m_shouldFallBack = true; 1904 1061 break; 1062 } 1063 } 1064 void backtrackTerm(size_t opIndex) 1065 { 1066 YarrOp& op = m_ops[opIndex]; 1067 PatternTerm* term = op.m_term; 1068 1069 switch (term->type) { 1070 case PatternTerm::TypePatternCharacter: 1071 switch (term->quantityType) { 1072 case QuantifierFixedCount: 1073 if (term->quantityCount == 1) 1074 backtrackPatternCharacterOnce(opIndex); 1075 else 1076 backtrackPatternCharacterFixed(opIndex); 1077 break; 1078 case QuantifierGreedy: 1079 backtrackPatternCharacterGreedy(opIndex); 1080 break; 1081 case QuantifierNonGreedy: 1082 backtrackPatternCharacterNonGreedy(opIndex); 1083 break; 1084 } 1085 break; 1086 1087 case PatternTerm::TypeCharacterClass: 1088 switch (term->quantityType) { 1089 case QuantifierFixedCount: 1090 if (term->quantityCount == 1) 1091 backtrackCharacterClassOnce(opIndex); 1092 else 1093 backtrackCharacterClassFixed(opIndex); 1094 break; 1095 case QuantifierGreedy: 1096 backtrackCharacterClassGreedy(opIndex); 1097 break; 1098 case QuantifierNonGreedy: 1099 backtrackCharacterClassNonGreedy(opIndex); 1100 break; 1101 } 1102 break; 1103 1104 case PatternTerm::TypeAssertionBOL: 1105 backtrackAssertionBOL(opIndex); 1106 break; 1107 1108 case PatternTerm::TypeAssertionEOL: 1109 backtrackAssertionEOL(opIndex); 1110 break; 1111 1112 case PatternTerm::TypeAssertionWordBoundary: 1113 backtrackAssertionWordBoundary(opIndex); 1114 break; 1905 1115 1906 1116 case PatternTerm::TypeForwardReference: … … 1908 1118 1909 1119 case PatternTerm::TypeParenthesesSubpattern: 1910 if (term.quantityCount == 1 && !term.parentheses.isCopy) 1911 generateParenthesesSingle(state); 1912 else if (term.parentheses.isTerminal) 1913 generateParenthesesGreedyNoBacktrack(state); 1914 else 1915 m_shouldFallBack = true; 1120 case PatternTerm::TypeParentheticalAssertion: 1121 ASSERT_NOT_REACHED(); 1122 case PatternTerm::TypeBackReference: 1123 m_shouldFallBack = true; 1916 1124 break; 1917 1918 case PatternTerm::TypeParentheticalAssertion: 1919 generateParentheticalAssertion(state); 1920 break; 1921 } 1922 } 1923 1924 void generateDisjunction(PatternDisjunction* disjunction) 1925 { 1926 TermGenerationState state(disjunction, 0); 1927 state.resetAlternative(); 1928 1929 // check availability for the next alternative 1930 int countCheckedForCurrentAlternative = 0; 1931 int countToCheckForFirstAlternative = 0; 1932 bool hasShorterAlternatives = false; 1933 bool setRepeatAlternativeLabels = false; 1934 JumpList notEnoughInputForPreviousAlternative; 1935 Label firstAlternative; 1936 Label firstAlternativeInputChecked; 1937 1938 // The label 'firstAlternative' is used to plant a check to see if there is 1939 // sufficient input available to run the first repeating alternative. 1940 // The label 'firstAlternativeInputChecked' will jump directly to matching 1941 // the first repeating alternative having skipped this check. 1942 1943 if (state.alternativeValid()) { 1944 PatternAlternative* alternative = state.alternative(); 1945 if (!alternative->onceThrough()) { 1946 firstAlternative = Label(this); 1947 setRepeatAlternativeLabels = true; 1948 } 1949 countToCheckForFirstAlternative = alternative->m_minimumSize; 1950 state.checkedTotal += countToCheckForFirstAlternative; 1951 if (countToCheckForFirstAlternative) 1952 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); 1953 countCheckedForCurrentAlternative = countToCheckForFirstAlternative; 1954 } 1955 1956 if (setRepeatAlternativeLabels) 1957 firstAlternativeInputChecked = Label(this); 1958 1959 while (state.alternativeValid()) { 1960 PatternAlternative* alternative = state.alternative(); 1961 optimizeAlternative(alternative); 1962 1963 // Track whether any alternatives are shorter than the first one. 1964 if (!alternative->onceThrough()) 1965 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); 1966 1967 for (state.resetTerm(); state.termValid(); state.nextTerm()) 1968 generateTerm(state); 1969 1970 // If we get here, the alternative matched. 1971 if (m_pattern.m_body->m_callFrameSize) 1972 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 1973 1974 ASSERT(index != returnRegister); 1975 if (m_pattern.m_body->m_hasFixedSize) { 1976 move(index, returnRegister); 1977 if (alternative->m_minimumSize) 1978 sub32(Imm32(alternative->m_minimumSize), returnRegister); 1979 1980 store32(returnRegister, output); 1981 } else 1982 load32(Address(output), returnRegister); 1983 1984 store32(index, Address(output, 4)); 1985 1986 generateReturn(); 1987 1988 state.nextAlternative(); 1989 if (alternative->onceThrough() && state.alternativeValid()) 1990 state.clearBacktrack(); 1991 1992 // if there are any more alternatives, plant the check for input before looping. 1993 if (state.alternativeValid()) { 1994 state.setJumpListToPriorParen(0); 1995 PatternAlternative* nextAlternative = state.alternative(); 1996 if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) { 1997 // We have handled non-repeating alternatives, jump to next iteration 1998 // and loop over repeating alternatives. 1999 state.jumpToBacktrack(this); 2000 2001 countToCheckForFirstAlternative = nextAlternative->m_minimumSize; 2002 2003 // If we get here, there the last input checked failed. 2004 notEnoughInputForPreviousAlternative.link(this); 2005 2006 state.linkAlternativeBacktracks(this); 2007 2008 // Back up to start the looping alternatives. 2009 if (countCheckedForCurrentAlternative) 2010 sub32(Imm32(countCheckedForCurrentAlternative), index); 2011 2012 firstAlternative = Label(this); 2013 2014 state.checkedTotal = countToCheckForFirstAlternative; 2015 if (countToCheckForFirstAlternative) 2016 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); 2017 2018 countCheckedForCurrentAlternative = countToCheckForFirstAlternative; 2019 2020 firstAlternativeInputChecked = Label(this); 2021 2022 setRepeatAlternativeLabels = true; 1125 } 1126 } 1127 1128 void generate() 1129 { 1130 // Forwards generate the matching code. 1131 ASSERT(m_ops.size()); 1132 size_t opIndex = 0; 1133 1134 do { 1135 YarrOp& op = m_ops[opIndex]; 1136 switch (op.m_op) { 1137 1138 case OpTerm: 1139 generateTerm(opIndex); 1140 break; 1141 1142 // OpBodyAlternativeBegin/Next/End 1143 // 1144 // These nodes wrap the set of alternatives in the body of the regular expression. 1145 // There may be either one or two chains of OpBodyAlternative nodes, one representing 1146 // the 'once through' sequence of alternatives (if any exist), and one representing 1147 // the repeating alternatives (again, if any exist). 1148 // 1149 // Upon normal entry to the Begin alternative, we will check that input is available. 1150 // Reentry to the Begin alternative will take place after the check has taken place, 1151 // and will assume that the input position has already been progressed as appropriate. 1152 // 1153 // Entry to subsequent Next/End alternatives occurs when the prior alternative has 1154 // successfully completed a match - return a success state from JIT code. 1155 // 1156 // Next alternatives allow for reentry optimized to suit backtracking from its 1157 // preceding alternative. It expects the input position to still be set to a position 1158 // appropriate to its predecessor, and it will only perform an input check if the 1159 // predecessor had a minimum size less than its own. 1160 // 1161 // In the case 'once through' expressions, the End node will also have a reentry 1162 // point to jump to when the last alternative fails. Again, this expects the input 1163 // position to still reflect that expected by the prior alternative. 1164 case OpBodyAlternativeBegin: { 1165 PatternAlternative* alternative = op.m_alternative; 1166 1167 // Upon entry at the head of the set of alternatives, check if input is available 1168 // to run the first alternative. (This progresses the input position). 1169 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize)); 1170 // We will reenter after the check, and assume the input position to have been 1171 // set as appropriate to this alternative. 1172 op.m_reentry = label(); 1173 1174 m_checked += alternative->m_minimumSize; 1175 break; 1176 } 1177 case OpBodyAlternativeNext: 1178 case OpBodyAlternativeEnd: { 1179 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; 1180 PatternAlternative* alternative = op.m_alternative; 1181 1182 // If we get here, the prior alternative matched - return success. 1183 1184 // Adjust the stack pointer to remove the pattern's frame. 1185 if (m_pattern.m_body->m_callFrameSize) 1186 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 1187 1188 // Load appropriate values into the return register and the first output 1189 // slot, and return. In the case of pattern with a fixed size, we will 1190 // not have yet set the value in the first 1191 ASSERT(index != returnRegister); 1192 if (m_pattern.m_body->m_hasFixedSize) { 1193 move(index, returnRegister); 1194 if (priorAlternative->m_minimumSize) 1195 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister); 1196 store32(returnRegister, output); 1197 } else 1198 load32(Address(output), returnRegister); 1199 store32(index, Address(output, 4)); 1200 generateReturn(); 1201 1202 // This is the divide between the tail of the prior alternative, above, and 1203 // the head of the subsequent alternative, below. 1204 1205 if (op.m_op == OpBodyAlternativeNext) { 1206 // This is the reentry point for the Next alternative. We expect any code 1207 // that jumps here to do so with the input position matching that of the 1208 // PRIOR alteranative, and we will only check input availability if we 1209 // need to progress it forwards. 1210 op.m_reentry = label(); 1211 if (int delta = alternative->m_minimumSize - priorAlternative->m_minimumSize) { 1212 add32(Imm32(delta), index); 1213 if (delta > 0) 1214 op.m_jumps.append(jumpIfNoAvailableInput()); 1215 } 1216 } else if (op.m_nextOp == notFound) { 1217 // This is the reentry point for the End of 'once through' alternatives, 1218 // jumped to when the las alternative fails to match. 1219 op.m_reentry = label(); 1220 sub32(Imm32(priorAlternative->m_minimumSize), index); 1221 } 1222 1223 if (op.m_op == OpBodyAlternativeNext) 1224 m_checked += alternative->m_minimumSize; 1225 m_checked -= priorAlternative->m_minimumSize; 1226 break; 1227 } 1228 1229 // OpSimpleNestedAlternativeBegin/Next/End 1230 // OpNestedAlternativeBegin/Next/End 1231 // 1232 // These nodes are used to handle sets of alternatives that are nested within 1233 // subpatterns and parenthetical assertions. The 'simple' forms are used where 1234 // we do not need to be able to backtrack back into any alternative other than 1235 // the last, the normal forms allow backtracking into any alternative. 1236 // 1237 // Each Begin/Next node is responsible for planting an input check to ensure 1238 // sufficient input is available on entry. Next nodes additionally need to 1239 // jump to the end - Next nodes use the End node's m_jumps list to hold this 1240 // set of jumps. 1241 // 1242 // In the non-simple forms, successful alternative matches must store a 1243 // 'return address' using a DataLabelPtr, used to store the address to jump 1244 // to when backtracking, to get to the code for the appropriate alternative. 1245 case OpSimpleNestedAlternativeBegin: 1246 case OpNestedAlternativeBegin: { 1247 PatternTerm* term = op.m_term; 1248 PatternAlternative* alternative = op.m_alternative; 1249 PatternDisjunction* disjunction = term->parentheses.disjunction; 1250 1251 // Calculate how much input we need to check for, and if non-zero check. 1252 op.m_checkAdjust = alternative->m_minimumSize; 1253 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) 1254 op.m_checkAdjust -= disjunction->m_minimumSize; 1255 if (op.m_checkAdjust) 1256 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); 1257 1258 m_checked += op.m_checkAdjust; 1259 break; 1260 } 1261 case OpSimpleNestedAlternativeNext: 1262 case OpNestedAlternativeNext: { 1263 PatternTerm* term = op.m_term; 1264 PatternAlternative* alternative = op.m_alternative; 1265 PatternDisjunction* disjunction = term->parentheses.disjunction; 1266 1267 // In the non-simple case, store a 'return address' so we can backtrack correctly. 1268 if (op.m_op == OpNestedAlternativeNext) { 1269 unsigned parenthesesFrameLocation = term->frameLocation; 1270 unsigned alternativeFrameLocation = parenthesesFrameLocation; 1271 if (term->quantityType != QuantifierFixedCount) 1272 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1273 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); 1274 } 1275 1276 // If we reach here then the last alternative has matched - jump to the 1277 // End node, to skip over any further alternatives. 1278 // 1279 // FIXME: this is logically O(N^2) (though N can be expected to be very 1280 // small). We could avoid this either by adding an extra jump to the JIT 1281 // data structures, or by making backtracking code that jumps to Next 1282 // alternatives are responsible for checking that input is available (if 1283 // we didn't need to plant the input checks, then m_jumps would be free). 1284 YarrOp* endOp = &m_ops[op.m_nextOp]; 1285 while (endOp->m_nextOp != notFound) { 1286 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); 1287 endOp = &m_ops[endOp->m_nextOp]; 1288 } 1289 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); 1290 endOp->m_jumps.append(jump()); 1291 1292 // This is the entry point for the next alternative. 1293 op.m_reentry = label(); 1294 1295 // Calculate how much input we need to check for, and if non-zero check. 1296 op.m_checkAdjust = alternative->m_minimumSize; 1297 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) 1298 op.m_checkAdjust -= disjunction->m_minimumSize; 1299 if (op.m_checkAdjust) 1300 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); 1301 1302 YarrOp& lastOp = m_ops[op.m_previousOp]; 1303 m_checked -= lastOp.m_checkAdjust; 1304 m_checked += op.m_checkAdjust; 1305 break; 1306 } 1307 case OpSimpleNestedAlternativeEnd: 1308 case OpNestedAlternativeEnd: { 1309 PatternTerm* term = op.m_term; 1310 1311 // In the non-simple case, store a 'return address' so we can backtrack correctly. 1312 if (op.m_op == OpNestedAlternativeEnd) { 1313 unsigned parenthesesFrameLocation = term->frameLocation; 1314 unsigned alternativeFrameLocation = parenthesesFrameLocation; 1315 if (term->quantityType != QuantifierFixedCount) 1316 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1317 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); 1318 } 1319 1320 // If this set of alternatives contains more than one alternative, 1321 // then the Next nodes will have planted jumps to the End, and added 1322 // them to this node's m_jumps list. 1323 op.m_jumps.link(this); 1324 op.m_jumps.clear(); 1325 1326 YarrOp& lastOp = m_ops[op.m_previousOp]; 1327 m_checked -= lastOp.m_checkAdjust; 1328 break; 1329 } 1330 1331 // OpParenthesesSubpatternOnceBegin/End 1332 // 1333 // These nodes support (optionally) capturing subpatterns, that have a 1334 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 1335 case OpParenthesesSubpatternOnceBegin: { 1336 PatternTerm* term = op.m_term; 1337 unsigned parenthesesFrameLocation = term->frameLocation; 1338 const RegisterID indexTemporary = regT0; 1339 ASSERT(term->quantityCount == 1); 1340 1341 // Upon entry to a Greedy quantified set of parenthese store the index. 1342 // We'll use this for two purposes: 1343 // - To indicate which iteration we are on of mathing the remainder of 1344 // the expression after the parentheses - the first, including the 1345 // match within the parentheses, or the second having skipped over them. 1346 // - To check for empty matches, which must be rejected. 1347 // 1348 // At the head of a NonGreedy set of parentheses we'll immediately set the 1349 // value on the stack to -1 (indicating a match skipping the subpattern), 1350 // and plant a jump to the end. We'll also plant a label to backtrack to 1351 // to reenter the subpattern later, with a store to set up index on the 1352 // second iteration. 1353 // 1354 // FIXME: for capturing parens, could use the index in the capture array? 1355 if (term->quantityType == QuantifierGreedy) 1356 storeToFrame(index, parenthesesFrameLocation); 1357 else if (term->quantityType == QuantifierNonGreedy) { 1358 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 1359 op.m_jumps.append(jump()); 1360 op.m_reentry = label(); 1361 storeToFrame(index, parenthesesFrameLocation); 1362 } 1363 1364 // If the parenthese are capturing, store the starting index value to the 1365 // captures array, offsetting as necessary. 1366 // 1367 // FIXME: could avoid offsetting this value in JIT code, apply 1368 // offsets only afterwards, at the point the results array is 1369 // being accessed. 1370 if (term->capture()) { 1371 int offsetId = term->parentheses.subpatternId << 1; 1372 int inputOffset = term->inputPosition - m_checked; 1373 if (term->quantityType == QuantifierFixedCount) 1374 inputOffset -= term->parentheses.disjunction->m_minimumSize; 1375 if (inputOffset) { 1376 move(index, indexTemporary); 1377 add32(Imm32(inputOffset), indexTemporary); 1378 store32(indexTemporary, Address(output, offsetId * sizeof(int))); 1379 } else 1380 store32(index, Address(output, offsetId * sizeof(int))); 1381 } 1382 break; 1383 } 1384 case OpParenthesesSubpatternOnceEnd: { 1385 PatternTerm* term = op.m_term; 1386 unsigned parenthesesFrameLocation = term->frameLocation; 1387 const RegisterID indexTemporary = regT0; 1388 ASSERT(term->quantityCount == 1); 1389 1390 // For Greedy/NonGreedy quantified parentheses, we must reject zero length 1391 // matches. If the minimum size is know to be non-zero we need not check. 1392 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) 1393 op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)))); 1394 1395 // If the parenthese are capturing, store the ending index value to the 1396 // captures array, offsetting as necessary. 1397 // 1398 // FIXME: could avoid offsetting this value in JIT code, apply 1399 // offsets only afterwards, at the point the results array is 1400 // being accessed. 1401 if (term->capture()) { 1402 int offsetId = (term->parentheses.subpatternId << 1) + 1; 1403 int inputOffset = term->inputPosition - m_checked; 1404 if (inputOffset) { 1405 move(index, indexTemporary); 1406 add32(Imm32(inputOffset), indexTemporary); 1407 store32(indexTemporary, Address(output, offsetId * sizeof(int))); 1408 } else 1409 store32(index, Address(output, offsetId * sizeof(int))); 1410 } 1411 1412 // If the parentheses are quantified Greedy then add a label to jump back 1413 // to if get a failed match from after the parentheses. For NonGreedy 1414 // parentheses, link the jump from before the subpattern to here. 1415 if (term->quantityType == QuantifierGreedy) 1416 op.m_reentry = label(); 1417 else if (term->quantityType == QuantifierNonGreedy) { 1418 YarrOp& beginOp = m_ops[op.m_previousOp]; 1419 beginOp.m_jumps.link(this); 1420 } 1421 break; 1422 } 1423 1424 // OpParenthesesSubpatternTerminalBegin/End 1425 case OpParenthesesSubpatternTerminalBegin: { 1426 PatternTerm* term = op.m_term; 1427 ASSERT(term->quantityType == QuantifierGreedy); 1428 ASSERT(term->quantityCount == quantifyInfinite); 1429 ASSERT(!term->capture()); 1430 1431 // Upon entry set a label to loop back to. 1432 op.m_reentry = label(); 1433 1434 // Store the start index of the current match; we need to reject zero 1435 // length matches. 1436 storeToFrame(index, term->frameLocation); 1437 break; 1438 } 1439 case OpParenthesesSubpatternTerminalEnd: { 1440 PatternTerm* term = op.m_term; 1441 1442 // Check for zero length matches - if the match is non-zero, then we 1443 // can accept it & loop back up to the head of the subpattern. 1444 YarrOp& beginOp = m_ops[op.m_previousOp]; 1445 branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry); 1446 1447 // Reject the match - backtrack back into the subpattern. 1448 op.m_jumps.append(jump()); 1449 1450 // This is the entry point to jump to when we stop matching - we will 1451 // do so once the subpattern cannot match any more. 1452 op.m_reentry = label(); 1453 break; 1454 } 1455 1456 // OpParentheticalAssertionBegin/End 1457 case OpParentheticalAssertionBegin: { 1458 PatternTerm* term = op.m_term; 1459 1460 // Store the current index - assertions should not update index, so 1461 // we will need to restore it upon a successful match. 1462 unsigned parenthesesFrameLocation = term->frameLocation; 1463 storeToFrame(index, parenthesesFrameLocation); 1464 1465 // Check 1466 op.m_checkAdjust = m_checked - term->inputPosition; 1467 if (op.m_checkAdjust) 1468 sub32(Imm32(op.m_checkAdjust), index); 1469 1470 m_checked -= op.m_checkAdjust; 1471 break; 1472 } 1473 case OpParentheticalAssertionEnd: { 1474 PatternTerm* term = op.m_term; 1475 1476 // Restore the input index value. 1477 unsigned parenthesesFrameLocation = term->frameLocation; 1478 loadFromFrame(parenthesesFrameLocation, index); 1479 1480 // If inverted, a successful match of the assertion must be treated 1481 // as a failure, so jump to backtracking. 1482 if (term->invert()) { 1483 op.m_jumps.append(jump()); 1484 op.m_reentry = label(); 1485 } 1486 1487 YarrOp& lastOp = m_ops[op.m_previousOp]; 1488 m_checked += lastOp.m_checkAdjust; 1489 break; 1490 } 1491 1492 case OpMatchFailed: 1493 if (m_pattern.m_body->m_callFrameSize) 1494 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 1495 move(TrustedImm32(-1), returnRegister); 1496 generateReturn(); 1497 break; 1498 } 1499 1500 ++opIndex; 1501 } while (opIndex < m_ops.size()); 1502 } 1503 1504 void backtrack() 1505 { 1506 // Backwards generate the backtracking code. 1507 size_t opIndex = m_ops.size(); 1508 ASSERT(opIndex); 1509 1510 do { 1511 --opIndex; 1512 YarrOp& op = m_ops[opIndex]; 1513 switch (op.m_op) { 1514 1515 case OpTerm: 1516 backtrackTerm(opIndex); 1517 break; 1518 1519 // OpBodyAlternativeBegin/Next/End 1520 // 1521 // For each Begin/Next node representing an alternative, we need to decide what to do 1522 // in two circumstances: 1523 // - If we backtrack back into this node, from within the alternative. 1524 // - If the input check at the head of the alternative fails (if this exists). 1525 // 1526 // We treat these two cases differently since in the former case we have slightly 1527 // more information - since we are backtracking out of a prior alternative we know 1528 // that at least enough input was available to run it. For example, given the regular 1529 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern 1530 // character match of 'a'), then we need not perform an additional input availability 1531 // check before running the second alternative. 1532 // 1533 // Backtracking required differs for the last alternative, which in the case of the 1534 // repeating set of alternatives must loop. The code generated for the last alternative 1535 // will also be used to handle all input check failures from any prior alternatives - 1536 // these require similar functionality, in seeking the next available alternative for 1537 // which there is sufficient input. 1538 // 1539 // Since backtracking of all other alternatives simply requires us to link backtracks 1540 // to the reentry point for the subsequent alternative, we will only be generating any 1541 // code when backtracking the last alternative. 1542 case OpBodyAlternativeBegin: 1543 case OpBodyAlternativeNext: { 1544 PatternAlternative* alternative = op.m_alternative; 1545 1546 if (op.m_op == OpBodyAlternativeNext) { 1547 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; 1548 m_checked += priorAlternative->m_minimumSize; 1549 } 1550 m_checked -= alternative->m_minimumSize; 1551 1552 // Is this the last alternative? If not, then if we backtrack to this point we just 1553 // need to jump to try to match the next alternative. 1554 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) { 1555 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this); 1556 break; 1557 } 1558 YarrOp& endOp = m_ops[op.m_nextOp]; 1559 1560 YarrOp* beginOp = &op; 1561 while (beginOp->m_op != OpBodyAlternativeBegin) { 1562 ASSERT(beginOp->m_op == OpBodyAlternativeNext); 1563 beginOp = &m_ops[beginOp->m_previousOp]; 1564 } 1565 1566 bool onceThrough = endOp.m_nextOp == notFound; 1567 1568 // First, generate code to handle cases where we backtrack out of an attempted match 1569 // of the last alternative. If this is a 'once through' set of alternatives then we 1570 // have nothing to do - link this straight through to the End. 1571 if (onceThrough) 1572 m_backtrackingState.linkTo(endOp.m_reentry, this); 1573 else { 1574 // Okay, we're going to need to loop. Calculate the delta between where the input 1575 // position was, and where we want it to be allowing for the fact that we need to 1576 // increment by 1. E.g. for the regexp /a|x/ we need to increment the position by 1577 // 1 between loop iterations, but for /abcd|xyz/ we need to increment by two when 1578 // looping from the last alternative to the first, for /a|xyz/ we need to decrement 1579 // by 1, and for /a|xy/ we don't need to move the input position at all. 1580 int deltaLastAlternativeToFirstAlternativePlusOne = (beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize) + 1; 1581 1582 // If we don't need to move the input poistion, and the pattern has a fixed size 1583 // (in which case we omit the store of the start index until the pattern has matched) 1584 // then we can just link the backtrack out of the last alternative straight to the 1585 // head of the first alternative. 1586 if (!deltaLastAlternativeToFirstAlternativePlusOne && m_pattern.m_body->m_hasFixedSize) 1587 m_backtrackingState.linkTo(beginOp->m_reentry, this); 1588 else { 1589 // We need to generate a trampoline of code to execute before looping back 1590 // around to the first alternative. 1591 m_backtrackingState.link(this); 1592 1593 // If the pattern size is not fixed, then store the start index, for use if we match. 1594 if (!m_pattern.m_body->m_hasFixedSize) { 1595 if (alternative->m_minimumSize == 1) 1596 store32(index, Address(output)); 1597 else { 1598 move(index, regT0); 1599 if (alternative->m_minimumSize) 1600 sub32(Imm32(alternative->m_minimumSize - 1), regT0); 1601 else 1602 add32(Imm32(1), regT0); 1603 store32(regT0, Address(output)); 1604 } 1605 } 1606 1607 if (deltaLastAlternativeToFirstAlternativePlusOne) 1608 add32(Imm32(deltaLastAlternativeToFirstAlternativePlusOne), index); 1609 1610 // Loop. Since this code is only reached when we backtrack out of the last 1611 // alternative (and NOT linked to from the input check upon entry to the 1612 // last alternative) we know that there must be at least enough input as 1613 // required by the last alternative. As such, we only need to check if the 1614 // first will require more to run - if the same or less is required we can 1615 // unconditionally jump. 1616 if (deltaLastAlternativeToFirstAlternativePlusOne > 0) 1617 checkInput().linkTo(beginOp->m_reentry, this); 1618 else 1619 jump(beginOp->m_reentry); 1620 } 1621 } 1622 1623 // We can reach this point in the code in two ways: 1624 // - Fallthrough from the code above (a repeating alternative backtracked out of its 1625 // last alternative, and did not have sufficent input to run the first). 1626 // - We will loop back up to the following label when a releating alternative loops, 1627 // following a failed input check. 1628 // 1629 // Either way, we have just failed the input check for the first alternative. 1630 Label firstInputCheckFailed(this); 1631 1632 // Generate code to handle input check failures from alternatives except the last. 1633 // prevOp is the alternative we're handling a bail out from (initially Begin), and 1634 // nextOp is the alternative we will be attempting to reenter into. 1635 // 1636 // We will link input check failures from the forwards matching path back to the code 1637 // that can handle them. 1638 YarrOp* prevOp = beginOp; 1639 YarrOp* nextOp = &m_ops[beginOp->m_nextOp]; 1640 while (nextOp->m_op != OpBodyAlternativeEnd) { 1641 prevOp->m_jumps.link(this); 1642 1643 int delta = nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize; 1644 if (delta) 1645 add32(Imm32(delta), index); 1646 1647 // We only get here if an input check fails, it is only worth checking again 1648 // if the next alternative has a minimum size less than the last. 1649 if (delta < 0) { 1650 // FIXME: if we added an extra label to YarrOp, we could avoid needing to 1651 // subtract delta back out, and reduce this code. Should performance test 1652 // the benefit of this. 1653 Jump fail = jumpIfNoAvailableInput(); 1654 sub32(Imm32(delta), index); 1655 jump(nextOp->m_reentry); 1656 fail.link(this); 1657 } 1658 prevOp = nextOp; 1659 nextOp = &m_ops[nextOp->m_nextOp]; 1660 } 1661 1662 // We fall through to here if there is insufficient input to run the last alternative. 1663 1664 // If there is insufficient input to run the last alternative, then for 'once through' 1665 // alternatives we are done - just jump back up into the forwards matching path at the End. 1666 if (onceThrough) { 1667 op.m_jumps.linkTo(endOp.m_reentry, this); 1668 jump(endOp.m_reentry); 1669 break; 1670 } 1671 1672 // For repeating alternatives, link any input check failure from the last alternative to 1673 // this point. 1674 op.m_jumps.link(this); 1675 1676 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize; 1677 1678 // Check for cases where input position is already incremented by 1 for the last 1679 // alternative (this is particularly useful where the minimum size of the body 1680 // disjunction is 0, e.g. /a*|b/). 1681 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) { 1682 // index is already incremented by 1, so just store it now! 1683 store32(index, Address(output)); 1684 needsToUpdateMatchStart = false; 1685 } 1686 1687 // Check whether there is sufficient input to loop. Increment the input position by 1688 // one, and check. Also add in the minimum disjunction size before checking - there 1689 // is no point in looping if we're just going to fail all the input checks around 1690 // the next iteration. 1691 int deltaLastAlternativeToBodyMinimumPlusOne = (m_pattern.m_body->m_minimumSize + 1) - alternative->m_minimumSize; 1692 if (deltaLastAlternativeToBodyMinimumPlusOne) 1693 add32(Imm32(deltaLastAlternativeToBodyMinimumPlusOne), index); 1694 Jump matchFailed = jumpIfNoAvailableInput(); 1695 1696 if (needsToUpdateMatchStart) { 1697 if (!m_pattern.m_body->m_minimumSize) 1698 store32(index, Address(output)); 1699 else { 1700 move(index, regT0); 1701 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); 1702 store32(regT0, Address(output)); 1703 } 1704 } 1705 1706 // Calculate how much more input the first alternative requires than the minimum 1707 // for the body as a whole. If no more is needed then we dont need an additional 1708 // input check here - jump straight back up to the start of the first alternative. 1709 int deltaBodyMinimumToFirstAlternative = beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize; 1710 if (!deltaBodyMinimumToFirstAlternative) 1711 jump(beginOp->m_reentry); 1712 else { 1713 add32(Imm32(deltaBodyMinimumToFirstAlternative), index); 1714 checkInput().linkTo(beginOp->m_reentry, this); 1715 jump(firstInputCheckFailed); 1716 } 1717 1718 // We jump to here if we iterate to the point that there is insufficient input to 1719 // run any matches, and need to return a failure state from JIT code. 1720 matchFailed.link(this); 1721 1722 if (m_pattern.m_body->m_callFrameSize) 1723 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 1724 move(TrustedImm32(-1), returnRegister); 1725 generateReturn(); 1726 break; 1727 } 1728 case OpBodyAlternativeEnd: { 1729 // We should never backtrack back into a body disjunction. 1730 ASSERT(m_backtrackingState.isEmpty()); 1731 1732 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; 1733 m_checked += priorAlternative->m_minimumSize; 1734 break; 1735 } 1736 1737 // OpSimpleNestedAlternativeBegin/Next/End 1738 // OpNestedAlternativeBegin/Next/End 1739 // 1740 // Generate code for when we backtrack back out of an alternative into 1741 // a Begin or Next node, or when the entry input count check fails. If 1742 // there are more alternatives we need to jump to the next alternative, 1743 // if not we backtrack back out of the current set of parentheses. 1744 // 1745 // In the case of non-simple nested assertions we need to also link the 1746 // 'return address' appropriately to backtrack back out into the correct 1747 // alternative. 1748 case OpSimpleNestedAlternativeBegin: 1749 case OpSimpleNestedAlternativeNext: 1750 case OpNestedAlternativeBegin: 1751 case OpNestedAlternativeNext: { 1752 YarrOp& nextOp = m_ops[op.m_nextOp]; 1753 bool isBegin = op.m_previousOp == notFound; 1754 bool isLastAlternative = nextOp.m_nextOp == notFound; 1755 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin)); 1756 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd)); 1757 1758 // Treat an input check failure the same as a failed match. 1759 m_backtrackingState.append(op.m_jumps); 1760 1761 // Set the backtracks to jump to the appropriate place. We may need 1762 // to link the backtracks in one of three different way depending on 1763 // the type of alternative we are dealing with: 1764 // - A single alternative, with no simplings. 1765 // - The last alternative of a set of two or more. 1766 // - An alternative other than the last of a set of two or more. 1767 // 1768 // In the case of a single alternative on its own, we don't need to 1769 // jump anywhere - if the alternative fails to match we can just 1770 // continue to backtrack out of the parentheses without jumping. 1771 // 1772 // In the case of the last alternative in a set of more than one, we 1773 // need to jump to return back out to the beginning. We'll do so by 1774 // adding a jump to the End node's m_jumps list, and linking this 1775 // when we come to generate the Begin node. For alternatives other 1776 // than the last, we need to jump to the next alternative. 1777 // 1778 // If the alternative had adjusted the input position we must link 1779 // backtracking to here, correct, and then jump on. If not we can 1780 // link the backtracks directly to their destination. 1781 if (op.m_checkAdjust) { 1782 // Handle the cases where we need to link the backtracks here. 1783 m_backtrackingState.link(this); 1784 sub32(Imm32(op.m_checkAdjust), index); 1785 if (!isLastAlternative) { 1786 // An alternative that is not the last should jump to its successor. 1787 jump(nextOp.m_reentry); 1788 } else if (!isBegin) { 1789 // The last of more than one alternatives must jump back to the begnning. 1790 nextOp.m_jumps.append(jump()); 1791 } else { 1792 // A single alternative on its own can fall through. 1793 m_backtrackingState.fallthrough(); 1794 } 2023 1795 } else { 2024 int countToCheckForNextAlternative = nextAlternative->m_minimumSize; 2025 2026 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. 2027 // If we get here, then the last input checked failed. 2028 notEnoughInputForPreviousAlternative.link(this); 2029 2030 // Check if sufficent input available to run the next alternative 2031 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); 2032 // We are now in the correct state to enter the next alternative; this add is only required 2033 // to mirror and revert operation of the sub32, just below. 2034 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); 2035 2036 // If we get here, then the last input checked passed. 2037 state.linkAlternativeBacktracks(this); 2038 2039 // No need to check if we can run the next alternative, since it is shorter - 2040 // just update index. 2041 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); 2042 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one. 2043 // If we get here, then the last input checked failed. 2044 // If there is insufficient input to run the current alternative, and the next alternative is longer, 2045 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if 2046 // we had checked. 2047 notEnoughInputForPreviousAlternative.link(this); 2048 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); 2049 notEnoughInputForPreviousAlternative.append(jump()); 2050 2051 // The next alternative is longer than the current one; check the difference. 2052 state.linkAlternativeBacktracks(this); 2053 2054 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); 2055 } else { // CASE 3: Both alternatives are the same length. 2056 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); 2057 2058 // If the next alterative is the same length as this one, then no need to check the input - 2059 // if there was sufficent input to run the current alternative then there is sufficient 2060 // input to run the next one; if not, there isn't. 2061 state.linkAlternativeBacktracks(this); 1796 // Handle the cases where we can link the backtracks directly to their destinations. 1797 if (!isLastAlternative) { 1798 // An alternative that is not the last should jump to its successor. 1799 m_backtrackingState.linkTo(nextOp.m_reentry, this); 1800 } else if (!isBegin) { 1801 // The last of more than one alternatives must jump back to the begnning. 1802 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this); 2062 1803 } 2063 state.checkedTotal -= countCheckedForCurrentAlternative; 2064 countCheckedForCurrentAlternative = countToCheckForNextAlternative; 2065 state.checkedTotal += countCheckedForCurrentAlternative; 2066 } 2067 } 2068 } 2069 2070 // If we get here, all Alternatives failed... 2071 2072 state.checkedTotal -= countCheckedForCurrentAlternative; 2073 2074 if (!setRepeatAlternativeLabels) { 2075 // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link 2076 // the match failures to this point, and fall through to the return below. 2077 state.linkAlternativeBacktracks(this, true); 2078 2079 notEnoughInputForPreviousAlternative.link(this); 1804 // In the case of a single alternative on its own do nothing - it can fall through. 1805 } 1806 1807 // At this point we've handled the backtracking back into this node. 1808 // Now link any backtracks that need to jump to here. 1809 1810 // For non-simple alternatives, link the alternative's 'return address' 1811 // so that we backtrack back out into the previous alternative. 1812 if (op.m_op == OpNestedAlternativeNext) 1813 m_backtrackingState.append(op.m_returnAddress); 1814 1815 // If there is more than one alternative, then the last alternative will 1816 // have planted a jump to be linked to the end. This jump was added to the 1817 // End node's m_jumps list. If we are back at the beginning, link it here. 1818 if (isBegin) { 1819 YarrOp* endOp = &m_ops[op.m_nextOp]; 1820 while (endOp->m_nextOp != notFound) { 1821 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); 1822 endOp = &m_ops[endOp->m_nextOp]; 1823 } 1824 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); 1825 m_backtrackingState.append(endOp->m_jumps); 1826 } 1827 1828 if (!isBegin) { 1829 YarrOp& lastOp = m_ops[op.m_previousOp]; 1830 m_checked += lastOp.m_checkAdjust; 1831 } 1832 m_checked -= op.m_checkAdjust; 1833 break; 1834 } 1835 case OpSimpleNestedAlternativeEnd: 1836 case OpNestedAlternativeEnd: { 1837 PatternTerm* term = op.m_term; 1838 1839 // If we backtrack into the end of a simple subpattern do nothing; 1840 // just continue through into the last alternative. If we backtrack 1841 // into the end of a non-simple set of alterntives we need to jump 1842 // to the backtracking return address set up during generation. 1843 if (op.m_op == OpNestedAlternativeEnd) { 1844 m_backtrackingState.link(this); 1845 1846 // Plant a jump to the return address. 1847 unsigned parenthesesFrameLocation = term->frameLocation; 1848 unsigned alternativeFrameLocation = parenthesesFrameLocation; 1849 if (term->quantityType != QuantifierFixedCount) 1850 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1851 loadFromFrameAndJump(alternativeFrameLocation); 1852 1853 // Link the DataLabelPtr associated with the end of the last 1854 // alternative to this point. 1855 m_backtrackingState.append(op.m_returnAddress); 1856 } 1857 1858 YarrOp& lastOp = m_ops[op.m_previousOp]; 1859 m_checked += lastOp.m_checkAdjust; 1860 break; 1861 } 1862 1863 // OpParenthesesSubpatternOnceBegin/End 1864 // 1865 // When we are backtracking back out of a capturing subpattern we need 1866 // to clear the start index in the matches output array, to record that 1867 // this subpattern has not been captured. 1868 // 1869 // When backtracking back out of a Greedy quantified subpattern we need 1870 // to catch this, and try running the remainder of the alternative after 1871 // the subpattern again, skipping the parentheses. 1872 // 1873 // Upon backtracking back into a quantified set of parentheses we need to 1874 // check whether we were currently skipping the subpattern. If not, we 1875 // can backtrack into them, if we were we need to either backtrack back 1876 // out of the start of the parentheses, or jump back to the forwards 1877 // matching start, depending of whether the match is Greedy or NonGreedy. 1878 case OpParenthesesSubpatternOnceBegin: { 1879 PatternTerm* term = op.m_term; 1880 ASSERT(term->quantityCount == 1); 1881 1882 // We only need to backtrack to thispoint if capturing or greedy. 1883 if (term->capture() || term->quantityType == QuantifierGreedy) { 1884 m_backtrackingState.link(this); 1885 1886 // If capturing, clear the capture (we only need to reset start). 1887 if (term->capture()) 1888 store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int))); 1889 1890 // If Greedy, jump to the end. 1891 if (term->quantityType == QuantifierGreedy) { 1892 // Clear the flag in the stackframe indicating we ran through the subpattern. 1893 unsigned parenthesesFrameLocation = term->frameLocation; 1894 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 1895 // Jump to after the parentheses, skipping the subpattern. 1896 jump(m_ops[op.m_nextOp].m_reentry); 1897 // A backtrack from after the parentheses, when skipping the subpattern, 1898 // will jump back to here. 1899 op.m_jumps.link(this); 1900 } 1901 1902 m_backtrackingState.fallthrough(); 1903 } 1904 break; 1905 } 1906 case OpParenthesesSubpatternOnceEnd: { 1907 PatternTerm* term = op.m_term; 1908 1909 if (term->quantityType != QuantifierFixedCount) { 1910 m_backtrackingState.link(this); 1911 1912 // Check whether we should backtrack back into the parentheses, or if we 1913 // are currently in a state where we had skipped over the subpattern 1914 // (in which case the flag value on the stack will be -1). 1915 unsigned parenthesesFrameLocation = term->frameLocation; 1916 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); 1917 1918 if (term->quantityType == QuantifierGreedy) { 1919 // For Greedy parentheses, we skip after having already tried going 1920 // through the subpattern, so if we get here we're done. 1921 YarrOp& beginOp = m_ops[op.m_previousOp]; 1922 beginOp.m_jumps.append(hadSkipped); 1923 } else { 1924 // For NonGreedy parentheses, we try skipping the subpattern first, 1925 // so if we get here we need to try running through the subpattern 1926 // next. Jump back to the start of the parentheses in the forwards 1927 // matching path. 1928 ASSERT(term->quantityType == QuantifierNonGreedy); 1929 YarrOp& beginOp = m_ops[op.m_previousOp]; 1930 hadSkipped.linkTo(beginOp.m_reentry, this); 1931 } 1932 1933 m_backtrackingState.fallthrough(); 1934 } 1935 1936 m_backtrackingState.append(op.m_jumps); 1937 break; 1938 } 1939 1940 // OpParenthesesSubpatternTerminalBegin/End 1941 // 1942 // Terminal subpatterns will always match - there is nothing after them to 1943 // force a backtrack, and they have a minimum count of 0, and as such will 1944 // always produce an acceptable result. 1945 case OpParenthesesSubpatternTerminalBegin: { 1946 // We will backtrack to this point once the subpattern cannot match any 1947 // more. Since no match is accepted as a successful match (we are Greedy 1948 // quantified with a minimum of zero) jump back to the forwards matching 1949 // path at the end. 1950 YarrOp& endOp = m_ops[op.m_nextOp]; 1951 m_backtrackingState.linkTo(endOp.m_reentry, this); 1952 break; 1953 } 1954 case OpParenthesesSubpatternTerminalEnd: 1955 // We should never be backtracking to here (hence the 'terminal' in the name). 1956 ASSERT(m_backtrackingState.isEmpty()); 1957 m_backtrackingState.append(op.m_jumps); 1958 break; 1959 1960 // OpParentheticalAssertionBegin/End 1961 case OpParentheticalAssertionBegin: { 1962 PatternTerm* term = op.m_term; 1963 YarrOp& endOp = m_ops[op.m_nextOp]; 1964 1965 // We need to handle the backtracks upon backtracking back out 1966 // of a parenthetical assertion if either we need to correct 1967 // the input index, or the assertion was inverted. 1968 if (op.m_checkAdjust || term->invert()) { 1969 m_backtrackingState.link(this); 1970 1971 if (op.m_checkAdjust) 1972 add32(Imm32(op.m_checkAdjust), index); 1973 1974 // In an inverted assertion failure to match the subpattern 1975 // is treated as a successful match - jump to the end of the 1976 // subpattern. We already have adjusted the input position 1977 // back to that before the assertion, which is correct. 1978 if (term->invert()) 1979 jump(endOp.m_reentry); 1980 1981 m_backtrackingState.fallthrough(); 1982 } 1983 1984 // The End node's jump list will contain any backtracks into 1985 // the end of the assertion. Also, if inverted, we will have 1986 // added the failure caused by a successful match to this. 1987 m_backtrackingState.append(endOp.m_jumps); 1988 1989 m_checked += op.m_checkAdjust; 1990 break; 1991 } 1992 case OpParentheticalAssertionEnd: { 1993 // FIXME: We should really be clearing any nested subpattern 1994 // matches on bailing out from after the pattern. Firefox has 1995 // this bug too (presumably because they use YARR!) 1996 1997 // Never backtrack into an assertion; later failures bail to before the begin. 1998 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); 1999 2000 YarrOp& lastOp = m_ops[op.m_previousOp]; 2001 m_checked -= lastOp.m_checkAdjust; 2002 break; 2003 } 2004 2005 case OpMatchFailed: 2006 break; 2007 } 2008 2009 } while (opIndex); 2010 } 2011 2012 // Compilation methods: 2013 // ==================== 2014 2015 // opCompileParenthesesSubpattern 2016 // Emits ops for a subpattern (set of parentheses). These consist 2017 // of a set of alternatives wrapped in an outer set of nodes for 2018 // the parentheses. 2019 // Supported types of parentheses are 'Once' (quantityCount == 1) 2020 // and 'Terminal' (non-capturing parentheses quantified as greedy 2021 // and infinite). 2022 // Alternatives will use the 'Simple' set of ops if either the 2023 // subpattern is terminal (in which case we will never need to 2024 // backtrack), or if the subpattern only contains one alternative. 2025 void opCompileParenthesesSubpattern(PatternTerm* term) 2026 { 2027 YarrOpCode parenthesesBeginOpCode; 2028 YarrOpCode parenthesesEndOpCode; 2029 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin; 2030 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext; 2031 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd; 2032 2033 // We can currently only compile quantity 1 subpatterns that are 2034 // not copies. We generate a copy in the case of a range quantifier, 2035 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to 2036 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem 2037 // comes where the subpattern is capturing, in which case we would 2038 // need to restore the capture from the first subpattern upon a 2039 // failure in the second. 2040 if (term->quantityCount == 1 && !term->parentheses.isCopy) { 2041 // Select the 'Once' nodes. 2042 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; 2043 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; 2044 2045 // If there is more than one alternative we cannot use the 'simple' nodes. 2046 if (term->parentheses.disjunction->m_alternatives.size() != 1) { 2047 alternativeBeginOpCode = OpNestedAlternativeBegin; 2048 alternativeNextOpCode = OpNestedAlternativeNext; 2049 alternativeEndOpCode = OpNestedAlternativeEnd; 2050 } 2051 } else if (term->parentheses.isTerminal) { 2052 // Select the 'Terminal' nodes. 2053 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; 2054 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; 2080 2055 } else { 2081 // How much more input need there be to be able to retry from the first alternative? 2082 // examples: 2083 // /yarr_jit/ or /wrec|pcre/ 2084 // In these examples we need check for one more input before looping. 2085 // /yarr_jit|pcre/ 2086 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative 2087 // being four longer than the last alternative checked, and another +1 to effectively move 2088 // the start position along by one). 2089 // /yarr|rules/ or /wrec|notsomuch/ 2090 // In these examples, provided that there was sufficient input to have just been matching for 2091 // the second alternative we can loop without checking for available input (since the second 2092 // alternative is longer than the first). In the latter example we need to decrement index 2093 // (by 4) so the start position is only progressed by 1 from the last iteration. 2094 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1; 2095 2096 // First, deal with the cases where there was sufficient input to try the last alternative. 2097 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. 2098 state.linkAlternativeBacktracks(this, true); 2099 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! 2100 state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true); 2101 else { // no need to check the input, but we do have some bookkeeping to do first. 2102 state.linkAlternativeBacktracks(this, true); 2103 2104 // Where necessary update our preserved start position. 2105 if (!m_pattern.m_body->m_hasFixedSize) { 2106 move(index, regT0); 2107 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); 2108 store32(regT0, Address(output)); 2109 } 2110 2111 // Update index if necessary, and loop (without checking). 2112 if (incrementForNextIter) 2113 add32(Imm32(incrementForNextIter), index); 2114 jump().linkTo(firstAlternativeInputChecked, this); 2115 } 2116 2117 notEnoughInputForPreviousAlternative.link(this); 2118 // Update our idea of the start position, if we're tracking this. 2119 if (!m_pattern.m_body->m_hasFixedSize) { 2120 if (countCheckedForCurrentAlternative - 1) { 2121 move(index, regT0); 2122 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); 2123 store32(regT0, Address(output)); 2124 } else 2125 store32(index, Address(output)); 2126 } 2127 2128 // Check if there is sufficent input to run the first alternative again. 2129 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); 2130 // No - insufficent input to run the first alteranative, are there any other alternatives we 2131 // might need to check? If so, the last check will have left the index incremented by 2132 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative 2133 // LESS input is available, to have the effect of just progressing the start position by 1 2134 // from the last iteration. If this check passes we can just jump up to the check associated 2135 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the 2136 // first alternative again, and this check will fail (otherwise the check planted just above 2137 // here would have passed). This is a bit sad, however it saves trying to do something more 2138 // complex here in compilation, and in the common case we should end up coallescing the checks. 2139 // 2140 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least 2141 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150, 2142 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there 2143 // is sufficient input to run either alternative (constantly failing). If there had been only 2144 // one alternative, or if the shorter alternative had come first, we would have terminated 2145 // immediately. :-/ 2146 if (hasShorterAlternatives) 2147 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this); 2148 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true, 2149 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 2150 // but since we're about to return a failure this doesn't really matter!) 2151 } 2152 2153 if (m_pattern.m_body->m_callFrameSize) 2154 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 2155 2156 move(TrustedImm32(-1), returnRegister); 2157 2158 generateReturn(); 2159 2160 m_expressionState.emitParenthesesTail(this); 2161 m_expressionState.emitIndirectJumpTable(this); 2162 m_expressionState.linkToNextIteration(this); 2056 // This subpattern is not supported by the JIT. 2057 m_shouldFallBack = true; 2058 return; 2059 } 2060 2061 size_t parenBegin = m_ops.size(); 2062 m_ops.append(parenthesesBeginOpCode); 2063 2064 m_ops.append(alternativeBeginOpCode); 2065 m_ops.last().m_previousOp = notFound; 2066 m_ops.last().m_term = term; 2067 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives; 2068 for (unsigned i = 0; i < alternatives.size(); ++i) { 2069 size_t lastOpIndex = m_ops.size() - 1; 2070 2071 PatternAlternative* nestedAlternative = alternatives[i]; 2072 opCompileAlternative(nestedAlternative); 2073 2074 size_t thisOpIndex = m_ops.size(); 2075 m_ops.append(YarrOp(alternativeNextOpCode)); 2076 2077 YarrOp& lastOp = m_ops[lastOpIndex]; 2078 YarrOp& thisOp = m_ops[thisOpIndex]; 2079 2080 lastOp.m_alternative = nestedAlternative; 2081 lastOp.m_nextOp = thisOpIndex; 2082 thisOp.m_previousOp = lastOpIndex; 2083 thisOp.m_term = term; 2084 } 2085 YarrOp& lastOp = m_ops.last(); 2086 ASSERT(lastOp.m_op == alternativeNextOpCode); 2087 lastOp.m_op = alternativeEndOpCode; 2088 lastOp.m_alternative = 0; 2089 lastOp.m_nextOp = notFound; 2090 2091 size_t parenEnd = m_ops.size(); 2092 m_ops.append(parenthesesEndOpCode); 2093 2094 m_ops[parenBegin].m_term = term; 2095 m_ops[parenBegin].m_previousOp = notFound; 2096 m_ops[parenBegin].m_nextOp = parenEnd; 2097 m_ops[parenEnd].m_term = term; 2098 m_ops[parenEnd].m_previousOp = parenBegin; 2099 m_ops[parenEnd].m_nextOp = notFound; 2100 } 2101 2102 // opCompileParentheticalAssertion 2103 // Emits ops for a parenthetical assertion. These consist of an 2104 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping 2105 // the alternatives, with these wrapped by an outer pair of 2106 // OpParentheticalAssertionBegin/End nodes. 2107 // We can always use the OpSimpleNestedAlternative nodes in the 2108 // case of parenthetical assertions since these only ever match 2109 // once, and will never backtrack back into the assertion. 2110 void opCompileParentheticalAssertion(PatternTerm* term) 2111 { 2112 size_t parenBegin = m_ops.size(); 2113 m_ops.append(OpParentheticalAssertionBegin); 2114 2115 m_ops.append(OpSimpleNestedAlternativeBegin); 2116 m_ops.last().m_previousOp = notFound; 2117 m_ops.last().m_term = term; 2118 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives; 2119 for (unsigned i = 0; i < alternatives.size(); ++i) { 2120 size_t lastOpIndex = m_ops.size() - 1; 2121 2122 PatternAlternative* nestedAlternative = alternatives[i]; 2123 opCompileAlternative(nestedAlternative); 2124 2125 size_t thisOpIndex = m_ops.size(); 2126 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext)); 2127 2128 YarrOp& lastOp = m_ops[lastOpIndex]; 2129 YarrOp& thisOp = m_ops[thisOpIndex]; 2130 2131 lastOp.m_alternative = nestedAlternative; 2132 lastOp.m_nextOp = thisOpIndex; 2133 thisOp.m_previousOp = lastOpIndex; 2134 thisOp.m_term = term; 2135 } 2136 YarrOp& lastOp = m_ops.last(); 2137 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext); 2138 lastOp.m_op = OpSimpleNestedAlternativeEnd; 2139 lastOp.m_alternative = 0; 2140 lastOp.m_nextOp = notFound; 2141 2142 size_t parenEnd = m_ops.size(); 2143 m_ops.append(OpParentheticalAssertionEnd); 2144 2145 m_ops[parenBegin].m_term = term; 2146 m_ops[parenBegin].m_previousOp = notFound; 2147 m_ops[parenBegin].m_nextOp = parenEnd; 2148 m_ops[parenEnd].m_term = term; 2149 m_ops[parenEnd].m_previousOp = parenBegin; 2150 m_ops[parenEnd].m_nextOp = notFound; 2151 } 2152 2153 // opCompileAlternative 2154 // Called to emit nodes for all terms in an alternative. 2155 void opCompileAlternative(PatternAlternative* alternative) 2156 { 2157 optimizeAlternative(alternative); 2158 2159 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { 2160 PatternTerm* term = &alternative->m_terms[i]; 2161 2162 switch (term->type) { 2163 case PatternTerm::TypeParenthesesSubpattern: 2164 opCompileParenthesesSubpattern(term); 2165 break; 2166 2167 case PatternTerm::TypeParentheticalAssertion: 2168 opCompileParentheticalAssertion(term); 2169 break; 2170 2171 default: 2172 m_ops.append(term); 2173 } 2174 } 2175 } 2176 2177 // opCompileBody 2178 // This method compiles the body disjunction of the regular expression. 2179 // The body consists of two sets of alternatives - zero or more 'once 2180 // through' (BOL anchored) alternatives, followed by zero or more 2181 // repeated alternatives. 2182 // For each of these two sets of alteratives, if not empty they will be 2183 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the 2184 // 'begin' node referencing the first alternative, and 'next' nodes 2185 // referencing any further alternatives. The begin/next/end nodes are 2186 // linked together in a doubly linked list. In the case of repeating 2187 // alternatives, the end node is also linked back to the beginning. 2188 // If no repeating alternatives exist, then a OpMatchFailed node exists 2189 // to return the failing result. 2190 void opCompileBody(PatternDisjunction* disjunction) 2191 { 2192 Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives; 2193 size_t currentAlternativeIndex = 0; 2194 2195 // Emit the 'once through' alternatives. 2196 if (alternatives.size() && alternatives[0]->onceThrough()) { 2197 m_ops.append(YarrOp(OpBodyAlternativeBegin)); 2198 m_ops.last().m_previousOp = notFound; 2199 2200 do { 2201 size_t lastOpIndex = m_ops.size() - 1; 2202 PatternAlternative* alternative = alternatives[currentAlternativeIndex]; 2203 opCompileAlternative(alternative); 2204 2205 size_t thisOpIndex = m_ops.size(); 2206 m_ops.append(YarrOp(OpBodyAlternativeNext)); 2207 2208 YarrOp& lastOp = m_ops[lastOpIndex]; 2209 YarrOp& thisOp = m_ops[thisOpIndex]; 2210 2211 lastOp.m_alternative = alternative; 2212 lastOp.m_nextOp = thisOpIndex; 2213 thisOp.m_previousOp = lastOpIndex; 2214 2215 ++currentAlternativeIndex; 2216 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough()); 2217 2218 YarrOp& lastOp = m_ops.last(); 2219 2220 ASSERT(lastOp.m_op == OpBodyAlternativeNext); 2221 lastOp.m_op = OpBodyAlternativeEnd; 2222 lastOp.m_alternative = 0; 2223 lastOp.m_nextOp = notFound; 2224 } 2225 2226 if (currentAlternativeIndex == alternatives.size()) { 2227 m_ops.append(YarrOp(OpMatchFailed)); 2228 return; 2229 } 2230 2231 // Emit the repeated alternatives. 2232 size_t repeatLoop = m_ops.size(); 2233 m_ops.append(YarrOp(OpBodyAlternativeBegin)); 2234 m_ops.last().m_previousOp = notFound; 2235 do { 2236 size_t lastOpIndex = m_ops.size() - 1; 2237 PatternAlternative* alternative = alternatives[currentAlternativeIndex]; 2238 ASSERT(!alternative->onceThrough()); 2239 opCompileAlternative(alternative); 2240 2241 size_t thisOpIndex = m_ops.size(); 2242 m_ops.append(YarrOp(OpBodyAlternativeNext)); 2243 2244 YarrOp& lastOp = m_ops[lastOpIndex]; 2245 YarrOp& thisOp = m_ops[thisOpIndex]; 2246 2247 lastOp.m_alternative = alternative; 2248 lastOp.m_nextOp = thisOpIndex; 2249 thisOp.m_previousOp = lastOpIndex; 2250 2251 ++currentAlternativeIndex; 2252 } while (currentAlternativeIndex < alternatives.size()); 2253 YarrOp& lastOp = m_ops.last(); 2254 ASSERT(lastOp.m_op == OpBodyAlternativeNext); 2255 lastOp.m_op = OpBodyAlternativeEnd; 2256 lastOp.m_alternative = 0; 2257 lastOp.m_nextOp = repeatLoop; 2163 2258 } 2164 2259 … … 2231 2326 : m_pattern(pattern) 2232 2327 , m_shouldFallBack(false) 2233 { 2234 } 2235 2236 void generate() 2328 , m_checked(0) 2329 { 2330 } 2331 2332 void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) 2237 2333 { 2238 2334 generateEnter(); … … 2244 2340 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 2245 2341 2246 generateDisjunction(m_pattern.m_body); 2247 } 2248 2249 void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) 2250 { 2342 // Compile the pattern to the internal 'YarrOp' representation. 2343 opCompileBody(m_pattern.m_body); 2344 2345 // If we encountered anything we can't handle in the JIT code 2346 // (e.g. backreferences) then return early. 2347 if (m_shouldFallBack) { 2348 jitObject.setFallBack(true); 2349 return; 2350 } 2351 2251 2352 generate(); 2252 2253 LinkBuffer patchBuffer(this, globalData->regexAllocator); 2254 2255 for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i) 2256 patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation)); 2257 2258 jitObject.set(patchBuffer.finalizeCode()); 2353 backtrack(); 2354 2355 // Link & finalize the code. 2356 LinkBuffer linkBuffer(this, globalData->regexAllocator); 2357 m_backtrackingState.linkDataLabels(linkBuffer); 2358 jitObject.set(linkBuffer.finalizeCode()); 2259 2359 jitObject.setFallBack(m_shouldFallBack); 2260 2360 } … … 2262 2362 private: 2263 2363 YarrPattern& m_pattern; 2364 2365 // Used to detect regular expression constructs that are not currently 2366 // supported in the JIT; fall back to the interpreter when this is detected. 2264 2367 bool m_shouldFallBack; 2265 GenerationState m_expressionState; 2368 2369 // The regular expression expressed as a linear sequence of operations. 2370 Vector<YarrOp, 128> m_ops; 2371 2372 // This records the current input offset being applied due to the current 2373 // set of alternatives we are nested within. E.g. when matching the 2374 // character 'b' within the regular expression /abc/, we will know that 2375 // the minimum size for the alternative is 3, checked upon entry to the 2376 // alternative, and that 'b' is at offset 1 from the start, and as such 2377 // when matching 'b' we need to apply an offset of -2 to the load. 2378 // 2379 // FIXME: This should go away. Rather than tracking this value throughout 2380 // code generation, we should gather this information up front & store it 2381 // on the YarrOp structure. 2382 int m_checked; 2383 2384 // This class records state whilst generating the backtracking path of code. 2385 BacktrackingState m_backtrackingState; 2266 2386 }; 2267 2387
Note: See TracChangeset
for help on using the changeset viewer.