Changeset 60739 in webkit
- Timestamp:
- Jun 4, 2010 11:57:54 PM (14 years ago)
- Location:
- trunk/WebCore
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebCore/ChangeLog
r60738 r60739 1 2010-06-04 Adam Barth <abarth@webkit.org> 2 3 Reviewed by Eric Seidel. 4 5 Make HTML5Lexer go fast 6 https://bugs.webkit.org/show_bug.cgi?id=40048 7 8 This patch changes us from using a jump table for each character to 9 using absolute jumps between parser states. This appears to be about a 10 1% improvement on the parser bench mark (which is 1/10th of what we 11 need to catch the old parser). 12 13 I've kept the underlying logic as close to the old logic as possible. 14 This new form will make it easier to handle the input stream part of 15 the spec and to make further performance improvements. 16 17 * html/HTML5Lexer.cpp: 18 (WebCore::HTML5Lexer::reset): 19 (WebCore::HTML5Lexer::nextToken): 20 (WebCore::HTML5Lexer::emitCurrentToken): 21 * html/HTML5Lexer.h: 22 1 23 2010-06-04 Adam Barth <abarth@webkit.org> 2 24 -
trunk/WebCore/html/HTML5Lexer.cpp
r60738 r60739 157 157 m_lineNumber = 0; 158 158 m_skipLeadingNewLineForListing = false; 159 m_emitPending = false;160 159 m_additionalAllowedCharacter = '\0'; 161 160 } … … 325 324 #endif 326 325 327 #define BEGIN_STATE(stateName) case stateName: 326 #define BEGIN_STATE(stateName) case stateName: stateName: 328 327 #define END_STATE() ASSERT_NOT_REACHED(); break; 329 328 330 #define EMIT_AND_RESUME_IN(stateName)\331 do { 332 emitCurrentToken();\333 m_state = DataState;\334 goto breakLabel; \329 #define RECONSUME_IN(stateName) \ 330 do { \ 331 m_state = stateName; \ 332 cc = *source; \ 333 goto stateName; \ 335 334 } while (false) 336 335 337 #define ADVANCE_TO(stateName) \ 338 do { \ 339 m_state = stateName; \ 340 goto breakLabel; \ 336 #define ADVANCE_TO(stateName) \ 337 do { \ 338 m_state = stateName; \ 339 source.advance(m_lineNumber); \ 340 if (source.isEmpty()) \ 341 return shouldEmitBufferedCharacterToken(source); \ 342 cc = *source; \ 343 goto stateName; \ 341 344 } while (false) 342 345 343 #define RECONSUME_IN(stateName) \ 344 do { \ 345 m_state = stateName; \ 346 goto continueLabel; \ 346 #define EMIT_AND_RESUME_IN(stateName) \ 347 do { \ 348 m_state = stateName; \ 349 source.advance(m_lineNumber); \ 350 emitCurrentToken(); \ 351 return true; \ 347 352 } while (false) 348 353 349 #define FLUSH_EMIT_AND_RESUME_IN(stateName) \ 350 do { \ 351 m_state = stateName; \ 352 maybeFlushBufferedEndTag(); \ 353 goto breakLabel; \ 354 #define _FLUSH_BUFFERED_END_TAG() \ 355 do { \ 356 ASSERT(m_token->type() == HTML5Token::Character || \ 357 m_token->type() == HTML5Token::Uninitialized); \ 358 source.advance(m_lineNumber); \ 359 if (m_token->type() == HTML5Token::Character) \ 360 return true; \ 361 m_token->beginEndTag(m_bufferedEndTagName); \ 362 m_bufferedEndTagName.clear(); \ 354 363 } while (false) 355 364 356 // When we move away from using a jump table, these macros will be different. 357 #define FLUSH_AND_ADVANCE_TO(stateName) FLUSH_EMIT_AND_RESUME_IN(stateName) 365 #define FLUSH_AND_ADVANCE_TO(stateName) \ 366 do { \ 367 m_state = stateName; \ 368 _FLUSH_BUFFERED_END_TAG(); \ 369 if (source.isEmpty()) \ 370 return shouldEmitBufferedCharacterToken(source); \ 371 cc = *source; \ 372 goto stateName; \ 373 } while (false) 374 375 #define FLUSH_EMIT_AND_RESUME_IN(stateName) \ 376 do { \ 377 m_state = stateName; \ 378 _FLUSH_BUFFERED_END_TAG(); \ 379 return true; \ 380 } while (false) 358 381 359 382 bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) … … 380 403 m_skipLeadingNewLineForListing = false; 381 404 405 if (source.isEmpty()) 406 return shouldEmitBufferedCharacterToken(source); 407 382 408 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 383 // FIXME: This while should stop as soon as we have a token to return.384 while (!source.isEmpty()) {385 // FIXME: This is a purposeful style violation because this while loop is386 // going to be removed soon.387 388 409 UChar cc = *source; 389 410 switch (m_state) { … … 1118 1139 1119 1140 BEGIN_STATE(BogusCommentState) { 1141 // FIXME: This state isn't correct because we'll terminate the 1142 // comment early if we don't have the whole input stream available. 1120 1143 m_token->beginComment(); 1121 1144 while (!source.isEmpty()) { 1122 1145 cc = *source; 1123 1146 if (cc == '>') 1124 break;1147 EMIT_AND_RESUME_IN(DataState); 1125 1148 m_token->appendToComment(cc); 1126 1149 source.advance(m_lineNumber); 1127 1150 } 1128 EMIT_AND_RESUME_IN(DataState); 1129 if (source.isEmpty()) 1130 return true; 1131 // FIXME: Handle EOF properly. 1132 break; 1151 m_state = DataState; 1152 return true; 1153 // FIXME: Handle EOF properly. 1133 1154 } 1134 1155 END_STATE() … … 1577 1598 } 1578 1599 1579 breakLabel: 1580 source.advance(m_lineNumber); 1581 if (m_emitPending) { 1582 m_emitPending = false; 1583 return true; 1584 } 1585 1586 continueLabel: 1587 ; // We need an empty statement here to make continueLabel happy. 1588 } // Matches the "while" above. 1589 1590 // We've reached the end of the input stream. If we have a character 1591 // token buffered, we should emit it. 1592 return shouldEmitBufferedCharacterToken(source); 1600 ASSERT_NOT_REACHED(); 1601 return false; 1593 1602 } 1594 1603 … … 1633 1642 } 1634 1643 1635 inline void HTML5Lexer::maybeFlushBufferedEndTag()1636 {1637 ASSERT(m_token->type() == HTML5Token::Character || m_token->type() == HTML5Token::Uninitialized);1638 if (m_token->type() == HTML5Token::Character) {1639 // We have a character token queued up. We need to emit it before we1640 // can start begin the buffered end tag token.1641 emitCurrentToken();1642 return;1643 }1644 flushBufferedEndTag();1645 }1646 1647 inline void HTML5Lexer::flushBufferedEndTag()1648 {1649 m_token->beginEndTag(m_bufferedEndTagName);1650 m_bufferedEndTagName.clear();1651 if (m_state == DataState)1652 emitCurrentToken();1653 }1654 1655 1644 inline void HTML5Lexer::emitCurrentToken() 1656 1645 { 1657 1646 ASSERT(m_token->type() != HTML5Token::Uninitialized); 1658 m_emitPending = true;1659 1647 if (m_token->type() == HTML5Token::StartTag) 1660 1648 m_appropriateEndTagName = m_token->name(); -
trunk/WebCore/html/HTML5Lexer.h
r60738 r60739 146 146 inline void addToPossibleEndTag(UChar cc); 147 147 inline bool isAppropriateEndTag(); 148 inline void maybeFlushBufferedEndTag();149 inline void flushBufferedEndTag();150 148 151 149 inline bool shouldEmitBufferedCharacterToken(const SegmentedString&); … … 161 159 162 160 bool m_skipLeadingNewLineForListing; 163 bool m_emitPending;164 161 165 162 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
Note: See TracChangeset
for help on using the changeset viewer.