Context Navigation

← Previous Changeset
Next Changeset →

Changeset 60739 in webkit

Timestamp:

Jun 4, 2010 11:57:54 PM (14 years ago)

Author:

abarth@webkit.org

Message:

2010-06-04 Adam Barth <abarth@webkit.org>

Reviewed by Eric Seidel.

Make HTML5Lexer go fast
https://bugs.webkit.org/show_bug.cgi?id=40048

This patch changes us from using a jump table for each character to
using absolute jumps between parser states. This appears to be about a
1% improvement on the parser bench mark (which is 1/10th of what we
need to catch the old parser).

I've kept the underlying logic as close to the old logic as possible.
This new form will make it easier to handle the input stream part of
the spec and to make further performance improvements.

html/HTML5Lexer.cpp: (WebCore::HTML5Lexer::reset): (WebCore::HTML5Lexer::nextToken): (WebCore::HTML5Lexer::emitCurrentToken):
html/HTML5Lexer.h:

Location:

trunk/WebCore

Files:

: 3 edited

ChangeLog (modified) (1 diff)
html/HTML5Lexer.cpp (modified) (6 diffs)
html/HTML5Lexer.h (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/WebCore/ChangeLog

-                      r60738
+                      r60739
+-06-04  Adam Barth  <abarth@webkit.org>
+        Reviewed by Eric Seidel.
+        Make HTML5Lexer go fast
+        https://bugs.webkit.org/show_bug.cgi?id=40048
+        This patch changes us from using a jump table for each character to
+        using absolute jumps between parser states.  This appears to be about a
+% improvement on the parser bench mark (which is 1/10th of what we
+        need to catch the old parser).
+        I've kept the underlying logic as close to the old logic as possible.
+        This new form will make it easier to handle the input stream part of
+        the spec and to make further performance improvements.
+        * html/HTML5Lexer.cpp:
+        (WebCore::HTML5Lexer::reset):
+        (WebCore::HTML5Lexer::nextToken):
+        (WebCore::HTML5Lexer::emitCurrentToken):
+        * html/HTML5Lexer.h:
 -06-04  Adam Barth  <abarth@webkit.org>

trunk/WebCore/html/HTML5Lexer.cpp

-                      r60738
+                      r60739
     m_lineNumber = 0;
     m_skipLeadingNewLineForListing = false;
-    m_emitPending = false;
     m_additionalAllowedCharacter = '\0';
+}
 …
 #endif
 #define BEGIN_STATE(stateName) case stateName:
+#define BEGIN_STATE(stateName) case stateName: stateName:
 #define END_STATE() ASSERT_NOT_REACHED(); break;
 #define EMIT_AND_RESUME_IN(stateName)                                       \
     do {                                                                    \
         emitCurrentToken();                                                 \
         m_state = DataState;                                                \
         goto breakLabel;                                                    \
+#define RECONSUME_IN(stateName)                                            \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        cc = *source;                                                      \
+        goto stateName;                                                    \
     } while (false)
+#define ADVANCE_TO(stateName)                                               \
+    do {                                                                    \
+        m_state = stateName;                                                \
+        goto breakLabel;                                                    \
+#define ADVANCE_TO(stateName)                                              \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        source.advance(m_lineNumber);                                      \
+        if (source.isEmpty())                                              \
+            return shouldEmitBufferedCharacterToken(source);               \
+        cc = *source;                                                      \
+        goto stateName;                                                    \
     } while (false)
+#define RECONSUME_IN(stateName)                                             \
+    do {                                                                    \
+        m_state = stateName;                                                \
+        goto continueLabel;                                                 \
+#define EMIT_AND_RESUME_IN(stateName)                                      \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        source.advance(m_lineNumber);                                      \
+        emitCurrentToken();                                                \
+        return true;                                                       \
     } while (false)
+#define FLUSH_EMIT_AND_RESUME_IN(stateName)                                 \
+    do {                                                                    \
+        m_state = stateName;                                                \
+        maybeFlushBufferedEndTag();                                         \
+        goto breakLabel;                                                    \
+#define _FLUSH_BUFFERED_END_TAG()                                          \
+    do {                                                                   \
+        ASSERT(m_token->type() == HTML5Token::Character ||                 \
+               m_token->type() == HTML5Token::Uninitialized);              \
+        source.advance(m_lineNumber);                                      \
+        if (m_token->type() == HTML5Token::Character)                      \
+            return true;                                                   \
+        m_token->beginEndTag(m_bufferedEndTagName);                        \
+        m_bufferedEndTagName.clear();                                      \
     } while (false)
+// When we move away from using a jump table, these macros will be different.
+#define FLUSH_AND_ADVANCE_TO(stateName) FLUSH_EMIT_AND_RESUME_IN(stateName)
+#define FLUSH_AND_ADVANCE_TO(stateName)                                    \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        _FLUSH_BUFFERED_END_TAG();                                         \
+        if (source.isEmpty())                                              \
+            return shouldEmitBufferedCharacterToken(source);               \
+        cc = *source;                                                      \
+        goto stateName;                                                    \
+    } while (false)
+#define FLUSH_EMIT_AND_RESUME_IN(stateName)                                \
+    do {                                                                   \
+        m_state = stateName;                                               \
+        _FLUSH_BUFFERED_END_TAG();                                         \
+        return true;                                                       \
+    } while (false)
 bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
 …
     m_skipLeadingNewLineForListing = false;
+    if (source.isEmpty())
+        return shouldEmitBufferedCharacterToken(source);
     // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
-    // FIXME: This while should stop as soon as we have a token to return.
-    while (!source.isEmpty()) {
-    // FIXME: This is a purposeful style violation because this while loop is
-    // going to be removed soon.
     UChar cc = *source;
     switch (m_state) {
 …
     BEGIN_STATE(BogusCommentState) {
+        // FIXME: This state isn't correct because we'll terminate the
+        // comment early if we don't have the whole input stream available.
         m_token->beginComment();
         while (!source.isEmpty()) {
             cc = *source;
             if (cc == '>')
                 break;
+                EMIT_AND_RESUME_IN(DataState);
             m_token->appendToComment(cc);
             source.advance(m_lineNumber);
+        }
+        EMIT_AND_RESUME_IN(DataState);
+        if (source.isEmpty())
+            return true;
+        // FIXME: Handle EOF properly.
+        break;
+        m_state = DataState;
+        return true;
+        // FIXME: Handle EOF properly.
+    }
     END_STATE()
 …
+    }
+breakLabel:
+    source.advance(m_lineNumber);
+    if (m_emitPending) {
+        m_emitPending = false;
+        return true;
+    }
+continueLabel:
+    ; // We need an empty statement here to make continueLabel happy.
+    } // Matches the "while" above.
+    // We've reached the end of the input stream.  If we have a character
+    // token buffered, we should emit it.
+    return shouldEmitBufferedCharacterToken(source);
+    ASSERT_NOT_REACHED();
+    return false;
+}
 …
+}
-inline void HTML5Lexer::maybeFlushBufferedEndTag()
+{
-    ASSERT(m_token->type() == HTML5Token::Character || m_token->type() == HTML5Token::Uninitialized);
-    if (m_token->type() == HTML5Token::Character) {
-        // We have a character token queued up.  We need to emit it before we
-        // can start begin the buffered end tag token.
-        emitCurrentToken();
-        return;
+    }
-    flushBufferedEndTag();
+}
-inline void HTML5Lexer::flushBufferedEndTag()
+{
-    m_token->beginEndTag(m_bufferedEndTagName);
-    m_bufferedEndTagName.clear();
-    if (m_state == DataState)
-        emitCurrentToken();
+}
 inline void HTML5Lexer::emitCurrentToken()
+{
     ASSERT(m_token->type() != HTML5Token::Uninitialized);
-    m_emitPending = true;
     if (m_token->type() == HTML5Token::StartTag)
         m_appropriateEndTagName = m_token->name();

trunk/WebCore/html/HTML5Lexer.h

-                      r60738
+                      r60739
         inline void addToPossibleEndTag(UChar cc);
         inline bool isAppropriateEndTag();
-        inline void maybeFlushBufferedEndTag();
-        inline void flushBufferedEndTag();
         inline bool shouldEmitBufferedCharacterToken(const SegmentedString&);
 …
         bool m_skipLeadingNewLineForListing;
-        bool m_emitPending;
         // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer

Note: See TracChangeset for help on using the changeset viewer.