Changeset 142673 in webkit


Ignore:
Timestamp:
Feb 12, 2013 2:44:25 PM (11 years ago)
Author:
abarth@webkit.org
Message:

Threaded HTML parser should pass the remaining fast/tokenizer tests
https://bugs.webkit.org/show_bug.cgi?id=109607

Reviewed by Eric Seidel.

This patch fixes some edge cases involving document.write. Previously,
we would drop input characters on the floor if the tokenizer wasn't
able to consume them synchronously. In this patch, we send the unparsed
characters to the background thread for consumption after rewinding the
input stream.

  • html/parser/BackgroundHTMLInputStream.cpp:

(WebCore::BackgroundHTMLInputStream::rewindTo):

  • html/parser/BackgroundHTMLInputStream.h:

(BackgroundHTMLInputStream):

  • html/parser/BackgroundHTMLParser.cpp:

(WebCore::BackgroundHTMLParser::resumeFrom):

  • html/parser/BackgroundHTMLParser.h:

(Checkpoint):

  • html/parser/HTMLDocumentParser.cpp:

(WebCore::HTMLDocumentParser::canTakeNextToken):
(WebCore::HTMLDocumentParser::didFailSpeculation):
(WebCore::HTMLDocumentParser::pumpTokenizer):
(WebCore::HTMLDocumentParser::finish):

  • html/parser/HTMLInputStream.h:

(WebCore::HTMLInputStream::closeWithoutMarkingEndOfFile):
(HTMLInputStream):

Location:
trunk/Source/WebCore
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/ChangeLog

    r142669 r142673  
     12013-02-12  Adam Barth  <abarth@webkit.org>
     2
     3        Threaded HTML parser should pass the remaining fast/tokenizer tests
     4        https://bugs.webkit.org/show_bug.cgi?id=109607
     5
     6        Reviewed by Eric Seidel.
     7
     8        This patch fixes some edge cases involving document.write. Previously,
     9        we would drop input characters on the floor if the tokenizer wasn't
     10        able to consume them synchronously. In this patch, we send the unparsed
     11        characters to the background thread for consumption after rewinding the
     12        input stream.
     13
     14        * html/parser/BackgroundHTMLInputStream.cpp:
     15        (WebCore::BackgroundHTMLInputStream::rewindTo):
     16        * html/parser/BackgroundHTMLInputStream.h:
     17        (BackgroundHTMLInputStream):
     18        * html/parser/BackgroundHTMLParser.cpp:
     19        (WebCore::BackgroundHTMLParser::resumeFrom):
     20        * html/parser/BackgroundHTMLParser.h:
     21        (Checkpoint):
     22        * html/parser/HTMLDocumentParser.cpp:
     23        (WebCore::HTMLDocumentParser::canTakeNextToken):
     24        (WebCore::HTMLDocumentParser::didFailSpeculation):
     25        (WebCore::HTMLDocumentParser::pumpTokenizer):
     26        (WebCore::HTMLDocumentParser::finish):
     27        * html/parser/HTMLInputStream.h:
     28        (WebCore::HTMLInputStream::closeWithoutMarkingEndOfFile):
     29        (HTMLInputStream):
     30
    1312013-02-12  Csaba Osztrogonác  <ossy@webkit.org>
    232
  • trunk/Source/WebCore/html/parser/BackgroundHTMLInputStream.cpp

    r141363 r142673  
    5454}
    5555
    56 void BackgroundHTMLInputStream::rewindTo(HTMLInputCheckpoint checkpointIndex)
     56void BackgroundHTMLInputStream::rewindTo(HTMLInputCheckpoint checkpointIndex, const String& unparsedInput)
    5757{
     58    ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
    5859    const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
    5960
     
    6162
    6263    m_current = checkpoint.input;
     64
    6365    for (size_t i = checkpoint.numberOfSegmentsAlreadyAppended; i < m_segments.size(); ++i)
    6466        m_current.append(SegmentedString(m_segments[i]));
    6567
     68    if (!unparsedInput.isEmpty())
     69        m_current.prepend(SegmentedString(unparsedInput));
     70
    6671    if (isClosed && !m_current.isClosed())
    6772        m_current.close();
     73
     74    // FIXME: We should be able to actively invalidate all the outstanding checkpoints
     75    // by clearing m_segments and m_checkpoints, but that causes
     76    // fast/tokenizer/write-before-load.html to hit the ASSERT at the beginning of
     77    // this function.
    6878}
    6979
  • trunk/Source/WebCore/html/parser/BackgroundHTMLInputStream.h

    r141363 r142673  
    4747    SegmentedString& current() { return m_current; }
    4848
     49    // An HTMLInputCheckpoint is valid until the next call to rewindTo, at which
     50    // point all outstanding checkpoints are invalidated.
    4951    HTMLInputCheckpoint createCheckpoint();
    50     void rewindTo(HTMLInputCheckpoint);
     52    void rewindTo(HTMLInputCheckpoint, const String& unparsedInput);
    5153
    5254private:
  • trunk/Source/WebCore/html/parser/BackgroundHTMLParser.cpp

    r142648 r142673  
    8282    m_token = checkpoint->token.release();
    8383    m_tokenizer = checkpoint->tokenizer.release();
    84     m_input.rewindTo(checkpoint->inputCheckpoint);
     84    m_input.rewindTo(checkpoint->inputCheckpoint, checkpoint->unparsedInput);
    8585    pumpTokenizer();
    8686}
  • trunk/Source/WebCore/html/parser/BackgroundHTMLParser.h

    r142648 r142673  
    5959        OwnPtr<HTMLTokenizer> tokenizer;
    6060        HTMLInputCheckpoint inputCheckpoint;
     61        String unparsedInput;
    6162    };
    6263
  • trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp

    r142648 r142673  
    250250        return false;
    251251
    252     ASSERT(!shouldUseThreading() || mode == ForceSynchronous);
     252    ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous);
    253253
    254254    if (isWaitingForScripts()) {
     
    313313    checkpoint->tokenizer = tokenizer;
    314314    checkpoint->inputCheckpoint = m_currentChunk->checkpoint;
    315 
     315    checkpoint->unparsedInput = m_input.current().toString().isolatedCopy();
     316    m_input.current().clear();
     317
     318    ASSERT(checkpoint->unparsedInput.isSafeToSendToAnotherThread());
    316319    HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::resumeFrom, m_backgroundParser, checkpoint.release()));
    317320}
     
    401404    ASSERT(m_tokenizer);
    402405    ASSERT(m_token);
    403     ASSERT(!shouldUseThreading() || mode == ForceSynchronous);
     406    ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous);
    404407
    405408    PumpSession session(m_pumpSessionNestingLevel, contextForParsingSession());
     
    672675    // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
    673676
    674     // We're not going to get any more data off the network, so we tell the
    675     // input stream we've reached the end of file. finish() can be called more
    676     // than once, if the first time does not call end().
    677     if (!m_input.haveSeenEndOfFile())
    678         m_input.markEndOfFile();
    679 
    680677#if ENABLE(THREADED_HTML_PARSER)
    681678    // Empty documents never got an append() call, and thus have never started
     
    683680    // and fall through to the non-threading case.
    684681    if (m_haveBackgroundParser) {
     682        if (!m_input.haveSeenEndOfFile())
     683            m_input.closeWithoutMarkingEndOfFile();
    685684        HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finish, m_backgroundParser));
    686685        return;
    687686    }
     687
    688688    if (shouldUseThreading() && !wasCreatedByScript()) {
    689689        ASSERT(!m_tokenizer && !m_token);
     
    695695    }
    696696#endif
     697
     698    // We're not going to get any more data off the network, so we tell the
     699    // input stream we've reached the end of file. finish() can be called more
     700    // than once, if the first time does not call end().
     701    if (!m_input.haveSeenEndOfFile())
     702        m_input.markEndOfFile();
    697703
    698704    attemptToEnd();
  • trunk/Source/WebCore/html/parser/HTMLInputStream.h

    r142497 r142673  
    7878    }
    7979
     80    void closeWithoutMarkingEndOfFile()
     81    {
     82        m_last->close();
     83    }
     84
    8085    bool haveSeenEndOfFile() const
    8186    {
Note: See TracChangeset for help on using the changeset viewer.