Changeset 144714 in webkit


Ignore:
Timestamp:
Mar 4, 2013 8:57:15 PM (11 years ago)
Author:
abarth@webkit.org
Message:

Background HTML parser can rewind the tokenizer after end-of-file
https://bugs.webkit.org/show_bug.cgi?id=111365

Reviewed by Eric Seidel.

Source/WebCore:

Prior to this patch, it was possible to call didFailSpeculation after
processing the end-of-file token because checkForSpeculationFailure
didn't zero out m_tokenizer in some control paths.

This patch renames checkForSpeculationFailure to validateSpeculations
and ensures that it always takes ownership of the main thread's
HTMLTokenizer.

This patch also adds a number of ASSERTs to make sure the parser state
machine stays in the correct configuration (e.g., that we don't have a
main thread tokenizer while we're supposed to be tokenizing on the
background thread).

Test: fast/parser/document-write-fighting-eof.html

  • html/parser/BackgroundHTMLInputStream.cpp:

(WebCore::BackgroundHTMLInputStream::rewindTo):

  • html/parser/BackgroundHTMLParser.cpp:

(WebCore::BackgroundHTMLParser::append):

  • html/parser/HTMLDocumentParser.cpp:

(WebCore::HTMLDocumentParser::validateSpeculations):
(WebCore::HTMLDocumentParser::processParsedChunkFromBackgroundParser):
(WebCore::HTMLDocumentParser::pumpPendingSpeculations):
(WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):

  • html/parser/HTMLDocumentParser.h:

(HTMLDocumentParser):

LayoutTests:

  • fast/parser/document-write-fighting-eof.html: Added.
Location:
trunk
Files:
2 added
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/LayoutTests/ChangeLog

    r144713 r144714  
     12013-03-04  Adam Barth  <abarth@webkit.org>
     2
     3        Background HTML parser can rewind the tokenizer after end-of-file
     4        https://bugs.webkit.org/show_bug.cgi?id=111365
     5
     6        Reviewed by Eric Seidel.
     7
     8        * fast/parser/document-write-fighting-eof.html: Added.
     9
    1102013-03-04  Tim 'mithro' Ansell  <mithro@mithis.com>
    211
  • trunk/Source/WebCore/ChangeLog

    r144713 r144714  
     12013-03-04  Adam Barth  <abarth@webkit.org>
     2
     3        Background HTML parser can rewind the tokenizer after end-of-file
     4        https://bugs.webkit.org/show_bug.cgi?id=111365
     5
     6        Reviewed by Eric Seidel.
     7
     8        Prior to this patch, it was possible to call didFailSpeculation after
     9        processing the end-of-file token because checkForSpeculationFailure
     10        didn't zero out m_tokenizer in some control paths.
     11
     12        This patch renames checkForSpeculationFailure to validateSpeculations
     13        and ensures that it always takes ownership of the main thread's
     14        HTMLTokenizer.
     15
     16        This patch also adds a number of ASSERTs to make sure the parser state
     17        machine stays in the correct configuration (e.g., that we don't have a
     18        main thread tokenizer while we're supposed to be tokenizing on the
     19        background thread).
     20
     21        Test: fast/parser/document-write-fighting-eof.html
     22
     23        * html/parser/BackgroundHTMLInputStream.cpp:
     24        (WebCore::BackgroundHTMLInputStream::rewindTo):
     25        * html/parser/BackgroundHTMLParser.cpp:
     26        (WebCore::BackgroundHTMLParser::append):
     27        * html/parser/HTMLDocumentParser.cpp:
     28        (WebCore::HTMLDocumentParser::validateSpeculations):
     29        (WebCore::HTMLDocumentParser::processParsedChunkFromBackgroundParser):
     30        (WebCore::HTMLDocumentParser::pumpPendingSpeculations):
     31        (WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
     32        * html/parser/HTMLDocumentParser.h:
     33        (HTMLDocumentParser):
     34
    1352013-03-04  Tim 'mithro' Ansell  <mithro@mithis.com>
    236
  • trunk/Source/WebCore/html/parser/BackgroundHTMLInputStream.cpp

    r143685 r144714  
    7272        m_current.close();
    7373
     74    ASSERT(m_current.isClosed() == isClosed);
     75
    7476    m_segments.clear();
    7577    m_checkpoints.clear();
  • trunk/Source/WebCore/html/parser/BackgroundHTMLParser.cpp

    r144407 r144714  
    147147void BackgroundHTMLParser::append(const String& input)
    148148{
     149    ASSERT(!m_input.current().isClosed());
    149150    m_input.append(input);
    150151    pumpTokenizer();
  • trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp

    r144543 r144714  
    329329}
    330330
    331 void HTMLDocumentParser::checkForSpeculationFailure()
    332 {
    333     if (!m_tokenizer)
    334         return;
    335     if (!m_currentChunk)
    336         return;
     331void HTMLDocumentParser::validateSpeculations()
     332{
     333    OwnPtr<HTMLTokenizer> tokenizer = m_tokenizer.release();
     334    OwnPtr<HTMLToken> token = m_token.release();
     335
     336    if (!tokenizer) {
     337        // There must not have been any changes to the HTMLTokenizer state on
     338        // the main thread, which means the speculation buffer is correct.
     339        return;
     340    }
     341
     342    if (!m_currentChunk) {
     343        // If there is no m_currentChunk, we must have already called didFailSpeculation
     344        // for this chunk.
     345        // FIXME: In this case, we're losing whatever state has been changed since
     346        // we called didFailSpeculation. See https://bugs.webkit.org/show_bug.cgi?id=110546
     347        return;
     348    }
     349
    337350    // Currently we're only smart enough to reuse the speculation buffer if the tokenizer
    338351    // both starts and ends in the DataState. That state is simplest because the HTMLToken
     
    340353    // speculation buffer in other states, but we'd likely need to do something more
    341354    // sophisticated with the HTMLToken.
    342     if (m_currentChunk->tokenizerState == HTMLTokenizer::DataState && m_tokenizer->state() == HTMLTokenizer::DataState && m_input.current().isEmpty()) {
    343         ASSERT(m_token->isUninitialized());
    344         m_tokenizer.clear();
    345         m_token.clear();
    346         return;
    347     }
    348     didFailSpeculation(m_token.release(), m_tokenizer.release());
     355    if (m_currentChunk->tokenizerState == HTMLTokenizer::DataState
     356        && tokenizer->state() == HTMLTokenizer::DataState
     357        && m_input.current().isEmpty()) {
     358        ASSERT(token->isUninitialized());
     359        return;
     360    }
     361
     362    didFailSpeculation(token.release(), tokenizer.release());
    349363}
    350364
     
    373387    ASSERT(refCount() >= 2);
    374388    ASSERT(shouldUseThreading());
     389    ASSERT(!m_tokenizer);
     390    ASSERT(!m_token);
    375391
    376392    ActiveParserSession session(contextForParsingSession());
     
    386402        if (XSSInfo* xssInfo = it->xssInfo())
    387403            m_xssAuditorDelegate.didBlockScript(*xssInfo);
     404
    388405        constructTreeFromCompactHTMLToken(*it);
    389406
     
    396413            // To match main-thread parser behavior (which never checks locationChangePending on the EOF path)
    397414            // we peek to see if this chunk has an EOF and process it anyway.
    398             if (tokens->last().type() == HTMLToken::EndOfFile)
     415            if (tokens->last().type() == HTMLToken::EndOfFile) {
     416                ASSERT(m_speculations.isEmpty());
    399417                prepareToStopParsing();
     418            }
    400419            break;
    401420        }
     
    404423            ASSERT(it + 1 == tokens->end()); // The </script> is assumed to be the last token of this bunch.
    405424            runScriptsForPausedTreeBuilder();
     425            validateSpeculations();
    406426            break;
    407427        }
     
    409429        if (it->type() == HTMLToken::EndOfFile) {
    410430            ASSERT(it + 1 == tokens->end()); // The EOF is assumed to be the last token of this bunch.
     431            ASSERT(m_speculations.isEmpty());
    411432            prepareToStopParsing();
    412433            break;
    413434        }
    414     }
    415 
    416     checkForSpeculationFailure();
     435
     436        ASSERT(!m_tokenizer);
     437        ASSERT(!m_token);
     438    }
    417439}
    418440
     
    424446    // ASSERT that this object is both attached to the Document and protected.
    425447    ASSERT(refCount() >= 2);
     448    // If this assert fails, you need to call validateSpeculations to make sure
     449    // m_tokenizer and m_token don't have state that invalidates m_speculations.
     450    ASSERT(!m_tokenizer);
     451    ASSERT(!m_token);
    426452
    427453    // FIXME: Pass in current input length.
     
    847873#if ENABLE(THREADED_HTML_PARSER)
    848874    if (m_haveBackgroundParser) {
    849         checkForSpeculationFailure();
    850 
     875        validateSpeculations();
    851876        // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document,
    852877        // but we need to ensure it isn't deleted yet.
  • trunk/Source/WebCore/html/parser/HTMLDocumentParser.h

    r144498 r144714  
    141141    void startBackgroundParser();
    142142    void stopBackgroundParser();
    143     void checkForSpeculationFailure();
     143    void validateSpeculations();
    144144    void didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
    145145    void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
Note: See TracChangeset for help on using the changeset viewer.