Changeset 141328 in webkit


Ignore:
Timestamp:
Jan 30, 2013 3:16:03 PM (11 years ago)
Author:
abarth@webkit.org
Message:

HTMLDocumentParser::insert should be aware of threaded parsing
https://bugs.webkit.org/show_bug.cgi?id=107764

Reviewed by Eric Seidel.

This patch is an incremental step towards recovering from
document.write invalidating our speculative parsing buffer. The
approach I've taken is to make it possible to transfer the
HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
thread. To make that possible, I've taught the HTMLDocumentParser how
to operate without a tokenizer or a token.

Not having a tokenizer or a token while parsing in the background also
helps us avoid accidentially feeding input to the main thread's
tokenizer when we're supposed to feed it to the background thread.

This patch shouldn't have any behavior change (other than possibly
fixing a crash in fast/parser when threading parsing is enabled).

  • html/parser/HTMLDocumentParser.cpp:

(WebCore::HTMLDocumentParser::HTMLDocumentParser):
(WebCore::HTMLDocumentParser::didFailSpeculation):
(WebCore):
(WebCore::HTMLDocumentParser::insert):
(WebCore::HTMLDocumentParser::finish):
(WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):

  • html/parser/HTMLDocumentParser.h:

(HTMLDocumentParser):

  • html/parser/HTMLTreeBuilder.cpp:

(WebCore::HTMLTreeBuilder::constructTree):
(WebCore::HTMLTreeBuilder::processStartTagForInBody):
(WebCore::HTMLTreeBuilder::processEndTag):
(WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
(WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
(WebCore::HTMLTreeBuilder::processScriptStartTag):

  • html/parser/TextDocumentParser.cpp:

(WebCore::TextDocumentParser::TextDocumentParser):

Location:
trunk/Source/WebCore
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/ChangeLog

    r141327 r141328  
     12013-01-30  Adam Barth  <abarth@webkit.org>
     2
     3        HTMLDocumentParser::insert should be aware of threaded parsing
     4        https://bugs.webkit.org/show_bug.cgi?id=107764
     5
     6        Reviewed by Eric Seidel.
     7
     8        This patch is an incremental step towards recovering from
     9        document.write invalidating our speculative parsing buffer. The
     10        approach I've taken is to make it possible to transfer the
     11        HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
     12        thread. To make that possible, I've taught the HTMLDocumentParser how
     13        to operate without a tokenizer or a token.
     14
     15        Not having a tokenizer or a token while parsing in the background also
     16        helps us avoid accidentially feeding input to the main thread's
     17        tokenizer when we're supposed to feed it to the background thread.
     18
     19        This patch shouldn't have any behavior change (other than possibly
     20        fixing a crash in fast/parser when threading parsing is enabled).
     21
     22        * html/parser/HTMLDocumentParser.cpp:
     23        (WebCore::HTMLDocumentParser::HTMLDocumentParser):
     24        (WebCore::HTMLDocumentParser::didFailSpeculation):
     25        (WebCore):
     26        (WebCore::HTMLDocumentParser::insert):
     27        (WebCore::HTMLDocumentParser::finish):
     28        (WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
     29        * html/parser/HTMLDocumentParser.h:
     30        (HTMLDocumentParser):
     31        * html/parser/HTMLTreeBuilder.cpp:
     32        (WebCore::HTMLTreeBuilder::constructTree):
     33        (WebCore::HTMLTreeBuilder::processStartTagForInBody):
     34        (WebCore::HTMLTreeBuilder::processEndTag):
     35        (WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
     36        (WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
     37        (WebCore::HTMLTreeBuilder::processScriptStartTag):
     38        * html/parser/TextDocumentParser.cpp:
     39        (WebCore::TextDocumentParser::TextDocumentParser):
     40
    1412013-01-30  Rafael Weinstein  <rafaelw@chromium.org>
    242
  • trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp

    r141071 r141328  
    7878    : ScriptableDocumentParser(document)
    7979    , m_options(document)
    80     , m_token(adoptPtr(new HTMLToken))
    81     , m_tokenizer(HTMLTokenizer::create(m_options))
     80    , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken))
     81    , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options))
    8282    , m_scriptRunner(HTMLScriptRunner::create(document, this))
    8383    , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, m_options))
     
    9191    , m_pumpSessionNestingLevel(0)
    9292{
     93    ASSERT(shouldUseThreading() || (m_token && m_tokenizer));
    9394}
    9495
     
    109110    , m_pumpSessionNestingLevel(0)
    110111{
     112    ASSERT(!shouldUseThreading());
    111113    bool reportErrors = false; // For now document fragment parsing never reports errors.
    112114    m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options));
     
    283285    ASSERT(m_pendingTokens.isEmpty());
    284286    processTokensFromBackgroundParser(tokens);
     287}
     288
     289void HTMLDocumentParser::didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>)
     290{
     291    // FIXME: Tell the background parser to resume parsing with this token and tokenizer.
    285292}
    286293
     
    447454    RefPtr<HTMLDocumentParser> protect(this);
    448455
     456#if ENABLE(THREADED_HTML_PARSER)
     457    if (!m_tokenizer) {
     458        ASSERT(!inPumpSession());
     459        ASSERT(m_haveBackgroundParser || wasCreatedByScript());
     460        m_token = adoptPtr(new HTMLToken);
     461        m_tokenizer = HTMLTokenizer::create(m_options);
     462    }
     463#endif
     464
    449465    SegmentedString excludedLineNumberSource(source);
    450466    excludedLineNumberSource.setExcludeLineNumbers();
    451467    m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
    452468    pumpTokenizerIfPossible(ForceSynchronous);
    453    
     469
     470#if ENABLE(THREADED_HTML_PARSER)
     471    if (!inPumpSession() && m_haveBackgroundParser) {
     472        // FIXME: If the tokenizer is in the same state as when we started this function,
     473        // then we haven't necessarily failed our speculation.
     474        didFailSpeculation(m_token.release(), m_tokenizer.release());
     475        return;
     476    }
     477#endif
     478
    454479    if (isWaitingForScripts()) {
    455480        // Check the document.write() output with a separate preload scanner as
     
    604629        HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finishPartial, ParserMap::identifierForParser(this)));
    605630        return;
     631    }
     632    if (shouldUseThreading() && !wasCreatedByScript()) {
     633        ASSERT(!m_tokenizer && !m_token);
     634        // We're finishing before receiving any data. Rather than booting up
     635        // the background parser just to spin it down, we finish parsing
     636        // synchronously.
     637        m_token = adoptPtr(new HTMLToken);
     638        m_tokenizer = HTMLTokenizer::create(m_options);
    606639    }
    607640#endif
     
    671704        while (!m_pendingTokens.isEmpty()) {
    672705            processTokensFromBackgroundParser(m_pendingTokens.takeFirst());
    673             if (isWaitingForScripts())
     706            if (isWaitingForScripts() || isStopped())
    674707                return;
    675708        }
  • trunk/Source/WebCore/html/parser/HTMLDocumentParser.h

    r141070 r141328  
    125125    void startBackgroundParser();
    126126    void stopBackgroundParser();
     127    void didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
    127128    void processTokensFromBackgroundParser(PassOwnPtr<CompactHTMLTokenStream>);
    128129#endif
  • trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp

    r141327 r141328  
    368368        processToken(token);
    369369
    370     bool inForeignContent = !m_tree.isEmpty()
    371         && !m_tree.currentStackItem()->isInHTMLNamespace()
    372         && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
    373         && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
    374 
    375     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
    376     m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
     370    if (m_parser->tokenizer()) {
     371        bool inForeignContent = !m_tree.isEmpty()
     372            && !m_tree.currentStackItem()->isInHTMLNamespace()
     373            && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
     374            && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
     375
     376        m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
     377        m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
     378    }
    377379
    378380    m_tree.executeQueuedTasks();
     
    741743        processFakePEndTagIfPInButtonScope();
    742744        m_tree.insertHTMLElement(token);
    743         m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
     745        if (m_parser->tokenizer())
     746            m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
    744747        return;
    745748    }
     
    851854        m_tree.insertHTMLElement(token);
    852855        m_shouldSkipLeadingNewline = true;
    853         m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
     856        if (m_parser->tokenizer())
     857            m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
    854858        m_originalInsertionMode = m_insertionMode;
    855859        m_framesetOk = false;
     
    21642168            setInsertionMode(m_originalInsertionMode);
    21652169
    2166             // This token will not have been created by the tokenizer if a
    2167             // self-closing script tag was encountered and pre-HTML5 parser
    2168             // quirks are enabled. We must set the tokenizer's state to
    2169             // DataState explicitly if the tokenizer didn't have a chance to.
    2170             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
    2171             m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
     2170            if (m_parser->tokenizer()) {
     2171                // This token will not have been created by the tokenizer if a
     2172                // self-closing script tag was encountered and pre-HTML5 parser
     2173                // quirks are enabled. We must set the tokenizer's state to
     2174                // DataState explicitly if the tokenizer didn't have a chance to.
     2175                ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
     2176                m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
     2177            }
    21722178            return;
    21732179        }
     
    27072713    ASSERT(token->type() == HTMLTokenTypes::StartTag);
    27082714    m_tree.insertHTMLElement(token);
    2709     m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
     2715    if (m_parser->tokenizer())
     2716        m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
    27102717    m_originalInsertionMode = m_insertionMode;
    27112718    setInsertionMode(TextMode);
     
    27162723    ASSERT(token->type() == HTMLTokenTypes::StartTag);
    27172724    m_tree.insertHTMLElement(token);
    2718     m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
     2725    if (m_parser->tokenizer())
     2726        m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
    27192727    m_originalInsertionMode = m_insertionMode;
    27202728    setInsertionMode(TextMode);
     
    27252733    ASSERT(token->type() == HTMLTokenTypes::StartTag);
    27262734    m_tree.insertScriptElement(token);
    2727     m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
     2735    if (m_parser->tokenizer())
     2736        m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
    27282737    m_originalInsertionMode = m_insertionMode;
    27292738
  • trunk/Source/WebCore/html/parser/TextDocumentParser.cpp

    r139523 r141328  
    3939    , m_haveInsertedFakePreElement(false)
    4040{
    41     tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
     41    // FIXME: If we're using threading, we need to tell the BackgroundHTMLParser to use PLAINTEXTState.
     42    if (tokenizer())
     43        tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
    4244}
    4345
Note: See TracChangeset for help on using the changeset viewer.