Context Navigation

← Previous Changeset
Next Changeset →

Changeset 142363 in webkit

Timestamp:

Feb 9, 2013 9:52:19 AM (11 years ago)

Author:

eric@webkit.org

Message:

Fix TextDocumentParser to play nice with threading
https://bugs.webkit.org/show_bug.cgi?id=109240

Reviewed by Adam Barth.

Before the HTML5 parser re-write the text document parser
was completely custom. With the HTML5 parser, we just made
the TextDocumentParser use the HTMLDocumentParser with an
artificial script tag.

However, our solution was slightly over-engineered to avoid
lying about the column numbers of the first line of the text document
during parsing. :)

This change makes us use a simpler (and threading-compatible)
solution by just inserting a real "<pre>" tag into the
input stream instead of hacking one together with the treebuilder
and manually setting the Tokenizer state.

fast/parser/empty-text-resource.html covers this case.

html/parser/TextDocumentParser.cpp:

(WebCore::TextDocumentParser::TextDocumentParser):
(WebCore::TextDocumentParser::insertFakePreElement):

Location:

trunk/Source/WebCore

Files:

: 6 edited

ChangeLog (modified) (1 diff)
html/parser/BackgroundHTMLParser.cpp (modified) (1 diff)
html/parser/BackgroundHTMLParser.h (modified) (1 diff)
html/parser/HTMLDocumentParser.cpp (modified) (1 diff)
html/parser/HTMLDocumentParser.h (modified) (1 diff)
html/parser/TextDocumentParser.cpp (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/Source/WebCore/ChangeLog

-                      r142359
+                      r142363
+-02-09  Eric Seidel  <eric@webkit.org>
+        Fix TextDocumentParser to play nice with threading
+        https://bugs.webkit.org/show_bug.cgi?id=109240
+        Reviewed by Adam Barth.
+        Before the HTML5 parser re-write the text document parser
+        was completely custom.  With the HTML5 parser, we just made
+        the TextDocumentParser use the HTMLDocumentParser with an
+        artificial script tag.
+        However, our solution was slightly over-engineered to avoid
+        lying about the column numbers of the first line of the text document
+        during parsing. :)
+        This change makes us use a simpler (and threading-compatible)
+        solution by just inserting a real "<pre>" tag into the
+        input stream instead of hacking one together with the treebuilder
+        and manually setting the Tokenizer state.
+        fast/parser/empty-text-resource.html covers this case.
+        * html/parser/TextDocumentParser.cpp:
+        (WebCore::TextDocumentParser::TextDocumentParser):
+        (WebCore::TextDocumentParser::insertFakePreElement):
 -02-09  Kent Tamura  <tkent@chromium.org>

trunk/Source/WebCore/html/parser/BackgroundHTMLParser.cpp

-                      r142305
+                      r142363
+}
+void BackgroundHTMLParser::forcePlaintextForTextDocument()
+{
+    // This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser)
+    // to force us into the PLAINTEXT state w/o using a <plaintext> tag.
+    // The TextDocumentParser uses a <pre> tag for historical/compatibility reasons.
+    m_tokenizer->setState(HTMLTokenizerState::PLAINTEXTState);
+}
 void BackgroundHTMLParser::markEndOfFile()
+{

trunk/Source/WebCore/html/parser/BackgroundHTMLParser.h

r142305	r142363
59	59	void stop();
60	60
	61	void forcePlaintextForTextDocument();
	62
61	63	private:
62	64	BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, const HTMLParserOptions&, const WeakPtr<HTMLDocumentParser>&, PassOwnPtr<XSSAuditor>);

trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp

-                      r142305
+                      r142363
 #endif // ENABLE(THREADED_HTML_PARSER)
+void HTMLDocumentParser::forcePlaintextForTextDocument()
+{
+#if ENABLE(THREADED_HTML_PARSER)
+    if (shouldUseThreading()) {
+        // This method is called before any data is appended, so we have to start
+        // the background parser ourselves.
+        if (!m_haveBackgroundParser)
+            startBackgroundParser();
+        HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::forcePlaintextForTextDocument, m_backgroundParser));
+    } else
+#endif
+        m_tokenizer->setState(HTMLTokenizerState::PLAINTEXTState);
+}
 void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
+{

trunk/Source/WebCore/html/parser/HTMLDocumentParser.h

r142305	r142363
102	102	HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
103	103
	104	void forcePlaintextForTextDocument();
	105
104	106	private:
105	107	static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission permission)

trunk/Source/WebCore/html/parser/TextDocumentParser.cpp

-                      r141328
+                      r142363
     , m_haveInsertedFakePreElement(false)
+{
-    // FIXME: If we're using threading, we need to tell the BackgroundHTMLParser to use PLAINTEXTState.
-    if (tokenizer())
-        tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+}
 …
     // sending fake bytes through the front-end of the parser to avoid
     // distrubing the line/column number calculations.
     Vector<Attribute> attributes;
     attributes.append(Attribute(styleAttr, "word-wrap: break-word; white-space: pre-wrap;"));
     RefPtr<AtomicHTMLToken> fakePre = AtomicHTMLToken::create(HTMLTokenTypes::StartTag, preTag.localName(), attributes);
+    treeBuilder()->constructTree(fakePre.get());
-    treeBuilder()->constructTree(fakePre.get());
     // Normally we would skip the first \n after a <pre> element, but we don't
     // want to skip the first \n for text documents!
     treeBuilder()->setShouldSkipLeadingNewline(false);
+    // Although Text Documents expose a "pre" element in their DOM, they
+    // act like a <plaintext> tag, so we have to force plaintext mode.
+    forcePlaintextForTextDocument();
     m_haveInsertedFakePreElement = true;

Note: See TracChangeset for help on using the changeset viewer.