Changeset 60926 in webkit
- Timestamp:
- Jun 9, 2010 5:49:25 PM (14 years ago)
- Location:
- trunk/WebCore
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebCore/ChangeLog
r60916 r60926 1 2010-06-09 Adam Barth <abarth@webkit.org> 2 3 Reviewed by Eric Seidel. 4 5 Fix handling of bytes received from the network while in document.write 6 https://bugs.webkit.org/show_bug.cgi?id=40356 7 8 The old tokenizer has specially logic for handling the case of 9 receiving bytes from the network while in a nested call to 10 document.write. This patch implements similar logic for the HTML5 11 tokenizer. Also, this patch abstracts the tricky shuffling of 12 SegmentedStrings behind a simple API. 13 14 I'm not sure how to trigger this case. My guess is we can trigger it 15 using a nested event loop, e.g., via alert(), but I'm not sure how to 16 test that in a LayoutTest. There don't appear to be any LayoutTests 17 that currently test this behavior despite it being present in the old 18 tokenizer. 19 20 * html/HTML5Tokenizer.cpp: 21 (WebCore::HTML5Tokenizer::pumpLexer): 22 (WebCore::HTML5Tokenizer::write): 23 - Added a branch for the |append| argument. 24 (WebCore::HTML5Tokenizer::end): 25 (WebCore::HTML5Tokenizer::finish): 26 (WebCore::HTML5Tokenizer::executeScript): 27 - Switch over to using a RAII pattern for recording and restoring 28 insertion points. 29 * html/HTML5Tokenizer.h: 30 (WebCore::HTML5Tokenizer::InputStream::InputStream): 31 (WebCore::HTML5Tokenizer::InputStream::appendToEnd): 32 (WebCore::HTML5Tokenizer::InputStream::insertAtCurrentInsertionPoint): 33 (WebCore::HTML5Tokenizer::InputStream::close): 34 - Putting the close() method on InputStream makes it much easier to 35 handle EOF. We now just close the last buffer in the stream when 36 the network says it's done. 37 (WebCore::HTML5Tokenizer::InputStream::current): 38 - This class could be moved to its own file, but it shouldn't be 39 used outside of the tokenizer. 40 (WebCore::HTML5Tokenizer::InsertionPointRecord::InsertionPointRecord): 41 (WebCore::HTML5Tokenizer::InsertionPointRecord::~InsertionPointRecord): 42 - A simple RAII class for managing saved insertion points. 43 * platform/text/SegmentedString.cpp: 44 (WebCore::SegmentedString::operator=): 45 - Fix a related bug where m_closed was not being copied properly in 46 the assignment operator. 47 1 48 2010-06-09 Tony Gentilcore <tonyg@chromium.org> 2 49 -
trunk/WebCore/html/HTML5Tokenizer.cpp
r60898 r60926 96 96 ASSERT(!m_parserStopped); 97 97 ASSERT(!m_treeBuilder->isPaused()); 98 while (!m_parserStopped && m_lexer->nextToken(m_ source, m_token)) {98 while (!m_parserStopped && m_lexer->nextToken(m_input.current(), m_token)) { 99 99 m_treeBuilder->constructTreeFromToken(m_token); 100 100 m_token.clear(); … … 114 114 } 115 115 116 void HTML5Tokenizer::write(const SegmentedString& source, bool )116 void HTML5Tokenizer::write(const SegmentedString& source, bool appendData) 117 117 { 118 118 if (m_parserStopped) … … 121 121 NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel); 122 122 123 // HTML5Tokenizer::executeScript is responsible for handling saving m_source before re-entry. 124 m_source.append(source); 123 if (appendData) { 124 m_input.appendToEnd(source); 125 if (m_writeNestingLevel > 1) { 126 // We've gotten data off the network in a nested call to write(). 127 // We don't want to consume any more of the input stream now. Do 128 // not worry. We'll consume this data in a less-nested write(). 129 return; 130 } 131 } else 132 m_input.insertAtCurrentInsertionPoint(source); 133 125 134 pumpLexerIfPossible(); 126 135 endIfDelayed(); … … 129 138 void HTML5Tokenizer::end() 130 139 { 131 m_source.close();132 140 pumpLexerIfPossible(); 133 141 // Informs the the rest of WebCore that parsing is really finished. … … 158 166 void HTML5Tokenizer::finish() 159 167 { 160 // We can't call m_source.close() yet as we may have a <script> execution161 // pending which will call document.write(). No more data off the network though.162 // end() calls Document::finishedParsing() once we're actually done parsing.168 // We're not going to get any more data off the network, so we close the 169 // input stream to indicate EOF. 170 m_input.close(); 163 171 attemptToEnd(); 164 172 } … … 222 230 if (!m_document->frame()) 223 231 return; 224 225 SegmentedString oldInsertionPoint = m_source; 226 m_source = SegmentedString(); 232 InsertionPointRecord savedInsertionPoint(m_input); 227 233 m_document->frame()->script()->executeScript(sourceCode); 228 // Append oldInsertionPoint onto the new (likely empty) m_source instead of229 // oldInsertionPoint.prepent(m_source) as that would ASSERT if230 // m_source.escaped() (it had characters pushed back onto it).231 // If m_source was closed, then the tokenizer was stopped, and we discard232 // any pending data as though an EOF character was inserted into the stream.233 if (!m_source.isClosed())234 m_source.append(oldInsertionPoint);235 234 } 236 235 -
trunk/WebCore/html/HTML5Tokenizer.h
r60898 r60926 71 71 72 72 private: 73 // The InputStream is made up of a sequence of SegmentedStrings: 74 // 75 // [--current--][--next--][--next--] ... [--next--] 76 // /\ (also called m_last) 77 // L_ current insertion point 78 // 79 // The current segmented string is stored in InputStream. Each of the 80 // afterInsertionPoint buffers are stored in InsertionPointRecords on the 81 // stack. 82 // 83 // We remove characters from the "current" string in the InputStream. 84 // document.write() will add characters at the current insertion point, 85 // which appends them to the "current" string. 86 // 87 // m_last is a pointer to the last of the afterInsertionPoint strings. 88 // The network adds data at the end of the InputStream, which appends 89 // them to the "last" string. 90 class InputStream { 91 public: 92 InputStream() 93 : m_last(&m_first) 94 { 95 } 96 97 void appendToEnd(const SegmentedString& string) 98 { 99 m_last->append(string); 100 } 101 102 void insertAtCurrentInsertionPoint(const SegmentedString& string) 103 { 104 m_first.append(string); 105 } 106 107 void close() { m_last->close(); } 108 109 SegmentedString& current() { return m_first; } 110 111 void splitInto(SegmentedString& next) 112 { 113 next = m_first; 114 m_first = SegmentedString(); 115 if (m_last == &m_first) { 116 // We used to only have one SegmentedString in the InputStream 117 // but now we have two. That means m_first is no longer also 118 // the m_last string, |next| is now the last one. 119 m_last = &next; 120 } 121 } 122 123 void mergeFrom(SegmentedString& next) 124 { 125 m_first.append(next); 126 if (m_last == &next) { 127 // The string |next| used to be the last SegmentedString in 128 // the InputStream. Now that it's been merged into m_first, 129 // that makes m_first the last one. 130 m_last = &m_first; 131 } 132 if (next.isClosed()) { 133 // We also need to merge the "closed" state from next to 134 // m_first. Arguably, this work could be done in append(). 135 m_first.close(); 136 } 137 } 138 139 private: 140 SegmentedString m_first; 141 SegmentedString* m_last; 142 }; 143 144 class InsertionPointRecord { 145 public: 146 InsertionPointRecord(InputStream& inputStream) 147 : m_inputStream(&inputStream) 148 { 149 m_inputStream->splitInto(m_next); 150 } 151 152 ~InsertionPointRecord() 153 { 154 m_inputStream->mergeFrom(m_next); 155 } 156 157 private: 158 InputStream* m_inputStream; 159 SegmentedString m_next; 160 }; 161 73 162 void pumpLexer(); 74 163 void pumpLexerIfPossible(); … … 79 168 bool inWrite() const { return m_writeNestingLevel > 0; } 80 169 81 SegmentedString m_source;170 InputStream m_input; 82 171 83 172 // We hold m_token here because it might be partially complete. -
trunk/WebCore/platform/text/SegmentedString.cpp
r60683 r60926 52 52 else 53 53 m_currentChar = other.m_currentChar; 54 m_closed = other.m_closed; 54 55 return *this; 55 56 }
Note: See TracChangeset
for help on using the changeset viewer.