Changeset 65132 in webkit
- Timestamp:
- Aug 11, 2010 12:30:57 AM (14 years ago)
- Location:
- trunk/WebCore
- Files:
-
- 2 added
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebCore/Android.mk
r65059 r65132 286 286 html/LegacyHTMLDocumentParser.cpp \ 287 287 html/HTMLViewSourceDocument.cpp \ 288 html/HTMLViewSourceParser.cpp \ 288 289 html/ImageData.cpp \ 289 290 html/ImageResizerThread.cpp \ -
trunk/WebCore/CMakeLists.txt
r65117 r65132 1037 1037 html/HTMLUListElement.cpp 1038 1038 html/HTMLViewSourceDocument.cpp 1039 html/HTMLViewSourceParser.cpp 1039 1040 html/ImageData.cpp 1040 1041 html/ImageResizerThread.cpp -
trunk/WebCore/ChangeLog
r65131 r65132 1 2010-08-11 Adam Barth <abarth@webkit.org> 2 3 Reviewed by Eric Seidel. 4 5 Port view-source to new parser 6 https://bugs.webkit.org/show_bug.cgi?id=43746 7 8 This patch switches the view-source mode for frames over to using the 9 new HTML parsing infrastructure. This patch is an architectural change 10 to how we parser view source documents. 11 12 Previously, the LegacyHTMLDocumentParser would output a "guide string" 13 that consided of the inter-attribute whitespace and various "control" 14 characters. The HTMLViewSourceDocument would then interpret this guide 15 string to approximately reconstruct the source of the original document 16 and colorize various syntatic constructs. 17 18 Unfortunately, that approach is inherently low-fidelity. It's not 19 really feasible to reconstruct the input document from the token 20 stream. The old view source mode also had a number of hacks in the old 21 parser (e.g., to turn of decoding of HTML entities). 22 23 Instead of trying to reconstruct the original document from the token 24 stream, we use the segmentation information given by the tokens to 25 colorize the input document itself. Each token now caries information 26 about where in the input stream it came from and where various 27 subcomponents (e.g., attribute names and values) are located. This 28 approach is higher fidelity because we use this segmentation 29 information to colorize the original input instead of attempting to 30 reconstruct the original input. 31 32 * Android.mk: 33 * CMakeLists.txt: 34 * GNUmakefile.am: 35 * WebCore.gypi: 36 * WebCore.pro: 37 * WebCore.vcproj/WebCore.vcproj: 38 * WebCore.xcodeproj/project.pbxproj: 39 * html/HTMLDocumentParser.cpp: 40 * html/HTMLToken.h: 41 (WebCore::HTMLToken::clear): 42 (WebCore::HTMLToken::startIndex): 43 (WebCore::HTMLToken::length): 44 (WebCore::HTMLToken::end): 45 * html/HTMLViewSourceDocument.cpp: 46 (WebCore::HTMLViewSourceDocument::createParser): 47 (WebCore::HTMLViewSourceDocument::addSource): 48 (WebCore::HTMLViewSourceDocument::processDoctypeToken): 49 (WebCore::HTMLViewSourceDocument::processTagToken): 50 (WebCore::HTMLViewSourceDocument::processCommentToken): 51 (WebCore::HTMLViewSourceDocument::processCharacterToken): 52 (WebCore::HTMLViewSourceDocument::addRange): 53 * html/HTMLViewSourceDocument.h: 54 * html/HTMLViewSourceParser.cpp: Added. 55 (WebCore::HTMLViewSourceParser::~HTMLViewSourceParser): 56 (WebCore::HTMLViewSourceParser::insert): 57 (WebCore::HTMLViewSourceParser::pumpTokenizer): 58 (WebCore::HTMLViewSourceParser::append): 59 (WebCore::HTMLViewSourceParser::sourceForToken): 60 (WebCore::HTMLViewSourceParser::updateTokenizerState): 61 (WebCore::HTMLViewSourceParser::finish): 62 (WebCore::HTMLViewSourceParser::finishWasCalled): 63 * html/HTMLViewSourceParser.h: Added. 64 (WebCore::HTMLViewSourceParser::HTMLViewSourceParser): 65 (WebCore::HTMLViewSourceParser::document): 66 * html/LegacyHTMLDocumentParser.cpp: 67 (WebCore::LegacyHTMLDocumentParser::processToken): 68 (WebCore::LegacyHTMLDocumentParser::processDoctypeToken): 69 1 70 2010-08-11 Yoshiki Hayashi <yhayashi@google.com> 2 71 -
trunk/WebCore/GNUmakefile.am
r65102 r65132 1556 1556 WebCore/html/HTMLViewSourceDocument.cpp \ 1557 1557 WebCore/html/HTMLViewSourceDocument.h \ 1558 WebCore/html/HTMLViewSourceParser.cpp \ 1559 WebCore/html/HTMLViewSourceParser.h \ 1558 1560 WebCore/html/ImageData.cpp \ 1559 1561 WebCore/html/ImageData.h \ -
trunk/WebCore/WebCore.gypi
r65102 r65132 1722 1722 'html/HTMLViewSourceDocument.cpp', 1723 1723 'html/HTMLViewSourceDocument.h', 1724 'html/HTMLViewSourceParser.cpp', 1725 'html/HTMLViewSourceParser.h', 1724 1726 'html/ImageData.cpp', 1725 1727 'html/ImageData.h', -
trunk/WebCore/WebCore.pro
r65128 r65132 741 741 html/HTMLUListElement.cpp \ 742 742 html/HTMLViewSourceDocument.cpp \ 743 html/HTMLViewSourceParser.cpp \ 743 744 html/ImageData.cpp \ 744 745 html/ImageResizerThread.cpp \ … … 1505 1506 html/HTMLVideoElement.h \ 1506 1507 html/HTMLViewSourceDocument.h \ 1508 html/HTMLViewSourceParser.h \ 1507 1509 html/ImageData.h \ 1508 1510 html/ImageResizerThread.h \ -
trunk/WebCore/WebCore.vcproj/WebCore.vcproj
r65102 r65132 40898 40898 </File> 40899 40899 <File 40900 RelativePath="..\html\HTMLViewSourceParser.cpp" 40901 > 40902 </File> 40903 <File 40904 RelativePath="..\html\HTMLViewSourceParser.h" 40905 > 40906 </File> 40907 <File 40900 40908 RelativePath="..\html\ImageData.cpp" 40901 40909 > -
trunk/WebCore/WebCore.xcodeproj/project.pbxproj
r65102 r65132 2675 2675 976E896011C0CA3A00EA9CA9 /* HTMLEntityParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 976E895E11C0CA3A00EA9CA9 /* HTMLEntityParser.cpp */; }; 2676 2676 976E896111C0CA3A00EA9CA9 /* HTMLEntityParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 976E895F11C0CA3A00EA9CA9 /* HTMLEntityParser.h */; }; 2677 978B6FC912128821001595EF /* HTMLViewSourceParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 978B6FC712128821001595EF /* HTMLViewSourceParser.cpp */; }; 2678 978B6FCA12128821001595EF /* HTMLViewSourceParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 978B6FC812128821001595EF /* HTMLViewSourceParser.h */; }; 2677 2679 979F43D31075E44A0000F83B /* RedirectScheduler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 979F43D11075E44A0000F83B /* RedirectScheduler.cpp */; }; 2678 2680 979F43D41075E44A0000F83B /* RedirectScheduler.h in Headers */ = {isa = PBXBuildFile; fileRef = 979F43D21075E44A0000F83B /* RedirectScheduler.h */; settings = {ATTRIBUTES = (Private, ); }; }; … … 8480 8482 976E895E11C0CA3A00EA9CA9 /* HTMLEntityParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HTMLEntityParser.cpp; sourceTree = "<group>"; }; 8481 8483 976E895F11C0CA3A00EA9CA9 /* HTMLEntityParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HTMLEntityParser.h; sourceTree = "<group>"; }; 8484 978B6FC712128821001595EF /* HTMLViewSourceParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HTMLViewSourceParser.cpp; sourceTree = "<group>"; }; 8485 978B6FC812128821001595EF /* HTMLViewSourceParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HTMLViewSourceParser.h; sourceTree = "<group>"; }; 8482 8486 979F43D11075E44A0000F83B /* RedirectScheduler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = RedirectScheduler.cpp; sourceTree = "<group>"; }; 8483 8487 979F43D21075E44A0000F83B /* RedirectScheduler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RedirectScheduler.h; sourceTree = "<group>"; }; … … 14172 14176 BCCD74E40A4C8DDF005FDA6D /* HTMLViewSourceDocument.cpp */, 14173 14177 BCCD74DB0A4C8D35005FDA6D /* HTMLViewSourceDocument.h */, 14178 978B6FC712128821001595EF /* HTMLViewSourceParser.cpp */, 14179 978B6FC812128821001595EF /* HTMLViewSourceParser.h */, 14174 14180 A77979130D6B9D0C003851B9 /* ImageData.cpp */, 14175 14181 A77979140D6B9D0C003851B9 /* ImageData.h */, … … 20150 20156 2EED575812109EE4007656BB /* BlobRegistry.h in Headers */, 20151 20157 2EED575C12109EF3007656BB /* BlobData.h in Headers */, 20158 978B6FCA12128821001595EF /* HTMLViewSourceParser.h in Headers */, 20152 20159 ); 20153 20160 runOnlyForDeploymentPostprocessing = 0; … … 22576 22583 2EED575512109ED0007656BB /* BlobURL.cpp in Sources */, 22577 22584 2EED575B12109EF3007656BB /* BlobData.cpp in Sources */, 22585 978B6FC912128821001595EF /* HTMLViewSourceParser.cpp in Sources */, 22578 22586 ); 22579 22587 runOnlyForDeploymentPostprocessing = 0; -
trunk/WebCore/html/HTMLToken.h
r65110 r65132 65 65 HTMLToken() { clear(); } 66 66 67 void clear( )67 void clear(int startIndex = 0) 68 68 { 69 69 m_type = Uninitialized; 70 m_range.m_start = startIndex; 71 m_range.m_end = startIndex; 70 72 m_data.clear(); 73 } 74 75 int startIndex() const { return m_range.m_start; } 76 int endIndex() const { return m_range.m_end; } 77 78 void end(int endIndex) 79 { 80 m_range.m_end = endIndex; 71 81 } 72 82 … … 321 331 322 332 Type m_type; 333 334 // Which characters from the input stream are represented by this token. 335 Range m_range; 323 336 324 337 // "name" for DOCTYPE, StartTag, and EndTag -
trunk/WebCore/html/HTMLViewSourceDocument.cpp
r65012 r65132 37 37 #include "HTMLTableRowElement.h" 38 38 #include "HTMLTableSectionElement.h" 39 #include "LegacyHTMLDocumentParser.h" 39 #include "HTMLToken.h" 40 #include "HTMLViewSourceParser.h" 41 #include "SegmentedString.h" 40 42 #include "Text.h" 41 43 #include "TextDocument.h" … … 59 61 || m_type == "application/vnd.wap.xhtml+xml" 60 62 #endif 61 ) { 62 // FIXME: Should respect Settings::html5ParserEnabled() 63 return new LegacyHTMLDocumentParser(this); 64 } 63 ) 64 return new HTMLViewSourceParser(this); 65 65 66 66 return createTextDocumentParser(this); … … 101 101 } 102 102 103 void HTMLViewSourceDocument::add ViewSourceToken(Token*token)103 void HTMLViewSourceDocument::addSource(const String& source, HTMLToken& token) 104 104 { 105 105 if (!m_current) 106 106 createContainingTable(); 107 107 108 if (token->tagName == textAtom) 109 addText(token->text.get(), ""); 110 else if (token->tagName == commentAtom) { 111 if (token->beginTag) { 112 m_current = addSpanWithClassName("webkit-html-comment"); 113 addText(String("<!--") + token->text.get() + "-->", "webkit-html-comment"); 114 } 115 } else { 116 // Handle the tag. 117 String classNameStr = "webkit-html-tag"; 118 m_current = addSpanWithClassName(classNameStr); 119 120 String text = "<"; 121 if (!token->beginTag) 122 text += "/"; 123 text += token->tagName; 124 Vector<UChar>* guide = token->m_sourceInfo.get(); 125 if (!guide || !guide->size()) 126 text += ">"; 127 128 addText(text, classNameStr); 129 130 // Walk our guide string that tells us where attribute names/values should go. 131 if (guide && guide->size()) { 132 unsigned size = guide->size(); 133 unsigned begin = 0; 134 unsigned currAttr = 0; 135 RefPtr<Attribute> attr = 0; 136 for (unsigned i = 0; i < size; i++) { 137 if (guide->at(i) == 'a' || guide->at(i) == 'x' || guide->at(i) == 'v') { 138 // Add in the string. 139 addText(String(static_cast<UChar*>(guide->data()) + begin, i - begin), classNameStr); 140 141 begin = i + 1; 142 143 if (guide->at(i) == 'a') { 144 if (token->attrs && currAttr < token->attrs->length()) 145 attr = token->attrs->attributeItem(currAttr++); 146 else 147 attr = 0; 148 } 149 if (attr) { 150 if (guide->at(i) == 'a') { 151 String name = attr->name().toString(); 152 153 m_current = addSpanWithClassName("webkit-html-attribute-name"); 154 addText(name, "webkit-html-attribute-name"); 155 if (m_current != m_tbody) 156 m_current = static_cast<Element*>(m_current->parent()); 157 } else { 158 const String& value = attr->value().string(); 159 160 // Compare ignoring case since LegacyHTMLDocumentParser doesn't 161 // lower names when passing in tokens to 162 // HTMLViewSourceDocument. 163 if (equalIgnoringCase(token->tagName, "base") && equalIgnoringCase(attr->name().localName(), "href")) { 164 // Catch the href attribute in the base element. 165 // It will be used for rendering anchors created 166 // by addLink() below. 167 setBaseElementURL(KURL(url(), value)); 168 } 169 170 // FIXME: XML could use namespace prefixes and confuse us. 171 if (equalIgnoringCase(attr->name().localName(), "src") || equalIgnoringCase(attr->name().localName(), "href")) 172 m_current = addLink(value, equalIgnoringCase(token->tagName, "a")); 173 else 174 m_current = addSpanWithClassName("webkit-html-attribute-value"); 175 addText(value, "webkit-html-attribute-value"); 176 if (m_current != m_tbody) 177 m_current = static_cast<Element*>(m_current->parent()); 178 } 179 } 180 } 181 } 182 183 // Add in any string that might be left. 184 if (begin < size) 185 addText(String(static_cast<UChar*>(guide->data()) + begin, size - begin), classNameStr); 186 187 // Add in the end tag. 188 addText(">", classNameStr); 189 } 190 191 m_current = m_td; 192 } 193 } 194 195 void HTMLViewSourceDocument::addViewSourceDoctypeToken(DoctypeToken* doctypeToken) 108 switch (token.type()) { 109 case HTMLToken::Uninitialized: 110 ASSERT_NOT_REACHED(); 111 break; 112 case HTMLToken::DOCTYPE: 113 processDoctypeToken(source, token); 114 break; 115 case HTMLToken::EndOfFile: 116 break; 117 case HTMLToken::StartTag: 118 case HTMLToken::EndTag: 119 processTagToken(source, token); 120 break; 121 case HTMLToken::Comment: 122 processCommentToken(source, token); 123 break; 124 case HTMLToken::Character: 125 processCharacterToken(source, token); 126 break; 127 } 128 } 129 130 void HTMLViewSourceDocument::processDoctypeToken(const String& source, HTMLToken&) 196 131 { 197 132 if (!m_current) 198 133 createContainingTable(); 199 134 m_current = addSpanWithClassName("webkit-html-doctype"); 200 String text = "<"; 201 text += String::adopt(doctypeToken->m_source); 202 text += ">"; 203 addText(text, "webkit-html-doctype"); 135 addText(source, "webkit-html-doctype"); 136 m_current = m_td; 137 } 138 139 void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token) 140 { 141 String classNameStr = "webkit-html-tag"; 142 m_current = addSpanWithClassName(classNameStr); 143 144 AtomicString tagName(token.name().data(), token.name().size()); 145 146 unsigned index = 0; 147 HTMLToken::AttributeList::const_iterator iter = token.attributes().begin(); 148 while (index < source.length()) { 149 if (iter == token.attributes().end()) { 150 // We want to show the remaining characters in the token. 151 index = addRange(source, index, source.length(), ""); 152 ASSERT(index == source.length()); 153 break; 154 } 155 156 AtomicString name(iter->m_name.data(), iter->m_name.size()); 157 String value(iter->m_value.data(), iter->m_value.size()); 158 159 index = addRange(source, index, iter->m_nameRange.m_start - token.startIndex(), ""); 160 index = addRange(source, index, iter->m_nameRange.m_end - token.startIndex(), "webkit-html-attribute-name"); 161 162 if (tagName == baseTag && name == hrefAttr) { 163 // Catch the href attribute in the base element. It will be used 164 // for rendering anchors created by addLink() below. 165 setBaseElementURL(KURL(url(), value)); 166 } 167 168 index = addRange(source, index, iter->m_valueRange.m_start - token.startIndex(), ""); 169 170 bool isLink = name == srcAttr || name == hrefAttr; 171 index = addRange(source, index, iter->m_valueRange.m_end - token.startIndex(), "webkit-html-attribute-value", isLink, tagName == aTag); 172 173 ++iter; 174 } 175 m_current = m_td; 176 } 177 178 void HTMLViewSourceDocument::processCommentToken(const String& source, HTMLToken&) 179 { 180 m_current = addSpanWithClassName("webkit-html-comment"); 181 addText(source, "webkit-html-comment"); 182 m_current = m_td; 183 } 184 185 void HTMLViewSourceDocument::processCharacterToken(const String& source, HTMLToken&) 186 { 187 addText(source, ""); 204 188 } 205 189 … … 288 272 } 289 273 274 int HTMLViewSourceDocument::addRange(const String& source, int start, int end, const String& className, bool isLink, bool isAnchor) 275 { 276 ASSERT(start <= end); 277 if (start == end) 278 return start; 279 280 String text = source.substring(start, end - start); 281 if (!className.isEmpty()) { 282 if (isLink) 283 m_current = addLink(text, isAnchor); 284 else 285 m_current = addSpanWithClassName(className); 286 } 287 addText(text, className); 288 if (!className.isEmpty() && m_current != m_tbody) 289 m_current = static_cast<Element*>(m_current->parent()); 290 return end; 291 } 292 290 293 PassRefPtr<Element> HTMLViewSourceDocument::addLink(const String& url, bool isAnchor) 291 294 { -
trunk/WebCore/html/HTMLViewSourceDocument.h
r61868 r65132 30 30 namespace WebCore { 31 31 32 class DoctypeToken;33 32 class HTMLTableCellElement; 34 33 class HTMLTableSectionElement; 35 36 struct Token; 34 class HTMLToken; 37 35 38 36 class HTMLViewSourceDocument : public HTMLDocument { … … 43 41 } 44 42 45 void addViewSourceToken(Token*); // Used by the LegacyHTMLDocumentParser. 43 void addSource(const String&, HTMLToken&); 44 45 void addViewSourceToken(HTMLToken&); // Used by the HTMLDocumentParser. 46 46 void addViewSourceText(const String&); // Used by the TextDocumentParser. 47 void addViewSourceDoctypeToken(DoctypeToken*);48 47 49 48 private: 50 49 HTMLViewSourceDocument(Frame*, const KURL&, const String& mimeType); 51 50 52 // Returns LegacyHTMLDocumentParser or TextDocumentParser based on m_type.51 // Returns HTMLViewSourceParser or TextDocumentParser based on m_type. 53 52 virtual DocumentParser* createParser(); 53 54 void processDoctypeToken(const String& source, HTMLToken&); 55 void processTagToken(const String& source, HTMLToken&); 56 void processCommentToken(const String& source, HTMLToken&); 57 void processCharacterToken(const String& source, HTMLToken&); 54 58 55 59 void createContainingTable(); … … 57 61 void addLine(const String& className); 58 62 void addText(const String& text, const String& className); 63 int addRange(const String& source, int start, int end, const String& className, bool isLink = false, bool isAnchor = false); 59 64 PassRefPtr<Element> addLink(const String& url, bool isAnchor); 60 65 -
trunk/WebCore/html/LegacyHTMLDocumentParser.cpp
r65031 r65132 1927 1927 if (NamedNodeMap* map = m_currentToken.attrs.get()) 1928 1928 map->shrinkToLength(); 1929 if (inViewSourceMode()) 1930 static_cast<HTMLViewSourceDocument*>(document())->addViewSourceToken(&m_currentToken); 1931 else 1932 // pass the token over to the parser, the parser DOES NOT delete the token 1933 n = m_treeBuilder->parseToken(&m_currentToken); 1929 // pass the token over to the parser, the parser DOES NOT delete the token 1930 n = m_treeBuilder->parseToken(&m_currentToken); 1934 1931 } 1935 1932 m_currentToken.reset(); … … 1940 1937 void LegacyHTMLDocumentParser::processDoctypeToken() 1941 1938 { 1942 if (inViewSourceMode()) 1943 static_cast<HTMLViewSourceDocument*>(document())->addViewSourceDoctypeToken(&m_doctypeToken); 1944 else 1945 m_treeBuilder->parseDoctypeToken(&m_doctypeToken); 1939 m_treeBuilder->parseDoctypeToken(&m_doctypeToken); 1946 1940 } 1947 1941
Note: See TracChangeset
for help on using the changeset viewer.