Changeset 73756 in webkit
- Timestamp:
- Dec 10, 2010 11:50:38 AM (13 years ago)
- Location:
- trunk
- Files:
-
- 152 added
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r73750 r73756 1 2010-12-09 Jenn Braithwaite <jennb@chromium.org> 2 3 Reviewed by Adam Barth. 4 5 TextResourceDecoder::checkForHeadCharset can look way past the limit. 6 https://bugs.webkit.org/show_bug.cgi?id=47397 7 8 Replaced charset detection algorithm with real parser. 9 Added tests for parser bugs mentioned in the thread for this bug report. 10 Converted hixie's encoding parsing tests to a layout test. 11 Added http-equiv attribute to meta tag in 2 existing tests. 12 13 * fast/encoding/bracket-in-script-expected.txt: Added. 14 * fast/encoding/bracket-in-script.html: Added. 15 * fast/encoding/bracket-in-tag-expected.txt: Added. 16 * fast/encoding/bracket-in-tag.html: Added. 17 * fast/encoding/escaped-bracket-expected.txt: Added. 18 * fast/encoding/escaped-bracket.html: Added. 19 * fast/encoding/meta-in-body-expected.txt: Added. 20 * fast/encoding/meta-in-body.html: Added. 21 * fast/encoding/meta-in-script-expected.txt: Added. 22 * fast/encoding/meta-in-script.html: Added. 23 * fast/encoding/meta-in-title-expected.txt: Added. 24 * fast/encoding/meta-in-title.html: Added. 25 * fast/encoding/mismatched-end-tag-expected.txt: Added. 26 * fast/encoding/mismatched-end-tag.html: Added. 27 * fast/encoding/namespace-meta-expected.txt: Added. 28 * fast/encoding/namespace-meta.html: Added. 29 * fast/encoding/namespace-tolerance.html: 30 * fast/encoding/not-http-equiv-content-expected.txt: Added. 31 * fast/encoding/not-http-equiv-content.html: Added. 32 * fast/encoding/parser-tests-expected.txt: Added. 33 * fast/encoding/parser-tests.html: Added. 34 * fast/encoding/quotes-in-title-expected.txt: Added. 35 * fast/encoding/quotes-in-title.html: Added. 36 * fast/encoding/resources/001.html: Added. 37 * fast/encoding/resources/002.html: Added. 38 * fast/encoding/resources/003.html: Added. 39 * fast/encoding/resources/004.html: Added. 40 * fast/encoding/resources/005.html: Added. 41 * fast/encoding/resources/006.html: Added. 42 * fast/encoding/resources/007.html: Added. 43 * fast/encoding/resources/008.html: Added. 44 * fast/encoding/resources/009.html: Added. 45 * fast/encoding/resources/010.html: Added. 46 * fast/encoding/resources/011.html: Added. 47 * fast/encoding/resources/012.html: Added. 48 * fast/encoding/resources/013.html: Added. 49 * fast/encoding/resources/014.html: Added. 50 * fast/encoding/resources/015.html: Added. 51 * fast/encoding/resources/016.html: Added. 52 * fast/encoding/resources/017.html: Added. 53 * fast/encoding/resources/018.html: Added. 54 * fast/encoding/resources/019.html: Added. 55 * fast/encoding/resources/020.html: Added. 56 * fast/encoding/resources/021.html: Added. 57 * fast/encoding/resources/022.html: Added. 58 * fast/encoding/resources/023.html: Added. 59 * fast/encoding/resources/024.html: Added. 60 * fast/encoding/resources/025.html: Added. 61 * fast/encoding/resources/026.html: Added. 62 * fast/encoding/resources/027.html: Added. 63 * fast/encoding/resources/028.html: Added. 64 * fast/encoding/resources/029.html: Added. 65 * fast/encoding/resources/030.html: Added. 66 * fast/encoding/resources/031.html: Added. 67 * fast/encoding/resources/032.html: Added. 68 * fast/encoding/resources/033.html: Added. 69 * fast/encoding/resources/034.html: Added. 70 * fast/encoding/resources/035.html: Added. 71 * fast/encoding/resources/036.html: Added. 72 * fast/encoding/resources/037.html: Added. 73 * fast/encoding/resources/038.html: Added. 74 * fast/encoding/resources/039.html: Added. 75 * fast/encoding/resources/040.html: Added. 76 * fast/encoding/resources/041.html: Added. 77 * fast/encoding/resources/042.html: Added. 78 * fast/encoding/resources/043.html: Added. 79 * fast/encoding/resources/044.html: Added. 80 * fast/encoding/resources/045.html: Added. 81 * fast/encoding/resources/046.html: Added. 82 * fast/encoding/resources/047.html: Added. 83 * fast/encoding/resources/048.html: Added. 84 * fast/encoding/resources/049.html: Added. 85 * fast/encoding/resources/050.html: Added. 86 * fast/encoding/resources/051.html: Added. 87 * fast/encoding/resources/052.html: Added. 88 * fast/encoding/resources/053.html: Added. 89 * fast/encoding/resources/054.html: Added. 90 * fast/encoding/resources/055.html: Added. 91 * fast/encoding/resources/056.html: Added. 92 * fast/encoding/resources/057.html: Added. 93 * fast/encoding/resources/058.html: Added. 94 * fast/encoding/resources/059.html: Added. 95 * fast/encoding/resources/060.html: Added. 96 * fast/encoding/resources/061.html: Added. 97 * fast/encoding/resources/062.html: Added. 98 * fast/encoding/resources/063.html: Added. 99 * fast/encoding/resources/064.html: Added. 100 * fast/encoding/resources/065.html: Added. 101 * fast/encoding/resources/066.html: Added. 102 * fast/encoding/resources/067.html: Added. 103 * fast/encoding/resources/068.html: Added. 104 * fast/encoding/resources/069.html: Added. 105 * fast/encoding/resources/070.html: Added. 106 * fast/encoding/resources/071.html: Added. 107 * fast/encoding/resources/072.html: Added. 108 * fast/encoding/resources/073.html: Added. 109 * fast/encoding/resources/074.html: Added. 110 * fast/encoding/resources/075.html: Added. 111 * fast/encoding/resources/076.html: Added. 112 * fast/encoding/resources/077.html: Added. 113 * fast/encoding/resources/078.html: Added. 114 * fast/encoding/resources/079.html: Added. 115 * fast/encoding/resources/080.html: Added. 116 * fast/encoding/resources/081.html: Added. 117 * fast/encoding/resources/082.html: Added. 118 * fast/encoding/resources/083.html: Added. 119 * fast/encoding/resources/084.html: Added. 120 * fast/encoding/resources/085.html: Added. 121 * fast/encoding/resources/086.html: Added. 122 * fast/encoding/resources/087.html: Added. 123 * fast/encoding/resources/088.html: Added. 124 * fast/encoding/resources/089.html: Added. 125 * fast/encoding/resources/090.html: Added. 126 * fast/encoding/resources/091.html: Added. 127 * fast/encoding/resources/092.html: Added. 128 * fast/encoding/resources/093.html: Added. 129 * fast/encoding/resources/094.html: Added. 130 * fast/encoding/resources/095.html: Added. 131 * fast/encoding/resources/096.html: Added. 132 * fast/encoding/resources/097.html: Added. 133 * fast/encoding/resources/098.html: Added. 134 * fast/encoding/resources/099.html: Added. 135 * fast/encoding/resources/100.html: Added. 136 * fast/encoding/resources/101.html: Added. 137 * fast/encoding/resources/102.html: Added. 138 * fast/encoding/resources/103.html: Added. 139 * fast/encoding/resources/104.html: Added. 140 * fast/encoding/resources/105.html: Added. 141 * fast/encoding/resources/106.html: Added. 142 * fast/encoding/resources/107.html: Added. 143 * fast/encoding/resources/108.html: Added. 144 * fast/encoding/resources/109.html: Added. 145 * fast/encoding/resources/110.html: Added. 146 * fast/encoding/resources/111.html: Added. 147 * fast/encoding/resources/112.html: Added. 148 * fast/encoding/resources/113.html: Added. 149 * fast/encoding/resources/114.html: Added. 150 * fast/encoding/resources/115.html: Added. 151 * fast/encoding/resources/116.html: Added. 152 * fast/encoding/resources/117.html: Added. 153 * fast/encoding/resources/118.html: Added. 154 * fast/encoding/resources/119.html: Added. 155 * fast/encoding/resources/120.html: Added. 156 * fast/encoding/resources/121.html: Added. 157 * fast/encoding/resources/122.html: Added. 158 * fast/encoding/resources/123.html: Added. 159 * fast/encoding/tag-name-digit-expected.txt: Added. 160 * fast/encoding/tag-name-digit.html: Added. 161 * fast/text/international/bidi-innertext.html: 162 * http/tests/misc/charset-sniffer-end-sniffing-expected.txt: Added. 163 * http/tests/misc/charset-sniffer-end-sniffing.html: Added. 164 * http/tests/misc/resources/charset-sniffer-end-sniffing.php: Added. 165 1 166 2010-12-10 Mihai Parparita <mihaip@chromium.org> 2 167 -
trunk/LayoutTests/fast/encoding/namespace-tolerance.html
r25130 r73756 1 1 <xhtml:html xmlns:xhtml=""> 2 <meta content="charset=UTF-8" >2 <meta content="charset=UTF-8" http-equiv="Content-Type"> 3 3 4 4 This test ensures a UTF-8 encoding is properly set on documents that: -
trunk/LayoutTests/fast/text/international/bidi-innertext.html
r29585 r73756 1 1 <html> 2 <meta name="content-type" content="text/html; charset=utf-8">2 <meta http-equiv="content-type" content="text/html; charset=utf-8"> 3 3 <script> 4 4 function print(message) -
trunk/WebCore/Android.mk
r73749 r73756 335 335 html/parser/HTMLEntityParser.cpp \ 336 336 html/parser/HTMLFormattingElementList.cpp \ 337 html/parser/HTMLMetaCharsetParser.cpp \ 337 338 html/parser/HTMLParserIdioms.cpp \ 338 339 html/parser/HTMLParserScheduler.cpp \ -
trunk/WebCore/CMakeLists.txt
r73749 r73756 1132 1132 html/parser/HTMLParserScheduler.cpp 1133 1133 html/parser/HTMLFormattingElementList.cpp 1134 html/parser/HTMLMetaCharsetParser.cpp 1134 1135 html/parser/HTMLPreloadScanner.cpp 1135 1136 html/parser/HTMLScriptRunner.cpp -
trunk/WebCore/ChangeLog
r73749 r73756 1 2010-12-09 Jenn Braithwaite <jennb@chromium.org> 2 3 Reviewed by Adam Barth. 4 5 TextResourceDecoder::checkForHeadCharset can look way past the limit. 6 https://bugs.webkit.org/show_bug.cgi?id=47397 7 8 Replaced charset detection algorithm with real parser. 9 Added tests for parser bugs mentioned in the thread for this bug report. 10 Converted hixie's encoding parsing tests to a layout test. 11 12 Tests: fast/encoding/bracket-in-script.html 13 fast/encoding/bracket-in-tag.html 14 fast/encoding/escaped-bracket.html 15 fast/encoding/meta-in-body.html 16 fast/encoding/meta-in-script.html 17 fast/encoding/meta-in-title.html 18 fast/encoding/mismatched-end-tag.html 19 fast/encoding/namespace-meta.html 20 fast/encoding/not-http-equiv-content.html 21 fast/encoding/parser-tests.html 22 fast/encoding/quotes-in-title.html 23 fast/encoding/tag-name-digit.html 24 http/tests/misc/charset-sniffer-end-sniffing.html 25 26 * Android.mk: 27 * CMakeLists.txt: 28 * GNUmakefile.am: 29 * WebCore.gypi: 30 * WebCore.pro: 31 * WebCore.vcproj/WebCore.vcproj: 32 * WebCore.xcodeproj/project.pbxproj: 33 * html/parser/HTMLMetaCharsetParser.cpp: Added. 34 (WebCore::HTMLMetaCharsetParser::HTMLMetaCharsetParser): 35 (WebCore::HTMLMetaCharsetParser::~HTMLMetaCharsetParser): 36 (WebCore::HTMLMetaCharsetParser::extractCharset): 37 (WebCore::HTMLMetaCharsetParser::processMeta): 38 (WebCore::HTMLMetaCharsetParser::checkForMetaCharset): 39 * html/parser/HTMLMetaCharsetParser.h: Added. 40 (WebCore::HTMLMetaCharsetParser::create): 41 (WebCore::HTMLMetaCharsetParser::encoding): 42 * loader/TextResourceDecoder.cpp: 43 (WebCore::TextResourceDecoder::checkForHeadCharset): 44 (WebCore::TextResourceDecoder::checkForMetaCharset): 45 * loader/TextResourceDecoder.h: 46 1 47 2010-12-10 Nate Chapin <japhet@chromium.org> 2 48 -
trunk/WebCore/GNUmakefile.am
r73749 r73756 1828 1828 WebCore/html/parser/HTMLFormattingElementList.h \ 1829 1829 WebCore/html/parser/HTMLInputStream.h \ 1830 WebCore/html/parser/HTMLMetaCharsetParser.cpp \ 1831 WebCore/html/parser/HTMLMetaCharsetParser.h \ 1830 1832 WebCore/html/parser/HTMLParserIdioms.cpp \ 1831 1833 WebCore/html/parser/HTMLParserIdioms.h \ -
trunk/WebCore/WebCore.gypi
r73749 r73756 1960 1960 'html/parser/HTMLFormattingElementList.h', 1961 1961 'html/parser/HTMLInputStream.h', 1962 'html/parser/HTMLMetaCharsetParser.cpp', 1963 'html/parser/HTMLMetaCharsetParser.h', 1962 1964 'html/parser/HTMLParserIdioms.cpp', 1963 1965 'html/parser/HTMLParserIdioms.h', -
trunk/WebCore/WebCore.pro
r73749 r73756 1010 1010 html/parser/HTMLEntitySearch.cpp \ 1011 1011 html/parser/HTMLFormattingElementList.cpp \ 1012 html/parser/HTMLMetaCharsetParser.cpp \ 1012 1013 html/parser/HTMLParserIdioms.cpp \ 1013 1014 html/parser/HTMLParserScheduler.cpp \ -
trunk/WebCore/WebCore.vcproj/WebCore.vcproj
r73749 r73756 55163 55163 </File> 55164 55164 <File 55165 RelativePath="..\html\parser\HTMLMetaCharsetParser.cpp" 55166 > 55167 </File> 55168 <File 55169 RelativePath="..\html\parser\HTMLMetaCharsetParser.h" 55170 > 55171 </File> 55172 <File 55165 55173 RelativePath="..\html\parser\HTMLParserIdioms.cpp" 55166 55174 > -
trunk/WebCore/WebCore.xcodeproj/project.pbxproj
r73749 r73756 727 727 29A8124A0FBB9CA900510293 /* AccessibilityObjectWrapper.mm in Sources */ = {isa = PBXBuildFile; fileRef = 29A812460FBB9CA900510293 /* AccessibilityObjectWrapper.mm */; }; 728 728 29A8124B0FBB9CA900510293 /* AXObjectCacheMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 29A812470FBB9CA900510293 /* AXObjectCacheMac.mm */; }; 729 2BE8E2C712A589EC00FAD550 /* HTMLMetaCharsetParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 2BE8E2C612A589EC00FAD550 /* HTMLMetaCharsetParser.h */; }; 730 2BE8E2C912A58A0100FAD550 /* HTMLMetaCharsetParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BE8E2C812A58A0100FAD550 /* HTMLMetaCharsetParser.cpp */; }; 729 731 2D9066060BE141D400956998 /* LayoutState.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2D9066040BE141D400956998 /* LayoutState.cpp */; }; 730 732 2D9066070BE141D400956998 /* LayoutState.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D9066050BE141D400956998 /* LayoutState.h */; settings = {ATTRIBUTES = (Private, ); }; }; … … 7002 7004 29A812460FBB9CA900510293 /* AccessibilityObjectWrapper.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AccessibilityObjectWrapper.mm; sourceTree = "<group>"; }; 7003 7005 29A812470FBB9CA900510293 /* AXObjectCacheMac.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AXObjectCacheMac.mm; sourceTree = "<group>"; }; 7006 2BE8E2C612A589EC00FAD550 /* HTMLMetaCharsetParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HTMLMetaCharsetParser.h; path = parser/HTMLMetaCharsetParser.h; sourceTree = "<group>"; }; 7007 2BE8E2C812A58A0100FAD550 /* HTMLMetaCharsetParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HTMLMetaCharsetParser.cpp; path = parser/HTMLMetaCharsetParser.cpp; sourceTree = "<group>"; }; 7004 7008 2D9066040BE141D400956998 /* LayoutState.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = LayoutState.cpp; sourceTree = "<group>"; }; 7005 7009 2D9066050BE141D400956998 /* LayoutState.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LayoutState.h; sourceTree = "<group>"; }; … … 15643 15647 977B3856122883E900B81FF8 /* HTMLFormattingElementList.h */, 15644 15648 97BC849A12370A4B000C6161 /* HTMLInputStream.h */, 15649 2BE8E2C812A58A0100FAD550 /* HTMLMetaCharsetParser.cpp */, 15650 2BE8E2C612A589EC00FAD550 /* HTMLMetaCharsetParser.h */, 15645 15651 93E2A304123E9DC0009FE12A /* HTMLParserIdioms.cpp */, 15646 15652 93E2A305123E9DC0009FE12A /* HTMLParserIdioms.h */, … … 20490 20496 E44613A50CD6331000FADA75 /* HTMLMediaElement.h in Headers */, 20491 20497 A8EA79F40A1916DF00A8EF5F /* HTMLMenuElement.h in Headers */, 20498 2BE8E2C712A589EC00FAD550 /* HTMLMetaCharsetParser.h in Headers */, 20492 20499 A871DC240A15205700B12A68 /* HTMLMetaElement.h in Headers */, 20493 20500 A454424B119B3661009BE912 /* HTMLMeterElement.h in Headers */, … … 23218 23225 E44613A40CD6331000FADA75 /* HTMLMediaElement.cpp in Sources */, 23219 23226 A8EA79F80A1916DF00A8EF5F /* HTMLMenuElement.cpp in Sources */, 23227 2BE8E2C912A58A0100FAD550 /* HTMLMetaCharsetParser.cpp in Sources */, 23220 23228 A871DC270A15205700B12A68 /* HTMLMetaElement.cpp in Sources */, 23221 23229 A454424A119B3661009BE912 /* HTMLMeterElement.cpp in Sources */, -
trunk/WebCore/loader/TextResourceDecoder.cpp
r64817 r73756 25 25 26 26 #include "DOMImplementation.h" 27 #include "HTMLMetaCharsetParser.h" 27 28 #include "HTMLNames.h" 28 29 #include "TextCodec.h" … … 52 53 for (size_t j = 0; j < targetLength; ++j) { 53 54 if (subject[i + j] != target[j]) { 54 match = false;55 break;56 }57 }58 if (match)59 return i;60 }61 return -1;62 }63 64 static int findIgnoringCase(const char* subject, size_t subjectLength, const char* target)65 {66 size_t targetLength = strlen(target);67 if (targetLength > subjectLength)68 return -1;69 #ifndef NDEBUG70 for (size_t i = 0; i < targetLength; ++i)71 ASSERT(isASCIILower(target[i]));72 #endif73 for (size_t i = 0; i <= subjectLength - targetLength; ++i) {74 bool match = true;75 for (size_t j = 0; j < targetLength; ++j) {76 if (toASCIILower(subject[i + j]) != target[j]) {77 55 match = false; 78 56 break; … … 535 513 } 536 514 537 const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.538 539 515 bool TextResourceDecoder::checkForHeadCharset(const char* data, size_t len, bool& movedDataToBuffer) 540 516 { … … 552 528 553 529 movedDataToBuffer = true; 530 531 // Continue with checking for an HTML meta tag if we were already doing so. 532 if (m_charsetParser) 533 return checkForMetaCharset(data, len); 554 534 555 535 const char* ptr = m_buffer.data(); … … 588 568 } 589 569 590 // we still don't have an encoding, and are in the head 591 // the following tags are allowed in <head>: 592 // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE 593 594 // We stop scanning when a tag that is not permitted in <head> 595 // is seen, rather when </head> is seen, because that more closely 596 // matches behavior in other browsers; more details in 597 // <http://bugs.webkit.org/show_bug.cgi?id=3590>. 598 599 // Additionally, we ignore things that looks like tags in <title>, <script> and <noscript>; see 600 // <http://bugs.webkit.org/show_bug.cgi?id=4560>, <http://bugs.webkit.org/show_bug.cgi?id=12165> 601 // and <http://bugs.webkit.org/show_bug.cgi?id=12389>. 602 603 // Since many sites have charset declarations after <body> or other tags that are disallowed in <head>, 604 // we don't bail out until we've checked at least bytesToCheckUnconditionally bytes of input. 605 606 AtomicStringImpl* enclosingTagName = 0; 607 bool inHeadSection = true; // Becomes false when </head> or any tag not allowed in head is encountered. 608 609 // the HTTP-EQUIV meta has no effect on XHTML 570 // The HTTP-EQUIV meta has no effect on XHTML. 610 571 if (m_contentType == XML) 611 572 return true; 612 573 613 while (ptr + 3 < pEnd) { // +3 guarantees that "<!--" fits in the buffer - and certainly we aren't going to lose any "charset" that way. 614 if (*ptr == '<') { 615 bool end = false; 616 ptr++; 617 618 // Handle comments. 619 if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') { 620 ptr += 3; 621 skipComment(ptr, pEnd); 622 if (ptr - m_buffer.data() >= bytesToCheckUnconditionally && !inHeadSection) { 623 // Some pages that test bandwidth from within the browser do it by having 624 // huge comments and measuring the time they take to load. Repeatedly scanning 625 // these comments can take a lot of CPU time. 626 m_checkedForHeadCharset = true; 627 return true; 628 } 629 continue; 630 } 631 632 if (*ptr == '/') { 633 ++ptr; 634 end = true; 635 } 636 637 // Grab the tag name, but mostly ignore namespaces. 638 bool sawNamespace = false; 639 char tagBuffer[20]; 640 int len = 0; 641 while (len < 19) { 642 if (ptr == pEnd) 643 return false; 644 char c = *ptr; 645 if (c == ':') { 646 len = 0; 647 sawNamespace = true; 648 ptr++; 649 continue; 650 } 651 if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) 652 ; 653 else if (c >= 'A' && c <= 'Z') 654 c += 'a' - 'A'; 655 else 656 break; 657 tagBuffer[len++] = c; 658 ptr++; 659 } 660 tagBuffer[len] = 0; 661 AtomicString tag(tagBuffer); 662 663 if (enclosingTagName) { 664 if (end && tag.impl() == enclosingTagName) 665 enclosingTagName = 0; 666 } else { 667 if (tag == titleTag) 668 enclosingTagName = titleTag.localName().impl(); 669 else if (tag == scriptTag) 670 enclosingTagName = scriptTag.localName().impl(); 671 else if (tag == noscriptTag) 672 enclosingTagName = noscriptTag.localName().impl(); 673 } 674 675 // Find where the opening tag ends. 676 const char* tagContentStart = ptr; 677 if (!end) { 678 while (ptr != pEnd && *ptr != '>') { 679 if (*ptr == '\'' || *ptr == '"') { 680 char quoteMark = *ptr; 681 ++ptr; 682 while (ptr != pEnd && *ptr != quoteMark) 683 ++ptr; 684 if (ptr == pEnd) 685 return false; 686 } 687 ++ptr; 688 } 689 if (ptr == pEnd) 690 return false; 691 ++ptr; 692 } 693 694 if (!end && tag == metaTag && !sawNamespace) { 695 const char* str = tagContentStart; 696 int length = ptr - tagContentStart; 697 int pos = 0; 698 while (pos < length) { 699 int charsetPos = findIgnoringCase(str + pos, length - pos, "charset"); 700 if (charsetPos == -1) 701 break; 702 pos += charsetPos + 7; 703 // skip whitespace 704 while (pos < length && str[pos] <= ' ') 705 pos++; 706 if (pos == length) 707 break; 708 if (str[pos++] != '=') 709 continue; 710 while ((pos < length) && 711 (str[pos] <= ' ' || str[pos] == '=' || str[pos] == '"' || str[pos] == '\'')) 712 pos++; 713 714 // end ? 715 if (pos == length) 716 break; 717 int end = pos; 718 while (end < length && 719 str[end] != ' ' && str[end] != '"' && str[end] != '\'' && 720 str[end] != ';' && str[end] != '>') 721 end++; 722 setEncoding(findTextEncoding(str + pos, end - pos), EncodingFromMetaTag); 723 if (m_source == EncodingFromMetaTag) 724 return true; 725 726 if (end >= length || str[end] == '/' || str[end] == '>') 727 break; 728 729 pos = end + 1; 730 } 731 } else { 732 if (!enclosingTagName && tag != scriptTag && tag != noscriptTag && tag != styleTag 733 && tag != linkTag && tag != metaTag && tag != objectTag && tag != titleTag && tag != baseTag 734 && (end || tag != htmlTag) && (end || tag != headTag) && isASCIIAlpha(tagBuffer[0])) { 735 inHeadSection = false; 736 } 737 738 if (ptr - m_buffer.data() >= bytesToCheckUnconditionally && !inHeadSection) { 739 m_checkedForHeadCharset = true; 740 return true; 741 } 742 } 743 } else 744 ++ptr; 745 } 746 return false; 574 m_charsetParser = HTMLMetaCharsetParser::create(); 575 return checkForMetaCharset(data, len); 576 } 577 578 bool TextResourceDecoder::checkForMetaCharset(const char* data, size_t length) 579 { 580 if (!m_charsetParser->checkForMetaCharset(data, length)) 581 return false; 582 583 setEncoding(m_charsetParser->encoding(), EncodingFromMetaTag); 584 m_charsetParser.clear(); 585 m_checkedForHeadCharset = true; 586 return true; 747 587 } 748 588 -
trunk/WebCore/loader/TextResourceDecoder.h
r42026 r73756 27 27 28 28 namespace WebCore { 29 30 class HTMLMetaCharsetParser; 29 31 30 32 class TextResourceDecoder : public RefCounted<TextResourceDecoder> { … … 75 77 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 76 78 bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer); 79 bool checkForMetaCharset(const char*, size_t); 77 80 void detectJapaneseEncoding(const char*, size_t); 78 81 bool shouldAutoDetect() const; … … 90 93 bool m_sawError; 91 94 bool m_usesEncodingDetector; 95 96 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 92 97 }; 93 98
Note: See TracChangeset
for help on using the changeset viewer.