Changeset 73756 in webkit


Ignore:
Timestamp:
Dec 10, 2010 11:50:38 AM (13 years ago)
Author:
commit-queue@webkit.org
Message:

2010-12-09 Jenn Braithwaite <jennb@chromium.org>

Reviewed by Adam Barth.

TextResourceDecoder::checkForHeadCharset can look way past the limit.
https://bugs.webkit.org/show_bug.cgi?id=47397

Replaced charset detection algorithm with real parser.
Added tests for parser bugs mentioned in the thread for this bug report.
Converted hixie's encoding parsing tests to a layout test.

Tests: fast/encoding/bracket-in-script.html

fast/encoding/bracket-in-tag.html
fast/encoding/escaped-bracket.html
fast/encoding/meta-in-body.html
fast/encoding/meta-in-script.html
fast/encoding/meta-in-title.html
fast/encoding/mismatched-end-tag.html
fast/encoding/namespace-meta.html
fast/encoding/not-http-equiv-content.html
fast/encoding/parser-tests.html
fast/encoding/quotes-in-title.html
fast/encoding/tag-name-digit.html
http/tests/misc/charset-sniffer-end-sniffing.html

  • Android.mk:
  • CMakeLists.txt:
  • GNUmakefile.am:
  • WebCore.gypi:
  • WebCore.pro:
  • WebCore.vcproj/WebCore.vcproj:
  • WebCore.xcodeproj/project.pbxproj:
  • html/parser/HTMLMetaCharsetParser.cpp: Added. (WebCore::HTMLMetaCharsetParser::HTMLMetaCharsetParser): (WebCore::HTMLMetaCharsetParser::~HTMLMetaCharsetParser): (WebCore::HTMLMetaCharsetParser::extractCharset): (WebCore::HTMLMetaCharsetParser::processMeta): (WebCore::HTMLMetaCharsetParser::checkForMetaCharset):
  • html/parser/HTMLMetaCharsetParser.h: Added. (WebCore::HTMLMetaCharsetParser::create): (WebCore::HTMLMetaCharsetParser::encoding):
  • loader/TextResourceDecoder.cpp: (WebCore::TextResourceDecoder::checkForHeadCharset): (WebCore::TextResourceDecoder::checkForMetaCharset):
  • loader/TextResourceDecoder.h:

2010-12-09 Jenn Braithwaite <jennb@chromium.org>

Reviewed by Adam Barth.

TextResourceDecoder::checkForHeadCharset can look way past the limit.
https://bugs.webkit.org/show_bug.cgi?id=47397

Replaced charset detection algorithm with real parser.
Added tests for parser bugs mentioned in the thread for this bug report.
Converted hixie's encoding parsing tests to a layout test.
Added http-equiv attribute to meta tag in 2 existing tests.

  • fast/encoding/bracket-in-script-expected.txt: Added.
  • fast/encoding/bracket-in-script.html: Added.
  • fast/encoding/bracket-in-tag-expected.txt: Added.
  • fast/encoding/bracket-in-tag.html: Added.
  • fast/encoding/escaped-bracket-expected.txt: Added.
  • fast/encoding/escaped-bracket.html: Added.
  • fast/encoding/meta-in-body-expected.txt: Added.
  • fast/encoding/meta-in-body.html: Added.
  • fast/encoding/meta-in-script-expected.txt: Added.
  • fast/encoding/meta-in-script.html: Added.
  • fast/encoding/meta-in-title-expected.txt: Added.
  • fast/encoding/meta-in-title.html: Added.
  • fast/encoding/mismatched-end-tag-expected.txt: Added.
  • fast/encoding/mismatched-end-tag.html: Added.
  • fast/encoding/namespace-meta-expected.txt: Added.
  • fast/encoding/namespace-meta.html: Added.
  • fast/encoding/namespace-tolerance.html:
  • fast/encoding/not-http-equiv-content-expected.txt: Added.
  • fast/encoding/not-http-equiv-content.html: Added.
  • fast/encoding/parser-tests-expected.txt: Added.
  • fast/encoding/parser-tests.html: Added.
  • fast/encoding/quotes-in-title-expected.txt: Added.
  • fast/encoding/quotes-in-title.html: Added.
  • fast/encoding/resources/001.html: Added.
  • fast/encoding/resources/002.html: Added.
  • fast/encoding/resources/003.html: Added.
  • fast/encoding/resources/004.html: Added.
  • fast/encoding/resources/005.html: Added.
  • fast/encoding/resources/006.html: Added.
  • fast/encoding/resources/007.html: Added.
  • fast/encoding/resources/008.html: Added.
  • fast/encoding/resources/009.html: Added.
  • fast/encoding/resources/010.html: Added.
  • fast/encoding/resources/011.html: Added.
  • fast/encoding/resources/012.html: Added.
  • fast/encoding/resources/013.html: Added.
  • fast/encoding/resources/014.html: Added.
  • fast/encoding/resources/015.html: Added.
  • fast/encoding/resources/016.html: Added.
  • fast/encoding/resources/017.html: Added.
  • fast/encoding/resources/018.html: Added.
  • fast/encoding/resources/019.html: Added.
  • fast/encoding/resources/020.html: Added.
  • fast/encoding/resources/021.html: Added.
  • fast/encoding/resources/022.html: Added.
  • fast/encoding/resources/023.html: Added.
  • fast/encoding/resources/024.html: Added.
  • fast/encoding/resources/025.html: Added.
  • fast/encoding/resources/026.html: Added.
  • fast/encoding/resources/027.html: Added.
  • fast/encoding/resources/028.html: Added.
  • fast/encoding/resources/029.html: Added.
  • fast/encoding/resources/030.html: Added.
  • fast/encoding/resources/031.html: Added.
  • fast/encoding/resources/032.html: Added.
  • fast/encoding/resources/033.html: Added.
  • fast/encoding/resources/034.html: Added.
  • fast/encoding/resources/035.html: Added.
  • fast/encoding/resources/036.html: Added.
  • fast/encoding/resources/037.html: Added.
  • fast/encoding/resources/038.html: Added.
  • fast/encoding/resources/039.html: Added.
  • fast/encoding/resources/040.html: Added.
  • fast/encoding/resources/041.html: Added.
  • fast/encoding/resources/042.html: Added.
  • fast/encoding/resources/043.html: Added.
  • fast/encoding/resources/044.html: Added.
  • fast/encoding/resources/045.html: Added.
  • fast/encoding/resources/046.html: Added.
  • fast/encoding/resources/047.html: Added.
  • fast/encoding/resources/048.html: Added.
  • fast/encoding/resources/049.html: Added.
  • fast/encoding/resources/050.html: Added.
  • fast/encoding/resources/051.html: Added.
  • fast/encoding/resources/052.html: Added.
  • fast/encoding/resources/053.html: Added.
  • fast/encoding/resources/054.html: Added.
  • fast/encoding/resources/055.html: Added.
  • fast/encoding/resources/056.html: Added.
  • fast/encoding/resources/057.html: Added.
  • fast/encoding/resources/058.html: Added.
  • fast/encoding/resources/059.html: Added.
  • fast/encoding/resources/060.html: Added.
  • fast/encoding/resources/061.html: Added.
  • fast/encoding/resources/062.html: Added.
  • fast/encoding/resources/063.html: Added.
  • fast/encoding/resources/064.html: Added.
  • fast/encoding/resources/065.html: Added.
  • fast/encoding/resources/066.html: Added.
  • fast/encoding/resources/067.html: Added.
  • fast/encoding/resources/068.html: Added.
  • fast/encoding/resources/069.html: Added.
  • fast/encoding/resources/070.html: Added.
  • fast/encoding/resources/071.html: Added.
  • fast/encoding/resources/072.html: Added.
  • fast/encoding/resources/073.html: Added.
  • fast/encoding/resources/074.html: Added.
  • fast/encoding/resources/075.html: Added.
  • fast/encoding/resources/076.html: Added.
  • fast/encoding/resources/077.html: Added.
  • fast/encoding/resources/078.html: Added.
  • fast/encoding/resources/079.html: Added.
  • fast/encoding/resources/080.html: Added.
  • fast/encoding/resources/081.html: Added.
  • fast/encoding/resources/082.html: Added.
  • fast/encoding/resources/083.html: Added.
  • fast/encoding/resources/084.html: Added.
  • fast/encoding/resources/085.html: Added.
  • fast/encoding/resources/086.html: Added.
  • fast/encoding/resources/087.html: Added.
  • fast/encoding/resources/088.html: Added.
  • fast/encoding/resources/089.html: Added.
  • fast/encoding/resources/090.html: Added.
  • fast/encoding/resources/091.html: Added.
  • fast/encoding/resources/092.html: Added.
  • fast/encoding/resources/093.html: Added.
  • fast/encoding/resources/094.html: Added.
  • fast/encoding/resources/095.html: Added.
  • fast/encoding/resources/096.html: Added.
  • fast/encoding/resources/097.html: Added.
  • fast/encoding/resources/098.html: Added.
  • fast/encoding/resources/099.html: Added.
  • fast/encoding/resources/100.html: Added.
  • fast/encoding/resources/101.html: Added.
  • fast/encoding/resources/102.html: Added.
  • fast/encoding/resources/103.html: Added.
  • fast/encoding/resources/104.html: Added.
  • fast/encoding/resources/105.html: Added.
  • fast/encoding/resources/106.html: Added.
  • fast/encoding/resources/107.html: Added.
  • fast/encoding/resources/108.html: Added.
  • fast/encoding/resources/109.html: Added.
  • fast/encoding/resources/110.html: Added.
  • fast/encoding/resources/111.html: Added.
  • fast/encoding/resources/112.html: Added.
  • fast/encoding/resources/113.html: Added.
  • fast/encoding/resources/114.html: Added.
  • fast/encoding/resources/115.html: Added.
  • fast/encoding/resources/116.html: Added.
  • fast/encoding/resources/117.html: Added.
  • fast/encoding/resources/118.html: Added.
  • fast/encoding/resources/119.html: Added.
  • fast/encoding/resources/120.html: Added.
  • fast/encoding/resources/121.html: Added.
  • fast/encoding/resources/122.html: Added.
  • fast/encoding/resources/123.html: Added.
  • fast/encoding/tag-name-digit-expected.txt: Added.
  • fast/encoding/tag-name-digit.html: Added.
  • fast/text/international/bidi-innertext.html:
  • http/tests/misc/charset-sniffer-end-sniffing-expected.txt: Added.
  • http/tests/misc/charset-sniffer-end-sniffing.html: Added.
  • http/tests/misc/resources/charset-sniffer-end-sniffing.php: Added.
Location:
trunk
Files:
152 added
13 edited

Legend:

Unmodified
Added
Removed
  • trunk/LayoutTests/ChangeLog

    r73750 r73756  
     12010-12-09  Jenn Braithwaite  <jennb@chromium.org>
     2
     3        Reviewed by Adam Barth.
     4
     5        TextResourceDecoder::checkForHeadCharset can look way past the limit.
     6        https://bugs.webkit.org/show_bug.cgi?id=47397
     7
     8        Replaced charset detection algorithm with real parser.
     9        Added tests for parser bugs mentioned in the thread for this bug report.
     10        Converted hixie's encoding parsing tests to a layout test.
     11        Added http-equiv attribute to meta tag in 2 existing tests.
     12
     13        * fast/encoding/bracket-in-script-expected.txt: Added.
     14        * fast/encoding/bracket-in-script.html: Added.
     15        * fast/encoding/bracket-in-tag-expected.txt: Added.
     16        * fast/encoding/bracket-in-tag.html: Added.
     17        * fast/encoding/escaped-bracket-expected.txt: Added.
     18        * fast/encoding/escaped-bracket.html: Added.
     19        * fast/encoding/meta-in-body-expected.txt: Added.
     20        * fast/encoding/meta-in-body.html: Added.
     21        * fast/encoding/meta-in-script-expected.txt: Added.
     22        * fast/encoding/meta-in-script.html: Added.
     23        * fast/encoding/meta-in-title-expected.txt: Added.
     24        * fast/encoding/meta-in-title.html: Added.
     25        * fast/encoding/mismatched-end-tag-expected.txt: Added.
     26        * fast/encoding/mismatched-end-tag.html: Added.
     27        * fast/encoding/namespace-meta-expected.txt: Added.
     28        * fast/encoding/namespace-meta.html: Added.
     29        * fast/encoding/namespace-tolerance.html:
     30        * fast/encoding/not-http-equiv-content-expected.txt: Added.
     31        * fast/encoding/not-http-equiv-content.html: Added.
     32        * fast/encoding/parser-tests-expected.txt: Added.
     33        * fast/encoding/parser-tests.html: Added.
     34        * fast/encoding/quotes-in-title-expected.txt: Added.
     35        * fast/encoding/quotes-in-title.html: Added.
     36        * fast/encoding/resources/001.html: Added.
     37        * fast/encoding/resources/002.html: Added.
     38        * fast/encoding/resources/003.html: Added.
     39        * fast/encoding/resources/004.html: Added.
     40        * fast/encoding/resources/005.html: Added.
     41        * fast/encoding/resources/006.html: Added.
     42        * fast/encoding/resources/007.html: Added.
     43        * fast/encoding/resources/008.html: Added.
     44        * fast/encoding/resources/009.html: Added.
     45        * fast/encoding/resources/010.html: Added.
     46        * fast/encoding/resources/011.html: Added.
     47        * fast/encoding/resources/012.html: Added.
     48        * fast/encoding/resources/013.html: Added.
     49        * fast/encoding/resources/014.html: Added.
     50        * fast/encoding/resources/015.html: Added.
     51        * fast/encoding/resources/016.html: Added.
     52        * fast/encoding/resources/017.html: Added.
     53        * fast/encoding/resources/018.html: Added.
     54        * fast/encoding/resources/019.html: Added.
     55        * fast/encoding/resources/020.html: Added.
     56        * fast/encoding/resources/021.html: Added.
     57        * fast/encoding/resources/022.html: Added.
     58        * fast/encoding/resources/023.html: Added.
     59        * fast/encoding/resources/024.html: Added.
     60        * fast/encoding/resources/025.html: Added.
     61        * fast/encoding/resources/026.html: Added.
     62        * fast/encoding/resources/027.html: Added.
     63        * fast/encoding/resources/028.html: Added.
     64        * fast/encoding/resources/029.html: Added.
     65        * fast/encoding/resources/030.html: Added.
     66        * fast/encoding/resources/031.html: Added.
     67        * fast/encoding/resources/032.html: Added.
     68        * fast/encoding/resources/033.html: Added.
     69        * fast/encoding/resources/034.html: Added.
     70        * fast/encoding/resources/035.html: Added.
     71        * fast/encoding/resources/036.html: Added.
     72        * fast/encoding/resources/037.html: Added.
     73        * fast/encoding/resources/038.html: Added.
     74        * fast/encoding/resources/039.html: Added.
     75        * fast/encoding/resources/040.html: Added.
     76        * fast/encoding/resources/041.html: Added.
     77        * fast/encoding/resources/042.html: Added.
     78        * fast/encoding/resources/043.html: Added.
     79        * fast/encoding/resources/044.html: Added.
     80        * fast/encoding/resources/045.html: Added.
     81        * fast/encoding/resources/046.html: Added.
     82        * fast/encoding/resources/047.html: Added.
     83        * fast/encoding/resources/048.html: Added.
     84        * fast/encoding/resources/049.html: Added.
     85        * fast/encoding/resources/050.html: Added.
     86        * fast/encoding/resources/051.html: Added.
     87        * fast/encoding/resources/052.html: Added.
     88        * fast/encoding/resources/053.html: Added.
     89        * fast/encoding/resources/054.html: Added.
     90        * fast/encoding/resources/055.html: Added.
     91        * fast/encoding/resources/056.html: Added.
     92        * fast/encoding/resources/057.html: Added.
     93        * fast/encoding/resources/058.html: Added.
     94        * fast/encoding/resources/059.html: Added.
     95        * fast/encoding/resources/060.html: Added.
     96        * fast/encoding/resources/061.html: Added.
     97        * fast/encoding/resources/062.html: Added.
     98        * fast/encoding/resources/063.html: Added.
     99        * fast/encoding/resources/064.html: Added.
     100        * fast/encoding/resources/065.html: Added.
     101        * fast/encoding/resources/066.html: Added.
     102        * fast/encoding/resources/067.html: Added.
     103        * fast/encoding/resources/068.html: Added.
     104        * fast/encoding/resources/069.html: Added.
     105        * fast/encoding/resources/070.html: Added.
     106        * fast/encoding/resources/071.html: Added.
     107        * fast/encoding/resources/072.html: Added.
     108        * fast/encoding/resources/073.html: Added.
     109        * fast/encoding/resources/074.html: Added.
     110        * fast/encoding/resources/075.html: Added.
     111        * fast/encoding/resources/076.html: Added.
     112        * fast/encoding/resources/077.html: Added.
     113        * fast/encoding/resources/078.html: Added.
     114        * fast/encoding/resources/079.html: Added.
     115        * fast/encoding/resources/080.html: Added.
     116        * fast/encoding/resources/081.html: Added.
     117        * fast/encoding/resources/082.html: Added.
     118        * fast/encoding/resources/083.html: Added.
     119        * fast/encoding/resources/084.html: Added.
     120        * fast/encoding/resources/085.html: Added.
     121        * fast/encoding/resources/086.html: Added.
     122        * fast/encoding/resources/087.html: Added.
     123        * fast/encoding/resources/088.html: Added.
     124        * fast/encoding/resources/089.html: Added.
     125        * fast/encoding/resources/090.html: Added.
     126        * fast/encoding/resources/091.html: Added.
     127        * fast/encoding/resources/092.html: Added.
     128        * fast/encoding/resources/093.html: Added.
     129        * fast/encoding/resources/094.html: Added.
     130        * fast/encoding/resources/095.html: Added.
     131        * fast/encoding/resources/096.html: Added.
     132        * fast/encoding/resources/097.html: Added.
     133        * fast/encoding/resources/098.html: Added.
     134        * fast/encoding/resources/099.html: Added.
     135        * fast/encoding/resources/100.html: Added.
     136        * fast/encoding/resources/101.html: Added.
     137        * fast/encoding/resources/102.html: Added.
     138        * fast/encoding/resources/103.html: Added.
     139        * fast/encoding/resources/104.html: Added.
     140        * fast/encoding/resources/105.html: Added.
     141        * fast/encoding/resources/106.html: Added.
     142        * fast/encoding/resources/107.html: Added.
     143        * fast/encoding/resources/108.html: Added.
     144        * fast/encoding/resources/109.html: Added.
     145        * fast/encoding/resources/110.html: Added.
     146        * fast/encoding/resources/111.html: Added.
     147        * fast/encoding/resources/112.html: Added.
     148        * fast/encoding/resources/113.html: Added.
     149        * fast/encoding/resources/114.html: Added.
     150        * fast/encoding/resources/115.html: Added.
     151        * fast/encoding/resources/116.html: Added.
     152        * fast/encoding/resources/117.html: Added.
     153        * fast/encoding/resources/118.html: Added.
     154        * fast/encoding/resources/119.html: Added.
     155        * fast/encoding/resources/120.html: Added.
     156        * fast/encoding/resources/121.html: Added.
     157        * fast/encoding/resources/122.html: Added.
     158        * fast/encoding/resources/123.html: Added.
     159        * fast/encoding/tag-name-digit-expected.txt: Added.
     160        * fast/encoding/tag-name-digit.html: Added.
     161        * fast/text/international/bidi-innertext.html:
     162        * http/tests/misc/charset-sniffer-end-sniffing-expected.txt: Added.
     163        * http/tests/misc/charset-sniffer-end-sniffing.html: Added.
     164        * http/tests/misc/resources/charset-sniffer-end-sniffing.php: Added.
     165
    11662010-12-10  Mihai Parparita  <mihaip@chromium.org>
    2167
  • trunk/LayoutTests/fast/encoding/namespace-tolerance.html

    r25130 r73756  
    11<xhtml:html xmlns:xhtml="">
    2 <meta content="charset=UTF-8">
     2<meta content="charset=UTF-8" http-equiv="Content-Type">
    33
    44This test ensures a UTF-8 encoding is properly set on documents that:
  • trunk/LayoutTests/fast/text/international/bidi-innertext.html

    r29585 r73756  
    11<html>
    2     <meta name="content-type" content="text/html; charset=utf-8">
     2    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    33    <script>
    44    function print(message)
  • trunk/WebCore/Android.mk

    r73749 r73756  
    335335        html/parser/HTMLEntityParser.cpp \
    336336        html/parser/HTMLFormattingElementList.cpp \
     337        html/parser/HTMLMetaCharsetParser.cpp \
    337338        html/parser/HTMLParserIdioms.cpp \
    338339        html/parser/HTMLParserScheduler.cpp \
  • trunk/WebCore/CMakeLists.txt

    r73749 r73756  
    11321132    html/parser/HTMLParserScheduler.cpp
    11331133    html/parser/HTMLFormattingElementList.cpp
     1134    html/parser/HTMLMetaCharsetParser.cpp
    11341135    html/parser/HTMLPreloadScanner.cpp
    11351136    html/parser/HTMLScriptRunner.cpp
  • trunk/WebCore/ChangeLog

    r73749 r73756  
     12010-12-09  Jenn Braithwaite  <jennb@chromium.org>
     2
     3        Reviewed by Adam Barth.
     4
     5        TextResourceDecoder::checkForHeadCharset can look way past the limit.
     6        https://bugs.webkit.org/show_bug.cgi?id=47397
     7
     8        Replaced charset detection algorithm with real parser.
     9        Added tests for parser bugs mentioned in the thread for this bug report.
     10        Converted hixie's encoding parsing tests to a layout test.
     11
     12        Tests: fast/encoding/bracket-in-script.html
     13               fast/encoding/bracket-in-tag.html
     14               fast/encoding/escaped-bracket.html
     15               fast/encoding/meta-in-body.html
     16               fast/encoding/meta-in-script.html
     17               fast/encoding/meta-in-title.html
     18               fast/encoding/mismatched-end-tag.html
     19               fast/encoding/namespace-meta.html
     20               fast/encoding/not-http-equiv-content.html
     21               fast/encoding/parser-tests.html
     22               fast/encoding/quotes-in-title.html
     23               fast/encoding/tag-name-digit.html
     24               http/tests/misc/charset-sniffer-end-sniffing.html
     25
     26        * Android.mk:
     27        * CMakeLists.txt:
     28        * GNUmakefile.am:
     29        * WebCore.gypi:
     30        * WebCore.pro:
     31        * WebCore.vcproj/WebCore.vcproj:
     32        * WebCore.xcodeproj/project.pbxproj:
     33        * html/parser/HTMLMetaCharsetParser.cpp: Added.
     34        (WebCore::HTMLMetaCharsetParser::HTMLMetaCharsetParser):
     35        (WebCore::HTMLMetaCharsetParser::~HTMLMetaCharsetParser):
     36        (WebCore::HTMLMetaCharsetParser::extractCharset):
     37        (WebCore::HTMLMetaCharsetParser::processMeta):
     38        (WebCore::HTMLMetaCharsetParser::checkForMetaCharset):
     39        * html/parser/HTMLMetaCharsetParser.h: Added.
     40        (WebCore::HTMLMetaCharsetParser::create):
     41        (WebCore::HTMLMetaCharsetParser::encoding):
     42        * loader/TextResourceDecoder.cpp:
     43        (WebCore::TextResourceDecoder::checkForHeadCharset):
     44        (WebCore::TextResourceDecoder::checkForMetaCharset):
     45        * loader/TextResourceDecoder.h:
     46
    1472010-12-10 Nate Chapin  <japhet@chromium.org>
    248
  • trunk/WebCore/GNUmakefile.am

    r73749 r73756  
    18281828        WebCore/html/parser/HTMLFormattingElementList.h \
    18291829        WebCore/html/parser/HTMLInputStream.h \
     1830        WebCore/html/parser/HTMLMetaCharsetParser.cpp \
     1831        WebCore/html/parser/HTMLMetaCharsetParser.h \
    18301832        WebCore/html/parser/HTMLParserIdioms.cpp \
    18311833        WebCore/html/parser/HTMLParserIdioms.h \
  • trunk/WebCore/WebCore.gypi

    r73749 r73756  
    19601960            'html/parser/HTMLFormattingElementList.h',
    19611961            'html/parser/HTMLInputStream.h',
     1962            'html/parser/HTMLMetaCharsetParser.cpp',
     1963            'html/parser/HTMLMetaCharsetParser.h',
    19621964            'html/parser/HTMLParserIdioms.cpp',
    19631965            'html/parser/HTMLParserIdioms.h',
  • trunk/WebCore/WebCore.pro

    r73749 r73756  
    10101010    html/parser/HTMLEntitySearch.cpp \
    10111011    html/parser/HTMLFormattingElementList.cpp \
     1012    html/parser/HTMLMetaCharsetParser.cpp \
    10121013    html/parser/HTMLParserIdioms.cpp \
    10131014    html/parser/HTMLParserScheduler.cpp \
  • trunk/WebCore/WebCore.vcproj/WebCore.vcproj

    r73749 r73756  
    5516355163                                </File>
    5516455164                                <File
     55165                                        RelativePath="..\html\parser\HTMLMetaCharsetParser.cpp"
     55166                                        >
     55167                                </File>
     55168                                <File
     55169                                        RelativePath="..\html\parser\HTMLMetaCharsetParser.h"
     55170                                        >
     55171                                </File>
     55172                                <File
    5516555173                                        RelativePath="..\html\parser\HTMLParserIdioms.cpp"
    5516655174                                        >
  • trunk/WebCore/WebCore.xcodeproj/project.pbxproj

    r73749 r73756  
    727727                29A8124A0FBB9CA900510293 /* AccessibilityObjectWrapper.mm in Sources */ = {isa = PBXBuildFile; fileRef = 29A812460FBB9CA900510293 /* AccessibilityObjectWrapper.mm */; };
    728728                29A8124B0FBB9CA900510293 /* AXObjectCacheMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 29A812470FBB9CA900510293 /* AXObjectCacheMac.mm */; };
     729                2BE8E2C712A589EC00FAD550 /* HTMLMetaCharsetParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 2BE8E2C612A589EC00FAD550 /* HTMLMetaCharsetParser.h */; };
     730                2BE8E2C912A58A0100FAD550 /* HTMLMetaCharsetParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BE8E2C812A58A0100FAD550 /* HTMLMetaCharsetParser.cpp */; };
    729731                2D9066060BE141D400956998 /* LayoutState.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2D9066040BE141D400956998 /* LayoutState.cpp */; };
    730732                2D9066070BE141D400956998 /* LayoutState.h in Headers */ = {isa = PBXBuildFile; fileRef = 2D9066050BE141D400956998 /* LayoutState.h */; settings = {ATTRIBUTES = (Private, ); }; };
     
    70027004                29A812460FBB9CA900510293 /* AccessibilityObjectWrapper.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AccessibilityObjectWrapper.mm; sourceTree = "<group>"; };
    70037005                29A812470FBB9CA900510293 /* AXObjectCacheMac.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AXObjectCacheMac.mm; sourceTree = "<group>"; };
     7006                2BE8E2C612A589EC00FAD550 /* HTMLMetaCharsetParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HTMLMetaCharsetParser.h; path = parser/HTMLMetaCharsetParser.h; sourceTree = "<group>"; };
     7007                2BE8E2C812A58A0100FAD550 /* HTMLMetaCharsetParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HTMLMetaCharsetParser.cpp; path = parser/HTMLMetaCharsetParser.cpp; sourceTree = "<group>"; };
    70047008                2D9066040BE141D400956998 /* LayoutState.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = LayoutState.cpp; sourceTree = "<group>"; };
    70057009                2D9066050BE141D400956998 /* LayoutState.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LayoutState.h; sourceTree = "<group>"; };
     
    1564315647                                977B3856122883E900B81FF8 /* HTMLFormattingElementList.h */,
    1564415648                                97BC849A12370A4B000C6161 /* HTMLInputStream.h */,
     15649                                2BE8E2C812A58A0100FAD550 /* HTMLMetaCharsetParser.cpp */,
     15650                                2BE8E2C612A589EC00FAD550 /* HTMLMetaCharsetParser.h */,
    1564515651                                93E2A304123E9DC0009FE12A /* HTMLParserIdioms.cpp */,
    1564615652                                93E2A305123E9DC0009FE12A /* HTMLParserIdioms.h */,
     
    2049020496                                E44613A50CD6331000FADA75 /* HTMLMediaElement.h in Headers */,
    2049120497                                A8EA79F40A1916DF00A8EF5F /* HTMLMenuElement.h in Headers */,
     20498                                2BE8E2C712A589EC00FAD550 /* HTMLMetaCharsetParser.h in Headers */,
    2049220499                                A871DC240A15205700B12A68 /* HTMLMetaElement.h in Headers */,
    2049320500                                A454424B119B3661009BE912 /* HTMLMeterElement.h in Headers */,
     
    2321823225                                E44613A40CD6331000FADA75 /* HTMLMediaElement.cpp in Sources */,
    2321923226                                A8EA79F80A1916DF00A8EF5F /* HTMLMenuElement.cpp in Sources */,
     23227                                2BE8E2C912A58A0100FAD550 /* HTMLMetaCharsetParser.cpp in Sources */,
    2322023228                                A871DC270A15205700B12A68 /* HTMLMetaElement.cpp in Sources */,
    2322123229                                A454424A119B3661009BE912 /* HTMLMeterElement.cpp in Sources */,
  • trunk/WebCore/loader/TextResourceDecoder.cpp

    r64817 r73756  
    2525
    2626#include "DOMImplementation.h"
     27#include "HTMLMetaCharsetParser.h"
    2728#include "HTMLNames.h"
    2829#include "TextCodec.h"
     
    5253        for (size_t j = 0; j < targetLength; ++j) {
    5354            if (subject[i + j] != target[j]) {
    54                 match = false;
    55                 break;
    56             }
    57         }
    58         if (match)
    59             return i;
    60     }
    61     return -1;
    62 }
    63 
    64 static int findIgnoringCase(const char* subject, size_t subjectLength, const char* target)
    65 {
    66     size_t targetLength = strlen(target);
    67     if (targetLength > subjectLength)
    68         return -1;
    69 #ifndef NDEBUG
    70     for (size_t i = 0; i < targetLength; ++i)
    71         ASSERT(isASCIILower(target[i]));
    72 #endif
    73     for (size_t i = 0; i <= subjectLength - targetLength; ++i) {
    74         bool match = true;
    75         for (size_t j = 0; j < targetLength; ++j) {
    76             if (toASCIILower(subject[i + j]) != target[j]) {
    7755                match = false;
    7856                break;
     
    535513}
    536514
    537 const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.
    538 
    539515bool TextResourceDecoder::checkForHeadCharset(const char* data, size_t len, bool& movedDataToBuffer)
    540516{
     
    552528
    553529    movedDataToBuffer = true;
     530
     531    // Continue with checking for an HTML meta tag if we were already doing so.
     532    if (m_charsetParser)
     533        return checkForMetaCharset(data, len);
    554534
    555535    const char* ptr = m_buffer.data();
     
    588568    }
    589569
    590     // we still don't have an encoding, and are in the head
    591     // the following tags are allowed in <head>:
    592     // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
    593    
    594     // We stop scanning when a tag that is not permitted in <head>
    595     // is seen, rather when </head> is seen, because that more closely
    596     // matches behavior in other browsers; more details in
    597     // <http://bugs.webkit.org/show_bug.cgi?id=3590>.
    598    
    599     // Additionally, we ignore things that looks like tags in <title>, <script> and <noscript>; see
    600     // <http://bugs.webkit.org/show_bug.cgi?id=4560>, <http://bugs.webkit.org/show_bug.cgi?id=12165>
    601     // and <http://bugs.webkit.org/show_bug.cgi?id=12389>.
    602 
    603     // Since many sites have charset declarations after <body> or other tags that are disallowed in <head>,
    604     // we don't bail out until we've checked at least bytesToCheckUnconditionally bytes of input.
    605 
    606     AtomicStringImpl* enclosingTagName = 0;
    607     bool inHeadSection = true; // Becomes false when </head> or any tag not allowed in head is encountered.
    608 
    609     // the HTTP-EQUIV meta has no effect on XHTML
     570    // The HTTP-EQUIV meta has no effect on XHTML.
    610571    if (m_contentType == XML)
    611572        return true;
    612573
    613     while (ptr + 3 < pEnd) { // +3 guarantees that "<!--" fits in the buffer - and certainly we aren't going to lose any "charset" that way.
    614         if (*ptr == '<') {
    615             bool end = false;
    616             ptr++;
    617 
    618             // Handle comments.
    619             if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') {
    620                 ptr += 3;
    621                 skipComment(ptr, pEnd);
    622                 if (ptr - m_buffer.data() >= bytesToCheckUnconditionally && !inHeadSection) {
    623                     // Some pages that test bandwidth from within the browser do it by having
    624                     // huge comments and measuring the time they take to load. Repeatedly scanning
    625                     // these comments can take a lot of CPU time.
    626                     m_checkedForHeadCharset = true;
    627                     return true;
    628                 }
    629                 continue;
    630             }
    631 
    632             if (*ptr == '/') {
    633                 ++ptr;
    634                 end = true;
    635             }
    636 
    637             // Grab the tag name, but mostly ignore namespaces.
    638             bool sawNamespace = false;
    639             char tagBuffer[20];
    640             int len = 0;
    641             while (len < 19) {
    642                 if (ptr == pEnd)
    643                     return false;
    644                 char c = *ptr;
    645                 if (c == ':') {
    646                     len = 0;
    647                     sawNamespace = true;
    648                     ptr++;
    649                     continue;
    650                 }
    651                 if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
    652                     ;
    653                 else if (c >= 'A' && c <= 'Z')
    654                     c += 'a' - 'A';
    655                 else
    656                     break;
    657                 tagBuffer[len++] = c;
    658                 ptr++;
    659             }
    660             tagBuffer[len] = 0;
    661             AtomicString tag(tagBuffer);
    662            
    663             if (enclosingTagName) {
    664                 if (end && tag.impl() == enclosingTagName)
    665                     enclosingTagName = 0;
    666             } else {
    667                 if (tag == titleTag)
    668                     enclosingTagName = titleTag.localName().impl();
    669                 else if (tag == scriptTag)
    670                     enclosingTagName = scriptTag.localName().impl();
    671                 else if (tag == noscriptTag)
    672                     enclosingTagName = noscriptTag.localName().impl();
    673             }
    674            
    675             // Find where the opening tag ends.
    676             const char* tagContentStart = ptr;
    677             if (!end) {
    678                 while (ptr != pEnd && *ptr != '>') {
    679                     if (*ptr == '\'' || *ptr == '"') {
    680                         char quoteMark = *ptr;
    681                         ++ptr;
    682                         while (ptr != pEnd && *ptr != quoteMark)
    683                             ++ptr;
    684                         if (ptr == pEnd)
    685                             return false;
    686                     }
    687                     ++ptr;
    688                 }
    689                 if (ptr == pEnd)
    690                     return false;
    691                 ++ptr;
    692             }
    693            
    694             if (!end && tag == metaTag && !sawNamespace) {
    695                 const char* str = tagContentStart;
    696                 int length = ptr - tagContentStart;
    697                 int pos = 0;
    698                 while (pos < length) {
    699                     int charsetPos = findIgnoringCase(str + pos, length - pos, "charset");
    700                     if (charsetPos == -1)
    701                         break;
    702                     pos += charsetPos + 7;
    703                     // skip whitespace
    704                     while (pos < length && str[pos] <= ' ')
    705                         pos++;
    706                     if (pos == length)
    707                         break;
    708                     if (str[pos++] != '=')
    709                         continue;
    710                     while ((pos < length) &&
    711                             (str[pos] <= ' ' || str[pos] == '=' || str[pos] == '"' || str[pos] == '\''))
    712                         pos++;
    713 
    714                     // end ?
    715                     if (pos == length)
    716                         break;
    717                     int end = pos;
    718                     while (end < length &&
    719                            str[end] != ' ' && str[end] != '"' && str[end] != '\'' &&
    720                            str[end] != ';' && str[end] != '>')
    721                         end++;
    722                     setEncoding(findTextEncoding(str + pos, end - pos), EncodingFromMetaTag);
    723                     if (m_source == EncodingFromMetaTag)
    724                         return true;
    725 
    726                     if (end >= length || str[end] == '/' || str[end] == '>')
    727                         break;
    728 
    729                     pos = end + 1;
    730                 }
    731             } else {
    732                 if (!enclosingTagName && tag != scriptTag && tag != noscriptTag && tag != styleTag
    733                     && tag != linkTag && tag != metaTag && tag != objectTag && tag != titleTag && tag != baseTag
    734                     && (end || tag != htmlTag) && (end || tag != headTag) && isASCIIAlpha(tagBuffer[0])) {
    735                     inHeadSection = false;
    736                 }
    737 
    738                 if (ptr - m_buffer.data() >= bytesToCheckUnconditionally && !inHeadSection) {
    739                     m_checkedForHeadCharset = true;
    740                     return true;
    741                 }
    742             }
    743         } else
    744             ++ptr;
    745     }
    746     return false;
     574    m_charsetParser = HTMLMetaCharsetParser::create();
     575    return checkForMetaCharset(data, len);
     576}
     577
     578bool TextResourceDecoder::checkForMetaCharset(const char* data, size_t length)
     579{
     580    if (!m_charsetParser->checkForMetaCharset(data, length))
     581        return false;
     582
     583    setEncoding(m_charsetParser->encoding(), EncodingFromMetaTag);
     584    m_charsetParser.clear();
     585    m_checkedForHeadCharset = true;
     586    return true;
    747587}
    748588
  • trunk/WebCore/loader/TextResourceDecoder.h

    r42026 r73756  
    2727
    2828namespace WebCore {
     29
     30class HTMLMetaCharsetParser;
    2931
    3032class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
     
    7577    bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
    7678    bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
     79    bool checkForMetaCharset(const char*, size_t);
    7780    void detectJapaneseEncoding(const char*, size_t);
    7881    bool shouldAutoDetect() const;
     
    9093    bool m_sawError;
    9194    bool m_usesEncodingDetector;
     95
     96    OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
    9297};
    9398
Note: See TracChangeset for help on using the changeset viewer.