Changeset 64817 in webkit
- Timestamp:
- Aug 5, 2010 10:36:28 PM (14 years ago)
- Location:
- trunk
- Files:
-
- 3 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r64815 r64817 1 2010-08-05 Alexey Proskuryakov <ap@apple.com> 2 3 Reviewed by Darin Adler. 4 5 https://bugs.webkit.org/show_bug.cgi?id=43554 6 Way too many encoding aliases are treated as valid 7 8 <rdar://problem/7863399> Garbage characters displayed in some yesky.com pages. 9 10 <rdar://problem/7859068> Garbage characters displayed for most text at ceping.zhaopin.com 11 12 * fast/encoding/char-encoding-expected.txt: 13 * fast/encoding/char-encoding.html: 14 Use a correct name for GB_2312-80. At least Firefox doesn't know GB-2312-80. 15 16 * http/tests/misc/bad-charset-alias-expected.txt: Added. 17 * http/tests/misc/bad-charset-alias.html: Added. 18 * http/tests/misc/resources/bad-charset-alias.php: Added. 19 Check that certain encoding names are unknown. Both Firefox and IE don't know these. 20 1 21 2010-08-05 W. James MacLean <wjmaclean@chromium.org> 2 22 -
trunk/LayoutTests/fast/encoding/char-encoding-expected.txt
r39787 r64817 7 7 PASS encode('GBK', 'U+00A5') is '%A3%A4' 8 8 PASS encode('gb2312', 'U+00A5') is '%A3%A4' 9 PASS encode('GB -2312-80', 'U+00A5') is '%A3%A4'9 PASS encode('GB_2312-80', 'U+00A5') is '%A3%A4' 10 10 PASS encode('EUC-CN', 'U+00A5') is '%A3%A4' 11 11 PASS encode('GBK', 'U+20AC') is '%80' 12 12 PASS encode('gb2312', 'U+20AC') is '%80' 13 PASS encode('GB -2312-80', 'U+20AC') is '%80'13 PASS encode('GB_2312-80', 'U+20AC') is '%80' 14 14 PASS encode('EUC-CN', 'U+20AC') is '%80' 15 15 PASS encode('GBK', 'U+01F9') is '%A8%BF' -
trunk/LayoutTests/fast/encoding/char-encoding.html
r51088 r64817 26 26 testEncode('GBK', 'U+00A5', '%A3%A4'); 27 27 testEncode('gb2312', 'U+00A5', '%A3%A4'); 28 testEncode('GB -2312-80', 'U+00A5', '%A3%A4');28 testEncode('GB_2312-80', 'U+00A5', '%A3%A4'); 29 29 testEncode('EUC-CN', 'U+00A5', '%A3%A4'); 30 30 //Euro symbol in gbk 31 31 testEncode('GBK', 'U+20AC', '%80'); 32 32 testEncode('gb2312', 'U+20AC', '%80'); 33 testEncode('GB -2312-80', 'U+20AC', '%80');33 testEncode('GB_2312-80', 'U+20AC', '%80'); 34 34 testEncode('EUC-CN', 'U+20AC', '%80'); 35 35 //Misc symbols from TEC specific GBK translation -
trunk/WebCore/ChangeLog
r64816 r64817 1 2010-08-05 Alexey Proskuryakov <ap@apple.com> 2 3 Reviewed by Darin Adler. 4 5 https://bugs.webkit.org/show_bug.cgi?id=43554 6 Way too many encoding aliases are treated as valid 7 8 <rdar://problem/7863399> Garbage characters displayed in some yesky.com pages. 9 10 <rdar://problem/7859068> Garbage characters displayed for most text at ceping.zhaopin.com 11 12 Test: http/tests/misc/bad-charset-alias.html 13 14 * loader/TextResourceDecoder.cpp: (WebCore::TextResourceDecoder::checkForCSSCharset): 15 Fix encoding name length computation. Previously, a trailing quote was ignored by 16 TextEncodingRegistry. 17 18 * platform/text/TextCodecICU.cpp: (WebCore::TextCodecICU::registerExtendedEncodingNames): 19 Added dashes to alias names that didn't have them. Added aliases prompted by regression tests. 20 21 * platform/text/TextCodecLatin1.cpp: (WebCore::TextCodecLatin1::registerEncodingNames): 22 Don't register 8859-1, other browsers do not support this encoding name. 23 24 * platform/text/TextEncoding.cpp: (WebCore::Latin1Encoding): 25 "Latin-1" is not a real encoding name, it's not known to Firefox or IE. 26 27 * platform/text/TextEncodingRegistry.cpp: 28 (WebCore::TextEncodingNameHash::equal): Changed to no longer ignore non-alphanumeric characters. 29 There is a good chance that we'll be missing support for some necessary alias names, but other 30 browsers don't ignore any characters when matching names. 31 (WebCore::TextEncodingNameHash::hash): Ditto. 32 (WebCore::checkExistingName): Re-formatted a line. 33 (WebCore::isUndesiredAlias): Added a filter to reject "8859_1" and any names containing commas. 34 (WebCore::addToTextEncodingNameMap): Used it. 35 (WebCore::atomicCanonicalTextEncodingName): Changed to no longer ignore non-alphanumeric characters. 36 1 37 2010-08-05 Simon Hausmann <simon.hausmann@nokia.com> 2 38 -
trunk/WebCore/loader/TextResourceDecoder.cpp
r62551 r64817 489 489 return false; 490 490 491 int encodingNameLength = pos - dataStart + 1;491 int encodingNameLength = pos - dataStart; 492 492 493 493 ++pos; -
trunk/WebCore/platform/text/TextCodecICU.cpp
r56825 r64817 71 71 } 72 72 73 // FIXME: Registering all the encodings we get from ucnv_getAvailableName74 // includes encodings we don't want or need. For example, all75 // the encodings with commas and version numbers.76 77 73 void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar) 78 74 { … … 137 133 // Perhaps we can prove these are not used on the web and remove them. 138 134 // Or perhaps we can get them added to ICU. 139 registrar("x macroman", "macintosh");140 registrar("x macukrainian", "x-mac-cyrillic");141 registrar("cn big5", "Big5");142 registrar("x xbig5", "Big5");143 registrar("cn gb", "GBK");135 registrar("x-mac-roman", "macintosh"); 136 registrar("x-mac-ukrainian", "x-mac-cyrillic"); 137 registrar("cn-big5", "Big5"); 138 registrar("x-x-big5", "Big5"); 139 registrar("cn-gb", "GBK"); 144 140 registrar("csgb231280", "GBK"); 145 registrar("x euccn", "GBK");146 registrar("x gbk", "GBK");147 registrar("csISO88598I", "ISO _8859-8-I");141 registrar("x-euc-cn", "GBK"); 142 registrar("x-gbk", "GBK"); 143 registrar("csISO88598I", "ISO-8859-8-I"); 148 144 registrar("koi", "KOI8-R"); 149 145 registrar("logical", "ISO-8859-8-I"); 150 146 registrar("unicode11utf8", "UTF-8"); 151 147 registrar("unicode20utf8", "UTF-8"); 152 registrar("x unicode20utf8", "UTF-8");148 registrar("x-unicode20utf8", "UTF-8"); 153 149 registrar("visual", "ISO-8859-8"); 154 150 registrar("winarabic", "windows-1256"); 155 151 registrar("winbaltic", "windows-1257"); 156 152 registrar("wincyrillic", "windows-1251"); 157 registrar("iso885911", "windows-874"); 158 registrar("dos874", "windows-874"); 153 registrar("iso-8859-11", "windows-874"); 154 registrar("iso8859-11", "windows-874"); 155 registrar("dos-874", "windows-874"); 159 156 registrar("wingreek", "windows-1253"); 160 157 registrar("winhebrew", "windows-1255"); … … 162 159 registrar("winturkish", "windows-1254"); 163 160 registrar("winvietnamese", "windows-1258"); 164 registrar("xcp1250", "windows-1250"); 165 registrar("xcp1251", "windows-1251"); 166 registrar("xeuc", "EUC-JP"); 167 registrar("xwindows949", "windows-949"); 168 registrar("xuhc", "windows-949"); 161 registrar("x-cp1250", "windows-1250"); 162 registrar("x-cp1251", "windows-1251"); 163 registrar("x-euc", "EUC-JP"); 164 registrar("x-windows-949", "windows-949"); 165 registrar("x-uhc", "windows-949"); 166 registrar("utf8", "UTF-8"); 169 167 170 168 // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names. 171 169 // They are not present in ICU 3.2. 172 registrar("dos 720", "cp864");170 registrar("dos-720", "cp864"); 173 171 registrar("jis7", "ISO-2022-JP"); 172 173 // Alternative spelling of ISO encoding names. 174 registrar("ISO8859-1", "ISO-8859-1"); 175 registrar("ISO8859-2", "ISO-8859-2"); 176 registrar("ISO8859-3", "ISO-8859-3"); 177 registrar("ISO8859-4", "ISO-8859-4"); 178 registrar("ISO8859-5", "ISO-8859-5"); 179 registrar("ISO8859-6", "ISO-8859-6"); 180 registrar("ISO8859-7", "ISO-8859-7"); 181 registrar("ISO8859-8", "ISO-8859-8"); 182 registrar("ISO8859-8-I", "ISO-8859-8-I"); 183 registrar("ISO8859-9", "ISO-8859-9"); 184 registrar("ISO8859-10", "ISO-8859-10"); 185 registrar("ISO8859-13", "ISO-8859-13"); 186 registrar("ISO8859-14", "ISO-8859-14"); 187 registrar("ISO8859-15", "ISO-8859-15"); 188 registrar("ISO8859-16", "ISO-8859-16"); 174 189 } 175 190 -
trunk/WebCore/platform/text/TextCodecLatin1.cpp
r56825 r64817 80 80 registrar("ibm-1252_P100-2000", "windows-1252"); 81 81 82 registrar("8859-1", "ISO-8859-1");83 82 registrar("CP819", "ISO-8859-1"); 84 83 registrar("IBM819", "ISO-8859-1"); -
trunk/WebCore/platform/text/TextEncoding.cpp
r56825 r64817 249 249 const TextEncoding& Latin1Encoding() 250 250 { 251 static TextEncoding globalLatin1Encoding(" Latin-1");251 static TextEncoding globalLatin1Encoding("latin1"); 252 252 return globalLatin1Encoding; 253 253 } -
trunk/WebCore/platform/text/TextEncodingRegistry.cpp
r63036 r64817 62 62 const size_t maxEncodingNameLength = 63; 63 63 64 // Hash for all-ASCII strings that does case folding and skips any characters 65 // that are not alphanumeric. If passed any non-ASCII characters, depends on 66 // the behavior of isalnum -- if that returns false as it does on OS X, then 67 // it will properly skip those characters too. 64 // Hash for all-ASCII strings that does case folding. 68 65 struct TextEncodingNameHash { 69 66 … … 73 70 char c2; 74 71 do { 75 do 76 c1 = *s1++; 77 while (c1 && !isASCIIAlphanumeric(c1)); 78 do 79 c2 = *s2++; 80 while (c2 && !isASCIIAlphanumeric(c2)); 72 c1 = *s1++; 73 c2 = *s2++; 81 74 if (toASCIILower(c1) != toASCIILower(c2)) 82 75 return false; … … 92 85 unsigned h = WTF::stringHashingStartValue; 93 86 for (;;) { 94 char c; 95 do { 96 c = *s++; 97 if (!c) { 98 h += (h << 3); 99 h ^= (h >> 11); 100 h += (h << 15); 101 return h; 102 } 103 } while (!isASCIIAlphanumeric(c)); 87 char c = *s++; 88 if (!c) { 89 h += (h << 3); 90 h ^= (h >> 11); 91 h += (h << 15); 92 return h; 93 } 104 94 h += toASCIILower(c); 105 95 h += (h << 10); … … 155 145 && strcasecmp(atomicName, "iso-8859-8") == 0) 156 146 return; 157 LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", 158 alias, oldAtomicName, atomicName); 159 } 160 161 #endif 147 LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName); 148 } 149 150 #endif 151 152 static bool isUndesiredAlias(const char* alias) 153 { 154 // Reject aliases with version numbers that are supported by some back-ends (such as "ISO_2022,locale=ja,version=0" in ICU). 155 for (const char* p = alias; *p; ++p) { 156 if (*p == ',') 157 return true; 158 } 159 // 8859_1 is known to (at least) ICU, but other browsers don't support this name - and having it caused a compatibility 160 // problem, see bug 43554. 161 if (0 == strcmp(alias, "8859_1")) 162 return true; 163 return false; 164 } 162 165 163 166 static void addToTextEncodingNameMap(const char* alias, const char* name) 164 167 { 165 168 ASSERT(strlen(alias) <= maxEncodingNameLength); 169 if (isUndesiredAlias(alias)) 170 return; 166 171 const char* atomicName = textEncodingNameMap->get(name); 167 172 ASSERT(strcmp(alias, name) == 0 || atomicName); … … 301 306 for (size_t i = 0; i < length; ++i) { 302 307 UChar c = characters[i]; 303 if (isASCIIAlphanumeric(c)) { 304 if (j == maxEncodingNameLength) 305 return 0; 306 buffer[j++] = c; 307 } 308 if (j == maxEncodingNameLength) 309 return 0; 310 buffer[j++] = c; 308 311 } 309 312 buffer[j] = 0;
Note: See TracChangeset
for help on using the changeset viewer.