Changeset 266528 in webkit
- Timestamp:
- Sep 3, 2020, 9:57:49 AM (5 years ago)
- Location:
- trunk
- Files:
-
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/imported/w3c/ChangeLog
r266527 r266528 1 2020-09-03 Alex Christensen <achristensen@webkit.org> 2 3 TextDecoder should ignore byte-order-mark like other browsers and spec 4 https://bugs.webkit.org/show_bug.cgi?id=216108 5 6 Reviewed by Darin Adler. 7 8 * web-platform-tests/encoding/streams/decode-ignore-bom.any-expected.txt: 9 * web-platform-tests/encoding/textdecoder-ignorebom.any-expected.txt: 10 1 11 2020-09-03 Alex Christensen <achristensen@webkit.org> 2 12 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-ignore-bom.any-expected.txt
r266348 r266528 1 1 2 2 PASS ignoreBOM should work for encoding utf-8, split at character 0 3 FAIL ignoreBOM should work for encoding utf-8, split at character 1 assert_equals: BOM should be stripped expected "abc" but got "abc" 4 FAIL ignoreBOM should work for encoding utf-8, split at character 2 assert_equals: BOM should be stripped expected "abc" but got "abc" 3 PASS ignoreBOM should work for encoding utf-8, split at character 1 4 PASS ignoreBOM should work for encoding utf-8, split at character 2 5 5 PASS ignoreBOM should work for encoding utf-8, split at character 3 6 6 PASS ignoreBOM should work for encoding utf-16le, split at character 0 7 FAIL ignoreBOM should work for encoding utf-16le, split at character 1 assert_equals: BOM should be stripped expected "abc" but got "abc" 7 PASS ignoreBOM should work for encoding utf-16le, split at character 1 8 8 PASS ignoreBOM should work for encoding utf-16le, split at character 2 9 FAIL ignoreBOM should work for encoding utf-16le, split at character 3 assert_equals: BOM should be preserved expected "abc" but got "abc" 9 PASS ignoreBOM should work for encoding utf-16le, split at character 3 10 10 PASS ignoreBOM should work for encoding utf-16be, split at character 0 11 FAIL ignoreBOM should work for encoding utf-16be, split at character 1 assert_equals: BOM should be stripped expected "abc" but got "abc" 11 PASS ignoreBOM should work for encoding utf-16be, split at character 1 12 12 PASS ignoreBOM should work for encoding utf-16be, split at character 2 13 FAIL ignoreBOM should work for encoding utf-16be, split at character 3 assert_equals: BOM should be preserved expected "abc" but got "abc" 13 PASS ignoreBOM should work for encoding utf-16be, split at character 3 14 14 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-ignore-bom.any.worker-expected.txt
r266348 r266528 1 1 2 2 PASS ignoreBOM should work for encoding utf-8, split at character 0 3 FAIL ignoreBOM should work for encoding utf-8, split at character 1 assert_equals: BOM should be stripped expected "abc" but got "abc" 4 FAIL ignoreBOM should work for encoding utf-8, split at character 2 assert_equals: BOM should be stripped expected "abc" but got "abc" 3 PASS ignoreBOM should work for encoding utf-8, split at character 1 4 PASS ignoreBOM should work for encoding utf-8, split at character 2 5 5 PASS ignoreBOM should work for encoding utf-8, split at character 3 6 6 PASS ignoreBOM should work for encoding utf-16le, split at character 0 7 FAIL ignoreBOM should work for encoding utf-16le, split at character 1 assert_equals: BOM should be stripped expected "abc" but got "abc" 7 PASS ignoreBOM should work for encoding utf-16le, split at character 1 8 8 PASS ignoreBOM should work for encoding utf-16le, split at character 2 9 FAIL ignoreBOM should work for encoding utf-16le, split at character 3 assert_equals: BOM should be preserved expected "abc" but got "abc" 9 PASS ignoreBOM should work for encoding utf-16le, split at character 3 10 10 PASS ignoreBOM should work for encoding utf-16be, split at character 0 11 FAIL ignoreBOM should work for encoding utf-16be, split at character 1 assert_equals: BOM should be stripped expected "abc" but got "abc" 11 PASS ignoreBOM should work for encoding utf-16be, split at character 1 12 12 PASS ignoreBOM should work for encoding utf-16be, split at character 2 13 FAIL ignoreBOM should work for encoding utf-16be, split at character 3 assert_equals: BOM should be preserved expected "abc" but got "abc" 13 PASS ignoreBOM should work for encoding utf-16be, split at character 3 14 14 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-copy.any-expected.txt
r264561 r266528 1 1 2 FAIL Modify buffer after passing it in (ArrayBuffer) assert_equals: expected "@" but got "@" 3 FAIL Modify buffer after passing it in (SharedArrayBuffer) assert_equals: expected "@" but got "@" 2 PASS Modify buffer after passing it in (ArrayBuffer) 3 PASS Modify buffer after passing it in (SharedArrayBuffer) 4 4 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-copy.any.worker-expected.txt
r264561 r266528 1 1 2 FAIL Modify buffer after passing it in (ArrayBuffer) assert_equals: expected "@" but got "@" 3 FAIL Modify buffer after passing it in (SharedArrayBuffer) assert_equals: expected "@" but got "@" 2 PASS Modify buffer after passing it in (ArrayBuffer) 3 PASS Modify buffer after passing it in (SharedArrayBuffer) 4 4 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-ignorebom.any-expected.txt
r256730 r266528 1 1 2 FAIL BOM is ignored if ignoreBOM option is specified: utf-8 assert_equals: utf-8: BOM should be present in decoded string if ignored by a reused decoder expected "abc" but got "abc" 3 FAIL BOM is ignored if ignoreBOM option is specified: utf-16le assert_equals: utf-16le: BOM should be present in decoded string if ignored by a reused decoder expected "abc" but got "abc" 4 FAIL BOM is ignored if ignoreBOM option is specified: utf-16be assert_equals: utf-16be: BOM should be present in decoded string if ignored by a reused decoder expected "abc" but got "abc" 2 PASS BOM is ignored if ignoreBOM option is specified: utf-8 3 PASS BOM is ignored if ignoreBOM option is specified: utf-16le 4 PASS BOM is ignored if ignoreBOM option is specified: utf-16be 5 5 PASS The ignoreBOM attribute of TextDecoder 6 6 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-ignorebom.any.worker-expected.txt
r256730 r266528 1 1 2 FAIL BOM is ignored if ignoreBOM option is specified: utf-8 assert_equals: utf-8: BOM should be present in decoded string if ignored by a reused decoder expected "abc" but got "abc" 3 FAIL BOM is ignored if ignoreBOM option is specified: utf-16le assert_equals: utf-16le: BOM should be present in decoded string if ignored by a reused decoder expected "abc" but got "abc" 4 FAIL BOM is ignored if ignoreBOM option is specified: utf-16be assert_equals: utf-16be: BOM should be present in decoded string if ignored by a reused decoder expected "abc" but got "abc" 2 PASS BOM is ignored if ignoreBOM option is specified: utf-8 3 PASS BOM is ignored if ignoreBOM option is specified: utf-16le 4 PASS BOM is ignored if ignoreBOM option is specified: utf-16be 5 5 PASS The ignoreBOM attribute of TextDecoder 6 6 -
trunk/Source/WebCore/ChangeLog
r266527 r266528 1 2020-09-03 Alex Christensen <achristensen@webkit.org> 2 3 TextDecoder should ignore byte-order-mark like other browsers and spec 4 https://bugs.webkit.org/show_bug.cgi?id=216108 5 6 Reviewed by Darin Adler. 7 8 Covered by newly passing web platform tests. 9 10 * dom/TextDecoder.cpp: 11 (WebCore::TextDecoder::ignoreBOMIfNecessary): 12 (WebCore::TextDecoder::decode): 13 (WebCore::TextDecoder::prependBOMIfNecessary): Deleted. 14 * dom/TextDecoder.h: 15 1 16 2020-09-03 Alex Christensen <achristensen@webkit.org> 2 17 -
trunk/Source/WebCore/dom/TextDecoder.cpp
r243163 r266528 49 49 } 50 50 51 void TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length) 51 constexpr uint8_t utf8BOMBytes[3] { 0xEF, 0xBB, 0xBF }; 52 constexpr uint8_t utf16BEBOMBytes[2] { 0xFE, 0xFF }; 53 constexpr uint8_t utf16LEBOMBytes[2] { 0xFF, 0xFE }; 54 55 size_t TextDecoder::bytesNeededForFullBOMIgnoreCheck() const 52 56 { 53 const uint8_t utf8BOMBytes[3] = {0xEF, 0xBB, 0xBF}; 54 const uint8_t utf16BEBOMBytes[2] = {0xFE, 0xFF}; 55 const uint8_t utf16LEBOMBytes[2] = {0xFF, 0xFE}; 57 if (m_textEncoding == UTF8Encoding()) 58 return sizeof(utf8BOMBytes); 59 if (m_textEncoding == UTF16BigEndianEncoding()) 60 return sizeof(utf16BEBOMBytes); 61 if (m_textEncoding == UTF16LittleEndianEncoding()) 62 return sizeof(utf16LEBOMBytes); 63 return 0; 64 } 65 66 bool TextDecoder::isBeginningOfIncompleteBOM(const uint8_t* bytes, size_t length) const 67 { 68 if (!length) 69 return true; 70 71 if (m_textEncoding == UTF8Encoding()) { 72 if (length == 1) 73 return bytes[0] == utf8BOMBytes[0]; 74 return length == 2 && bytes[0] == utf8BOMBytes[0] && bytes[1] == utf8BOMBytes[1]; 75 } 76 if (m_textEncoding == UTF16BigEndianEncoding()) 77 return length == 1 && bytes[0] == utf16BEBOMBytes[0]; 78 if (m_textEncoding == UTF16LittleEndianEncoding()) 79 return length == 1 && bytes[0] == utf16LEBOMBytes[0]; 80 81 return false; 82 } 83 84 auto TextDecoder::ignoreBOMIfNecessary(const uint8_t*& data, size_t& length, bool stream) -> WaitForMoreBOMBytes 85 { 86 if (m_bomIgnoredIfNecessary || m_options.ignoreBOM) 87 return WaitForMoreBOMBytes::No; 88 89 if (stream && length < bytesNeededForFullBOMIgnoreCheck()) { 90 if (isBeginningOfIncompleteBOM(data, length)) 91 return WaitForMoreBOMBytes::Yes; 92 m_bomIgnoredIfNecessary = true; 93 return WaitForMoreBOMBytes::No; 94 } 56 95 57 96 if (m_textEncoding == UTF8Encoding() … … 75 114 length -= sizeof(utf16LEBOMBytes); 76 115 } 77 } 78 79 String TextDecoder::prependBOMIfNecessary(const String& decoded) 80 { 81 if (m_hasDecoded || !m_options.ignoreBOM) 82 return decoded; 83 const UChar utf16BEBOM[2] = {0xFEFF, '\0'}; 84 85 // FIXME: Make TextCodec::decode take a flag for prepending BOM so we don't need to do this extra allocation and copy. 86 return makeString(utf16BEBOM, decoded); 116 m_bomIgnoredIfNecessary = true; 117 return WaitForMoreBOMBytes::No; 87 118 } 88 119 … … 103 134 } 104 135 105 ignoreBOMIfNecessary(data, length); 136 if (!options.stream) 137 m_bomIgnoredIfNecessary = false; 106 138 139 bool alreadyBuffered = false; 107 140 if (m_buffer.size()) { 108 141 m_buffer.append(data, length); 109 142 data = m_buffer.data(); 110 143 length = m_buffer.size(); 144 alreadyBuffered = true; 145 } 146 147 if (ignoreBOMIfNecessary(data, length, options.stream) == WaitForMoreBOMBytes::Yes) { 148 ASSERT(options.stream); 149 if (!alreadyBuffered) 150 m_buffer.append(data, length); 151 return String(); 111 152 } 112 153 … … 118 159 String result; 119 160 if (!sawError) 120 result = prependBOMIfNecessary(m_textEncoding.decode(charData, length, stopOnError, sawError));161 result = m_textEncoding.decode(charData, length, stopOnError, sawError); 121 162 122 163 if (sawError) { … … 128 169 if (m_options.fatal) 129 170 return Exception { TypeError }; 130 result = prependBOMIfNecessary(m_textEncoding.decode(charData, length));171 result = m_textEncoding.decode(charData, length); 131 172 } 132 173 } else 133 174 m_buffer.clear(); 134 175 135 m_hasDecoded = true;136 176 return result; 137 177 } -
trunk/Source/WebCore/dom/TextDecoder.h
r242776 r266528 51 51 52 52 private: 53 String prependBOMIfNecessary(const String&);54 void ignoreBOMIfNecessary(const uint8_t*& data, size_t& length);55 53 TextDecoder(const char*, Options); 54 55 enum class WaitForMoreBOMBytes : bool { No, Yes }; 56 WaitForMoreBOMBytes ignoreBOMIfNecessary(const uint8_t*& data, size_t& length, bool stream); 57 size_t bytesNeededForFullBOMIgnoreCheck() const; 58 bool isBeginningOfIncompleteBOM(const uint8_t*, size_t) const; 59 56 60 TextEncoding m_textEncoding; 57 61 Options m_options; 58 bool m_hasDecoded { false };59 62 Vector<uint8_t> m_buffer; 63 bool m_bomIgnoredIfNecessary { false }; 60 64 }; 61 65
Note:
See TracChangeset
for help on using the changeset viewer.