Changeset 266457 in webkit
- Timestamp:
- Sep 2, 2020, 7:45:32 AM (5 years ago)
- Location:
- trunk
- Files:
-
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r266453 r266457 1 2020-09-02 Alex Christensen <achristensen@webkit.org> 2 3 Align UTF-16 decoder with Chrome, Firefox, and specification 4 https://bugs.webkit.org/show_bug.cgi?id=216058 5 6 Reviewed by Youenn Fablet. 7 8 * imported/blink/fast/encoding/utf-16-odd-byte-expected.txt: 9 1 10 2020-09-02 Diego Pino Garcia <dpino@igalia.com> 2 11 -
trunk/LayoutTests/fast/parser/test-unicode-characters-in-attribute-name-expected.txt
r28908 r266457 4 4 1. title : http://www.315.com.cn/newwebsite/images0821/315_ca_03.gif 5 5 2. la ng : zh-cn 6 6 � -
trunk/LayoutTests/imported/blink/fast/encoding/utf-16-odd-byte-expected.txt
r190629 r266457 1 This document has an odd number of bytes - the UTF-16 decoder should emit a replacement character here: 1 This document has an odd number of bytes - the UTF-16 decoder should emit a replacement character here: � -
trunk/LayoutTests/imported/w3c/ChangeLog
r266448 r266457 1 2020-09-02 Alex Christensen <achristensen@webkit.org> 2 3 Align UTF-16 decoder with Chrome, Firefox, and specification 4 https://bugs.webkit.org/show_bug.cgi?id=216058 5 6 Reviewed by Youenn Fablet. 7 8 * web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt: 9 * web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt: 10 * web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt: 11 * web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt: 12 * web-platform-tests/encoding/textdecoder-utf16-surrogates.any-expected.txt: 13 * web-platform-tests/encoding/textdecoder-utf16-surrogates.any.worker-expected.txt: 14 1 15 2020-09-01 Alex Christensen <achristensen@webkit.org> 2 16 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt
r266348 r266457 1 1 2 2 PASS TextDecoderStream should be able to decode UTF-16BE 3 FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16BE assert_equals: output should be replacement character expected "\ufffd" but got "�" 4 FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16BE assert_unreached: Should have rejected: readable should be errored Reached unreachable code 3 PASS TextDecoderStream should be able to decode invalid sequences in UTF-16BE 4 PASS TextDecoderStream should be able to reject invalid sequences in UTF-16BE 5 5 PASS TextDecoderStream should be able to decode UTF-16LE 6 FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16LE assert_equals: output should be replacement character expected "\ufffd" but got "�" 7 FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16LE assert_unreached: Should have rejected: readable should be errored Reached unreachable code 6 PASS TextDecoderStream should be able to decode invalid sequences in UTF-16LE 7 PASS TextDecoderStream should be able to reject invalid sequences in UTF-16LE 8 8 PASS TextDecoderStream should be able to decode Shift_JIS 9 9 FAIL TextDecoderStream should be able to decode invalid sequences in Shift_JIS assert_equals: output should be replacement character expected "\ufffd" but got "\x1a" -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt
r266348 r266457 1 1 2 2 PASS TextDecoderStream should be able to decode UTF-16BE 3 FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16BE assert_equals: output should be replacement character expected "\ufffd" but got "�" 4 FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16BE assert_unreached: Should have rejected: readable should be errored Reached unreachable code 3 PASS TextDecoderStream should be able to decode invalid sequences in UTF-16BE 4 PASS TextDecoderStream should be able to reject invalid sequences in UTF-16BE 5 5 PASS TextDecoderStream should be able to decode UTF-16LE 6 FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16LE assert_equals: output should be replacement character expected "\ufffd" but got "�" 7 FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16LE assert_unreached: Should have rejected: readable should be errored Reached unreachable code 6 PASS TextDecoderStream should be able to decode invalid sequences in UTF-16LE 7 PASS TextDecoderStream should be able to reject invalid sequences in UTF-16LE 8 8 PASS TextDecoderStream should be able to decode Shift_JIS 9 9 FAIL TextDecoderStream should be able to decode invalid sequences in Shift_JIS assert_equals: output should be replacement character expected "\ufffd" but got "\x1a" -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt
r256730 r266457 1 1 2 FAIL Fatal flag, non-streaming cases assert_equals: Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset expected "\ufffd" but got "" 2 PASS Fatal flag, non-streaming cases 3 3 FAIL Fatal flag, streaming cases assert_equals: expected "\0" but got "" 4 4 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt
r256730 r266457 1 1 2 FAIL Fatal flag, non-streaming cases assert_equals: Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset expected "\ufffd" but got "" 2 PASS Fatal flag, non-streaming cases 3 3 FAIL Fatal flag, streaming cases assert_equals: expected "\0" but got "" 4 4 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any-expected.txt
r256730 r266457 1 1 2 FAIL utf-16le - lone surrogate lead assert_equals: expected "\ufffd" but got "�" 3 FAIL utf-16le - lone surrogate lead (fatal flag set) assert_throws_js: function "function () { 4 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 5 }" did not throw 6 FAIL utf-16le - lone surrogate trail assert_equals: expected "\ufffd" but got "�" 7 FAIL utf-16le - lone surrogate trail (fatal flag set) assert_throws_js: function "function () { 8 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 9 }" did not throw 10 FAIL utf-16le - unmatched surrogate lead assert_equals: expected "\ufffd\0" but got "�\0" 11 FAIL utf-16le - unmatched surrogate lead (fatal flag set) assert_throws_js: function "function () { 12 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 13 }" did not throw 14 FAIL utf-16le - unmatched surrogate trail assert_equals: expected "\ufffd\0" but got "�\0" 15 FAIL utf-16le - unmatched surrogate trail (fatal flag set) assert_throws_js: function "function () { 16 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 17 }" did not throw 18 FAIL utf-16le - swapped surrogate pair assert_equals: expected "\ufffd\ufffd" but got "��" 19 FAIL utf-16le - swapped surrogate pair (fatal flag set) assert_throws_js: function "function () { 20 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 21 }" did not throw 2 PASS utf-16le - lone surrogate lead 3 PASS utf-16le - lone surrogate lead (fatal flag set) 4 PASS utf-16le - lone surrogate trail 5 PASS utf-16le - lone surrogate trail (fatal flag set) 6 PASS utf-16le - unmatched surrogate lead 7 PASS utf-16le - unmatched surrogate lead (fatal flag set) 8 PASS utf-16le - unmatched surrogate trail 9 PASS utf-16le - unmatched surrogate trail (fatal flag set) 10 PASS utf-16le - swapped surrogate pair 11 PASS utf-16le - swapped surrogate pair (fatal flag set) 22 12 -
trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any.worker-expected.txt
r256730 r266457 1 1 2 FAIL utf-16le - lone surrogate lead assert_equals: expected "\ufffd" but got "�" 3 FAIL utf-16le - lone surrogate lead (fatal flag set) assert_throws_js: function "function () { 4 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 5 }" did not throw 6 FAIL utf-16le - lone surrogate trail assert_equals: expected "\ufffd" but got "�" 7 FAIL utf-16le - lone surrogate trail (fatal flag set) assert_throws_js: function "function () { 8 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 9 }" did not throw 10 FAIL utf-16le - unmatched surrogate lead assert_equals: expected "\ufffd\0" but got "�\0" 11 FAIL utf-16le - unmatched surrogate lead (fatal flag set) assert_throws_js: function "function () { 12 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 13 }" did not throw 14 FAIL utf-16le - unmatched surrogate trail assert_equals: expected "\ufffd\0" but got "�\0" 15 FAIL utf-16le - unmatched surrogate trail (fatal flag set) assert_throws_js: function "function () { 16 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 17 }" did not throw 18 FAIL utf-16le - swapped surrogate pair assert_equals: expected "\ufffd\ufffd" but got "��" 19 FAIL utf-16le - swapped surrogate pair (fatal flag set) assert_throws_js: function "function () { 20 new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) 21 }" did not throw 2 PASS utf-16le - lone surrogate lead 3 PASS utf-16le - lone surrogate lead (fatal flag set) 4 PASS utf-16le - lone surrogate trail 5 PASS utf-16le - lone surrogate trail (fatal flag set) 6 PASS utf-16le - unmatched surrogate lead 7 PASS utf-16le - unmatched surrogate lead (fatal flag set) 8 PASS utf-16le - unmatched surrogate trail 9 PASS utf-16le - unmatched surrogate trail (fatal flag set) 10 PASS utf-16le - swapped surrogate pair 11 PASS utf-16le - swapped surrogate pair (fatal flag set) 22 12 -
trunk/Source/WebCore/ChangeLog
r266454 r266457 1 2020-09-02 Alex Christensen <achristensen@webkit.org> 2 3 Align UTF-16 decoder with Chrome, Firefox, and specification 4 https://bugs.webkit.org/show_bug.cgi?id=216058 5 6 Reviewed by Youenn Fablet. 7 8 Covered by web platform tests. 9 10 * platform/text/TextCodecUTF16.cpp: 11 (WebCore::TextCodecUTF16::decode): 12 * platform/text/TextCodecUTF16.h: 13 (): Deleted. 14 1 15 2020-09-02 Youenn Fablet <youenn@apple.com> 2 16 -
trunk/Source/WebCore/platform/text/TextCodecUTF16.cpp
r248846 r266457 28 28 29 29 #include <wtf/text/CString.h> 30 #include <wtf/text/StringBuilder.h> 30 31 #include <wtf/text/WTFString.h> 31 32 … … 62 63 } 63 64 64 String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool&) 65 // https://encoding.spec.whatwg.org/#shared-utf-16-decoder 66 String TextCodecUTF16::decode(const char* bytes, size_t length, bool flush, bool, bool& sawError) 65 67 { 66 if (!length) 67 return String(); 68 const auto* p = reinterpret_cast<const uint8_t*>(bytes); 69 const auto* const end = p + length; 70 const auto* const endMinusOneOrNull = end ? end - 1 : nullptr; 68 71 69 // FIXME: This should generate an error if there is an unpaired surrogate. 72 StringBuilder result; 73 result.reserveCapacity(length / 2); 70 74 71 const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes); 72 size_t numBytes = length + m_haveBufferedByte; 73 size_t numCodeUnits = numBytes / 2; 74 RELEASE_ASSERT(numCodeUnits <= std::numeric_limits<unsigned>::max()); 75 Function<void(UChar)> processBytesShared; 76 processBytesShared = [&] (UChar codeUnit) { 77 if (m_leadSurrogate) { 78 auto leadSurrogate = *std::exchange(m_leadSurrogate, WTF::nullopt); 79 if (codeUnit >= 0xDC00 && codeUnit <= 0xDFFF) { 80 result.appendCharacter(0x10000 + ((leadSurrogate - 0xD800) << 10) + codeUnit - 0xDC00); 81 return; 82 } 83 sawError = true; 84 result.append(replacementCharacter); 85 processBytesShared(codeUnit); 86 return; 87 } 88 if (codeUnit >= 0xD800 && codeUnit <= 0xDBFF) { 89 m_leadSurrogate = codeUnit; 90 return; 91 } 92 if (codeUnit >= 0xDC00 && codeUnit <=0xDFFF) { 93 sawError = true; 94 result.append(replacementCharacter); 95 return; 96 } 97 result.append(codeUnit); 98 }; 99 auto processBytesLE = [&] (uint8_t first, uint8_t second) { 100 processBytesShared(first | (second << 8)); 101 }; 102 auto processBytesBE = [&] (uint8_t first, uint8_t second) { 103 processBytesShared((first << 8) | second); 104 }; 75 105 76 UChar* q; 77 auto result = String::createUninitialized(numCodeUnits, q); 78 79 if (m_haveBufferedByte) { 80 UChar c; 106 if (m_leadByte && p < end) { 107 auto leadByte = *std::exchange(m_leadByte, WTF::nullopt); 81 108 if (m_littleEndian) 82 c = m_bufferedByte | (p[0] << 8);109 processBytesLE(leadByte, p[0]); 83 110 else 84 c = (m_bufferedByte << 8) | p[0]; 85 *q++ = c; 86 m_haveBufferedByte = false; 87 p += 1; 88 numCodeUnits -= 1; 111 processBytesBE(leadByte, p[0]); 112 p++; 89 113 } 90 114 91 115 if (m_littleEndian) { 92 for (size_t i = 0; i < numCodeUnits; ++i) {93 UChar c = p[0] | (p[1] << 8);116 while (p < endMinusOneOrNull) { 117 processBytesLE(p[0], p[1]); 94 118 p += 2; 95 *q++ = c;96 119 } 97 120 } else { 98 for (size_t i = 0; i < numCodeUnits; ++i) {99 UChar c = (p[0] << 8) | p[1];121 while (p < endMinusOneOrNull) { 122 processBytesBE(p[0], p[1]); 100 123 p += 2; 101 *q++ = c;102 124 } 103 125 } 104 126 105 if (numBytes & 1) { 106 ASSERT(!m_haveBufferedByte); 107 m_haveBufferedByte = true; 108 m_bufferedByte = p[0]; 127 if (p && p == endMinusOneOrNull) { 128 ASSERT(!m_leadByte); 129 m_leadByte = p[0]; 130 } else 131 ASSERT(!p || p == end); 132 133 if (flush && (m_leadByte || m_leadSurrogate)) { 134 m_leadByte = WTF::nullopt; 135 m_leadSurrogate = WTF::nullopt; 136 sawError = true; 137 result.append(replacementCharacter); 109 138 } 110 139 111 return result ;140 return result.toString(); 112 141 } 113 142 -
trunk/Source/WebCore/platform/text/TextCodecUTF16.h
r225618 r266457 42 42 43 43 bool m_littleEndian; 44 bool m_haveBufferedByte { false };45 unsigned char m_bufferedByte;44 Optional<uint8_t> m_leadByte; 45 Optional<UChar> m_leadSurrogate; 46 46 }; 47 47
Note:
See TracChangeset
for help on using the changeset viewer.