Changeset 24052 in webkit
- Timestamp:
- Jul 6, 2007 3:00:45 AM (17 years ago)
- Location:
- trunk
- Files:
-
- 8 added
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r24048 r24052 1 2007-07-06 Jungshik Shin <jungshik.shin@gmail.com> 2 3 Reviewed by Alexey. 4 5 - test for http://bugs.webkit.org/show_bug.cgi?id=13415 6 7 * fast/encoding/utf-32-big-endian-bom-expected.txt: Added. 8 * fast/encoding/utf-32-big-endian-bom.html: Added. 9 * fast/encoding/utf-32-big-endian-nobom-expected.txt: Added. 10 * fast/encoding/utf-32-big-endian-nobom.xml: Added. 11 * fast/encoding/utf-32-little-endian-bom-expected.txt: Added. 12 * fast/encoding/utf-32-little-endian-bom.html: Added. 13 * fast/encoding/utf-32-little-endian-nobom-expected.txt: Added. 14 * fast/encoding/utf-32-little-endian-nobom.xml: Added. 15 1 16 2007-07-06 Rob Buis <buis@kde.org> 2 17 -
trunk/WebCore/ChangeLog
r24051 r24052 1 2007-07-06 Jungshik Shin <jungshik.shin@gmail.com> 2 3 Reviewed by Alexey. 4 5 - Add UTF-32 encoding support 6 http://bugs.webkit.org/show_bug.cgi?id=13415 7 8 Test: 9 - fast/encoding/utf-32-big-endian-bom.html 10 - fast/encoding/utf-32-big-endian-nobom.xml 11 - fast/encoding/utf-32-little-endian-bom.html 12 - fast/encoding/utf-32-little-endian-nobom.xml 13 14 * loader/TextResourceDecoder.cpp: 15 (WebCore::TextResourceDecoder::checkForBOM): 16 (WebCore::TextResourceDecoder::checkForHeadCharset): 17 * platform/TextDecoder.cpp: 18 (WebCore::TextDecoder::checkForBOM): 19 * platform/TextDecoder.h: 20 * platform/TextEncoding.cpp: 21 (WebCore::UTF32BigEndianEncoding): 22 (WebCore::UTF32LittleEndianEncoding): 23 * platform/TextEncoding.h: 24 1 25 2007-07-06 Holger Hans Peter Freyther <zecke@selfish.org> 2 26 -
trunk/WebCore/loader/TextResourceDecoder.cpp
r23906 r24052 347 347 void TextResourceDecoder::checkForBOM(const char* data, size_t len) 348 348 { 349 // Check for UTF-16 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.349 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding. 350 350 351 351 if (m_source == UserChosenEncoding) { … … 357 357 // Check if we have enough data. 358 358 size_t bufferLength = m_buffer.size(); 359 if (bufferLength + len < 3)359 if (bufferLength + len < 4) 360 360 return; 361 361 362 362 m_checkedForBOM = true; 363 363 364 // Extract the first threebytes.364 // Extract the first four bytes. 365 365 // Handle the case where some of bytes are already in the buffer. 366 366 // The last byte is always guaranteed to not be in the buffer. … … 368 368 unsigned char c1 = bufferLength >= 1 ? m_buffer[0] : *udata++; 369 369 unsigned char c2 = bufferLength >= 2 ? m_buffer[1] : *udata++; 370 ASSERT(bufferLength < 3); 371 unsigned char c3 = *udata; 370 unsigned char c3 = bufferLength >= 3 ? m_buffer[2] : *udata++; 371 ASSERT(bufferLength < 4); 372 unsigned char c4 = *udata; 372 373 373 374 // Check for the BOM. 374 if (c1 == 0xFE && c2 == 0xFF) 375 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); 376 else if (c1 == 0xFF && c2 == 0xFE) 377 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); 375 if (c1 == 0xFF && c2 == 0xFE) { 376 if (c3 !=0 || c4 != 0) 377 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); 378 else 379 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); 380 } 378 381 else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) 379 382 setEncoding(UTF8Encoding(), AutoDetectedEncoding); 383 else if (c1 == 0xFE && c2 == 0xFF) 384 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); 385 else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) 386 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); 380 387 } 381 388 … … 520 527 } else if (ptr[0] == 0 && ptr[1] == '?' && ptr[2] == 0 && ptr[3] == 'x' && ptr[4] == 0 && ptr[5] == 'm' && ptr[6] == 0 && ptr[7] == 'l') { 521 528 // UTF-16 without BOM 522 setEncoding(((ptr - m_buffer.data()) % 2) ? "UTF-16LE" : "UTF-16BE", AutoDetectedEncoding); 529 setEncoding(((ptr - m_buffer.data()) % 2) ? UTF16LittleEndianEncoding() : UTF16BigEndianEncoding(), AutoDetectedEncoding); 530 return true; 531 } else if (ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == '?' && ptr[4] == 0 && ptr[5] == 0 && ptr[6] == 0 && ptr[7] == 'x') { 532 // UTF-32 without BOM 533 setEncoding(((ptr - m_buffer.data()) % 4) ? UTF32LittleEndianEncoding() : UTF32BigEndianEncoding(), AutoDetectedEncoding); 523 534 return true; 524 535 } -
trunk/WebCore/platform/TextDecoder.cpp
r16245 r24052 58 58 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; 59 59 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; 60 unsigned char c3 = buf2Len ? (--buf2Len, *buf2++) : 0; 60 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; 61 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; 61 62 62 63 const TextEncoding* encodingConsideringBOM = &m_encoding; 63 if (c1 == 0xFF && c2 == 0xFE) 64 encodingConsideringBOM = &UTF16LittleEndianEncoding(); 64 bool foundBOM = true; 65 if (c1 == 0xFF && c2 == 0xFE) { 66 if (c3 != 0 || c4 != 0) 67 encodingConsideringBOM = &UTF16LittleEndianEncoding(); 68 else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) 69 encodingConsideringBOM = &UTF32LittleEndianEncoding(); 70 else 71 foundBOM = false; 72 } 73 else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) 74 encodingConsideringBOM = &UTF8Encoding(); 65 75 else if (c1 == 0xFE && c2 == 0xFF) 66 76 encodingConsideringBOM = &UTF16BigEndianEncoding(); 67 else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) 68 encodingConsideringBOM = &UTF8Encoding(); 69 else if (numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) { 77 else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) 78 encodingConsideringBOM = &UTF32BigEndianEncoding(); 79 else 80 foundBOM = false; 81 if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) { 70 82 // Continue to look for the BOM. 71 83 memcpy(&m_bufferedBytes[numBufferedBytes], data, length); -
trunk/WebCore/platform/TextDecoder.h
r17431 r24052 57 57 bool m_checkedForBOM; 58 58 unsigned char m_numBufferedBytes; 59 unsigned char m_bufferedBytes[ 2];59 unsigned char m_bufferedBytes[3]; 60 60 }; 61 61 -
trunk/WebCore/platform/TextEncoding.cpp
r21227 r24052 186 186 } 187 187 188 const TextEncoding& UTF32BigEndianEncoding() 189 { 190 static TextEncoding globalUTF32BigEndianEncoding("UTF-32BE"); 191 return globalUTF32BigEndianEncoding; 192 } 193 194 const TextEncoding& UTF32LittleEndianEncoding() 195 { 196 static TextEncoding globalUTF32LittleEndianEncoding("UTF-32LE"); 197 return globalUTF32LittleEndianEncoding; 198 } 199 200 188 201 const TextEncoding& UTF8Encoding() 189 202 { -
trunk/WebCore/platform/TextEncoding.h
r18712 r24052 61 61 const TextEncoding& UTF16BigEndianEncoding(); 62 62 const TextEncoding& UTF16LittleEndianEncoding(); 63 const TextEncoding& UTF32BigEndianEncoding(); 64 const TextEncoding& UTF32LittleEndianEncoding(); 63 65 const TextEncoding& UTF8Encoding(); 64 66 const TextEncoding& WindowsLatin1Encoding();
Note: See TracChangeset
for help on using the changeset viewer.