Changeset 25066 in webkit
- Timestamp:
- Aug 13, 2007 10:24:05 PM (17 years ago)
- Location:
- trunk
- Files:
-
- 2 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r25062 r25066 1 2007-08-13 Alexey Proskuryakov <ap@webkit.org> 2 3 Reviewed by Darin. 4 5 http://bugs.webkit.org/show_bug.cgi?id=14951 6 REGRESSION: page interpreted as UTF-8 because of stray <?xml> after <head> 7 8 * fast/encoding/misplaced-xml-declaration-expected.txt: Added. 9 * fast/encoding/misplaced-xml-declaration.html: Added. 10 1 11 2007-08-13 Sam Weinig <sam@webkit.org> 2 12 -
trunk/WebCore/ChangeLog
r25065 r25066 1 2007-08-13 Alexey Proskuryakov <ap@webkit.org> 2 3 Reviewed by Darin. 4 5 http://bugs.webkit.org/show_bug.cgi?id=14951 6 REGRESSION: page interpreted as UTF-8 because of stray <?xml> after <head> 7 8 Test: fast/encoding/misplaced-xml-declaration.html 9 10 * loader/TextResourceDecoder.cpp: 11 (WebCore::TextResourceDecoder::checkForHeadCharset): Only honor XML declaration 12 at the very beginning of the file. 13 1 14 2007-08-13 Oliver Hunt <oliver@apple.com> 2 15 -
trunk/WebCore/loader/TextResourceDecoder.cpp
r24052 r25066 4 4 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 5 5 Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. 6 Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)6 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) 7 7 8 8 This library is free software; you can redistribute it and/or … … 481 481 482 482 movedDataToBuffer = true; 483 483 484 const char* ptr = m_buffer.data(); 485 const char* pEnd = ptr + m_buffer.size(); 486 487 // Is there enough data available to check for XML declaration? 488 if (m_buffer.size() < 8) 489 return false; 490 491 // Handle XML declaration, which can have encoding in it. This encoding is honored even for HTML documents. 492 // It is an error for an XML declaration not to be at the start of an XML document, and it is ignored in HTML documents in such case. 493 if (ptr[0] == '<' && ptr[1] == '?' && ptr[2] == 'x' && ptr[3] == 'm' && ptr[4] == 'l') { 494 const char* xmlDeclarationEnd = ptr; 495 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') 496 ++xmlDeclarationEnd; 497 if (xmlDeclarationEnd == pEnd) 498 return false; 499 DeprecatedCString str(ptr, xmlDeclarationEnd - ptr); // No need for +1, because we have an extra "?" to lose at the end of XML declaration. 500 int len = 0; 501 int pos = findXMLEncoding(str, len); 502 if (pos != -1) 503 setEncoding(TextEncoding(str.mid(pos, len)), EncodingFromXMLHeader); 504 // continue looking for a charset - it may be specified in an HTTP-Equiv meta 505 } else if (ptr[0] == '<' && ptr[1] == 0 && ptr[2] == '?' && ptr[3] == 0 && ptr[4] == 'x' && ptr[5] == 0) { 506 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); 507 return true; 508 } else if (ptr[0] == 0 && ptr[1] == '<' && ptr[2] == 0 && ptr[3] == '?' && ptr[4] == 0 && ptr[5] == 'x') { 509 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); 510 return true; 511 } else if (ptr[0] == '<' && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0 && ptr[4] == '?' && ptr[5] == 0 && ptr[6] == 0 && ptr[7] == 0) { 512 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); 513 return true; 514 } else if (ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == '<' && ptr[4] == 0 && ptr[5] == 0 && ptr[6] == 0 && ptr[7] == '?') { 515 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); 516 return true; 517 } 518 484 519 // we still don't have an encoding, and are in the head 485 520 // the following tags are allowed in <head>: … … 497 532 AtomicStringImpl* enclosingTagName = 0; 498 533 499 const char* ptr = m_buffer.data(); 500 const char* pEnd = ptr + m_buffer.size(); 501 while (ptr + 7 < pEnd) { // +7 guarantees that "<!--" and "<?xml" fit in the buffer - and certainly we aren't going to lose any "charset" that way. 534 while (ptr + 3 < pEnd) { // +3 guarantees that "<!--" fits in the buffer - and certainly we aren't going to lose any "charset" that way. 502 535 if (*ptr == '<') { 503 536 bool end = false; … … 509 542 skipComment(ptr, pEnd); 510 543 continue; 511 }512 513 // Handle XML declaration, which can have encoding in it.514 // This encoding is honored even for HTML documents.515 if (ptr[0] == '?' && ptr[1] == 'x' && ptr[2] == 'm' && ptr[3] == 'l') {516 const char* xmlDeclarationEnd = ptr;517 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>')518 ++xmlDeclarationEnd;519 if (xmlDeclarationEnd == pEnd)520 return false;521 DeprecatedCString str(ptr, xmlDeclarationEnd - ptr); // No need for +1, because we have an extra "?" to lose at the end of XML declaration.522 int len = 0;523 int pos = findXMLEncoding(str, len);524 if (pos != -1)525 setEncoding(TextEncoding(str.mid(pos, len)), EncodingFromXMLHeader);526 // continue looking for a charset - it may be specified in an HTTP-Equiv meta527 } else if (ptr[0] == 0 && ptr[1] == '?' && ptr[2] == 0 && ptr[3] == 'x' && ptr[4] == 0 && ptr[5] == 'm' && ptr[6] == 0 && ptr[7] == 'l') {528 // UTF-16 without BOM529 setEncoding(((ptr - m_buffer.data()) % 2) ? UTF16LittleEndianEncoding() : UTF16BigEndianEncoding(), AutoDetectedEncoding);530 return true;531 } else if (ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == '?' && ptr[4] == 0 && ptr[5] == 0 && ptr[6] == 0 && ptr[7] == 'x') {532 // UTF-32 without BOM533 setEncoding(((ptr - m_buffer.data()) % 4) ? UTF32LittleEndianEncoding() : UTF32BigEndianEncoding(), AutoDetectedEncoding);534 return true;535 544 } 536 545
Note: See TracChangeset
for help on using the changeset viewer.