Changeset 33380 in webkit


Ignore:
Timestamp:
May 13, 2008 11:36:58 AM (16 years ago)
Author:
ap@webkit.org
Message:

Reviewed by Eric Seidel.

https://bugs.webkit.org/show_bug.cgi?id=18681
<rdar://problem/5888130> WebKit should not remove BOM characters from content.

We were only trying to match Firefox, and it doesn't do this any more.

Tests: fast/encoding/bom-in-content.html

fast/encoding/bom-in-content-utf16.html

  • platform/text/TextDecoder.cpp: (WebCore::TextDecoder::checkForBOM): Skip the BOM if it's at the start of input stream.
  • platform/text/TextCodec.cpp:
  • platform/text/TextCodec.h:
  • platform/text/TextCodecICU.cpp: (WebCore::TextCodecICU::decode):
  • platform/text/TextCodecUTF16.cpp: (WebCore::TextCodecUTF16::decode):
  • platform/text/mac/TextCodecMac.cpp: (WebCore::TextCodecMac::decode): Don't remove the BOM.
Location:
trunk
Files:
7 added
1 deleted
8 edited
1 moved

Legend:

Unmodified
Added
Removed
  • trunk/LayoutTests/ChangeLog

    r33379 r33380  
     12008-05-13  Alexey Proskuryakov  <ap@webkit.org>
     2
     3        Reviewed by Eric Seidel.
     4
     5        https://bugs.webkit.org/show_bug.cgi?id=18681
     6        <rdar://problem/5888130> WebKit should not remove BOM characters from content.
     7
     8        * fast/encoding/bom-in-content-expected.txt: Added.
     9        * fast/encoding/bom-in-content.html: Added.
     10        * fast/encoding/bom-in-content-utf16-expected.txt: Added.
     11        * fast/encoding/bom-in-content-utf16.html: Added.
     12
     13        * http/tests/incremental/resources: Added.
     14        * http/tests/incremental/resources/slow-utf8-css.pl: Copied from LayoutTests/http/tests/incremental/slow-utf8-css.pl.
     15        * http/tests/incremental/slow-utf8-css-expected.txt: Added.
     16        * http/tests/incremental/slow-utf8-css.html: Added.
     17        * http/tests/incremental/slow-utf8-css.pl: Removed.
     18        * platform/mac/http/tests/incremental: Removed.
     19        * platform/mac/http/tests/incremental/slow-utf8-css-expected.checksum: Removed.
     20        * platform/mac/http/tests/incremental/slow-utf8-css-expected.png: Removed.
     21        * platform/mac/http/tests/incremental/slow-utf8-css-expected.txt: Removed.
     22        This test was relying on BOM characters being removed, but this was not what it tested for.
     23        Rewrote it and made text-only.
     24
    1252008-05-13  Alexey Proskuryakov  <ap@webkit.org>
    226
  • trunk/LayoutTests/http/tests/incremental/resources/slow-utf8-css.pl

    r33378 r33380  
    1111print "\n";
    1212
    13 print "\xef\xbb\xbfTest for bug 10753: The beginning of a CSS file is missing.\n\n";
    14 # Dump some BOMs to bypass CFNetwork buffering.
     13print "\xef\xbb\xbf#result {color:green;}\n";
     14# Dump some spaces to bypass CFNetwork buffering.
    1515for ($count = 1; $count < 4000; $count++) {
    16     print "\xef\xbb\xbf";
     16    print "   ";
    1717}
    1818
    1919# Delay to force the second line of text to be decoded as a separate chunk.
    2020sleep 1;
    21 print "You should see a bug description on a separate line above this one.";
     21print "body {}";
  • trunk/WebCore/ChangeLog

    r33378 r33380  
     12008-05-13  Alexey Proskuryakov  <ap@webkit.org>
     2
     3        Reviewed by Eric Seidel.
     4
     5        https://bugs.webkit.org/show_bug.cgi?id=18681
     6        <rdar://problem/5888130> WebKit should not remove BOM characters from content.
     7
     8        We were only trying to match Firefox, and it doesn't do this any more.
     9
     10        Tests: fast/encoding/bom-in-content.html
     11               fast/encoding/bom-in-content-utf16.html
     12
     13        * platform/text/TextDecoder.cpp: (WebCore::TextDecoder::checkForBOM): Skip the BOM if it's
     14        at the start of input stream.
     15
     16        * platform/text/TextCodec.cpp:
     17        * platform/text/TextCodec.h:
     18        * platform/text/TextCodecICU.cpp:
     19        (WebCore::TextCodecICU::decode):
     20        * platform/text/TextCodecUTF16.cpp:
     21        (WebCore::TextCodecUTF16::decode):
     22        * platform/text/mac/TextCodecMac.cpp:
     23        (WebCore::TextCodecMac::decode):
     24        Don't remove the BOM.
     25
    1262008-05-13  Anders Carlsson  <andersca@apple.com>
    227
  • trunk/WebCore/platform/text/TextCodec.cpp

    r31089 r33380  
    3333namespace WebCore {
    3434
    35 const UChar BOM = 0xFEFF;
    36 
    3735TextCodec::~TextCodec()
    3836{
    39 }
    40 
    41 // We strip BOM characters because they can show up both at the start of content
    42 // and inside content, and we never want them to end up in the decoded text.
    43 void TextCodec::appendOmittingBOM(Vector<UChar>& v, const UChar* characters, size_t length)
    44 {
    45     size_t start = 0;
    46     for (size_t i = 0; i != length; ++i) {
    47         if (BOM == characters[i]) {
    48             if (start != i)
    49                 v.append(&characters[start], i - start);
    50             start = i + 1;
    51         }
    52     }
    53     if (start != length)
    54         v.append(&characters[start], length - start);
    5537}
    5638
  • trunk/WebCore/platform/text/TextCodec.h

    r31316 r33380  
    7373        // The length of the string (not including the null) will be returned.
    7474        static int getUnencodableReplacement(unsigned codePoint, UnencodableHandling, UnencodableReplacementArray);
    75 
    76     protected:
    77         static void appendOmittingBOM(Vector<UChar>&, const UChar*, size_t length);
    7875    };
    7976
  • trunk/WebCore/platform/text/TextCodecICU.cpp

    r33377 r33380  
    283283    do {
    284284        int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err);
    285         appendOmittingBOM(result, buffer, ucharsDecoded);
     285        result.append(buffer, ucharsDecoded);
    286286    } while (err == U_BUFFER_OVERFLOW_ERROR);
    287287
  • trunk/WebCore/platform/text/TextCodecUTF16.cpp

    r31316 r33380  
    3434
    3535namespace WebCore {
    36 
    37 const UChar BOM = 0xFEFF;
    3836
    3937void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
     
    8684        else
    8785            c = (m_bufferedByte << 8) | p[0];
    88         if (c != BOM)
    89             *q++ = c;
     86        *q++ = c;
    9087        m_haveBufferedByte = false;
    9188        p += 1;
     
    9794            UChar c = p[0] | (p[1] << 8);
    9895            p += 2;
    99             if (c != BOM)
    100                 *q++ = c;
     96            *q++ = c;
    10197        }
    10298    else
     
    104100            UChar c = (p[0] << 8) | p[1];
    105101            p += 2;
    106             if (c != BOM)
    107                 *q++ = c;
     102            *q++ = c;
    108103        }
    109104
  • trunk/WebCore/platform/text/TextDecoder.cpp

    r31316 r33380  
    5050String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
    5151{
     52    ASSERT(!m_checkedForBOM);
     53
    5254    // Check to see if we found a BOM.
    5355    size_t numBufferedBytes = m_numBufferedBytes;
     
    6365    const TextEncoding* encodingConsideringBOM = &m_encoding;
    6466    bool foundBOM = true;
     67    size_t lengthOfBOM = 0;
    6568    if (c1 == 0xFF && c2 == 0xFE) {
    66         if (c3 != 0 || c4 != 0)
     69        if (c3 != 0 || c4 != 0)  {
    6770            encodingConsideringBOM = &UTF16LittleEndianEncoding();
    68         else if (numBufferedBytes + length > sizeof(m_bufferedBytes))
     71            lengthOfBOM = 2;
     72        } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) {
    6973            encodingConsideringBOM = &UTF32LittleEndianEncoding();
    70         else
     74            lengthOfBOM = 4;
     75        } else
    7176            foundBOM = false;
    72     }
    73     else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF)
     77    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
    7478        encodingConsideringBOM = &UTF8Encoding();
    75     else if (c1 == 0xFE && c2 == 0xFF)
     79        lengthOfBOM = 3;
     80    } else if (c1 == 0xFE && c2 == 0xFF) {
    7681        encodingConsideringBOM = &UTF16BigEndianEncoding();
    77     else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF)
     82        lengthOfBOM = 2;
     83    } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) {
    7884        encodingConsideringBOM = &UTF32BigEndianEncoding();
    79     else
     85        lengthOfBOM = 4;
     86    } else
    8087        foundBOM = false;
     88
    8189    if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
    8290        // Continue to look for the BOM.
     
    9199        return String();
    92100    m_checkedForBOM = true;
     101
     102    // Skip the BOM.
     103    if (foundBOM) {
     104        ASSERT(numBufferedBytes < lengthOfBOM);
     105        size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes;
     106        ASSERT(numUnbufferedBOMBytes <= length);
     107
     108        data += numUnbufferedBOMBytes;
     109        length -= numUnbufferedBOMBytes;
     110        numBufferedBytes = 0;
     111        m_numBufferedBytes = 0;
     112    }
    93113
    94114    // Handle case where we have some buffered bytes to deal with.
  • trunk/WebCore/platform/text/mac/TextCodecMac.cpp

    r31349 r33380  
    244244
    245245        ASSERT(!(bytesWritten % sizeof(UChar)));
    246         appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
     246        result.append(buffer, bytesWritten / sizeof(UChar));
    247247
    248248        bufferWasFull = status == kTECOutputBufferFullStatus;
     
    253253        TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
    254254        ASSERT(!(bytesWritten % sizeof(UChar)));
    255         appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
     255        result.append(buffer, bytesWritten / sizeof(UChar));
    256256    }
    257257
Note: See TracChangeset for help on using the changeset viewer.