Changeset 31316

Show
Ignore:
Timestamp:
2008-03-26 10:19:07 (6 months ago)
Author:
eric@webkit.org
Message:

Reviewed by darin.

Fix, makes us pass Test 70
XML documents should be strict about encoding checks
http://bugs.webkit.org/show_bug.cgi?id=17079

Test: fast/encoding/invalid-xml.html

  • WebCore.base.exp:
  • dom/XMLTokenizer.cpp: (WebCore::XMLTokenizer::write):
  • loader/CachedFont.cpp: (WebCore::CachedFont::ensureSVGFontData):
  • loader/TextResourceDecoder.cpp: (WebCore::TextResourceDecoder::TextResourceDecoder): (WebCore::TextResourceDecoder::decode):
  • loader/TextResourceDecoder.h:
  • platform/text/TextCodec.h: (WebCore::TextCodec::decode):
  • platform/text/TextCodecICU.cpp: (WebCore::TextCodecICU::decodeToBuffer): (WebCore::ErrorCallbackSetter::ErrorCallbackSetter): (WebCore::ErrorCallbackSetter::~ErrorCallbackSetter): (WebCore::TextCodecICU::decode):
  • platform/text/TextCodecICU.h:
  • platform/text/TextCodecLatin1.cpp:
  • platform/text/TextCodecLatin1.h:
  • platform/text/TextCodecUTF16.cpp:
  • platform/text/TextCodecUTF16.h:
  • platform/text/TextCodecUserDefined.cpp:
  • platform/text/TextCodecUserDefined.h:
  • platform/text/TextDecoder.cpp: (WebCore::TextDecoder::checkForBOM):
  • platform/text/TextDecoder.h: (WebCore::TextDecoder::decode):
  • platform/text/TextEncoding.cpp: (WebCore::TextEncoding::decode):
  • platform/text/TextEncoding.h: (WebCore::TextEncoding::decode):
  • platform/text/mac/TextCodecMac.cpp: (WebCore::TextCodecMac::decode):
  • platform/text/mac/TextCodecMac.h:
Location:
trunk
Files:
7 added
22 modified

Legend:

Unmodified
Added
Removed
  • trunk/LayoutTests/ChangeLog

    r31313 r31316  
     12008-03-26  Eric Seidel  <eric@webkit.org> 
     2 
     3        Reviewed by darin. 
     4 
     5        Fix, makes us pass Test 70 
     6        XML documents should be strict about encoding checks 
     7        http://bugs.webkit.org/show_bug.cgi?id=17079 
     8 
     9        * fast/encoding/invalid-xml-expected.txt: Added. 
     10        * fast/encoding/invalid-xml.html: Copied from LayoutTests/fast/dom/resources/TEMPLATE.html. 
     11        * fast/encoding/resources/invalid-xml-shift-jis.xml: Added. 
     12        * fast/encoding/resources/invalid-xml-utf16.xml: Added. 
     13        * fast/encoding/resources/invalid-xml-utf8.xml: Added. 
     14        * fast/encoding/resources/invalid-xml-x-mac-thai.xml: Added. 
     15        * fast/encoding/resources/invalid-xml.js: Added. 
     16 
    1172008-03-26  Dan Bernstein  <mitz@apple.com> 
    218 
  • trunk/WebCore/ChangeLog

    r31315 r31316  
     12008-03-26  Eric Seidel  <eric@webkit.org> 
     2 
     3        Reviewed by darin. 
     4 
     5        Fix, makes us pass Test 70 
     6        XML documents should be strict about encoding checks 
     7        http://bugs.webkit.org/show_bug.cgi?id=17079 
     8 
     9        Test: fast/encoding/invalid-xml.html 
     10 
     11        * WebCore.base.exp: 
     12        * dom/XMLTokenizer.cpp: 
     13        (WebCore::XMLTokenizer::write): 
     14        * loader/CachedFont.cpp: 
     15        (WebCore::CachedFont::ensureSVGFontData): 
     16        * loader/TextResourceDecoder.cpp: 
     17        (WebCore::TextResourceDecoder::TextResourceDecoder): 
     18        (WebCore::TextResourceDecoder::decode): 
     19        * loader/TextResourceDecoder.h: 
     20        * platform/text/TextCodec.h: 
     21        (WebCore::TextCodec::decode): 
     22        * platform/text/TextCodecICU.cpp: 
     23        (WebCore::TextCodecICU::decodeToBuffer): 
     24        (WebCore::ErrorCallbackSetter::ErrorCallbackSetter): 
     25        (WebCore::ErrorCallbackSetter::~ErrorCallbackSetter): 
     26        (WebCore::TextCodecICU::decode): 
     27        * platform/text/TextCodecICU.h: 
     28        * platform/text/TextCodecLatin1.cpp: 
     29        * platform/text/TextCodecLatin1.h: 
     30        * platform/text/TextCodecUTF16.cpp: 
     31        * platform/text/TextCodecUTF16.h: 
     32        * platform/text/TextCodecUserDefined.cpp: 
     33        * platform/text/TextCodecUserDefined.h: 
     34        * platform/text/TextDecoder.cpp: 
     35        (WebCore::TextDecoder::checkForBOM): 
     36        * platform/text/TextDecoder.h: 
     37        (WebCore::TextDecoder::decode): 
     38        * platform/text/TextEncoding.cpp: 
     39        (WebCore::TextEncoding::decode): 
     40        * platform/text/TextEncoding.h: 
     41        (WebCore::TextEncoding::decode): 
     42        * platform/text/mac/TextCodecMac.cpp: 
     43        (WebCore::TextCodecMac::decode): 
     44        * platform/text/mac/TextCodecMac.h: 
     45 
    1462008-03-26  Antti Koivisto  <antti@apple.com> 
    247 
  • trunk/WebCore/WebCore.base.exp

    r31293 r31316  
    659659__ZNK7WebCore12SharedBuffer4dataEv 
    660660__ZNK7WebCore12SharedBuffer4sizeEv 
    661 __ZNK7WebCore12TextEncoding6decodeEPKcm 
     661__ZNK7WebCore12TextEncoding6decodeEPKcmbRb 
    662662__ZNK7WebCore13HitTestResult10isLiveLinkEv 
    663663__ZNK7WebCore13HitTestResult10isSelectedEv 
  • trunk/WebCore/dom/XMLTokenizer.cpp

    r30584 r31316  
    4747#include "ResourceRequest.h" 
    4848#include "ResourceResponse.h" 
     49#include "TextResourceDecoder.h" 
    4950#ifndef USE_QXMLSTREAM 
    5051#include <libxml/parser.h> 
     
    669670#endif 
    670671     
     672    if (m_doc->decoder() && m_doc->decoder()->sawError()) 
     673        // If the decoder saw an error, report it as fatal (stops parsing) 
     674        handleError(fatal, "Encoding error", lineNumber(), columnNumber()); 
     675 
    671676    return false; 
    672677} 
  • trunk/WebCore/loader/CachedFont.cpp

    r31287 r31316  
    138138        TextResourceDecoder decoder("application/xml"); 
    139139        m_externalSVGDocument->write(decoder.decode(m_data->data(), m_data->size())); 
     140        if (decoder.sawError()) { 
     141            m_externalSVGDocument.clear(); 
     142            return 0; 
     143        } 
    140144 
    141145        m_externalSVGDocument->finishParsing(); 
  • trunk/WebCore/loader/TextResourceDecoder.cpp

    r30545 r31316  
    328328    , m_checkedForCSSCharset(false) 
    329329    , m_checkedForHeadCharset(false) 
     330    , m_sawError(false) 
    330331{ 
    331332} 
     
    759760 
    760761    if (m_buffer.isEmpty()) 
    761         return m_decoder.decode(data, len); 
     762        return m_decoder.decode(data, len, false, m_contentType == XML, m_sawError); 
    762763 
    763764    if (!movedDataToBuffer) { 
     
    767768    } 
    768769 
    769     String result = m_decoder.decode(m_buffer.data(), m_buffer.size()); 
     770    String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), false, m_contentType == XML, m_sawError); 
    770771    m_buffer.clear(); 
    771772    return result; 
     
    774775String TextResourceDecoder::flush() 
    775776{ 
    776     String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), true); 
     777    String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), true, m_contentType == XML, m_sawError); 
    777778    m_buffer.clear(); 
    778779    return result; 
  • trunk/WebCore/loader/TextResourceDecoder.h

    r27776 r31316  
    5353    String decode(const char* data, size_t length); 
    5454    String flush(); 
     55     
     56    bool sawError() const { return m_sawError; } 
    5557 
    5658private: 
     
    7173    bool m_checkedForCSSCharset; 
    7274    bool m_checkedForHeadCharset; 
     75    bool m_sawError; 
    7376}; 
    7477 
  • trunk/WebCore/platform/text/TextCodec.h

    r31089 r31316  
    3333#include <wtf/unicode/Unicode.h> 
    3434 
     35#include "PlatformString.h" 
     36 
    3537namespace WebCore { 
    36  
    37     class CString; 
    38     class String; 
    3938    class TextEncoding; 
    4039 
     
    6160        virtual ~TextCodec(); 
    6261 
    63         virtual String decode(const char*, size_t length, bool flush = false) = 0; 
     62        String decode(const char* str, size_t length, bool flush = false) 
     63        { 
     64            bool ignored; 
     65            return decode(str, length, flush, false, ignored); 
     66        } 
     67         
     68        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0; 
    6469        virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0; 
    6570 
  • trunk/WebCore/platform/text/TextCodecICU.cpp

    r31089 r31316  
    216216} 
    217217 
    218 String TextCodecICU::decode(const char* bytes, size_t length, bool flush) 
     218int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err) 
     219{ 
     220    UChar* targetStart = target; 
     221    err = U_ZERO_ERROR; 
     222    ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err); 
     223    return target - targetStart; 
     224} 
     225 
     226class ErrorCallbackSetter { 
     227public: 
     228    ErrorCallbackSetter(UConverter* converter, bool stopOnError) 
     229        : m_converter(converter) 
     230        , m_shouldStopOnEncodingErrors(stopOnError) 
     231    { 
     232        if (m_shouldStopOnEncodingErrors) { 
     233            UErrorCode err = U_ZERO_ERROR; 
     234            ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE, 
     235                           UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction, 
     236                           &m_savedContext, &err); 
     237            ASSERT(err == U_ZERO_ERROR); 
     238        } 
     239    } 
     240    ~ErrorCallbackSetter() 
     241    { 
     242        if (m_shouldStopOnEncodingErrors) { 
     243            UErrorCode err = U_ZERO_ERROR; 
     244            const void* oldContext; 
     245            UConverterToUCallback oldAction; 
     246            ucnv_setToUCallBack(m_converter, m_savedAction, 
     247                   m_savedContext, &oldAction, 
     248                   &oldContext, &err); 
     249            ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE); 
     250            ASSERT(oldContext == UCNV_SUB_STOP_ON_ILLEGAL); 
     251            ASSERT(err == U_ZERO_ERROR); 
     252        } 
     253    } 
     254private: 
     255    UConverter* m_converter; 
     256    bool m_shouldStopOnEncodingErrors; 
     257    const void* m_savedContext; 
     258    UConverterToUCallback m_savedAction; 
     259}; 
     260 
     261String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) 
    219262{ 
    220263    // Get a converter for the passed-in encoding. 
     
    227270        } 
    228271    } 
     272     
     273    ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError); 
    229274 
    230275    Vector<UChar> result; 
    231276 
    232277    UChar buffer[ConversionBufferSize]; 
     278    UChar* bufferLimit = buffer + ConversionBufferSize; 
    233279    const char* source = reinterpret_cast<const char*>(bytes); 
    234280    const char* sourceLimit = source + length; 
    235281    int32_t* offsets = NULL; 
    236     UErrorCode err; 
     282    UErrorCode err = U_ZERO_ERROR; 
    237283 
    238284    do { 
    239         UChar* target = buffer; 
    240         const UChar* targetLimit = target + ConversionBufferSize; 
    241         err = U_ZERO_ERROR; 
    242         ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err); 
    243         int count = target - buffer; 
    244         appendOmittingBOM(result, buffer, count); 
     285        int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err); 
     286        appendOmittingBOM(result, buffer, ucharsDecoded); 
    245287    } while (err == U_BUFFER_OVERFLOW_ERROR); 
    246288 
     
    248290        // flush the converter so it can be reused, and not be bothered by this error. 
    249291        do { 
    250             UChar *target = buffer; 
    251             const UChar *targetLimit = target + ConversionBufferSize; 
    252             err = U_ZERO_ERROR; 
    253             ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err); 
     292            decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, err); 
    254293        } while (source < sourceLimit); 
     294        sawError = true; 
    255295        LOG_ERROR("ICU conversion error"); 
    256         return String(); 
    257296    } 
    258297 
  • trunk/WebCore/platform/text/TextCodecICU.h

    r31089 r31316  
    4646        virtual ~TextCodecICU(); 
    4747 
    48         virtual String decode(const char*, size_t length, bool flush = false); 
     48        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); 
    4949        virtual CString encode(const UChar*, size_t length, UnencodableHandling); 
    5050 
     
    5454        bool needsGBKFallbacks() const { return m_needsGBKFallbacks; } 
    5555        void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; } 
     56         
     57        int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source, 
     58            const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err); 
    5659 
    5760        TextEncoding m_encoding; 
  • trunk/WebCore/platform/text/TextCodecLatin1.cpp

    r31089 r31316  
    119119} 
    120120 
    121 String TextCodecLatin1::decode(const char* bytes, size_t length, bool) 
     121String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&) 
    122122{ 
    123123    StringBuffer characters(length); 
  • trunk/WebCore/platform/text/TextCodecLatin1.h

    r31089 r31316  
    3636        static void registerCodecs(TextCodecRegistrar); 
    3737 
    38         virtual String decode(const char*, size_t length, bool flush = false); 
     38        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); 
    3939        virtual CString encode(const UChar*, size_t length, UnencodableHandling); 
    4040    }; 
  • trunk/WebCore/platform/text/TextCodecUTF16.cpp

    r31089 r31316  
    6868} 
    6969 
    70 String TextCodecUTF16::decode(const char* bytes, size_t length, bool) 
     70String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool stopOnError, bool& sawError) 
    7171{ 
    7272    if (!length) 
  • trunk/WebCore/platform/text/TextCodecUTF16.h

    r31089 r31316  
    3838        TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { } 
    3939 
    40         virtual String decode(const char*, size_t length, bool flush = false); 
     40        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); 
    4141        virtual CString encode(const UChar*, size_t length, UnencodableHandling); 
    4242 
  • trunk/WebCore/platform/text/TextCodecUserDefined.cpp

    r31089 r31316  
    5151} 
    5252 
    53 String TextCodecUserDefined::decode(const char* bytes, size_t length, bool) 
     53String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&) 
    5454{ 
    5555    StringBuffer buffer(length); 
  • trunk/WebCore/platform/text/TextCodecUserDefined.h

    r31089 r31316  
    3636        static void registerCodecs(TextCodecRegistrar); 
    3737 
    38         virtual String decode(const char*, size_t length, bool flush = false); 
     38        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); 
    3939        virtual CString encode(const UChar*, size_t length, UnencodableHandling); 
    4040    }; 
  • trunk/WebCore/platform/text/TextDecoder.cpp

    r28234 r31316  
    4848} 
    4949 
    50 String TextDecoder::checkForBOM(const char* data, size_t length, bool flush) 
     50String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError) 
    5151{ 
    5252    // Check to see if we found a BOM. 
     
    9797        memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes); 
    9898        m_numBufferedBytes = 0; 
    99         return m_codec->decode(bufferedBytes, numBufferedBytes, false) 
    100             + m_codec->decode(data, length, flush); 
     99 
     100        String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError); 
     101        if (stopOnError && sawError) 
     102            return bufferedResult; 
     103        return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError); 
    101104    } 
    102105 
    103     return m_codec->decode(data, length, flush); 
     106    return m_codec->decode(data, length, flush, stopOnError, sawError); 
    104107} 
    105108 
  • trunk/WebCore/platform/text/TextDecoder.h

    r28234 r31316  
    4242        const TextEncoding& encoding() const { return m_encoding; }; 
    4343 
    44         String decode(const char* data, size_t length, bool flush = false) 
     44        String decode(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError) 
    4545        { 
    4646            if (!m_checkedForBOM) 
    47                 return checkForBOM(data, length, flush); 
    48             return m_codec->decode(data, length, flush); 
     47                return checkForBOM(data, length, flush, stopOnError, sawError); 
     48            return m_codec->decode(data, length, flush, stopOnError, sawError); 
    4949        } 
    5050 
    5151    private: 
    52         String checkForBOM(const char*, size_t length, bool flush); 
     52        String checkForBOM(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); 
    5353 
    5454        TextEncoding m_encoding; 
  • trunk/WebCore/platform/text/TextEncoding.cpp

    r31089 r31316  
    6060} 
    6161 
    62 String TextEncoding::decode(const char* data, size_t length) const 
     62String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const 
    6363{ 
    6464    if (!m_name) 
    6565        return String(); 
    6666 
    67     return TextDecoder(*this).decode(data, length, true); 
     67    return TextDecoder(*this).decode(data, length, true, stopOnError, sawError); 
    6868} 
    6969 
  • trunk/WebCore/platform/text/TextEncoding.h

    r31089 r31316  
    4848        const TextEncoding& closest8BitEquivalent() const; 
    4949 
    50         String decode(const char*, size_t length) const; 
     50        String decode(const char* str, size_t length) const 
     51        { 
     52            bool ignored; 
     53            return decode(str, length, false, ignored); 
     54        } 
     55        String decode(const char*, size_t length, bool stopOnError, bool& sawError) const; 
    5156        CString encode(const UChar*, size_t length, UnencodableHandling) const; 
    5257 
  • trunk/WebCore/platform/text/mac/TextCodecMac.cpp

    r31089 r31316  
    7979TextCodecMac::TextCodecMac(TECTextEncodingID encoding) 
    8080    : m_encoding(encoding) 
    81     , m_error(false) 
    8281    , m_numBufferedBytes(0) 
    8382    , m_converterTEC(0) 
     
    180179 
    181180    // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus. 
    182     if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) { 
     181    if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) 
    183182        status = kTECOutputBufferFullStatus; 
    184     } 
    185183 
    186184    inputLength = bytesRead; 
     
    189187} 
    190188 
    191 String TextCodecMac::decode(const char* bytes, size_t length, bool flush) 
     189String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) 
    192190{ 
    193191    // Get a converter for the passed-in encoding. 
     
    202200    UniChar buffer[ConversionBufferSize]; 
    203201 
    204     while (sourceLength || bufferWasFull) { 
     202    while ((sourceLength || bufferWasFull) && !sawError) { 
    205203        int bytesRead = 0; 
    206204        int bytesWritten = 0; 
     
    218216                // FIXME: Put FFFD character into the output string in this case? 
    219217                TECClearConverterContextInfo(m_converterTEC); 
     218                if (stopOnError) { 
     219                    sawError = true; 
     220                    break; 
     221                } 
    220222                if (sourceLength) { 
    221223                    sourcePointer += 1; 
     
    238240            default: 
    239241                LOG_ERROR("text decoding failed with error %ld", static_cast<long>(status)); 
    240                 m_error = true; 
     242                sawError = true; 
    241243                return String(); 
    242244        } 
  • trunk/WebCore/platform/text/mac/TextCodecMac.h

    r31089 r31316  
    4444        virtual ~TextCodecMac(); 
    4545 
    46         virtual String decode(const char*, size_t length, bool flush = false); 
     46        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); 
    4747        virtual CString encode(const UChar*, size_t length, UnencodableHandling); 
    4848 
     
    5656        TECTextEncodingID m_encoding; 
    5757        UChar m_backslashAsCurrencySymbol; 
    58         bool m_error; 
    5958        unsigned m_numBufferedBytes; 
    6059        unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character