Changeset 31316 in webkit


Ignore:
Timestamp:
Mar 26, 2008 10:19:07 AM (16 years ago)
Author:
eric@webkit.org
Message:

Reviewed by darin.

Fix, makes us pass Test 70
XML documents should be strict about encoding checks
http://bugs.webkit.org/show_bug.cgi?id=17079

Test: fast/encoding/invalid-xml.html

  • WebCore.base.exp:
  • dom/XMLTokenizer.cpp: (WebCore::XMLTokenizer::write):
  • loader/CachedFont.cpp: (WebCore::CachedFont::ensureSVGFontData):
  • loader/TextResourceDecoder.cpp: (WebCore::TextResourceDecoder::TextResourceDecoder): (WebCore::TextResourceDecoder::decode):
  • loader/TextResourceDecoder.h:
  • platform/text/TextCodec.h: (WebCore::TextCodec::decode):
  • platform/text/TextCodecICU.cpp: (WebCore::TextCodecICU::decodeToBuffer): (WebCore::ErrorCallbackSetter::ErrorCallbackSetter): (WebCore::ErrorCallbackSetter::~ErrorCallbackSetter): (WebCore::TextCodecICU::decode):
  • platform/text/TextCodecICU.h:
  • platform/text/TextCodecLatin1.cpp:
  • platform/text/TextCodecLatin1.h:
  • platform/text/TextCodecUTF16.cpp:
  • platform/text/TextCodecUTF16.h:
  • platform/text/TextCodecUserDefined.cpp:
  • platform/text/TextCodecUserDefined.h:
  • platform/text/TextDecoder.cpp: (WebCore::TextDecoder::checkForBOM):
  • platform/text/TextDecoder.h: (WebCore::TextDecoder::decode):
  • platform/text/TextEncoding.cpp: (WebCore::TextEncoding::decode):
  • platform/text/TextEncoding.h: (WebCore::TextEncoding::decode):
  • platform/text/mac/TextCodecMac.cpp: (WebCore::TextCodecMac::decode):
  • platform/text/mac/TextCodecMac.h:
Location:
trunk
Files:
7 added
22 edited

Legend:

Unmodified
Added
Removed
  • trunk/LayoutTests/ChangeLog

    r31313 r31316  
     12008-03-26  Eric Seidel  <eric@webkit.org>
     2
     3        Reviewed by darin.
     4
     5        Fix, makes us pass Test 70
     6        XML documents should be strict about encoding checks
     7        http://bugs.webkit.org/show_bug.cgi?id=17079
     8
     9        * fast/encoding/invalid-xml-expected.txt: Added.
     10        * fast/encoding/invalid-xml.html: Copied from LayoutTests/fast/dom/resources/TEMPLATE.html.
     11        * fast/encoding/resources/invalid-xml-shift-jis.xml: Added.
     12        * fast/encoding/resources/invalid-xml-utf16.xml: Added.
     13        * fast/encoding/resources/invalid-xml-utf8.xml: Added.
     14        * fast/encoding/resources/invalid-xml-x-mac-thai.xml: Added.
     15        * fast/encoding/resources/invalid-xml.js: Added.
     16
    1172008-03-26  Dan Bernstein  <mitz@apple.com>
    218
  • trunk/WebCore/ChangeLog

    r31315 r31316  
     12008-03-26  Eric Seidel  <eric@webkit.org>
     2
     3        Reviewed by darin.
     4
     5        Fix, makes us pass Test 70
     6        XML documents should be strict about encoding checks
     7        http://bugs.webkit.org/show_bug.cgi?id=17079
     8
     9        Test: fast/encoding/invalid-xml.html
     10
     11        * WebCore.base.exp:
     12        * dom/XMLTokenizer.cpp:
     13        (WebCore::XMLTokenizer::write):
     14        * loader/CachedFont.cpp:
     15        (WebCore::CachedFont::ensureSVGFontData):
     16        * loader/TextResourceDecoder.cpp:
     17        (WebCore::TextResourceDecoder::TextResourceDecoder):
     18        (WebCore::TextResourceDecoder::decode):
     19        * loader/TextResourceDecoder.h:
     20        * platform/text/TextCodec.h:
     21        (WebCore::TextCodec::decode):
     22        * platform/text/TextCodecICU.cpp:
     23        (WebCore::TextCodecICU::decodeToBuffer):
     24        (WebCore::ErrorCallbackSetter::ErrorCallbackSetter):
     25        (WebCore::ErrorCallbackSetter::~ErrorCallbackSetter):
     26        (WebCore::TextCodecICU::decode):
     27        * platform/text/TextCodecICU.h:
     28        * platform/text/TextCodecLatin1.cpp:
     29        * platform/text/TextCodecLatin1.h:
     30        * platform/text/TextCodecUTF16.cpp:
     31        * platform/text/TextCodecUTF16.h:
     32        * platform/text/TextCodecUserDefined.cpp:
     33        * platform/text/TextCodecUserDefined.h:
     34        * platform/text/TextDecoder.cpp:
     35        (WebCore::TextDecoder::checkForBOM):
     36        * platform/text/TextDecoder.h:
     37        (WebCore::TextDecoder::decode):
     38        * platform/text/TextEncoding.cpp:
     39        (WebCore::TextEncoding::decode):
     40        * platform/text/TextEncoding.h:
     41        (WebCore::TextEncoding::decode):
     42        * platform/text/mac/TextCodecMac.cpp:
     43        (WebCore::TextCodecMac::decode):
     44        * platform/text/mac/TextCodecMac.h:
     45
    1462008-03-26  Antti Koivisto  <antti@apple.com>
    247
  • trunk/WebCore/WebCore.base.exp

    r31293 r31316  
    659659__ZNK7WebCore12SharedBuffer4dataEv
    660660__ZNK7WebCore12SharedBuffer4sizeEv
    661 __ZNK7WebCore12TextEncoding6decodeEPKcm
     661__ZNK7WebCore12TextEncoding6decodeEPKcmbRb
    662662__ZNK7WebCore13HitTestResult10isLiveLinkEv
    663663__ZNK7WebCore13HitTestResult10isSelectedEv
  • trunk/WebCore/dom/XMLTokenizer.cpp

    r30584 r31316  
    4747#include "ResourceRequest.h"
    4848#include "ResourceResponse.h"
     49#include "TextResourceDecoder.h"
    4950#ifndef USE_QXMLSTREAM
    5051#include <libxml/parser.h>
     
    669670#endif
    670671   
     672    if (m_doc->decoder() && m_doc->decoder()->sawError())
     673        // If the decoder saw an error, report it as fatal (stops parsing)
     674        handleError(fatal, "Encoding error", lineNumber(), columnNumber());
     675
    671676    return false;
    672677}
  • trunk/WebCore/loader/CachedFont.cpp

    r31287 r31316  
    138138        TextResourceDecoder decoder("application/xml");
    139139        m_externalSVGDocument->write(decoder.decode(m_data->data(), m_data->size()));
     140        if (decoder.sawError()) {
     141            m_externalSVGDocument.clear();
     142            return 0;
     143        }
    140144
    141145        m_externalSVGDocument->finishParsing();
  • trunk/WebCore/loader/TextResourceDecoder.cpp

    r30545 r31316  
    328328    , m_checkedForCSSCharset(false)
    329329    , m_checkedForHeadCharset(false)
     330    , m_sawError(false)
    330331{
    331332}
     
    759760
    760761    if (m_buffer.isEmpty())
    761         return m_decoder.decode(data, len);
     762        return m_decoder.decode(data, len, false, m_contentType == XML, m_sawError);
    762763
    763764    if (!movedDataToBuffer) {
     
    767768    }
    768769
    769     String result = m_decoder.decode(m_buffer.data(), m_buffer.size());
     770    String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), false, m_contentType == XML, m_sawError);
    770771    m_buffer.clear();
    771772    return result;
     
    774775String TextResourceDecoder::flush()
    775776{
    776     String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), true);
     777    String result = m_decoder.decode(m_buffer.data(), m_buffer.size(), true, m_contentType == XML, m_sawError);
    777778    m_buffer.clear();
    778779    return result;
  • trunk/WebCore/loader/TextResourceDecoder.h

    r27776 r31316  
    5353    String decode(const char* data, size_t length);
    5454    String flush();
     55   
     56    bool sawError() const { return m_sawError; }
    5557
    5658private:
     
    7173    bool m_checkedForCSSCharset;
    7274    bool m_checkedForHeadCharset;
     75    bool m_sawError;
    7376};
    7477
  • trunk/WebCore/platform/text/TextCodec.h

    r31089 r31316  
    3333#include <wtf/unicode/Unicode.h>
    3434
     35#include "PlatformString.h"
     36
    3537namespace WebCore {
    36 
    37     class CString;
    38     class String;
    3938    class TextEncoding;
    4039
     
    6160        virtual ~TextCodec();
    6261
    63         virtual String decode(const char*, size_t length, bool flush = false) = 0;
     62        String decode(const char* str, size_t length, bool flush = false)
     63        {
     64            bool ignored;
     65            return decode(str, length, flush, false, ignored);
     66        }
     67       
     68        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0;
    6469        virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0;
    6570
  • trunk/WebCore/platform/text/TextCodecICU.cpp

    r31089 r31316  
    216216}
    217217
    218 String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
     218int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err)
     219{
     220    UChar* targetStart = target;
     221    err = U_ZERO_ERROR;
     222    ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
     223    return target - targetStart;
     224}
     225
     226class ErrorCallbackSetter {
     227public:
     228    ErrorCallbackSetter(UConverter* converter, bool stopOnError)
     229        : m_converter(converter)
     230        , m_shouldStopOnEncodingErrors(stopOnError)
     231    {
     232        if (m_shouldStopOnEncodingErrors) {
     233            UErrorCode err = U_ZERO_ERROR;
     234            ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
     235                           UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
     236                           &m_savedContext, &err);
     237            ASSERT(err == U_ZERO_ERROR);
     238        }
     239    }
     240    ~ErrorCallbackSetter()
     241    {
     242        if (m_shouldStopOnEncodingErrors) {
     243            UErrorCode err = U_ZERO_ERROR;
     244            const void* oldContext;
     245            UConverterToUCallback oldAction;
     246            ucnv_setToUCallBack(m_converter, m_savedAction,
     247                   m_savedContext, &oldAction,
     248                   &oldContext, &err);
     249            ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
     250            ASSERT(oldContext == UCNV_SUB_STOP_ON_ILLEGAL);
     251            ASSERT(err == U_ZERO_ERROR);
     252        }
     253    }
     254private:
     255    UConverter* m_converter;
     256    bool m_shouldStopOnEncodingErrors;
     257    const void* m_savedContext;
     258    UConverterToUCallback m_savedAction;
     259};
     260
     261String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
    219262{
    220263    // Get a converter for the passed-in encoding.
     
    227270        }
    228271    }
     272   
     273    ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);
    229274
    230275    Vector<UChar> result;
    231276
    232277    UChar buffer[ConversionBufferSize];
     278    UChar* bufferLimit = buffer + ConversionBufferSize;
    233279    const char* source = reinterpret_cast<const char*>(bytes);
    234280    const char* sourceLimit = source + length;
    235281    int32_t* offsets = NULL;
    236     UErrorCode err;
     282    UErrorCode err = U_ZERO_ERROR;
    237283
    238284    do {
    239         UChar* target = buffer;
    240         const UChar* targetLimit = target + ConversionBufferSize;
    241         err = U_ZERO_ERROR;
    242         ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
    243         int count = target - buffer;
    244         appendOmittingBOM(result, buffer, count);
     285        int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err);
     286        appendOmittingBOM(result, buffer, ucharsDecoded);
    245287    } while (err == U_BUFFER_OVERFLOW_ERROR);
    246288
     
    248290        // flush the converter so it can be reused, and not be bothered by this error.
    249291        do {
    250             UChar *target = buffer;
    251             const UChar *targetLimit = target + ConversionBufferSize;
    252             err = U_ZERO_ERROR;
    253             ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err);
     292            decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, err);
    254293        } while (source < sourceLimit);
     294        sawError = true;
    255295        LOG_ERROR("ICU conversion error");
    256         return String();
    257296    }
    258297
  • trunk/WebCore/platform/text/TextCodecICU.h

    r31089 r31316  
    4646        virtual ~TextCodecICU();
    4747
    48         virtual String decode(const char*, size_t length, bool flush = false);
     48        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
    4949        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
    5050
     
    5454        bool needsGBKFallbacks() const { return m_needsGBKFallbacks; }
    5555        void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
     56       
     57        int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source,
     58            const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err);
    5659
    5760        TextEncoding m_encoding;
  • trunk/WebCore/platform/text/TextCodecLatin1.cpp

    r31089 r31316  
    119119}
    120120
    121 String TextCodecLatin1::decode(const char* bytes, size_t length, bool)
     121String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
    122122{
    123123    StringBuffer characters(length);
  • trunk/WebCore/platform/text/TextCodecLatin1.h

    r31089 r31316  
    3636        static void registerCodecs(TextCodecRegistrar);
    3737
    38         virtual String decode(const char*, size_t length, bool flush = false);
     38        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
    3939        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
    4040    };
  • trunk/WebCore/platform/text/TextCodecUTF16.cpp

    r31089 r31316  
    6868}
    6969
    70 String TextCodecUTF16::decode(const char* bytes, size_t length, bool)
     70String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool stopOnError, bool& sawError)
    7171{
    7272    if (!length)
  • trunk/WebCore/platform/text/TextCodecUTF16.h

    r31089 r31316  
    3838        TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
    3939
    40         virtual String decode(const char*, size_t length, bool flush = false);
     40        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
    4141        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
    4242
  • trunk/WebCore/platform/text/TextCodecUserDefined.cpp

    r31089 r31316  
    5151}
    5252
    53 String TextCodecUserDefined::decode(const char* bytes, size_t length, bool)
     53String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&)
    5454{
    5555    StringBuffer buffer(length);
  • trunk/WebCore/platform/text/TextCodecUserDefined.h

    r31089 r31316  
    3636        static void registerCodecs(TextCodecRegistrar);
    3737
    38         virtual String decode(const char*, size_t length, bool flush = false);
     38        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
    3939        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
    4040    };
  • trunk/WebCore/platform/text/TextDecoder.cpp

    r28234 r31316  
    4848}
    4949
    50 String TextDecoder::checkForBOM(const char* data, size_t length, bool flush)
     50String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
    5151{
    5252    // Check to see if we found a BOM.
     
    9797        memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
    9898        m_numBufferedBytes = 0;
    99         return m_codec->decode(bufferedBytes, numBufferedBytes, false)
    100             + m_codec->decode(data, length, flush);
     99
     100        String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError);
     101        if (stopOnError && sawError)
     102            return bufferedResult;
     103        return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError);
    101104    }
    102105
    103     return m_codec->decode(data, length, flush);
     106    return m_codec->decode(data, length, flush, stopOnError, sawError);
    104107}
    105108
  • trunk/WebCore/platform/text/TextDecoder.h

    r28234 r31316  
    4242        const TextEncoding& encoding() const { return m_encoding; };
    4343
    44         String decode(const char* data, size_t length, bool flush = false)
     44        String decode(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
    4545        {
    4646            if (!m_checkedForBOM)
    47                 return checkForBOM(data, length, flush);
    48             return m_codec->decode(data, length, flush);
     47                return checkForBOM(data, length, flush, stopOnError, sawError);
     48            return m_codec->decode(data, length, flush, stopOnError, sawError);
    4949        }
    5050
    5151    private:
    52         String checkForBOM(const char*, size_t length, bool flush);
     52        String checkForBOM(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
    5353
    5454        TextEncoding m_encoding;
  • trunk/WebCore/platform/text/TextEncoding.cpp

    r31089 r31316  
    6060}
    6161
    62 String TextEncoding::decode(const char* data, size_t length) const
     62String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const
    6363{
    6464    if (!m_name)
    6565        return String();
    6666
    67     return TextDecoder(*this).decode(data, length, true);
     67    return TextDecoder(*this).decode(data, length, true, stopOnError, sawError);
    6868}
    6969
  • trunk/WebCore/platform/text/TextEncoding.h

    r31089 r31316  
    4848        const TextEncoding& closest8BitEquivalent() const;
    4949
    50         String decode(const char*, size_t length) const;
     50        String decode(const char* str, size_t length) const
     51        {
     52            bool ignored;
     53            return decode(str, length, false, ignored);
     54        }
     55        String decode(const char*, size_t length, bool stopOnError, bool& sawError) const;
    5156        CString encode(const UChar*, size_t length, UnencodableHandling) const;
    5257
  • trunk/WebCore/platform/text/mac/TextCodecMac.cpp

    r31089 r31316  
    7979TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
    8080    : m_encoding(encoding)
    81     , m_error(false)
    8281    , m_numBufferedBytes(0)
    8382    , m_converterTEC(0)
     
    180179
    181180    // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
    182     if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) {
     181    if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0)
    183182        status = kTECOutputBufferFullStatus;
    184     }
    185183
    186184    inputLength = bytesRead;
     
    189187}
    190188
    191 String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
     189String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
    192190{
    193191    // Get a converter for the passed-in encoding.
     
    202200    UniChar buffer[ConversionBufferSize];
    203201
    204     while (sourceLength || bufferWasFull) {
     202    while ((sourceLength || bufferWasFull) && !sawError) {
    205203        int bytesRead = 0;
    206204        int bytesWritten = 0;
     
    218216                // FIXME: Put FFFD character into the output string in this case?
    219217                TECClearConverterContextInfo(m_converterTEC);
     218                if (stopOnError) {
     219                    sawError = true;
     220                    break;
     221                }
    220222                if (sourceLength) {
    221223                    sourcePointer += 1;
     
    238240            default:
    239241                LOG_ERROR("text decoding failed with error %ld", static_cast<long>(status));
    240                 m_error = true;
     242                sawError = true;
    241243                return String();
    242244        }
  • trunk/WebCore/platform/text/mac/TextCodecMac.h

    r31089 r31316  
    4444        virtual ~TextCodecMac();
    4545
    46         virtual String decode(const char*, size_t length, bool flush = false);
     46        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
    4747        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
    4848
     
    5656        TECTextEncodingID m_encoding;
    5757        UChar m_backslashAsCurrencySymbol;
    58         bool m_error;
    5958        unsigned m_numBufferedBytes;
    6059        unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
Note: See TracChangeset for help on using the changeset viewer.