Changeset 131836 in webkit


Ignore:
Timestamp:
Oct 18, 2012, 6:22:21 PM (13 years ago)
Author:
msaboff@apple.com
Message:

convertUTF8ToUTF16() Should Check for ASCII Input
ihttps://bugs.webkit.org/show_bug.cgi?id=99739

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to
create a string using the 8 bit source. Added a new OpaqueJSString::create(LChar*, unsigned).
Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call.

  • API/JSStringRef.cpp:

(JSStringCreateWithUTF8CString):

  • API/JSStringRefCF.cpp:

(JSStringCreateWithCFString):

  • API/OpaqueJSString.h:

(OpaqueJSString::create):
(OpaqueJSString):
(OpaqueJSString::OpaqueJSString):

Source/WTF:

Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion. This is
used to check to see if all characters are ASCII and is returned via a bool*.

  • wtf/unicode/UTF8.cpp:

(WTF::Unicode::convertUTF8ToUTF16):

  • wtf/unicode/UTF8.h:
Location:
trunk/Source
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/API/JSStringRef.cpp

    r71375 r131836  
    4747        Vector<UChar, 1024> buffer(length);
    4848        UChar* p = buffer.data();
    49         if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length))
     49        bool sourceIsAllASCII;
     50        if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length, &sourceIsAllASCII)) {
     51            if (sourceIsAllASCII)
     52                return OpaqueJSString::create(reinterpret_cast<const LChar*>(string), length).leakRef();
    5053            return OpaqueJSString::create(buffer.data(), p - buffer.data()).leakRef();
     54        }
    5155    }
    5256
  • trunk/Source/JavaScriptCore/API/JSStringRefCF.cpp

    r127191 r131836  
    4747        return OpaqueJSString::create(reinterpret_cast<UChar*>(buffer.get()), length).leakRef();
    4848    } else {
    49         return OpaqueJSString::create(0, 0).leakRef();
     49        return OpaqueJSString::create(static_cast<const LChar*>(0), 0).leakRef();
    5050    }
    5151}
  • trunk/Source/JavaScriptCore/API/OpaqueJSString.h

    r131777 r131836  
    4242    }
    4343
     44    static PassRefPtr<OpaqueJSString> create(const LChar* characters, unsigned length)
     45    {
     46        return adoptRef(new OpaqueJSString(characters, length));
     47    }
     48
    4449    static PassRefPtr<OpaqueJSString> create(const UChar* characters, unsigned length)
    4550    {
     
    7176    }
    7277
     78    OpaqueJSString(const LChar* characters, unsigned length)
     79    {
     80        m_string = String(characters, length);
     81    }
     82
    7383    OpaqueJSString(const UChar* characters, unsigned length)
    7484    {
  • trunk/Source/JavaScriptCore/ChangeLog

    r131830 r131836  
     12012-10-18  Michael Saboff  <msaboff@apple.com>
     2
     3        convertUTF8ToUTF16() Should Check for ASCII Input
     4        ihttps://bugs.webkit.org/show_bug.cgi?id=99739
     5
     6        Reviewed by Geoffrey Garen.
     7
     8        Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to
     9        create a string using the 8 bit source.  Added a new OpaqueJSString::create(LChar*, unsigned).
     10        Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call.
     11
     12        * API/JSStringRef.cpp:
     13        (JSStringCreateWithUTF8CString):
     14        * API/JSStringRefCF.cpp:
     15        (JSStringCreateWithCFString):
     16        * API/OpaqueJSString.h:
     17        (OpaqueJSString::create):
     18        (OpaqueJSString):
     19        (OpaqueJSString::OpaqueJSString):
     20
    1212012-10-18  Oliver Hunt  <oliver@apple.com>
    222
  • trunk/Source/WTF/ChangeLog

    r131833 r131836  
     12012-10-18  Michael Saboff  <msaboff@apple.com>
     2
     3        convertUTF8ToUTF16() Should Check for ASCII Input
     4        ihttps://bugs.webkit.org/show_bug.cgi?id=99739
     5
     6        Reviewed by Geoffrey Garen.
     7
     8        Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion.  This is
     9        used to check to see if all characters are ASCII and is returned via a bool*.
     10
     11        * wtf/unicode/UTF8.cpp:
     12        (WTF::Unicode::convertUTF8ToUTF16):
     13        * wtf/unicode/UTF8.h:
     14
    1152012-10-18  Michael Saboff  <msaboff@apple.com>
    216
  • trunk/Source/WTF/wtf/unicode/UTF8.cpp

    r124268 r131836  
    298298ConversionResult convertUTF8ToUTF16(
    299299    const char** sourceStart, const char* sourceEnd,
    300     UChar** targetStart, UChar* targetEnd, bool strict)
     300    UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict)
    301301{
    302302    ConversionResult result = conversionOK;
    303303    const char* source = *sourceStart;
    304304    UChar* target = *targetStart;
     305    UChar orAllData = 0;
    305306    while (source < sourceEnd) {
    306307        int utf8SequenceLength = inlineUTF8SequenceLength(*source);
     
    330331                    result = sourceIllegal;
    331332                    break;
    332                 } else
     333                } else {
    333334                    *target++ = replacementCharacter;
    334             } else
     335                    orAllData |= replacementCharacter;
     336                }
     337            } else {
    335338                *target++ = character; // normal case
     339                orAllData |= character;
     340            }
    336341        } else if (U_IS_SUPPLEMENTARY(character)) {
    337342            // target is a character in range 0xFFFF - 0x10FFFF
     
    343348            *target++ = U16_LEAD(character);
    344349            *target++ = U16_TRAIL(character);
     350            orAllData = 0xffff;
    345351        } else {
    346352            if (strict) {
     
    348354                result = sourceIllegal;
    349355                break; // Bail out; shouldn't continue
    350             } else
     356            } else {
    351357                *target++ = replacementCharacter;
     358                orAllData |= replacementCharacter;
     359            }
    352360        }
    353361    }
    354362    *sourceStart = source;
    355363    *targetStart = target;
     364
     365    if (sourceAllASCII)
     366        *sourceAllASCII = !(orAllData & 0x7f);
     367
    356368    return result;
    357369}
  • trunk/Source/WTF/wtf/unicode/UTF8.h

    r124320 r131836  
    6565    WTF_EXPORT_PRIVATE ConversionResult convertUTF8ToUTF16(
    6666                    const char** sourceStart, const char* sourceEnd,
    67                     UChar** targetStart, UChar* targetEnd, bool strict = true);
     67                    UChar** targetStart, UChar* targetEnd, bool* isSourceAllASCII = 0, bool strict = true);
    6868
    6969    WTF_EXPORT_PRIVATE ConversionResult convertLatin1ToUTF8(
Note: See TracChangeset for help on using the changeset viewer.