Changeset 72662 in webkit


Ignore:
Timestamp:
Nov 24, 2010 4:51:02 AM (13 years ago)
Author:
commit-queue@webkit.org
Message:

2010-11-24 Carlos Garcia Campos <cgarcia@igalia.com>

Reviewed by Xan Lopez.

[GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
https://bugs.webkit.org/show_bug.cgi?id=48625

GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
UTF-8 to perform the case operations and then convert back the result to
UTF-16. GLib conversion methods return a new allocated string, so we
have to memcpy the result into the destination buffer too. Using our
own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
we don't need such memcpy, since they take an already allocated buffer
rather than returning a new one. There's another optimization for the
case when the destination buffer is not large enough. In that case,
methods should return the expected destination buffer size and are
called again with a new buffer. We can avoid the conversion to UTF-16 by
pre-calculating the required size for the destination buffer.

  • wtf/unicode/glib/UnicodeGLib.cpp: (WTF::Unicode::getUTF16LengthFromUTF8): (WTF::Unicode::convertCase): (WTF::Unicode::foldCase): (WTF::Unicode::toLower): (WTF::Unicode::toUpper):
Location:
trunk/JavaScriptCore
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/ChangeLog

    r72592 r72662  
     12010-11-24  Carlos Garcia Campos  <cgarcia@igalia.com>
     2
     3        Reviewed by Xan Lopez.
     4
     5        [GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
     6        https://bugs.webkit.org/show_bug.cgi?id=48625
     7
     8        GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
     9        UTF-8 to perform the case operations and then convert back the result to
     10        UTF-16. GLib conversion methods return a new allocated string, so we
     11        have to memcpy the result into the destination buffer too. Using our
     12        own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
     13        we don't need such memcpy, since they take an already allocated buffer
     14        rather than returning a new one. There's another optimization for the
     15        case when the destination buffer is not large enough. In that case,
     16        methods should return the expected destination buffer size and are
     17        called again with a new buffer. We can avoid the conversion to UTF-16 by
     18        pre-calculating the required size for the destination buffer.
     19
     20        * wtf/unicode/glib/UnicodeGLib.cpp:
     21        (WTF::Unicode::getUTF16LengthFromUTF8):
     22        (WTF::Unicode::convertCase):
     23        (WTF::Unicode::foldCase):
     24        (WTF::Unicode::toLower):
     25        (WTF::Unicode::toUpper):
     26
    1272010-11-23  Patrick Gansterer  <paroga@webkit.org>
    228
  • trunk/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp

    r51336 r72662  
    22 *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
    33 *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
     4 *  Copyright (C) 2010 Igalia S.L.
    45 *
    56 *  This library is free software; you can redistribute it and/or
     
    2223#include "config.h"
    2324#include "UnicodeGLib.h"
     25
     26#include <wtf/Vector.h>
     27#include <wtf/unicode/UTF8.h>
     28
     29#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
    2430
    2531namespace WTF {
     
    4450}
    4551
    46 int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
     52static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
     53{
     54    int utf16Length = 0;
     55    const gchar* inputString = utf8String;
     56
     57    while ((utf8String + length - inputString > 0) && *inputString) {
     58        gunichar character = g_utf8_get_char(inputString);
     59
     60        utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
     61        inputString = g_utf8_next_char(inputString);
     62    }
     63
     64    return utf16Length;
     65}
     66
     67typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
     68
     69static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
    4770{
    4871    *error = false;
    49     GOwnPtr<GError> gerror;
    5072
    51     GOwnPtr<char> utf8src;
    52     utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
    53     if (gerror) {
     73    // Allocate a buffer big enough to hold all the characters.
     74    Vector<char> buffer(srcLength * 3);
     75    char* utf8Target = buffer.data();
     76    const UChar* utf16Source = src;
     77    ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
     78    if (conversionResult != conversionOK) {
    5479        *error = true;
    5580        return -1;
    5681    }
     82    buffer.shrink(utf8Target - buffer.data());
    5783
    58     GOwnPtr<char> utf8result;
    59     utf8result.set(g_utf8_casefold(utf8src.get(), -1));
     84    GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
     85    long utf8ResultLength = strlen(utf8Result.get());
    6086
    61     long utf16resultLength = -1;
    62     GOwnPtr<UChar> utf16result;
    63     utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
    64     if (gerror) {
     87    // Calculate the destination buffer size.
     88    int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
     89    if (realLength > resultLength) {
    6590        *error = true;
    66         return -1;
     91        return realLength;
    6792    }
    6893
    69     if (utf16resultLength > resultLength) {
     94    // Convert the result to UTF-16.
     95    UChar* utf16Target = result;
     96    const char* utf8Source = utf8Result.get();
     97    conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true);
     98    long utf16ResultLength = utf16Target - result;
     99    if (conversionResult != conversionOK)
    70100        *error = true;
    71         return utf16resultLength;
    72     }
    73     memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
    74101
    75     return utf16resultLength;
     102    return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
     103}
     104int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
     105{
     106    return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
    76107}
    77108
    78109int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
    79110{
    80     *error = false;
    81     GOwnPtr<GError> gerror;
    82 
    83     GOwnPtr<char> utf8src;
    84     utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
    85     if (gerror) {
    86         *error = true;
    87         return -1;
    88     }
    89 
    90     GOwnPtr<char> utf8result;
    91     utf8result.set(g_utf8_strdown(utf8src.get(), -1));
    92 
    93     long utf16resultLength = -1;
    94     GOwnPtr<UChar> utf16result;
    95     utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
    96     if (gerror) {
    97         *error = true;
    98         return -1;
    99     }
    100 
    101     if (utf16resultLength > resultLength) {
    102         *error = true;
    103         return utf16resultLength;
    104     }
    105     memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
    106 
    107     return utf16resultLength;
     111    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
    108112}
    109113
    110114int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
    111115{
    112     *error = false;
    113     GOwnPtr<GError> gerror;
    114 
    115     GOwnPtr<char> utf8src;
    116     utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
    117     if (gerror) {
    118         *error = true;
    119         return -1;
    120     }
    121 
    122     GOwnPtr<char> utf8result;
    123     utf8result.set(g_utf8_strup(utf8src.get(), -1));
    124 
    125     long utf16resultLength = -1;
    126     GOwnPtr<UChar> utf16result;
    127     utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
    128     if (gerror) {
    129         *error = true;
    130         return -1;
    131     }
    132 
    133     if (utf16resultLength > resultLength) {
    134         *error = true;
    135         return utf16resultLength;
    136     }
    137     memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
    138 
    139     return utf16resultLength;
     116    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
    140117}
    141118
Note: See TracChangeset for help on using the changeset viewer.