Context Navigation

← Previous Changeset
Next Changeset →

Changeset 72662 in webkit

Timestamp:

Nov 24, 2010, 4:51:02 AM (15 years ago)

Author:

commit-queue@webkit.org

Message:

2010-11-24 Carlos Garcia Campos <cgarcia@igalia.com>

Reviewed by Xan Lopez.

[GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
https://bugs.webkit.org/show_bug.cgi?id=48625

GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
UTF-8 to perform the case operations and then convert back the result to
UTF-16. GLib conversion methods return a new allocated string, so we
have to memcpy the result into the destination buffer too. Using our
own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
we don't need such memcpy, since they take an already allocated buffer
rather than returning a new one. There's another optimization for the
case when the destination buffer is not large enough. In that case,
methods should return the expected destination buffer size and are
called again with a new buffer. We can avoid the conversion to UTF-16 by
pre-calculating the required size for the destination buffer.

wtf/unicode/glib/UnicodeGLib.cpp: (WTF::Unicode::getUTF16LengthFromUTF8): (WTF::Unicode::convertCase): (WTF::Unicode::foldCase): (WTF::Unicode::toLower): (WTF::Unicode::toUpper):

Location:

trunk/JavaScriptCore

Files:

: 2 edited

ChangeLog (modified) (1 diff)
wtf/unicode/glib/UnicodeGLib.cpp (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/JavaScriptCore/ChangeLog

-              r72592
+              r72662
+-11-24  Carlos Garcia Campos  <cgarcia@igalia.com>
+        Reviewed by Xan Lopez.
+        [GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
+        https://bugs.webkit.org/show_bug.cgi?id=48625
+        GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
+        UTF-8 to perform the case operations and then convert back the result to
+        UTF-16. GLib conversion methods return a new allocated string, so we
+        have to memcpy the result into the destination buffer too. Using our
+        own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
+        we don't need such memcpy, since they take an already allocated buffer
+        rather than returning a new one. There's another optimization for the
+        case when the destination buffer is not large enough. In that case,
+        methods should return the expected destination buffer size and are
+        called again with a new buffer. We can avoid the conversion to UTF-16 by
+        pre-calculating the required size for the destination buffer.
+        * wtf/unicode/glib/UnicodeGLib.cpp:
+        (WTF::Unicode::getUTF16LengthFromUTF8):
+        (WTF::Unicode::convertCase):
+        (WTF::Unicode::foldCase):
+        (WTF::Unicode::toLower):
+        (WTF::Unicode::toUpper):
 -11-23  Patrick Gansterer  <paroga@webkit.org>

trunk/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp

-              r51336
+              r72662
  *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
  *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *  Copyright (C) 2010 Igalia S.L.
+ *
  *  This library is free software; you can redistribute it and/or
 …
 #include "config.h"
 #include "UnicodeGLib.h"
+#include <wtf/Vector.h>
+#include <wtf/unicode/UTF8.h>
+#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
 namespace WTF {
 …
+}
+int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
+{
+    int utf16Length = 0;
+    const gchar* inputString = utf8String;
+    while ((utf8String + length - inputString > 0) && *inputString) {
+        gunichar character = g_utf8_get_char(inputString);
+        utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
+        inputString = g_utf8_next_char(inputString);
+    }
+    return utf16Length;
+}
+typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
+static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
+{
     *error = false;
-    GOwnPtr<GError> gerror;
+    GOwnPtr<char> utf8src;
+    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
+    if (gerror) {
+    // Allocate a buffer big enough to hold all the characters.
+    Vector<char> buffer(srcLength * 3);
+    char* utf8Target = buffer.data();
+    const UChar* utf16Source = src;
+    ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
+    if (conversionResult != conversionOK) {
         *error = true;
         return -1;
+    }
+    buffer.shrink(utf8Target - buffer.data());
     GOwnPtr<char> utf8result;
     utf8result.set(g_utf8_casefold(utf8src.get(), -1));
+    GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
+    long utf8ResultLength = strlen(utf8Result.get());
+    long utf16resultLength = -1;
+    GOwnPtr<UChar> utf16result;
+    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
+    if (gerror) {
+    // Calculate the destination buffer size.
+    int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
+    if (realLength > resultLength) {
         *error = true;
         return -1;
+        return realLength;
+    }
+    if (utf16resultLength > resultLength) {
+    // Convert the result to UTF-16.
+    UChar* utf16Target = result;
+    const char* utf8Source = utf8Result.get();
+    conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true);
+    long utf16ResultLength = utf16Target - result;
+    if (conversionResult != conversionOK)
         *error = true;
-        return utf16resultLength;
+    }
-    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
+    return utf16resultLength;
+    return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
+}
+int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
+}
 int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    *error = false;
+    GOwnPtr<GError> gerror;
+    GOwnPtr<char> utf8src;
+    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
+    if (gerror) {
+        *error = true;
+        return -1;
+    }
+    GOwnPtr<char> utf8result;
+    utf8result.set(g_utf8_strdown(utf8src.get(), -1));
+    long utf16resultLength = -1;
+    GOwnPtr<UChar> utf16result;
+    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
+    if (gerror) {
+        *error = true;
+        return -1;
+    }
+    if (utf16resultLength > resultLength) {
+        *error = true;
+        return utf16resultLength;
+    }
+    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
+    return utf16resultLength;
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
+}
 int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    *error = false;
+    GOwnPtr<GError> gerror;
+    GOwnPtr<char> utf8src;
+    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
+    if (gerror) {
+        *error = true;
+        return -1;
+    }
+    GOwnPtr<char> utf8result;
+    utf8result.set(g_utf8_strup(utf8src.get(), -1));
+    long utf16resultLength = -1;
+    GOwnPtr<UChar> utf16result;
+    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
+    if (gerror) {
+        *error = true;
+        return -1;
+    }
+    if (utf16resultLength > resultLength) {
+        *error = true;
+        return utf16resultLength;
+    }
+    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
+    return utf16resultLength;
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 72662 in webkit

Legend:

trunk/JavaScriptCore/ChangeLog

trunk/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp

Download in other formats: