Changeset 72662 in webkit
- Timestamp:
- Nov 24, 2010 4:51:02 AM (13 years ago)
- Location:
- trunk/JavaScriptCore
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/ChangeLog
r72592 r72662 1 2010-11-24 Carlos Garcia Campos <cgarcia@igalia.com> 2 3 Reviewed by Xan Lopez. 4 5 [GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend 6 https://bugs.webkit.org/show_bug.cgi?id=48625 7 8 GLib methods use UTF-8 strings, so we have to convert from UTF-16 to 9 UTF-8 to perform the case operations and then convert back the result to 10 UTF-16. GLib conversion methods return a new allocated string, so we 11 have to memcpy the result into the destination buffer too. Using our 12 own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h 13 we don't need such memcpy, since they take an already allocated buffer 14 rather than returning a new one. There's another optimization for the 15 case when the destination buffer is not large enough. In that case, 16 methods should return the expected destination buffer size and are 17 called again with a new buffer. We can avoid the conversion to UTF-16 by 18 pre-calculating the required size for the destination buffer. 19 20 * wtf/unicode/glib/UnicodeGLib.cpp: 21 (WTF::Unicode::getUTF16LengthFromUTF8): 22 (WTF::Unicode::convertCase): 23 (WTF::Unicode::foldCase): 24 (WTF::Unicode::toLower): 25 (WTF::Unicode::toUpper): 26 1 27 2010-11-23 Patrick Gansterer <paroga@webkit.org> 2 28 -
trunk/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
r51336 r72662 2 2 * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> 3 3 * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> 4 * Copyright (C) 2010 Igalia S.L. 4 5 * 5 6 * This library is free software; you can redistribute it and/or … … 22 23 #include "config.h" 23 24 #include "UnicodeGLib.h" 25 26 #include <wtf/Vector.h> 27 #include <wtf/unicode/UTF8.h> 28 29 #define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF) 24 30 25 31 namespace WTF { … … 44 50 } 45 51 46 int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 52 static int getUTF16LengthFromUTF8(const gchar* utf8String, int length) 53 { 54 int utf16Length = 0; 55 const gchar* inputString = utf8String; 56 57 while ((utf8String + length - inputString > 0) && *inputString) { 58 gunichar character = g_utf8_get_char(inputString); 59 60 utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1; 61 inputString = g_utf8_next_char(inputString); 62 } 63 64 return utf16Length; 65 } 66 67 typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length); 68 69 static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction) 47 70 { 48 71 *error = false; 49 GOwnPtr<GError> gerror;50 72 51 GOwnPtr<char> utf8src; 52 utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); 53 if (gerror) { 73 // Allocate a buffer big enough to hold all the characters. 74 Vector<char> buffer(srcLength * 3); 75 char* utf8Target = buffer.data(); 76 const UChar* utf16Source = src; 77 ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true); 78 if (conversionResult != conversionOK) { 54 79 *error = true; 55 80 return -1; 56 81 } 82 buffer.shrink(utf8Target - buffer.data()); 57 83 58 GOwnPtr<char> utf8 result;59 utf8result.set(g_utf8_casefold(utf8src.get(), -1));84 GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size())); 85 long utf8ResultLength = strlen(utf8Result.get()); 60 86 61 long utf16resultLength = -1; 62 GOwnPtr<UChar> utf16result; 63 utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); 64 if (gerror) { 87 // Calculate the destination buffer size. 88 int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength); 89 if (realLength > resultLength) { 65 90 *error = true; 66 return -1;91 return realLength; 67 92 } 68 93 69 if (utf16resultLength > resultLength) { 94 // Convert the result to UTF-16. 95 UChar* utf16Target = result; 96 const char* utf8Source = utf8Result.get(); 97 conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true); 98 long utf16ResultLength = utf16Target - result; 99 if (conversionResult != conversionOK) 70 100 *error = true; 71 return utf16resultLength;72 }73 memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));74 101 75 return utf16resultLength; 102 return utf16ResultLength <= 0 ? -1 : utf16ResultLength; 103 } 104 int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 105 { 106 return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold); 76 107 } 77 108 78 109 int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 79 110 { 80 *error = false; 81 GOwnPtr<GError> gerror; 82 83 GOwnPtr<char> utf8src; 84 utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); 85 if (gerror) { 86 *error = true; 87 return -1; 88 } 89 90 GOwnPtr<char> utf8result; 91 utf8result.set(g_utf8_strdown(utf8src.get(), -1)); 92 93 long utf16resultLength = -1; 94 GOwnPtr<UChar> utf16result; 95 utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); 96 if (gerror) { 97 *error = true; 98 return -1; 99 } 100 101 if (utf16resultLength > resultLength) { 102 *error = true; 103 return utf16resultLength; 104 } 105 memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); 106 107 return utf16resultLength; 111 return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown); 108 112 } 109 113 110 114 int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) 111 115 { 112 *error = false; 113 GOwnPtr<GError> gerror; 114 115 GOwnPtr<char> utf8src; 116 utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr())); 117 if (gerror) { 118 *error = true; 119 return -1; 120 } 121 122 GOwnPtr<char> utf8result; 123 utf8result.set(g_utf8_strup(utf8src.get(), -1)); 124 125 long utf16resultLength = -1; 126 GOwnPtr<UChar> utf16result; 127 utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr())); 128 if (gerror) { 129 *error = true; 130 return -1; 131 } 132 133 if (utf16resultLength > resultLength) { 134 *error = true; 135 return utf16resultLength; 136 } 137 memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar)); 138 139 return utf16resultLength; 116 return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup); 140 117 } 141 118
Note: See TracChangeset
for help on using the changeset viewer.