Changeset 46343 in webkit


Ignore:
Timestamp:
Jul 24, 2009 1:42:49 AM (15 years ago)
Author:
xan@webkit.org
Message:

2009-07-24 Xan Lopez <xlopez@igalia.com>

Reviewed by Jan Alonzo.

https://bugs.webkit.org/show_bug.cgi?id=25415
[GTK][ATK] Please implement support for get_text_at_offset

Use TextEncoding facilities to convert between UTF16 and UTF8
instead of rolling our own solution.

  • accessibility/gtk/AccessibilityObjectWrapperAtk.cpp: (convertUniCharToUTF8):
Location:
trunk/WebCore
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/WebCore/ChangeLog

    r46342 r46343  
     12009-07-24  Xan Lopez  <xlopez@igalia.com>
     2
     3        Reviewed by Jan Alonzo.
     4
     5        https://bugs.webkit.org/show_bug.cgi?id=25415
     6        [GTK][ATK] Please implement support for get_text_at_offset
     7
     8        Use TextEncoding facilities to convert between UTF16 and UTF8
     9        instead of rolling our own solution.
     10
     11        * accessibility/gtk/AccessibilityObjectWrapperAtk.cpp:
     12        (convertUniCharToUTF8):
     13
    1142009-07-24  Xan Lopez  <xlopez@igalia.com>
    215
  • trunk/WebCore/accessibility/gtk/AccessibilityObjectWrapperAtk.cpp

    r46342 r46343  
    4848#include "NotImplemented.h"
    4949#include "RenderText.h"
     50#include "TextEncoding.h"
    5051
    5152#include <atk/atk.h>
     
    518519}
    519520
    520 #define IS_HIGH_SURROGATE(u)  ((UChar)(u) >= (UChar)0xd800 && (UChar)(u) <= (UChar)0xdbff)
    521 #define IS_LOW_SURROGATE(u)   ((UChar)(u) >= (UChar)0xdc00 && (UChar)(u) <= (UChar)0xdfff)
    522 
    523 static void UTF16ToUTF8(const UChar* aText, gint aLength, char* &text, gint &length)
    524 {
    525     gboolean needCopy = FALSE;
    526     int i;
    527 
    528     for (i = 0; i < aLength; i++) {
    529         if (!aText[i] || IS_LOW_SURROGATE(aText[i])) {
    530             needCopy = TRUE;
    531             break;
    532         } else if (IS_HIGH_SURROGATE(aText[i])) {
    533             if (i < aLength - 1 && IS_LOW_SURROGATE(aText[i+1]))
    534                 i++;
    535             else {
    536                 needCopy = TRUE;
    537                 break;
    538             }
    539         }
    540     }
    541 
    542     if (needCopy) {
    543         /* Pango doesn't correctly handle nuls.  We convert them to 0xff. */
    544         /* Also "validate" UTF-16 text to make sure conversion doesn't fail. */
    545 
    546         UChar* p = (UChar*)g_memdup(aText, aLength * sizeof(aText[0]));
    547 
    548         /* don't need to reset i */
    549         for (i = 0; i < aLength; i++) {
    550             if (!p[i] || IS_LOW_SURROGATE(p[i]))
    551                 p[i] = 0xFFFD;
    552             else if (IS_HIGH_SURROGATE(p[i])) {
    553                 if (i < aLength - 1 && IS_LOW_SURROGATE(aText[i+1]))
    554                     i++;
    555                 else
    556                     p[i] = 0xFFFD;
    557             }
    558         }
    559 
    560         aText = p;
    561     }
    562 
    563     glong items_written;
    564     text = g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(aText), aLength, NULL, &items_written, NULL);
    565     length = items_written;
    566 
    567     if (needCopy)
    568         g_free((gpointer)aText);
    569 
    570 }
    571 
    572 static gchar* g_substr(const gchar* string, gint start, gint end)
     521static gchar* utf8Substr(const gchar* string, gint start, gint end)
    573522{
    574523    ASSERT(string);
     
    586535static gchar* convertUniCharToUTF8(const UChar* characters, gint length, int from, int to)
    587536{
    588     gchar* utf8 = 0;
    589     gint newLength = 0;
    590     UTF16ToUTF8(characters, length, utf8, newLength);
    591     if (!utf8)
    592         return NULL;
    593 
    594     gchar *pos = g_substr(utf8, from, to);
    595     g_free(utf8);
    596     gint len = strlen(pos);
     537    CString stringUTF8 = UTF8Encoding().encode(characters, length, QuestionMarksForUnencodables);
     538    gchar* utf8String = utf8Substr(stringUTF8.data(), from, to);
     539    if (!g_utf8_validate(utf8String, -1, NULL)) {
     540        g_free(utf8String);
     541        return 0;
     542    }
     543    gsize len = strlen(utf8String);
    597544    GString* ret = g_string_new_len(NULL, len);
    598545
     
    601548    while (len > 0) {
    602549        gint index, start;
    603         pango_find_paragraph_boundary(pos, len, &index, &start);
    604         g_string_append_len(ret, pos, index);
     550        pango_find_paragraph_boundary(utf8String, len, &index, &start);
     551        g_string_append_len(ret, utf8String, index);
    605552        if (index == start)
    606553            break;
    607554        g_string_append_c(ret, ' ');
    608         pos += start;
     555        utf8String += start;
    609556        len -= start;
    610557    }
    611558
     559    g_free(utf8String);
    612560    return g_string_free(ret, FALSE);
    613561}
Note: See TracChangeset for help on using the changeset viewer.