Changeset 119113 in webkit
- Timestamp:
- May 31, 2012 9:36:29 AM (12 years ago)
- Location:
- trunk/Source/WebKit/gtk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebKit/gtk/ChangeLog
r119063 r119113 1 2012-05-31 commit-queue@webkit.org <commit-queue@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc> 2 3 Spell checker doesn't recognize contractions (apostrophes) 4 https://bugs.webkit.org/show_bug.cgi?id=86118 5 6 Reviewed by Gustavo Noronha Silva. 7 8 The Enchant spell checker was breaking words on apostrophes, because 9 apparently they were always being detected as Pango word-end 10 characters. To know whether or not the apostrophe is a 11 word end character requires looking at a string with a larger 12 granularity than one character. 13 14 Simplify the way the we break strings, by search for non-graphable 15 character manually to find word starts and ends. This has the side 16 effect of removing the dependency on Pango and eliminating one copy. 17 18 This change also cleans up some misbehavior on the part of the 19 WebCoreSupport layer which was not converting from Unicode character 20 offsets to UTF-16. These offsets can be different if any of the 21 characters in the UTF-16 string are surrogate pairs (non BMP 22 characters). 23 24 * WebCoreSupport/TextCheckerClientGtk.cpp: 25 (WebKit::TextCheckerClientGtk::checkSpellingOfString): Properly 26 convert from Unicode offsets to UTF-16 offsets. 27 * webkit/webkitspellcheckerenchant.cpp: 28 (findByteOffsetToFirstNonGraphableCharacter): Added this helper. 29 (getExtentsOfNextWord): Ditto. 30 (wordIsSpelledCorrectlyInAtLeastOneDictionary): Ditto. 31 (checkSpellingOfString): Don't split words on apostrophes. 32 1 33 2012-05-30 Sudarsana Nagineni <sudarsana.nagineni@linux.intel.com> 2 34 -
trunk/Source/WebKit/gtk/WebCoreSupport/TextCheckerClientGtk.cpp
r117671 r119113 64 64 GOwnPtr<gchar> utf8Text(g_utf16_to_utf8(const_cast<gunichar2*>(text), length, 0, 0, 0)); 65 65 webkit_spell_checker_check_spelling_of_string(m_spellChecker.get(), utf8Text.get(), misspellingLocation, misspellingLength); 66 67 // We have the offset and length in Unicode characters, but we need to convert them to UTF-16 offsets. 68 // Unfortunately there doesn't seem to be a simple way to do this. 69 if (!*misspellingLength) 70 return; 71 72 bool pastStartOfWord = false; 73 for (int i = 0; i < length; i++) { 74 if (i >= *misspellingLocation + *misspellingLength) 75 return; 76 if (!pastStartOfWord && i > *misspellingLocation) 77 pastStartOfWord = true; 78 79 // If this character is part of a surrogate pair, we need to skip the next character (the trail) 80 // and to increase our offsets. 81 if (!U16_IS_SINGLE(text[i])) { 82 i++; 83 (*misspellingLength)++; 84 *misspellingLocation += pastStartOfWord ? 0 : 1; 85 } 86 } 66 87 } 67 88 -
trunk/Source/WebKit/gtk/webkit/webkitspellcheckerenchant.cpp
r117671 r119113 89 89 } 90 90 91 static size_t findByteOffsetToFirstNonGraphableCharacter(const char* utf8String) 92 { 93 const char* firstNonGraphableCharacter = utf8String; 94 while (firstNonGraphableCharacter && *firstNonGraphableCharacter && g_unichar_isgraph(g_utf8_get_char(firstNonGraphableCharacter))) 95 firstNonGraphableCharacter = g_utf8_find_next_char(firstNonGraphableCharacter, 0); 96 97 if (!firstNonGraphableCharacter) // If there was an error, the offset will be zero. 98 return 0; 99 100 return firstNonGraphableCharacter - utf8String; 101 } 102 103 static void getExtentsOfNextWord(const char* utf8String, const char*& wordStart, size_t& length) 104 { 105 wordStart = utf8String; 106 while (wordStart && *wordStart && !g_unichar_isgraph(g_utf8_get_char(wordStart))) 107 wordStart = g_utf8_find_next_char(wordStart, NULL); 108 109 if (!wordStart || !*wordStart) { 110 length = 0; 111 return; 112 } 113 114 length = findByteOffsetToFirstNonGraphableCharacter(wordStart); 115 return; 116 } 117 118 static bool wordIsSpelledCorrectlyInAtLeastOneDictionary(GSList* dictionaries, const char* word, size_t length, int* misspellingLocation, int* misspellingLength) 119 { 120 for (; dictionaries; dictionaries = dictionaries->next) { 121 EnchantDict* dictionary = static_cast<EnchantDict*>(dictionaries->data); 122 int result = enchant_dict_check(dictionary, word, length); 123 124 if (result < 0) // Error during checking. 125 continue; 126 if (!result) // Stop checking, as this word is correct for at least one dictionary. 127 return true; 128 } 129 130 return false; 131 } 132 91 133 static void checkSpellingOfString(WebKitSpellChecker* checker, const char* string, int* misspellingLocation, int* misspellingLength) 92 134 { 93 135 WebKitSpellCheckerEnchantPrivate* priv = WEBKIT_SPELL_CHECKER_ENCHANT(checker)->priv; 94 136 95 GSList* dicts = priv->enchantDicts; 96 if (!dicts) 137 *misspellingLocation = -1; 138 *misspellingLength = 0; 139 140 GSList* dictionaries = priv->enchantDicts; 141 if (!dictionaries) 97 142 return; 98 143 99 int length = g_utf8_strlen(string, -1); 100 101 PangoLanguage* language(pango_language_get_default()); 102 GOwnPtr<PangoLogAttr> attrs(g_new(PangoLogAttr, length + 1)); 103 104 // pango_get_log_attrs uses an aditional position at the end of the text. 105 pango_get_log_attrs(string, -1, -1, language, attrs.get(), length + 1); 106 107 for (int i = 0; i < length + 1; i++) { 108 // We go through each character until we find an is_word_start, 109 // then we get into an inner loop to find the is_word_end corresponding 110 // to it. 111 if (attrs.get()[i].is_word_start) { 112 int start = i; 113 int end = i; 114 int wordLength; 115 116 while (attrs.get()[end].is_word_end < 1) 117 end++; 118 119 wordLength = end - start; 120 // Set the iterator to be at the current word end, so we don't 121 // check characters twice. 122 i = end; 123 124 gchar* cstart = g_utf8_offset_to_pointer(string, start); 125 gint bytes = static_cast<gint>(g_utf8_offset_to_pointer(string, end) - cstart); 126 GOwnPtr<gchar> word(g_new0(gchar, bytes + 1)); 127 128 g_utf8_strncpy(word.get(), cstart, wordLength); 129 130 for (; dicts; dicts = dicts->next) { 131 EnchantDict* dict = static_cast<EnchantDict*>(dicts->data); 132 if (enchant_dict_check(dict, word.get(), wordLength)) { 133 *misspellingLocation = start; 134 *misspellingLength = wordLength; 135 } else { 136 // Stop checking, this word is ok in at least one dict. 137 *misspellingLocation = -1; 138 *misspellingLength = 0; 139 break; 140 } 141 } 142 } 144 const char* wordStart = string; 145 size_t wordLength; 146 getExtentsOfNextWord(wordStart, wordStart, wordLength); 147 while (wordLength > 0) { 148 if (!wordIsSpelledCorrectlyInAtLeastOneDictionary(dictionaries, wordStart, wordLength, misspellingLocation, misspellingLength)) { 149 *misspellingLocation = g_utf8_pointer_to_offset(string, wordStart); 150 *misspellingLength = g_utf8_pointer_to_offset(string, wordStart + wordLength) - *misspellingLocation; 151 return; 152 } 153 154 getExtentsOfNextWord(wordStart + wordLength, wordStart, wordLength); 143 155 } 144 156 }
Note: See TracChangeset
for help on using the changeset viewer.