Changeset 49899 in webkit


Ignore:
Timestamp:
Oct 21, 2009 12:11:08 AM (14 years ago)
Author:
eric@webkit.org
Message:

2009-10-21 Satoshi Nakagawa <psychs@limechat.net>

Reviewed by Darin Adler.

Fixed Japanese text search problems.
Treat small kana letters and kana letters as different characters in search.
Do not ignore diacritic marks in search for Japanese texts.

https://bugs.webkit.org/show_bug.cgi?id=30437

  • fast/text/international/japanese-kana-letters-expected.txt: Added.
  • fast/text/international/japanese-kana-letters.html: Added.

2009-10-21 Satoshi Nakagawa <psychs@limechat.net>

Reviewed by Darin Adler.

Fixed Japanese text search problems.
Treat small kana letters and kana letters as different characters in search.
Do not ignore diacritic marks in search for Japanese texts.

https://bugs.webkit.org/show_bug.cgi?id=30437

Test: fast/text/international/japanese-kana-letters.html

  • editing/TextIterator.cpp: (WebCore::createCollator): (WebCore::collator): (WebCore::createSearcher):
Location:
trunk
Files:
2 added
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/LayoutTests/ChangeLog

    r49897 r49899  
     12009-10-21  Satoshi Nakagawa  <psychs@limechat.net>
     2
     3        Reviewed by Darin Adler.
     4
     5        Fixed Japanese text search problems.
     6        Treat small kana letters and kana letters as different characters in search.
     7        Do not ignore diacritic marks in search for Japanese texts.
     8
     9        https://bugs.webkit.org/show_bug.cgi?id=30437
     10
     11        * fast/text/international/japanese-kana-letters-expected.txt: Added.
     12        * fast/text/international/japanese-kana-letters.html: Added.
     13
    1142009-10-20  Pavel Feldman  <pfeldman@chromium.org>
    215
  • trunk/WebCore/ChangeLog

    r49898 r49899  
     12009-10-21  Satoshi Nakagawa  <psychs@limechat.net>
     2
     3        Reviewed by Darin Adler.
     4
     5        Fixed Japanese text search problems.
     6        Treat small kana letters and kana letters as different characters in search.
     7        Do not ignore diacritic marks in search for Japanese texts.
     8
     9        https://bugs.webkit.org/show_bug.cgi?id=30437
     10
     11        Test: fast/text/international/japanese-kana-letters.html
     12
     13        * editing/TextIterator.cpp:
     14        (WebCore::createCollator):
     15        (WebCore::collator):
     16        (WebCore::createSearcher):
     17
    1182009-10-20  Eric Z. Ayers  <zundel@google.com>
    219
  • trunk/WebCore/editing/TextIterator.cpp

    r49895 r49899  
    14331433#endif
    14341434
     1435// Tailored collation rules for Japanese text search.
     1436// The default Unicode Collation Algorithm is unnatural in Japanese.
     1437// These rules intend to treat the following characters as different characters.
     1438//
     1439// - Small kana letters and normal kana letters
     1440// - Voiceless letters, voiced letters and semi-voiced letters
     1441//
     1442// This is original work built in reference to the following Unicode standard documents.
     1443//
     1444// - http://unicode.org/reports/tr10/
     1445// - http://unicode.org/Public/UCA/latest/allkeys.txt
     1446//
     1447static const UChar japaneseKanaCollationRules[] = {
     1448    '&', 0x3041, '=', 0x30a1, '=', 0xff67, '<', 0x3042,
     1449    '=', 0x30a2, '=', 0xff71, '<', 0x3043, '=', 0x30a3,
     1450    '=', 0xff68, '<', 0x3044, '=', 0x30a4, '=', 0xff72,
     1451    '<', 0x3045, '=', 0x30a5, '=', 0xff69, '<', 0x3046,
     1452    '=', 0x30a6, '=', 0xff73, '<', 0x3094, '=', 0x30f4,
     1453    '<', 0x3047, '=', 0x30a7, '=', 0xff6a, '<', 0x3048,
     1454    '=', 0x30a8, '=', 0xff74, '<', 0x3049, '=', 0x30a9,
     1455    '=', 0xff6b, '<', 0x304a, '=', 0x30aa, '=', 0xff75,
     1456    '<', 0x3095, '=', 0x30f5, '<', 0x304b, '=', 0x30ab,
     1457    '=', 0xff76, '<', 0x304c, '=', 0x30ac, '<', 0x304d,
     1458    '=', 0x30ad, '=', 0xff77, '<', 0x304e, '=', 0x30ae,
     1459    '<', 0x304f, '=', 0x30af, '=', 0xff78, '<', 0x3050,
     1460    '=', 0x30b0, '<', 0x3096, '=', 0x30f6, '<', 0x3051,
     1461    '=', 0x30b1, '=', 0xff79, '<', 0x3052, '=', 0x30b2,
     1462    '<', 0x3053, '=', 0x30b3, '=', 0xff7a, '<', 0x3054,
     1463    '=', 0x30b4, '<', 0x3055, '=', 0x30b5, '=', 0xff7b,
     1464    '<', 0x3056, '=', 0x30b6, '<', 0x3057, '=', 0x30b7,
     1465    '=', 0xff7c, '<', 0x3058, '=', 0x30b8, '<', 0x3059,
     1466    '=', 0x30b9, '=', 0xff7d, '<', 0x305a, '=', 0x30ba,
     1467    '<', 0x305b, '=', 0x30bb, '=', 0xff7e, '<', 0x305c,
     1468    '=', 0x30bc, '<', 0x305d, '=', 0x30bd, '=', 0xff7f,
     1469    '<', 0x305e, '=', 0x30be, '<', 0x305f, '=', 0x30bf,
     1470    '=', 0xff80, '<', 0x3060, '=', 0x30c0, '<', 0x3061,
     1471    '=', 0x30c1, '=', 0xff81, '<', 0x3062, '=', 0x30c2,
     1472    '<', 0x3063, '=', 0x30c3, '=', 0xff6f, '<', 0x3064,
     1473    '=', 0x30c4, '=', 0xff82, '<', 0x3065, '=', 0x30c5,
     1474    '<', 0x3066, '=', 0x30c6, '=', 0xff83, '<', 0x3067,
     1475    '=', 0x30c7, '<', 0x3068, '=', 0x30c8, '=', 0xff84,
     1476    '<', 0x3069, '=', 0x30c9, '<', 0x306a, '=', 0x30ca,
     1477    '=', 0xff85, '<', 0x306b, '=', 0x30cb, '=', 0xff86,
     1478    '<', 0x306c, '=', 0x30cc, '=', 0xff87, '<', 0x306d,
     1479    '=', 0x30cd, '=', 0xff88, '<', 0x306e, '=', 0x30ce,
     1480    '=', 0xff89, '<', 0x306f, '=', 0x30cf, '=', 0xff8a,
     1481    '<', 0x3070, '=', 0x30d0, '<', 0x3071, '=', 0x30d1,
     1482    '<', 0x3072, '=', 0x30d2, '=', 0xff8b, '<', 0x3073,
     1483    '=', 0x30d3, '<', 0x3074, '=', 0x30d4, '<', 0x3075,
     1484    '=', 0x30d5, '=', 0xff8c, '<', 0x3076, '=', 0x30d6,
     1485    '<', 0x3077, '=', 0x30d7, '<', 0x3078, '=', 0x30d8,
     1486    '=', 0xff8d, '<', 0x3079, '=', 0x30d9, '<', 0x307a,
     1487    '=', 0x30da, '<', 0x307b, '=', 0x30db, '=', 0xff8e,
     1488    '<', 0x307c, '=', 0x30dc, '<', 0x307d, '=', 0x30dd,
     1489    '<', 0x307e, '=', 0x30de, '=', 0xff8f, '<', 0x307f,
     1490    '=', 0x30df, '=', 0xff90, '<', 0x3080, '=', 0x30e0,
     1491    '=', 0xff91, '<', 0x3081, '=', 0x30e1, '=', 0xff92,
     1492    '<', 0x3082, '=', 0x30e2, '=', 0xff93, '<', 0x3083,
     1493    '=', 0x30e3, '=', 0xff6c, '<', 0x3084, '=', 0x30e4,
     1494    '=', 0xff94, '<', 0x3085, '=', 0x30e5, '=', 0xff6d,
     1495    '<', 0x3086, '=', 0x30e6, '=', 0xff95, '<', 0x3087,
     1496    '=', 0x30e7, '=', 0xff6e, '<', 0x3088, '=', 0x30e8,
     1497    '=', 0xff96, '<', 0x3089, '=', 0x30e9, '=', 0xff97,
     1498    '<', 0x308a, '=', 0x30ea, '=', 0xff98, '<', 0x308b,
     1499    '=', 0x30eb, '=', 0xff99, '<', 0x308c, '=', 0x30ec,
     1500    '=', 0xff9a, '<', 0x308d, '=', 0x30ed, '=', 0xff9b,
     1501    '<', 0x308e, '=', 0x30ee, '<', 0x308f, '=', 0x30ef,
     1502    '=', 0xff9c, '<', 0x30f7, '<', 0x3090, '=', 0x30f0,
     1503    '<', 0x30f8, '<', 0x3091, '=', 0x30f1, '<', 0x3092,
     1504    '=', 0x30f2, '=', 0xff66, '<', 0x3093, '=', 0x30f3,
     1505    '=', 0xff9d, 0
     1506};
     1507
     1508static UCollator* createCollator()
     1509{
     1510    // Set tailored collation rules to fix Japanese text search.
     1511    // See the comments before japaneseKanaCollationRules for details.
     1512    UErrorCode status = U_ZERO_ERROR;
     1513    UCollator* collator = ucol_openRules(japaneseKanaCollationRules, -1, UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, 0, &status);
     1514    ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
     1515    return collator;
     1516}
     1517
     1518static UCollator* collator()
     1519{
     1520    static UCollator* collator = createCollator();
     1521    return collator;
     1522}
     1523   
    14351524static UStringSearch* createSearcher()
    14361525{
     
    14411530    UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status);
    14421531    ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
     1532    status = U_ZERO_ERROR;
     1533    usearch_setCollator(searcher, collator(), &status);
     1534    ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
     1535    usearch_reset(searcher);
    14431536    return searcher;
    14441537}
Note: See TracChangeset for help on using the changeset viewer.