Changeset 281375 in webkit


Ignore:
Timestamp:
Aug 21, 2021, 9:17:02 AM (4 years ago)
Author:
ysuzuki@apple.com
Message:

[JSC] Intl.DisplayNames v2
https://bugs.webkit.org/show_bug.cgi?id=227832

Reviewed by Ross Kirsling.

JSTests:

  • stress/intl-displaynames-v2.js: Added.

(shouldBe):
(shouldThrow):
(vm.icuVersion):

  • stress/intl-displaynames.js:

(vm.icuVersion):

  • test262/config.yaml:
  • test262/expectations.yaml:

Source/JavaScriptCore:

This patch implements Intl.DisplayNames v2[1].
Newly added names are calendar names and date time field names.
For the language name, language display option is added.

[1]: https://github.com/tc39/intl-displaynames-v2

  • runtime/CommonIdentifiers.h:
  • runtime/IntlCache.cpp:

(JSC::IntlCache::getFieldDisplayName):

  • runtime/IntlCache.h:
  • runtime/IntlDisplayNames.cpp:

(JSC::IntlDisplayNames::initializeDisplayNames):
(JSC::IntlDisplayNames::of const):
(JSC::IntlDisplayNames::resolvedOptions const):
(JSC::IntlDisplayNames::typeString):
(JSC::IntlDisplayNames::languageDisplayString):

  • runtime/IntlDisplayNames.h:
  • runtime/IntlObject.cpp:

(JSC::isUnicodeLocaleIdentifierType):
(JSC::canonicalizeUnicodeLocaleID):
(JSC::canonicalizeLocaleList):
(JSC::defaultLocale):
(JSC::mapBCP47ToICUCalendarKeyword):
(JSC::mapICUCollationKeywordToBCP47):
(JSC::canonicalizeLanguageTag): Deleted.

  • runtime/IntlObject.h:
Location:
trunk
Files:
1 added
12 edited

Legend:

Unmodified
Added
Removed
  • trunk/JSTests/ChangeLog

    r281374 r281375  
     12021-08-21  Yusuke Suzuki  <ysuzuki@apple.com>
     2
     3        [JSC] Intl.DisplayNames v2
     4        https://bugs.webkit.org/show_bug.cgi?id=227832
     5
     6        Reviewed by Ross Kirsling.
     7
     8        * stress/intl-displaynames-v2.js: Added.
     9        (shouldBe):
     10        (shouldThrow):
     11        (vm.icuVersion):
     12        * stress/intl-displaynames.js:
     13        (vm.icuVersion):
     14        * test262/config.yaml:
     15        * test262/expectations.yaml:
     16
    1172021-08-21  Yusuke Suzuki  <ysuzuki@apple.com>
    218
  • trunk/JSTests/stress/intl-displaynames.js

    r275881 r281375  
    179179            }
    180180        };
    181         shouldBe(languageNames.of(object), object);
     181        shouldBe(languageNames.of(object), 'en-AA');
    182182    }
    183183
  • trunk/JSTests/test262/config.yaml

    r281374 r281375  
    3131    - json-modules
    3232    - class-static-block
    33     - Intl.DisplayNames-v2
    3433    - callable-boundary-realms
    3534  paths:
  • trunk/JSTests/test262/expectations.yaml

    r280825 r281375  
    610610test/annexB/language/global-code/switch-dflt-global-skip-early-err.js:
    611611  default: "SyntaxError: Cannot declare a function that shadows a let/const/class/function variable 'f' in strict mode."
    612 test/built-ins/Array/prototype/Symbol.unscopables/value.js:
    613   default: 'Test262Error: `findLast` property value Expected SameValue(«undefined», «true») to be true'
    614   strict mode: 'Test262Error: `findLast` property value Expected SameValue(«undefined», «true») to be true'
    615612test/built-ins/Date/UTC/fp-evaluation-order.js:
    616613  default: 'Test262Error: order of operations / precision in MakeTime Expected SameValue(«NaN», «29312») to be true'
  • trunk/Source/JavaScriptCore/ChangeLog

    r281374 r281375  
     12021-08-21  Yusuke Suzuki  <ysuzuki@apple.com>
     2
     3        [JSC] Intl.DisplayNames v2
     4        https://bugs.webkit.org/show_bug.cgi?id=227832
     5
     6        Reviewed by Ross Kirsling.
     7
     8        This patch implements Intl.DisplayNames v2[1].
     9        Newly added names are calendar names and date time field names.
     10        For the language name, language display option is added.
     11
     12        [1]: https://github.com/tc39/intl-displaynames-v2
     13
     14        * runtime/CommonIdentifiers.h:
     15        * runtime/IntlCache.cpp:
     16        (JSC::IntlCache::getFieldDisplayName):
     17        * runtime/IntlCache.h:
     18        * runtime/IntlDisplayNames.cpp:
     19        (JSC::IntlDisplayNames::initializeDisplayNames):
     20        (JSC::IntlDisplayNames::of const):
     21        (JSC::IntlDisplayNames::resolvedOptions const):
     22        (JSC::IntlDisplayNames::typeString):
     23        (JSC::IntlDisplayNames::languageDisplayString):
     24        * runtime/IntlDisplayNames.h:
     25        * runtime/IntlObject.cpp:
     26        (JSC::isUnicodeLocaleIdentifierType):
     27        (JSC::canonicalizeUnicodeLocaleID):
     28        (JSC::canonicalizeLocaleList):
     29        (JSC::defaultLocale):
     30        (JSC::mapBCP47ToICUCalendarKeyword):
     31        (JSC::mapICUCollationKeywordToBCP47):
     32        (JSC::canonicalizeLanguageTag): Deleted.
     33        * runtime/IntlObject.h:
     34
    1352021-08-21  Yusuke Suzuki  <ysuzuki@apple.com>
    236
  • trunk/Source/JavaScriptCore/runtime/CommonIdentifiers.h

    r279630 r281375  
    117117    macro(executionCount) \
    118118    macro(exitKind) \
     119    macro(fallback) \
    119120    macro(flags) \
    120121    macro(forEach) \
     
    155156    macro(join) \
    156157    macro(language) \
     158    macro(languageDisplay) \
    157159    macro(lastIndex) \
    158160    macro(length) \
  • trunk/Source/JavaScriptCore/runtime/IntlCache.cpp

    r267143 r281375  
    5454}
    5555
     56Vector<UChar, 32> IntlCache::getFieldDisplayName(const CString& locale, UDateTimePatternField field, UDateTimePGDisplayWidth width, UErrorCode& status)
     57{
     58    auto sharedGenerator = getSharedPatternGenerator(locale, status);
     59    if (U_FAILURE(status))
     60        return { };
     61    Vector<UChar, 32> buffer;
     62    status = callBufferProducingFunction(udatpg_getFieldDisplayName, sharedGenerator, field, width, buffer);
     63    if (U_FAILURE(status))
     64        return { };
     65    return buffer;
     66}
     67
    5668} // namespace JSC
  • trunk/Source/JavaScriptCore/runtime/IntlCache.h

    r267143 r281375  
    4040
    4141    Vector<UChar, 32> getBestDateTimePattern(const CString& locale, const UChar* skeleton, unsigned skeletonSize, UErrorCode&);
     42    Vector<UChar, 32> getFieldDisplayName(const CString& locale, UDateTimePatternField, UDateTimePGDisplayWidth, UErrorCode&);
    4243
    4344private:
  • trunk/Source/JavaScriptCore/runtime/IntlDisplayNames.cpp

    r278253 r281375  
    2727#include "IntlDisplayNames.h"
    2828
     29#include "IntlCache.h"
    2930#include "IntlObjectInlines.h"
    3031#include "JSCInlines.h"
     
    9697    RETURN_IF_EXCEPTION(scope, void());
    9798
    98     auto type = intlOption<std::optional<Type>>(globalObject, options, vm.propertyNames->type, { { "language"_s, Type::Language }, { "region"_s, Type::Region }, { "script"_s, Type::Script }, { "currency"_s, Type::Currency } }, "type must be either \"language\", \"region\", \"script\", or \"currency\""_s, std::nullopt);
     99    auto type = intlOption<std::optional<Type>>(globalObject, options, vm.propertyNames->type, { { "language"_s, Type::Language }, { "region"_s, Type::Region }, { "script"_s, Type::Script }, { "currency"_s, Type::Currency }, { "calendar"_s, Type::Calendar }, { "dateTimeField"_s, Type::DateTimeField } }, "type must be either \"language\", \"region\", \"script\", \"currency\", \"calendar\", or \"dateTimeField\""_s, std::nullopt);
    99100    RETURN_IF_EXCEPTION(scope, void());
    100101    if (!type) {
     
    104105    m_type = type.value();
    105106
    106     m_fallback = intlOption<Fallback>(globalObject, options, Identifier::fromString(vm, "fallback"), { { "code"_s, Fallback::Code }, { "none"_s, Fallback::None } }, "fallback must be either \"code\" or \"none\""_s, Fallback::Code);
     107    m_fallback = intlOption<Fallback>(globalObject, options, vm.propertyNames->fallback, { { "code"_s, Fallback::Code }, { "none"_s, Fallback::None } }, "fallback must be either \"code\" or \"none\""_s, Fallback::Code);
     108    RETURN_IF_EXCEPTION(scope, void());
     109
     110    m_languageDisplay = intlOption<LanguageDisplay>(globalObject, options, vm.propertyNames->languageDisplay, { { "dialect"_s, LanguageDisplay::Dialect }, { "standard"_s, LanguageDisplay::Standard } }, "languageDisplay must be either \"dialect\" or \"standard\""_s, LanguageDisplay::Dialect);
    107111    RETURN_IF_EXCEPTION(scope, void());
    108112
     
    112116    UDisplayContext contexts[] = {
    113117        // en_GB displays as 'English (United Kingdom)' (Standard Names) or 'British English' (Dialect Names).
    114         // We use Dialect Names here, aligned to the examples in the spec draft and V8's behavior.
    115118        // https://github.com/tc39/proposal-intl-displaynames#language-display-names
    116         UDISPCTX_DIALECT_NAMES,
     119        (m_type == Type::Language && m_languageDisplay == LanguageDisplay::Standard) ? UDISPCTX_STANDARD_NAMES : UDISPCTX_DIALECT_NAMES,
    117120
    118121        // Capitailization mode can be picked from several options. Possibly either UDISPCTX_CAPITALIZATION_NONE or UDISPCTX_CAPITALIZATION_FOR_STANDALONE is
     
    154157    RETURN_IF_EXCEPTION(scope, { });
    155158
     159    // https://tc39.es/proposal-intl-displaynames/#sec-canonicalcodefordisplaynames
     160    auto canonicalizeCodeForDisplayNames = [](Type type, String&& code) -> CString {
     161        ASSERT(code.isAllASCII());
     162        switch (type) {
     163        case Type::Language: {
     164            return canonicalizeUnicodeLocaleID(code.ascii()).ascii();
     165        }
     166        case Type::Region: {
     167            // Let code be the result of mapping code to upper case as described in 6.1.
     168            auto result = code.ascii();
     169            char* mutableData = result.mutableData();
     170            for (unsigned index = 0; index < result.length(); ++index)
     171                mutableData[index] = toASCIIUpper(mutableData[index]);
     172            return result;
     173        }
     174        case Type::Script: {
     175            // Let code be the result of mapping the first character in code to upper case, and mapping the second, third and fourth character in code to lower case, as described in 6.1.
     176            auto result = code.ascii();
     177            char* mutableData = result.mutableData();
     178            if (result.length() >= 1)
     179                mutableData[0] = toASCIIUpper(mutableData[0]);
     180            for (unsigned index = 1; index < result.length(); ++index)
     181                mutableData[index] = toASCIILower(mutableData[index]);
     182            return result;
     183        }
     184        case Type::Currency:
     185            ASSERT_NOT_REACHED();
     186            break;
     187        case Type::Calendar: {
     188            // Let code be the result of mapping code to lower case as described in 6.1.
     189            String lowered = code.convertToASCIILowercase();
     190            if (auto mapped = mapBCP47ToICUCalendarKeyword(lowered))
     191                lowered = WTFMove(mapped.value());
     192            return lowered.ascii();
     193        }
     194        case Type::DateTimeField: {
     195            ASSERT_NOT_REACHED();
     196            break;
     197        }
     198        }
     199        return { };
     200    };
     201
    156202    Vector<UChar, 32> buffer;
    157203    UErrorCode status = U_ZERO_ERROR;
    158 
    159     if (m_type == Type::Currency) {
     204    CString canonicalCode;
     205    switch (m_type) {
     206    case Type::Language: {
     207        if (!isUnicodeLanguageId(code)) {
     208            throwRangeError(globalObject, scope, "argument is not a language id"_s);
     209            return { };
     210        }
     211        canonicalCode = canonicalizeCodeForDisplayNames(m_type, WTFMove(code));
     212        // Do not use uldn_languageDisplayName since it is not expected one for this "language" type. It returns "en-US" for "en-US" code, instead of "American English".
     213        status = callBufferProducingFunction(uldn_localeDisplayName, m_displayNames.get(), canonicalCode.data(), buffer);
     214        break;
     215    }
     216    case Type::Region: {
     217        if (!isUnicodeRegionSubtag(code)) {
     218            throwRangeError(globalObject, scope, "argument is not a region subtag"_s);
     219            return { };
     220        }
     221        canonicalCode = canonicalizeCodeForDisplayNames(m_type, WTFMove(code));
     222        status = callBufferProducingFunction(uldn_regionDisplayName, m_displayNames.get(), canonicalCode.data(), buffer);
     223        break;
     224    }
     225    case Type::Script: {
     226        if (!isUnicodeScriptSubtag(code)) {
     227            throwRangeError(globalObject, scope, "argument is not a script subtag"_s);
     228            return { };
     229        }
     230        canonicalCode = canonicalizeCodeForDisplayNames(m_type, WTFMove(code));
     231        status = callBufferProducingFunction(uldn_scriptDisplayName, m_displayNames.get(), canonicalCode.data(), buffer);
     232        break;
     233    }
     234    case Type::Currency: {
    160235        // We do not use uldn_keyValueDisplayName + "currency". This is because of the following reasons.
    161236        //     1. ICU does not respect UDISPCTX_LENGTH_FULL / UDISPCTX_LENGTH_SHORT in its implementation.
     
    203278        // > Returns pointer to display string of 'len' UChars. If the resource data contains no entry for 'currency', then 'currency' itself is returned.
    204279        if (status == U_USING_DEFAULT_WARNING && result == currency)
    205             return (m_fallback == Fallback::None) ? jsUndefined() : codeValue;
     280            return (m_fallback == Fallback::None) ? jsUndefined() : jsString(vm, String(currency, 3));
    206281        return jsString(vm, String(result, length));
    207282    }
    208 
    209     // https://tc39.es/proposal-intl-displaynames/#sec-canonicalcodefordisplaynames
    210     auto canonicalizeCodeForDisplayNames = [](Type type, const String& code) -> CString {
    211         ASSERT(code.isAllASCII());
    212         auto result = code.ascii();
    213         char* mutableData = result.mutableData();
    214         switch (type) {
    215         case Type::Language: {
    216             // Let code be the result of mapping code to lower case as described in 6.1.
    217             for (unsigned index = 0; index < result.length(); ++index)
    218                 mutableData[index] = toASCIILower(mutableData[index]);
    219             break;
    220         }
    221         case Type::Region: {
    222             // Let code be the result of mapping code to upper case as described in 6.1.
    223             for (unsigned index = 0; index < result.length(); ++index)
    224                 mutableData[index] = toASCIIUpper(mutableData[index]);
    225             break;
    226         }
    227         case Type::Script: {
    228             // Let code be the result of mapping the first character in code to upper case, and mapping the second, third and fourth character in code to lower case, as described in 6.1.
    229             if (result.length() >= 1)
    230                 mutableData[0] = toASCIIUpper(mutableData[0]);
    231             for (unsigned index = 1; index < result.length(); ++index)
    232                 mutableData[index] = toASCIILower(mutableData[index]);
    233             break;
    234         }
    235         case Type::Currency:
    236             ASSERT_NOT_REACHED();
    237             break;
    238         }
    239         return result;
    240     };
    241 
    242     switch (m_type) {
    243     case Type::Language: {
    244         // If code does not matches the unicode_language_id production, throw a RangeError exception
    245         if (!isUnicodeLanguageId(code)) {
    246             throwRangeError(globalObject, scope, "argument is not a language id"_s);
    247             return { };
    248         }
    249         auto language = canonicalizeCodeForDisplayNames(m_type, code);
    250         // Do not use uldn_languageDisplayName since it is not expected one for this "language" type. It returns "en-US" for "en-US" code, instead of "American English".
    251         status = callBufferProducingFunction(uldn_localeDisplayName, m_displayNames.get(), language.data(), buffer);
     283    case Type::Calendar: {
     284        // a. If code does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
     285        if (!isUnicodeLocaleIdentifierType(code)) {
     286            throwRangeError(globalObject, scope, "argument is not a calendar code"_s);
     287            return { };
     288        }
     289        canonicalCode = canonicalizeCodeForDisplayNames(m_type, WTFMove(code));
     290        status = callBufferProducingFunction(uldn_keyValueDisplayName, m_displayNames.get(), "calendar", canonicalCode.data(), buffer);
    252291        break;
    253292    }
    254     case Type::Region: {
    255         // If code does not matches the unicode_region_subtag production, throw a RangeError exception
    256         if (!isUnicodeRegionSubtag(code)) {
    257             throwRangeError(globalObject, scope, "argument is not a region subtag"_s);
    258             return { };
    259         }
    260         auto region = canonicalizeCodeForDisplayNames(m_type, code);
    261         status = callBufferProducingFunction(uldn_regionDisplayName, m_displayNames.get(), region.data(), buffer);
    262         break;
    263     }
    264     case Type::Script: {
    265         // If code does not matches the unicode_script_subtag production, throw a RangeError exception
    266         if (!isUnicodeScriptSubtag(code)) {
    267             throwRangeError(globalObject, scope, "argument is not a script subtag"_s);
    268             return { };
    269         }
    270         auto script = canonicalizeCodeForDisplayNames(m_type, code);
    271         status = callBufferProducingFunction(uldn_scriptDisplayName, m_displayNames.get(), script.data(), buffer);
    272         break;
    273     }
    274     case Type::Currency:
    275         ASSERT_NOT_REACHED();
    276         break;
     293    case Type::DateTimeField: {
     294        // We do not use uldn_keyValueDisplayName since it cannot handle narrow length.
     295        // Instead, we use udatpg_getFieldDisplayName.
     296
     297        // https://tc39.es/intl-displaynames-v2/#sec-isvaliddatetimefieldcode
     298        auto isValidDateTimeFieldCode = [](const String& code) -> std::optional<UDateTimePatternField> {
     299            if (code == "era"_s)
     300                return UDATPG_ERA_FIELD;
     301            if (code == "year"_s)
     302                return UDATPG_YEAR_FIELD;
     303            if (code == "quarter"_s)
     304                return UDATPG_QUARTER_FIELD;
     305            if (code == "month"_s)
     306                return UDATPG_MONTH_FIELD;
     307            if (code == "weekOfYear"_s)
     308                return UDATPG_WEEK_OF_YEAR_FIELD;
     309            if (code == "weekday"_s)
     310                return UDATPG_WEEKDAY_FIELD;
     311            if (code == "day"_s)
     312                return UDATPG_DAY_FIELD;
     313            if (code == "dayPeriod"_s)
     314                return UDATPG_DAYPERIOD_FIELD;
     315            if (code == "hour"_s)
     316                return UDATPG_HOUR_FIELD;
     317            if (code == "minute"_s)
     318                return UDATPG_MINUTE_FIELD;
     319            if (code == "second"_s)
     320                return UDATPG_SECOND_FIELD;
     321            if (code == "timeZoneName"_s)
     322                return UDATPG_ZONE_FIELD;
     323            return std::nullopt;
     324        };
     325
     326        auto field = isValidDateTimeFieldCode(code);
     327        if (!field) {
     328            throwRangeError(globalObject, scope, "argument is not a dateTimeField code"_s);
     329            return { };
     330        }
     331
     332        UDateTimePGDisplayWidth style = UDATPG_WIDE;
     333        switch (m_style) {
     334        case Style::Long:
     335            style = UDATPG_WIDE;
     336            break;
     337        case Style::Short:
     338            style = UDATPG_ABBREVIATED;
     339            break;
     340        case Style::Narrow:
     341            style = UDATPG_NARROW;
     342            break;
     343        }
     344
     345        buffer = vm.intlCache().getFieldDisplayName(m_localeCString.data(), field.value(), style, status);
     346        if (U_FAILURE(status))
     347            return (m_fallback == Fallback::None) ? jsUndefined() : jsString(vm, code);
     348        return jsString(vm, String(buffer));
     349    }
    277350    }
    278351    if (U_FAILURE(status)) {
     
    280353        // We should return undefined if fallback is "none". Otherwise, we should return input value.
    281354        if (status == U_ILLEGAL_ARGUMENT_ERROR)
    282             return (m_fallback == Fallback::None) ? jsUndefined() : codeValue;
     355            return (m_fallback == Fallback::None) ? jsUndefined() : jsString(vm, String(canonicalCode.data(), canonicalCode.length()));
    283356        return throwTypeError(globalObject, scope, "Failed to query a display name."_s);
    284357    }
     
    299372    options->putDirect(vm, vm.propertyNames->style, jsNontrivialString(vm, styleString(m_style)));
    300373    options->putDirect(vm, vm.propertyNames->type, jsNontrivialString(vm, typeString(m_type)));
    301     options->putDirect(vm, Identifier::fromString(vm, "fallback"), jsNontrivialString(vm, fallbackString(m_fallback)));
     374    options->putDirect(vm, vm.propertyNames->fallback, jsNontrivialString(vm, fallbackString(m_fallback)));
     375    if (m_type == Type::Language)
     376        options->putDirect(vm, vm.propertyNames->languageDisplay, jsNontrivialString(vm, languageDisplayString(m_languageDisplay)));
    302377    return options;
    303378}
     
    328403    case Type::Currency:
    329404        return "currency"_s;
     405    case Type::Calendar:
     406        return "calendar"_s;
     407    case Type::DateTimeField:
     408        return "dateTimeField"_s;
    330409    }
    331410    ASSERT_NOT_REACHED();
     
    345424}
    346425
     426ASCIILiteral IntlDisplayNames::languageDisplayString(LanguageDisplay languageDisplay)
     427{
     428    switch (languageDisplay) {
     429    case LanguageDisplay::Dialect:
     430        return "dialect"_s;
     431    case LanguageDisplay::Standard:
     432        return "standard"_s;
     433    }
     434    ASSERT_NOT_REACHED();
     435    return ASCIILiteral::null();
     436}
     437
    347438} // namespace JSC
  • trunk/Source/JavaScriptCore/runtime/IntlDisplayNames.h

    r264639 r281375  
    7373
    7474    enum class Style : uint8_t { Narrow, Short, Long };
    75     enum class Type : uint8_t { Language, Region, Script, Currency };
     75    enum class Type : uint8_t { Language, Region, Script, Currency, Calendar, DateTimeField };
    7676    enum class Fallback : uint8_t { Code, None };
     77    enum class LanguageDisplay : uint8_t { Dialect, Standard };
    7778
    7879    static ASCIILiteral styleString(Style);
    7980    static ASCIILiteral typeString(Type);
    8081    static ASCIILiteral fallbackString(Fallback);
     82    static ASCIILiteral languageDisplayString(LanguageDisplay);
    8183
    8284    using ULocaleDisplayNamesDeleter = ICUDeleter<uldn_close>;
     
    8991    Type m_type { Type::Language };
    9092    Fallback m_fallback { Fallback::Code };
     93    LanguageDisplay m_languageDisplay { LanguageDisplay::Dialect };
    9194};
    9295
  • trunk/Source/JavaScriptCore/runtime/IntlObject.cpp

    r281374 r281375  
    6868#include <wtf/text/StringBuilder.h>
    6969#include <wtf/text/StringImpl.h>
     70#include <wtf/text/StringParsingBuffer.h>
    7071#include <wtf/unicode/icu/ICUHelpers.h>
    7172
     
    610611bool isUnicodeLocaleIdentifierType(StringView string)
    611612{
    612     ASSERT(!string.isNull());
    613 
    614     for (auto part : string.splitAllowingEmptyEntries('-')) {
    615         auto length = part.length();
    616         if (length < 3 || length > 8)
    617             return false;
    618 
    619         for (auto character : part.codeUnits()) {
    620             if (!isASCIIAlphanumeric(character))
     613    // Matching the Unicode Locale Identifier type nonterminal.
     614    // Because the spec abstract operation is not mentioning to BCP-47 conformance for this matching,
     615    // '-' and '_' separators are allowed while BCP-47 only accepts '-'.
     616    // On the other hand, IsStructurallyValidLanguageTag explicitly mentions to BCP-47.
     617    return readCharactersForParsing(string, [](auto buffer) -> bool {
     618        while (true) {
     619            auto begin = buffer.position();
     620            while (buffer.hasCharactersRemaining() && isASCIIAlphanumeric(*buffer))
     621                ++buffer;
     622            unsigned length = buffer.position() - begin;
     623            if (length < 3 || length > 8)
    621624                return false;
    622         }
    623     }
    624 
    625     return true;
     625            if (!buffer.hasCharactersRemaining())
     626                return true;
     627            if (*buffer != '-' && *buffer != '_')
     628                return false;
     629            ++buffer;
     630        }
     631    });
    626632}
    627633
    628634// https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
    629 static String canonicalizeLanguageTag(const CString& tag)
     635String canonicalizeUnicodeLocaleID(const CString& tag)
    630636{
    631637    auto buffer = localeIDBufferForLanguageTagWithNullTerminator(tag);
     
    703709            if (isStructurallyValidLanguageTag(tag)) {
    704710                ASSERT(tag.isAllASCII());
    705                 String canonicalizedTag = canonicalizeLanguageTag(tag.ascii());
     711                String canonicalizedTag = canonicalizeUnicodeLocaleID(tag.ascii());
    706712                if (!canonicalizedTag.isNull()) {
    707713                    if (seenSet.add(canonicalizedTag).isNewEntry)
     
    740746    // same thing as userPreferredLanguages()[0].
    741747    if (auto defaultLanguage = globalObject->globalObjectMethodTable()->defaultLanguage) {
    742         String locale = canonicalizeLanguageTag(defaultLanguage().utf8());
     748        String locale = canonicalizeUnicodeLocaleID(defaultLanguage().utf8());
    743749        if (!locale.isEmpty())
    744750            return locale;
     
    747753    Vector<String> languages = userPreferredLanguages();
    748754    for (const auto& language : languages) {
    749         String locale = canonicalizeLanguageTag(language.utf8());
     755        String locale = canonicalizeUnicodeLocaleID(language.utf8());
    750756        if (!locale.isEmpty())
    751757            return locale;
     
    14701476}
    14711477
     1478std::optional<String> mapBCP47ToICUCalendarKeyword(const String& calendar)
     1479{
     1480    if (calendar == "gregory"_s)
     1481        return "gregorian"_s;
     1482    if (calendar == "islamicc"_s)
     1483        return "islamic-civil"_s;
     1484    if (calendar == "ethioaa"_s)
     1485        return "ethiopic-amete-alem"_s;
     1486    return std::nullopt;
     1487}
     1488
    14721489std::optional<String> mapICUCollationKeywordToBCP47(const String& collation)
    14731490{
    1474     // Map keyword values to BCP 47 equivalents.
    14751491    if (collation == "dictionary"_s)
    14761492        return "dict"_s;
  • trunk/Source/JavaScriptCore/runtime/IntlObject.h

    r281374 r281375  
    128128bool isUnicodeLanguageId(StringView);
    129129bool isStructurallyValidLanguageTag(StringView);
     130String canonicalizeUnicodeLocaleID(const CString& languageTag);
    130131
    131132bool isWellFormedCurrencyCode(StringView);
     
    139140std::optional<String> mapICUCollationKeywordToBCP47(const String&);
    140141std::optional<String> mapICUCalendarKeywordToBCP47(const String&);
     142std::optional<String> mapBCP47ToICUCalendarKeyword(const String&);
    141143
    142144} // namespace JSC
Note: See TracChangeset for help on using the changeset viewer.