Changeset 241233 in webkit


Ignore:
Timestamp:
Feb 8, 2019 8:40:22 PM (5 years ago)
Author:
ysuzuki@apple.com
Message:

[JSC] String.fromCharCode's slow path always generates 16bit string
https://bugs.webkit.org/show_bug.cgi?id=194466

Reviewed by Keith Miller.

JSTests:

  • stress/string-from-char-code-slow-path.js: Added.

(shouldBe):
(testWithLength):

Source/JavaScriptCore:

String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
16bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
as much as possible.

It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.

  • runtime/StringConstructor.cpp:

(JSC::stringFromCharCode):

Location:
trunk
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/JSTests/ChangeLog

    r241228 r241233  
     12019-02-08  Yusuke Suzuki  <ysuzuki@apple.com>
     2
     3        [JSC] String.fromCharCode's slow path always generates 16bit string
     4        https://bugs.webkit.org/show_bug.cgi?id=194466
     5
     6        Reviewed by Keith Miller.
     7
     8        * stress/string-from-char-code-slow-path.js: Added.
     9        (shouldBe):
     10        (testWithLength):
     11
    1122019-02-08  Saam barati  <sbarati@apple.com>
    213
  • trunk/Source/JavaScriptCore/ChangeLog

    r241230 r241233  
     12019-02-08  Yusuke Suzuki  <ysuzuki@apple.com>
     2
     3        [JSC] String.fromCharCode's slow path always generates 16bit string
     4        https://bugs.webkit.org/show_bug.cgi?id=194466
     5
     6        Reviewed by Keith Miller.
     7
     8        String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
     9        goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
     10        and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
     11        creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
     12        16bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
     13        as much as possible.
     14
     15        It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.
     16
     17        * runtime/StringConstructor.cpp:
     18        (JSC::stringFromCharCode):
     19
    1202019-02-08  Keith Miller  <keith_miller@apple.com>
    221
  • trunk/Source/JavaScriptCore/runtime/StringConstructor.cpp

    r236697 r241233  
    8484    }
    8585
    86     UChar* buf;
    87     auto impl = StringImpl::createUninitialized(length, buf);
     86    LChar* buf8Bit;
     87    auto impl8Bit = StringImpl::createUninitialized(length, buf8Bit);
    8888    for (unsigned i = 0; i < length; ++i) {
    89         buf[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
     89        UChar character = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
    9090        RETURN_IF_EXCEPTION(scope, encodedJSValue());
     91        if (UNLIKELY(!isLatin1(character))) {
     92            UChar* buf16Bit;
     93            auto impl16Bit = StringImpl::createUninitialized(length, buf16Bit);
     94            StringImpl::copyCharacters(buf16Bit, buf8Bit, i);
     95            buf16Bit[i] = character;
     96            ++i;
     97            for (; i < length; ++i) {
     98                buf16Bit[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
     99                RETURN_IF_EXCEPTION(scope, encodedJSValue());
     100            }
     101            RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl16Bit))));
     102        }
     103        buf8Bit[i] = static_cast<LChar>(character);
    91104    }
    92     RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl))));
     105    RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl8Bit))));
    93106}
    94107
  • trunk/Source/WTF/wtf/text/StringImpl.cpp

    r239439 r241233  
    290290
    291291    for (size_t i = 0; i < length; ++i) {
    292         if (characters[i] & 0xFF00)
     292        if (!isLatin1(characters[i]))
    293293            return create(characters, length);
    294294        data[i] = static_cast<LChar>(characters[i]);
     
    415415            data8[i] = toASCIILower(character);
    416416        else {
    417             ASSERT(u_tolower(character) <= 0xFF);
     417            ASSERT(isLatin1(u_tolower(character)));
    418418            data8[i] = static_cast<LChar>(u_tolower(character));
    419419        }
     
    460460            ASSERT(u_toupper(character) <= 0xFFFF);
    461461            UChar upper = u_toupper(character);
    462             if (UNLIKELY(upper > 0xFF)) {
     462            if (UNLIKELY(!isLatin1(upper))) {
    463463                // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
    464464                goto upconvert;
     
    481481                *dest++ = 'S';
    482482            } else {
    483                 ASSERT(u_toupper(character) <= 0xFF);
     483                ASSERT(isLatin1(u_toupper(character)));
    484484                *dest++ = static_cast<LChar>(u_toupper(character));
    485485            }
     
    629629                    data8[i] = toASCIILower(character);
    630630                else {
    631                     ASSERT(u_foldCase(character, U_FOLD_CASE_DEFAULT) <= 0xFF);
     631                    ASSERT(isLatin1(u_foldCase(character, U_FOLD_CASE_DEFAULT)));
    632632                    data8[i] = static_cast<LChar>(u_foldCase(character, U_FOLD_CASE_DEFAULT));
    633633                }
     
    12541254
    12551255    if (is8Bit()) {
    1256         if (target > 0xFF) {
     1256        if (!isLatin1(target)) {
    12571257            // Looking for a 16-bit character in an 8-bit string, so we're done.
    12581258            return *this;
    12591259        }
    12601260
    1261         if (replacement <= 0xFF) {
     1261        if (isLatin1(replacement)) {
    12621262            LChar* data;
    12631263            LChar oldChar = static_cast<LChar>(target);
  • trunk/Source/WTF/wtf/text/StringImpl.h

    r239439 r241233  
    127127
    128128#endif
     129
     130template<typename CharacterType> inline bool isLatin1(CharacterType character)
     131{
     132    using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type;
     133    return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF);
     134}
    129135
    130136class StringImplShape {
     
    12271233using WTF::StringImpl;
    12281234using WTF::equal;
     1235using WTF::isLatin1;
  • trunk/Source/WTF/wtf/text/WTFString.cpp

    r239439 r241233  
    147147        return;
    148148    }
    149     if (character <= 0xFF && is8Bit()) {
     149    if (isLatin1(character) && is8Bit()) {
    150150        append(static_cast<LChar>(character));
    151151        return;
     
    830830    for (unsigned i = 0; i < length; ++i) {
    831831        UChar ch = characters[i];
    832         characterBuffer[i] = ch > 0xff ? '?' : ch;
     832        characterBuffer[i] = !isLatin1(ch) ? '?' : ch;
    833833    }
    834834
Note: See TracChangeset for help on using the changeset viewer.