Context Navigation

← Previous Changeset
Next Changeset →

Changeset 241233 in webkit

Timestamp:

Feb 8, 2019 8:40:22 PM (5 years ago)

Author:

ysuzuki@apple.com

Message:

[JSC] String.fromCharCode's slow path always generates 16bit string
https://bugs.webkit.org/show_bug.cgi?id=194466

Reviewed by Keith Miller.

JSTests:

stress/string-from-char-code-slow-path.js: Added.

(shouldBe):
(testWithLength):

Source/JavaScriptCore:

String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
16bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
as much as possible.

It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.

runtime/StringConstructor.cpp:

(JSC::stringFromCharCode):

Location:

trunk

Files:

: 1 added
: 6 edited

JSTests/ChangeLog (modified) (1 diff)
JSTests/stress/string-from-char-code-slow-path.js (added)
Source/JavaScriptCore/ChangeLog (modified) (1 diff)
Source/JavaScriptCore/runtime/StringConstructor.cpp (modified) (1 diff)
Source/WTF/wtf/text/StringImpl.cpp (modified) (6 diffs)
Source/WTF/wtf/text/StringImpl.h (modified) (2 diffs)
Source/WTF/wtf/text/WTFString.cpp (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/JSTests/ChangeLog

-                      r241228
+                      r241233
+-02-08  Yusuke Suzuki  <ysuzuki@apple.com>
+        [JSC] String.fromCharCode's slow path always generates 16bit string
+        https://bugs.webkit.org/show_bug.cgi?id=194466
+        Reviewed by Keith Miller.
+        * stress/string-from-char-code-slow-path.js: Added.
+        (shouldBe):
+        (testWithLength):
 -02-08  Saam barati  <sbarati@apple.com>

trunk/Source/JavaScriptCore/ChangeLog

-                      r241230
+                      r241233
+-02-08  Yusuke Suzuki  <ysuzuki@apple.com>
+        [JSC] String.fromCharCode's slow path always generates 16bit string
+        https://bugs.webkit.org/show_bug.cgi?id=194466
+        Reviewed by Keith Miller.
+        String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
+        goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
+        and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
+        creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
+bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
+        as much as possible.
+        It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.
+        * runtime/StringConstructor.cpp:
+        (JSC::stringFromCharCode):
 -02-08  Keith Miller  <keith_miller@apple.com>

trunk/Source/JavaScriptCore/runtime/StringConstructor.cpp

-                      r236697
+                      r241233
+    }
     UChar* buf;
     auto impl = StringImpl::createUninitialized(length, buf);
+    LChar* buf8Bit;
+    auto impl8Bit = StringImpl::createUninitialized(length, buf8Bit);
     for (unsigned i = 0; i < length; ++i) {
         buf[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
+        UChar character = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
         RETURN_IF_EXCEPTION(scope, encodedJSValue());
+        if (UNLIKELY(!isLatin1(character))) {
+            UChar* buf16Bit;
+            auto impl16Bit = StringImpl::createUninitialized(length, buf16Bit);
+            StringImpl::copyCharacters(buf16Bit, buf8Bit, i);
+            buf16Bit[i] = character;
+            ++i;
+            for (; i < length; ++i) {
+                buf16Bit[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
+                RETURN_IF_EXCEPTION(scope, encodedJSValue());
+            }
+            RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl16Bit))));
+        }
+        buf8Bit[i] = static_cast<LChar>(character);
+    }
     RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl))));
+    RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl8Bit))));
+}

trunk/Source/WTF/wtf/text/StringImpl.cpp

-                      r239439
+                      r241233
     for (size_t i = 0; i < length; ++i) {
         if (characters[i] & 0xFF00)
+        if (!isLatin1(characters[i]))
             return create(characters, length);
         data[i] = static_cast<LChar>(characters[i]);
 …
             data8[i] = toASCIILower(character);
         else {
             ASSERT(u_tolower(character) <= 0xFF);
+            ASSERT(isLatin1(u_tolower(character)));
             data8[i] = static_cast<LChar>(u_tolower(character));
+        }
 …
             ASSERT(u_toupper(character) <= 0xFFFF);
             UChar upper = u_toupper(character);
             if (UNLIKELY(upper > 0xFF)) {
+            if (UNLIKELY(!isLatin1(upper))) {
                 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
                 goto upconvert;
 …
                 *dest++ = 'S';
             } else {
                 ASSERT(u_toupper(character) <= 0xFF);
+                ASSERT(isLatin1(u_toupper(character)));
                 *dest++ = static_cast<LChar>(u_toupper(character));
+            }
 …
                     data8[i] = toASCIILower(character);
                 else {
                     ASSERT(u_foldCase(character, U_FOLD_CASE_DEFAULT) <= 0xFF);
+                    ASSERT(isLatin1(u_foldCase(character, U_FOLD_CASE_DEFAULT)));
                     data8[i] = static_cast<LChar>(u_foldCase(character, U_FOLD_CASE_DEFAULT));
+                }
 …
     if (is8Bit()) {
         if (target > 0xFF) {
+        if (!isLatin1(target)) {
             // Looking for a 16-bit character in an 8-bit string, so we're done.
             return *this;
+        }
         if (replacement <= 0xFF) {
+        if (isLatin1(replacement)) {
             LChar* data;
             LChar oldChar = static_cast<LChar>(target);

trunk/Source/WTF/wtf/text/StringImpl.h

-                      r239439
+                      r241233
 #endif
+template<typename CharacterType> inline bool isLatin1(CharacterType character)
+{
+    using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type;
+    return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF);
+}
 class StringImplShape {
 …
 using WTF::StringImpl;
 using WTF::equal;
+using WTF::isLatin1;

trunk/Source/WTF/wtf/text/WTFString.cpp

-                      r239439
+                      r241233
         return;
+    }
     if (character <= 0xFF && is8Bit()) {
+    if (isLatin1(character) && is8Bit()) {
         append(static_cast<LChar>(character));
         return;
 …
     for (unsigned i = 0; i < length; ++i) {
         UChar ch = characters[i];
         characterBuffer[i] = ch > 0xff ? '?' : ch;
+        characterBuffer[i] = !isLatin1(ch) ? '?' : ch;
+    }

Note: See TracChangeset for help on using the changeset viewer.