Changeset 206177 in webkit
- Timestamp:
- Sep 20, 2016 2:50:30 PM (8 years ago)
- Location:
- trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r206170 r206177 1 2016-09-20 Alex Christensen <achristensen@webkit.org> 2 3 Reduce allocations in URLParser 4 https://bugs.webkit.org/show_bug.cgi?id=162241 5 6 Reviewed by Chris Dumez. 7 8 Use Vectors instead of StringBuilders. This allows us to use the inline capacity on the stack 9 for short URLs (<2KB) and also allows us to skip branches because we know whether the 10 contained type is UChar or LChar at compile time. It also allows us to use uncheckedAppend. 11 12 Added new API tests for parts that were less tested, but there is 13 no change in behavior except for a performance improvement. 14 15 * platform/URLParser.cpp: 16 (WebCore::appendCodePoint): 17 (WebCore::encodeQuery): 18 (WebCore::URLParser::failure): 19 (WebCore::URLParser::parse): 20 (WebCore::percentDecode): 21 (WebCore::domainToASCII): 22 (WebCore::hasInvalidDomainCharacter): 23 (WebCore::URLParser::parseHost): 24 (WebCore::formURLDecode): 25 (WebCore::isC0Control): Deleted. 26 * platform/URLParser.h: 27 1 28 2016-09-20 Nan Wang <n_wang@apple.com> 2 29 -
trunk/Source/WebCore/platform/URLParser.cpp
r206169 r206177 31 31 #include <unicode/uidna.h> 32 32 #include <unicode/utypes.h> 33 #include <wtf/HashMap.h>34 #include <wtf/NeverDestroyed.h>35 #include <wtf/text/StringBuilder.h>36 #include <wtf/text/StringHash.h>37 33 38 34 namespace WebCore { … … 115 111 m_begin += i; 116 112 return *this; 113 } 114 115 static void appendCodePoint(Vector<UChar>& destination, UChar32 codePoint) 116 { 117 if (U_IS_BMP(codePoint)) { 118 destination.append(static_cast<UChar>(codePoint)); 119 return; 120 } 121 destination.reserveCapacity(destination.size() + 2); 122 destination.uncheckedAppend(U16_LEAD(codePoint)); 123 destination.uncheckedAppend(U16_TRAIL(codePoint)); 117 124 } 118 125 … … 505 512 } 506 513 507 inline static void encodeQuery(const StringBuilder& source, Vector<LChar>& destination, const TextEncoding& encoding)514 inline static void encodeQuery(const Vector<UChar>& source, Vector<LChar>& destination, const TextEncoding& encoding) 508 515 { 509 516 // FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested. 510 CString encoded = encoding.encode( source.toStringPreserveCapacity(), URLEncodedEntitiesForUnencodables);517 CString encoded = encoding.encode(StringView(source.data(), source.size()), URLEncodedEntitiesForUnencodables); 511 518 const char* data = encoded.data(); 512 519 size_t length = encoded.length(); … … 913 920 return parse<serialized>(input.characters16(), input.length(), { }, UTF8Encoding()); 914 921 } 915 922 916 923 template<bool serialized, typename CharacterType> 917 924 URL URLParser::parse(const CharacterType* input, const unsigned length, const URL& base, const TextEncoding& encoding) … … 924 931 925 932 bool isUTF8Encoding = encoding == UTF8Encoding(); 926 StringBuilderqueryBuffer;933 Vector<UChar> queryBuffer; 927 934 928 935 unsigned endIndex = length; … … 1409 1416 utf8PercentEncodeQuery<serialized>(*c, m_asciiBuffer); 1410 1417 else 1411 queryBuffer.append(*c);1418 appendCodePoint(queryBuffer, *c); 1412 1419 ++c; 1413 1420 break; … … 1417 1424 m_asciiBuffer.append(*c); 1418 1425 else 1419 m_unicodeFragmentBuffer.append(*c);1426 appendCodePoint(m_unicodeFragmentBuffer, *c); 1420 1427 ++c; 1421 1428 break; … … 1927 1934 } 1928 1935 1929 // FIXME: This should return a CString. 1930 inline static String percentDecode(const LChar* input, size_t length) 1931 { 1932 StringBuilder output; 1936 const size_t defaultInlineBufferSize = 2048; 1937 1938 inline static Vector<LChar, defaultInlineBufferSize> percentDecode(const LChar* input, size_t length) 1939 { 1940 Vector<LChar, defaultInlineBufferSize> output; 1941 output.reserveInitialCapacity(length); 1933 1942 1934 1943 for (size_t i = 0; i < length; ++i) { 1935 1944 uint8_t byte = input[i]; 1936 1945 if (byte != '%') 1937 output. append(byte);1946 output.uncheckedAppend(byte); 1938 1947 else if (i < length - 2) { 1939 1948 if (isASCIIHexDigit(input[i + 1]) && isASCIIHexDigit(input[i + 2])) { 1940 output. append(toASCIIHexValue(input[i + 1], input[i + 2]));1949 output.uncheckedAppend(toASCIIHexValue(input[i + 1], input[i + 2])); 1941 1950 i += 2; 1942 1951 } else 1943 output. append(byte);1952 output.uncheckedAppend(byte); 1944 1953 } else 1945 output. append(byte);1946 } 1947 return output .toStringPreserveCapacity();1954 output.uncheckedAppend(byte); 1955 } 1956 return output; 1948 1957 } 1949 1958 … … 1955 1964 } 1956 1965 1957 inline static Optional<String> domainToASCII(const String& domain) 1958 { 1959 const unsigned hostnameBufferLength = 2048; 1960 1966 inline static Optional<Vector<LChar, defaultInlineBufferSize>> domainToASCII(const String& domain) 1967 { 1968 Vector<LChar, defaultInlineBufferSize> ascii; 1961 1969 if (containsOnlyASCII(domain)) { 1962 if (domain.is8Bit())1963 return domain.convertToASCIILowercase();1964 Vector<LChar, hostnameBufferLength> buffer;1965 1970 size_t length = domain.length(); 1966 buffer.reserveInitialCapacity(length); 1967 for (size_t i = 0; i < length; ++i) 1968 buffer.append(toASCIILower(domain[i])); 1969 return String(buffer.data(), length); 1971 if (domain.is8Bit()) { 1972 const LChar* characters = domain.characters8(); 1973 ascii.reserveInitialCapacity(length); 1974 for (size_t i = 0; i < length; ++i) 1975 ascii.uncheckedAppend(toASCIILower(characters[i])); 1976 } else { 1977 const UChar* characters = domain.characters16(); 1978 ascii.reserveInitialCapacity(length); 1979 for (size_t i = 0; i < length; ++i) 1980 ascii.uncheckedAppend(toASCIILower(characters[i])); 1981 } 1982 return ascii; 1970 1983 } 1971 1984 1972 UChar hostnameBuffer[ hostnameBufferLength];1985 UChar hostnameBuffer[defaultInlineBufferSize]; 1973 1986 UErrorCode error = U_ZERO_ERROR; 1974 1987 … … 1978 1991 #endif 1979 1992 // FIXME: This should use uidna_openUTS46 / uidna_close instead 1980 int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, nullptr, &error);1993 int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, defaultInlineBufferSize, UIDNA_ALLOW_UNASSIGNED, nullptr, &error); 1981 1994 #if COMPILER(GCC) || COMPILER(CLANG) 1982 1995 #pragma GCC diagnostic pop 1983 1996 #endif 1997 ASSERT(numCharactersConverted <= static_cast<int32_t>(defaultInlineBufferSize)); 1984 1998 1985 1999 if (error == U_ZERO_ERROR) { 1986 LChar buffer[hostnameBufferLength];1987 2000 for (int32_t i = 0; i < numCharactersConverted; ++i) { 1988 2001 ASSERT(isASCII(hostnameBuffer[i])); 1989 buffer[i] = hostnameBuffer[i]; 1990 } 1991 return String(buffer, numCharactersConverted); 2002 ASSERT(!isASCIIUpper(hostnameBuffer[i])); 2003 } 2004 ascii.append(hostnameBuffer, numCharactersConverted); 2005 return ascii; 1992 2006 } 1993 2007 … … 1996 2010 } 1997 2011 1998 inline static bool hasInvalidDomainCharacter(const String& asciiDomain) 1999 { 2000 RELEASE_ASSERT(asciiDomain.is8Bit()); 2001 const LChar* characters = asciiDomain.characters8(); 2002 for (size_t i = 0; i < asciiDomain.length(); ++i) { 2003 if (isInvalidDomainCharacter(characters[i])) 2012 inline static bool hasInvalidDomainCharacter(const Vector<LChar, defaultInlineBufferSize>& asciiDomain) 2013 { 2014 for (size_t i = 0; i < asciiDomain.size(); ++i) { 2015 if (isInvalidDomainCharacter(asciiDomain[i])) 2004 2016 return true; 2005 2017 } … … 2096 2108 return true; 2097 2109 } 2098 2099 // FIXME: We probably don't need to make so many buffers and String copies. 2100 StringBuilder utf8Encoded; 2110 2111 Vector<LChar, defaultInlineBufferSize> utf8Encoded; 2101 2112 for (; !iterator.atEnd(); ++iterator) { 2102 2113 if (!serialized && isTabOrNewline(*iterator)) … … 2112 2123 utf8Encoded.append(buffer, offset); 2113 2124 } 2114 RELEASE_ASSERT(utf8Encoded.is8Bit()); 2115 String percentDecoded = percentDecode(utf8Encoded.characters8(), utf8Encoded.length()); 2116 RELEASE_ASSERT(percentDecoded.is8Bit()); 2117 String domain = String::fromUTF8(percentDecoded.characters8(), percentDecoded.length()); 2125 Vector<LChar, defaultInlineBufferSize> percentDecoded = percentDecode(utf8Encoded.data(), utf8Encoded.size()); 2126 String domain = String::fromUTF8(percentDecoded.data(), percentDecoded.size()); 2118 2127 auto asciiDomain = domainToASCII(domain); 2119 2128 if (!asciiDomain || hasInvalidDomainCharacter(asciiDomain.value())) 2120 2129 return false; 2121 String& asciiDomainValue = asciiDomain.value(); 2122 RELEASE_ASSERT(asciiDomainValue.is8Bit()); 2123 const LChar* asciiDomainCharacters = asciiDomainValue.characters8(); 2124 2125 if (auto address = parseIPv4Host(CodePointIterator<LChar>(asciiDomainCharacters, asciiDomainCharacters + asciiDomainValue.length()))) { 2130 Vector<LChar, defaultInlineBufferSize>& asciiDomainValue = asciiDomain.value(); 2131 const LChar* asciiDomainCharacters = asciiDomainValue.data(); 2132 2133 if (auto address = parseIPv4Host(CodePointIterator<LChar>(asciiDomainValue.begin(), asciiDomainValue.end()))) { 2126 2134 serializeIPv4(address.value(), m_asciiBuffer); 2127 2135 m_url.m_hostEnd = m_asciiBuffer.size(); … … 2134 2142 } 2135 2143 2136 m_asciiBuffer.append(asciiDomainCharacters, asciiDomainValue. length());2144 m_asciiBuffer.append(asciiDomainCharacters, asciiDomainValue.size()); 2137 2145 m_url.m_hostEnd = m_asciiBuffer.size(); 2138 2146 if (!iterator.atEnd()) { … … 2151 2159 return Nullopt; 2152 2160 auto percentDecoded = percentDecode(reinterpret_cast<const LChar*>(utf8.data()), utf8.length()); 2153 RELEASE_ASSERT(percentDecoded.is8Bit()); 2154 return String::fromUTF8(percentDecoded.characters8(), percentDecoded.length()); 2161 return String::fromUTF8(percentDecoded.data(), percentDecoded.size()); 2155 2162 } 2156 2163 -
trunk/Source/WebCore/platform/URLParser.h
r206162 r206177 53 53 URL m_url; 54 54 Vector<LChar> m_asciiBuffer; 55 Vector<UChar 32> m_unicodeFragmentBuffer;55 Vector<UChar> m_unicodeFragmentBuffer; 56 56 bool m_urlIsSpecial { false }; 57 57 bool m_hostHasPercentOrNonASCII { false }; -
trunk/Tools/ChangeLog
r206162 r206177 1 2016-09-20 Alex Christensen <achristensen@webkit.org> 2 3 Reduce allocations in URLParser 4 https://bugs.webkit.org/show_bug.cgi?id=162241 5 6 Reviewed by Chris Dumez. 7 8 * TestWebKitAPI/Tests/WebCore/URLParser.cpp: 9 (TestWebKitAPI::TEST_F): 10 (TestWebKitAPI::checkURL): 11 1 12 2016-09-20 Alex Christensen <achristensen@webkit.org> 2 13 -
trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp
r206162 r206177 27 27 #include <WebCore/URLParser.h> 28 28 #include <wtf/MainThread.h> 29 #include <wtf/text/StringBuilder.h> 29 30 30 31 using namespace WebCore; … … 559 560 {"foo", "", "", "", 0, "/", "", "", "foo:///"}, 560 561 {"foo", "", "", "", 0, "//", "", "", "foo://"}); 562 checkURLDifferences(wideString(L"http://host?ß😍#ß😍"), 563 {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", wideString(L"ß😍"), wideString(L"http://host/?%C3%9F%F0%9F%98%8D#ß😍")}, 564 {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", "http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D"}); 561 565 562 566 // This matches the spec and web platform tests, but not Chrome, Firefox, or URL::parse. … … 650 654 {"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"}, 651 655 {"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"}); 656 checkURLDifferences("http://%48OsT", 657 {"http", "", "", "host", 0, "/", "", "", "http://host/"}, 658 {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"}); 659 652 660 } 653 661 … … 714 722 } 715 723 724 static void checkURL(const String& urlString, const TextEncoding& encoding, const ExpectedParts& parts) 725 { 726 URLParser parser; 727 auto url = parser.parse(urlString, { }, encoding); 728 EXPECT_TRUE(eq(parts.protocol, url.protocol())); 729 EXPECT_TRUE(eq(parts.user, url.user())); 730 EXPECT_TRUE(eq(parts.password, url.pass())); 731 EXPECT_TRUE(eq(parts.host, url.host())); 732 EXPECT_EQ(parts.port, url.port()); 733 EXPECT_TRUE(eq(parts.path, url.path())); 734 EXPECT_TRUE(eq(parts.query, url.query())); 735 EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier())); 736 EXPECT_TRUE(eq(parts.string, url.string())); 737 } 738 739 TEST_F(URLParserTest, QueryEncoding) 740 { 741 checkURL(wideString(L"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", wideString(L"ß😍"), wideString(L"http://host/?%C3%9F%F0%9F%98%8D#ß😍")}); 742 // FIXME: Add tests with other encodings. 743 } 744 716 745 } // namespace TestWebKitAPI
Note: See TracChangeset
for help on using the changeset viewer.