Changeset 206177 in webkit


Ignore:
Timestamp:
Sep 20, 2016 2:50:30 PM (8 years ago)
Author:
achristensen@apple.com
Message:

Reduce allocations in URLParser
https://bugs.webkit.org/show_bug.cgi?id=162241

Reviewed by Chris Dumez.

Source/WebCore:

Use Vectors instead of StringBuilders. This allows us to use the inline capacity on the stack
for short URLs (<2KB) and also allows us to skip branches because we know whether the
contained type is UChar or LChar at compile time. It also allows us to use uncheckedAppend.

Added new API tests for parts that were less tested, but there is
no change in behavior except for a performance improvement.

  • platform/URLParser.cpp:

(WebCore::appendCodePoint):
(WebCore::encodeQuery):
(WebCore::URLParser::failure):
(WebCore::URLParser::parse):
(WebCore::percentDecode):
(WebCore::domainToASCII):
(WebCore::hasInvalidDomainCharacter):
(WebCore::URLParser::parseHost):
(WebCore::formURLDecode):
(WebCore::isC0Control): Deleted.

  • platform/URLParser.h:

Tools:

  • TestWebKitAPI/Tests/WebCore/URLParser.cpp:

(TestWebKitAPI::TEST_F):
(TestWebKitAPI::checkURL):

Location:
trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/ChangeLog

    r206170 r206177  
     12016-09-20  Alex Christensen  <achristensen@webkit.org>
     2
     3        Reduce allocations in URLParser
     4        https://bugs.webkit.org/show_bug.cgi?id=162241
     5
     6        Reviewed by Chris Dumez.
     7
     8        Use Vectors instead of StringBuilders.  This allows us to use the inline capacity on the stack
     9        for short URLs (<2KB) and also allows us to skip branches because we know whether the
     10        contained type is UChar or LChar at compile time.  It also allows us to use uncheckedAppend.
     11
     12        Added new API tests for parts that were less tested, but there is
     13        no change in behavior except for a performance improvement.
     14
     15        * platform/URLParser.cpp:
     16        (WebCore::appendCodePoint):
     17        (WebCore::encodeQuery):
     18        (WebCore::URLParser::failure):
     19        (WebCore::URLParser::parse):
     20        (WebCore::percentDecode):
     21        (WebCore::domainToASCII):
     22        (WebCore::hasInvalidDomainCharacter):
     23        (WebCore::URLParser::parseHost):
     24        (WebCore::formURLDecode):
     25        (WebCore::isC0Control): Deleted.
     26        * platform/URLParser.h:
     27
    1282016-09-20  Nan Wang  <n_wang@apple.com>
    229
  • trunk/Source/WebCore/platform/URLParser.cpp

    r206169 r206177  
    3131#include <unicode/uidna.h>
    3232#include <unicode/utypes.h>
    33 #include <wtf/HashMap.h>
    34 #include <wtf/NeverDestroyed.h>
    35 #include <wtf/text/StringBuilder.h>
    36 #include <wtf/text/StringHash.h>
    3733
    3834namespace WebCore {
     
    115111    m_begin += i;
    116112    return *this;
     113}
     114   
     115static void appendCodePoint(Vector<UChar>& destination, UChar32 codePoint)
     116{
     117    if (U_IS_BMP(codePoint)) {
     118        destination.append(static_cast<UChar>(codePoint));
     119        return;
     120    }
     121    destination.reserveCapacity(destination.size() + 2);
     122    destination.uncheckedAppend(U16_LEAD(codePoint));
     123    destination.uncheckedAppend(U16_TRAIL(codePoint));
    117124}
    118125
     
    505512}
    506513   
    507 inline static void encodeQuery(const StringBuilder& source, Vector<LChar>& destination, const TextEncoding& encoding)
     514inline static void encodeQuery(const Vector<UChar>& source, Vector<LChar>& destination, const TextEncoding& encoding)
    508515{
    509516    // FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested.
    510     CString encoded = encoding.encode(source.toStringPreserveCapacity(), URLEncodedEntitiesForUnencodables);
     517    CString encoded = encoding.encode(StringView(source.data(), source.size()), URLEncodedEntitiesForUnencodables);
    511518    const char* data = encoded.data();
    512519    size_t length = encoded.length();
     
    913920    return parse<serialized>(input.characters16(), input.length(), { }, UTF8Encoding());
    914921}
    915    
     922
    916923template<bool serialized, typename CharacterType>
    917924URL URLParser::parse(const CharacterType* input, const unsigned length, const URL& base, const TextEncoding& encoding)
     
    924931   
    925932    bool isUTF8Encoding = encoding == UTF8Encoding();
    926     StringBuilder queryBuffer;
     933    Vector<UChar> queryBuffer;
    927934
    928935    unsigned endIndex = length;
     
    14091416                utf8PercentEncodeQuery<serialized>(*c, m_asciiBuffer);
    14101417            else
    1411                 queryBuffer.append(*c);
     1418                appendCodePoint(queryBuffer, *c);
    14121419            ++c;
    14131420            break;
     
    14171424                m_asciiBuffer.append(*c);
    14181425            else
    1419                 m_unicodeFragmentBuffer.append(*c);
     1426                appendCodePoint(m_unicodeFragmentBuffer, *c);
    14201427            ++c;
    14211428            break;
     
    19271934}
    19281935
    1929 // FIXME: This should return a CString.
    1930 inline static String percentDecode(const LChar* input, size_t length)
    1931 {
    1932     StringBuilder output;
     1936const size_t defaultInlineBufferSize = 2048;
     1937
     1938inline static Vector<LChar, defaultInlineBufferSize> percentDecode(const LChar* input, size_t length)
     1939{
     1940    Vector<LChar, defaultInlineBufferSize> output;
     1941    output.reserveInitialCapacity(length);
    19331942   
    19341943    for (size_t i = 0; i < length; ++i) {
    19351944        uint8_t byte = input[i];
    19361945        if (byte != '%')
    1937             output.append(byte);
     1946            output.uncheckedAppend(byte);
    19381947        else if (i < length - 2) {
    19391948            if (isASCIIHexDigit(input[i + 1]) && isASCIIHexDigit(input[i + 2])) {
    1940                 output.append(toASCIIHexValue(input[i + 1], input[i + 2]));
     1949                output.uncheckedAppend(toASCIIHexValue(input[i + 1], input[i + 2]));
    19411950                i += 2;
    19421951            } else
    1943                 output.append(byte);
     1952                output.uncheckedAppend(byte);
    19441953        } else
    1945             output.append(byte);
    1946     }
    1947     return output.toStringPreserveCapacity();
     1954            output.uncheckedAppend(byte);
     1955    }
     1956    return output;
    19481957}
    19491958
     
    19551964}
    19561965
    1957 inline static Optional<String> domainToASCII(const String& domain)
    1958 {
    1959     const unsigned hostnameBufferLength = 2048;
    1960 
     1966inline static Optional<Vector<LChar, defaultInlineBufferSize>> domainToASCII(const String& domain)
     1967{
     1968    Vector<LChar, defaultInlineBufferSize> ascii;
    19611969    if (containsOnlyASCII(domain)) {
    1962         if (domain.is8Bit())
    1963             return domain.convertToASCIILowercase();
    1964         Vector<LChar, hostnameBufferLength> buffer;
    19651970        size_t length = domain.length();
    1966         buffer.reserveInitialCapacity(length);
    1967         for (size_t i = 0; i < length; ++i)
    1968             buffer.append(toASCIILower(domain[i]));
    1969         return String(buffer.data(), length);
     1971        if (domain.is8Bit()) {
     1972            const LChar* characters = domain.characters8();
     1973            ascii.reserveInitialCapacity(length);
     1974            for (size_t i = 0; i < length; ++i)
     1975                ascii.uncheckedAppend(toASCIILower(characters[i]));
     1976        } else {
     1977            const UChar* characters = domain.characters16();
     1978            ascii.reserveInitialCapacity(length);
     1979            for (size_t i = 0; i < length; ++i)
     1980                ascii.uncheckedAppend(toASCIILower(characters[i]));
     1981        }
     1982        return ascii;
    19701983    }
    19711984   
    1972     UChar hostnameBuffer[hostnameBufferLength];
     1985    UChar hostnameBuffer[defaultInlineBufferSize];
    19731986    UErrorCode error = U_ZERO_ERROR;
    19741987
     
    19781991#endif
    19791992    // FIXME: This should use uidna_openUTS46 / uidna_close instead
    1980     int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, nullptr, &error);
     1993    int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, defaultInlineBufferSize, UIDNA_ALLOW_UNASSIGNED, nullptr, &error);
    19811994#if COMPILER(GCC) || COMPILER(CLANG)
    19821995#pragma GCC diagnostic pop
    19831996#endif
     1997    ASSERT(numCharactersConverted <= static_cast<int32_t>(defaultInlineBufferSize));
    19841998
    19851999    if (error == U_ZERO_ERROR) {
    1986         LChar buffer[hostnameBufferLength];
    19872000        for (int32_t i = 0; i < numCharactersConverted; ++i) {
    19882001            ASSERT(isASCII(hostnameBuffer[i]));
    1989             buffer[i] = hostnameBuffer[i];
    1990         }
    1991         return String(buffer, numCharactersConverted);
     2002            ASSERT(!isASCIIUpper(hostnameBuffer[i]));
     2003        }
     2004        ascii.append(hostnameBuffer, numCharactersConverted);
     2005        return ascii;
    19922006    }
    19932007
     
    19962010}
    19972011
    1998 inline static bool hasInvalidDomainCharacter(const String& asciiDomain)
    1999 {
    2000     RELEASE_ASSERT(asciiDomain.is8Bit());
    2001     const LChar* characters = asciiDomain.characters8();
    2002     for (size_t i = 0; i < asciiDomain.length(); ++i) {
    2003         if (isInvalidDomainCharacter(characters[i]))
     2012inline static bool hasInvalidDomainCharacter(const Vector<LChar, defaultInlineBufferSize>& asciiDomain)
     2013{
     2014    for (size_t i = 0; i < asciiDomain.size(); ++i) {
     2015        if (isInvalidDomainCharacter(asciiDomain[i]))
    20042016            return true;
    20052017    }
     
    20962108        return true;
    20972109    }
    2098 
    2099     // FIXME: We probably don't need to make so many buffers and String copies.
    2100     StringBuilder utf8Encoded;
     2110   
     2111    Vector<LChar, defaultInlineBufferSize> utf8Encoded;
    21012112    for (; !iterator.atEnd(); ++iterator) {
    21022113        if (!serialized && isTabOrNewline(*iterator))
     
    21122123        utf8Encoded.append(buffer, offset);
    21132124    }
    2114     RELEASE_ASSERT(utf8Encoded.is8Bit());
    2115     String percentDecoded = percentDecode(utf8Encoded.characters8(), utf8Encoded.length());
    2116     RELEASE_ASSERT(percentDecoded.is8Bit());
    2117     String domain = String::fromUTF8(percentDecoded.characters8(), percentDecoded.length());
     2125    Vector<LChar, defaultInlineBufferSize> percentDecoded = percentDecode(utf8Encoded.data(), utf8Encoded.size());
     2126    String domain = String::fromUTF8(percentDecoded.data(), percentDecoded.size());
    21182127    auto asciiDomain = domainToASCII(domain);
    21192128    if (!asciiDomain || hasInvalidDomainCharacter(asciiDomain.value()))
    21202129        return false;
    2121     String& asciiDomainValue = asciiDomain.value();
    2122     RELEASE_ASSERT(asciiDomainValue.is8Bit());
    2123     const LChar* asciiDomainCharacters = asciiDomainValue.characters8();
    2124 
    2125     if (auto address = parseIPv4Host(CodePointIterator<LChar>(asciiDomainCharacters, asciiDomainCharacters + asciiDomainValue.length()))) {
     2130    Vector<LChar, defaultInlineBufferSize>& asciiDomainValue = asciiDomain.value();
     2131    const LChar* asciiDomainCharacters = asciiDomainValue.data();
     2132
     2133    if (auto address = parseIPv4Host(CodePointIterator<LChar>(asciiDomainValue.begin(), asciiDomainValue.end()))) {
    21262134        serializeIPv4(address.value(), m_asciiBuffer);
    21272135        m_url.m_hostEnd = m_asciiBuffer.size();
     
    21342142    }
    21352143
    2136     m_asciiBuffer.append(asciiDomainCharacters, asciiDomainValue.length());
     2144    m_asciiBuffer.append(asciiDomainCharacters, asciiDomainValue.size());
    21372145    m_url.m_hostEnd = m_asciiBuffer.size();
    21382146    if (!iterator.atEnd()) {
     
    21512159        return Nullopt;
    21522160    auto percentDecoded = percentDecode(reinterpret_cast<const LChar*>(utf8.data()), utf8.length());
    2153     RELEASE_ASSERT(percentDecoded.is8Bit());
    2154     return String::fromUTF8(percentDecoded.characters8(), percentDecoded.length());
     2161    return String::fromUTF8(percentDecoded.data(), percentDecoded.size());
    21552162}
    21562163
  • trunk/Source/WebCore/platform/URLParser.h

    r206162 r206177  
    5353    URL m_url;
    5454    Vector<LChar> m_asciiBuffer;
    55     Vector<UChar32> m_unicodeFragmentBuffer;
     55    Vector<UChar> m_unicodeFragmentBuffer;
    5656    bool m_urlIsSpecial { false };
    5757    bool m_hostHasPercentOrNonASCII { false };
  • trunk/Tools/ChangeLog

    r206162 r206177  
     12016-09-20  Alex Christensen  <achristensen@webkit.org>
     2
     3        Reduce allocations in URLParser
     4        https://bugs.webkit.org/show_bug.cgi?id=162241
     5
     6        Reviewed by Chris Dumez.
     7
     8        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
     9        (TestWebKitAPI::TEST_F):
     10        (TestWebKitAPI::checkURL):
     11
    1122016-09-20  Alex Christensen  <achristensen@webkit.org>
    213
  • trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

    r206162 r206177  
    2727#include <WebCore/URLParser.h>
    2828#include <wtf/MainThread.h>
     29#include <wtf/text/StringBuilder.h>
    2930
    3031using namespace WebCore;
     
    559560        {"foo", "", "", "", 0, "/", "", "", "foo:///"},
    560561        {"foo", "", "", "", 0, "//", "", "", "foo://"});
     562    checkURLDifferences(wideString(L"http://host?ß😍#ß😍"),
     563        {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", wideString(L"ß😍"), wideString(L"http://host/?%C3%9F%F0%9F%98%8D#ß😍")},
     564        {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", "http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D"});
    561565
    562566    // This matches the spec and web platform tests, but not Chrome, Firefox, or URL::parse.
     
    650654        {"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"},
    651655        {"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"});
     656    checkURLDifferences("http://%48OsT",
     657        {"http", "", "", "host", 0, "/", "", "", "http://host/"},
     658        {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
     659
    652660}
    653661   
     
    714722}
    715723
     724static void checkURL(const String& urlString, const TextEncoding& encoding, const ExpectedParts& parts)
     725{
     726    URLParser parser;
     727    auto url = parser.parse(urlString, { }, encoding);
     728    EXPECT_TRUE(eq(parts.protocol, url.protocol()));
     729    EXPECT_TRUE(eq(parts.user, url.user()));
     730    EXPECT_TRUE(eq(parts.password, url.pass()));
     731    EXPECT_TRUE(eq(parts.host, url.host()));
     732    EXPECT_EQ(parts.port, url.port());
     733    EXPECT_TRUE(eq(parts.path, url.path()));
     734    EXPECT_TRUE(eq(parts.query, url.query()));
     735    EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
     736    EXPECT_TRUE(eq(parts.string, url.string()));
     737}
     738
     739TEST_F(URLParserTest, QueryEncoding)
     740{
     741    checkURL(wideString(L"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", wideString(L"ß😍"), wideString(L"http://host/?%C3%9F%F0%9F%98%8D#ß😍")});
     742    // FIXME: Add tests with other encodings.
     743}
     744
    716745} // namespace TestWebKitAPI
Note: See TracChangeset for help on using the changeset viewer.