Changeset 206198 in webkit


Ignore:
Timestamp:
Sep 20, 2016 11:34:13 PM (8 years ago)
Author:
achristensen@apple.com
Message:

Optimize URLParser
https://bugs.webkit.org/show_bug.cgi?id=162105

Reviewed by Geoffrey Garen.

Source/WebCore:

Covered by new API tests.
This is about a 5% speedup on my URLParser benchmark.

  • platform/URLParser.cpp:

(WebCore::percentEncodeByte):
(WebCore::utf8PercentEncode):
(WebCore::utf8QueryEncode):
(WebCore::encodeQuery):
(WebCore::URLParser::parse):
(WebCore::serializeURLEncodedForm):
(WebCore::percentEncode): Deleted.
(WebCore::utf8PercentEncodeQuery): Deleted.

Tools:

  • TestWebKitAPI/Tests/WebCore/URLParser.cpp:

(TestWebKitAPI::TEST_F):

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/ChangeLog

    r206197 r206198  
     12016-09-20  Alex Christensen  <achristensen@webkit.org>
     2
     3        Optimize URLParser
     4        https://bugs.webkit.org/show_bug.cgi?id=162105
     5
     6        Reviewed by Geoffrey Garen.
     7
     8        Covered by new API tests.
     9        This is about a 5% speedup on my URLParser benchmark.
     10
     11        * platform/URLParser.cpp:
     12        (WebCore::percentEncodeByte):
     13        (WebCore::utf8PercentEncode):
     14        (WebCore::utf8QueryEncode):
     15        (WebCore::encodeQuery):
     16        (WebCore::URLParser::parse):
     17        (WebCore::serializeURLEncodedForm):
     18        (WebCore::percentEncode): Deleted.
     19        (WebCore::utf8PercentEncodeQuery): Deleted.
     20
    1212016-09-20  Carlos Garcia Campos  <cgarcia@igalia.com>
    222
  • trunk/Source/WebCore/platform/URLParser.cpp

    r206196 r206198  
    458458}
    459459
    460 inline static void percentEncode(uint8_t byte, Vector<LChar>& buffer)
     460inline static void percentEncodeByte(uint8_t byte, Vector<LChar>& buffer)
    461461{
    462462    buffer.append('%');
     
    464464    buffer.append(lowerNibbleToASCIIHexDigit(byte));
    465465}
     466
     467const char* replacementCharacterUTF8PercentEncoded = "%EF%BF%BD";
     468const size_t replacementCharacterUTF8PercentEncodedLength = 9;
    466469
    467470template<bool serialized>
     
    473476        destination.append(codePoint);
    474477    } else {
    475         if (isInCodeSet(codePoint)) {
    476             uint8_t buffer[U8_MAX_LENGTH];
    477             int32_t offset = 0;
    478             UBool error = false;
    479             U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
    480             // FIXME: Check error.
    481             for (int32_t i = 0; i < offset; ++i)
    482                 percentEncode(buffer[i], destination);
    483         } else {
    484             ASSERT_WITH_MESSAGE(isASCII(codePoint), "isInCodeSet should always return true for non-ASCII characters");
    485             destination.append(codePoint);
    486         }
     478        if (isASCII(codePoint)) {
     479            if (isInCodeSet(codePoint))
     480                percentEncodeByte(codePoint, destination);
     481            else
     482                destination.append(codePoint);
     483            return;
     484        }
     485        ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters");
     486       
     487        if (!U_IS_UNICODE_CHAR(codePoint)) {
     488            destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
     489            return;
     490        }
     491       
     492        uint8_t buffer[U8_MAX_LENGTH];
     493        int32_t offset = 0;
     494        U8_APPEND_UNSAFE(buffer, offset, codePoint);
     495        for (int32_t i = 0; i < offset; ++i)
     496            percentEncodeByte(buffer[i], destination);
    487497    }
    488498}
    489499
    490500template<bool serialized>
    491 inline static void utf8PercentEncodeQuery(UChar32 codePoint, Vector<LChar>& destination)
     501inline static void utf8QueryEncode(UChar32 codePoint, Vector<LChar>& destination)
    492502{
    493503    if (serialized) {
     
    496506        destination.append(codePoint);
    497507    } else {
     508        if (isASCII(codePoint)) {
     509            if (shouldPercentEncodeQueryByte(codePoint))
     510                percentEncodeByte(codePoint, destination);
     511            else
     512                destination.append(codePoint);
     513            return;
     514        }
     515       
     516        if (!U_IS_UNICODE_CHAR(codePoint)) {
     517            destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
     518            return;
     519        }
     520
    498521        uint8_t buffer[U8_MAX_LENGTH];
    499522        int32_t offset = 0;
    500         UBool error = false;
    501         U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
    502         ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer)));
    503         // FIXME: Check error.
     523        U8_APPEND_UNSAFE(buffer, offset, codePoint);
    504524        for (int32_t i = 0; i < offset; ++i) {
    505525            auto byte = buffer[i];
    506526            if (shouldPercentEncodeQueryByte(byte))
    507                 percentEncode(byte, destination);
     527                percentEncodeByte(byte, destination);
    508528            else
    509529                destination.append(byte);
     
    521541        uint8_t byte = data[i];
    522542        if (shouldPercentEncodeQueryByte(byte))
    523             percentEncode(byte, destination);
     543            percentEncodeByte(byte, destination);
    524544        else
    525545            destination.append(byte);
     
    14141434            }
    14151435            if (isUTF8Encoding)
    1416                 utf8PercentEncodeQuery<serialized>(*c, m_asciiBuffer);
     1436                utf8QueryEncode<serialized>(*c, m_asciiBuffer);
    14171437            else
    14181438                appendCodePoint(queryBuffer, *c);
     
    21992219            output.append(byte);
    22002220        else
    2201             percentEncode(byte, output);
     2221            percentEncodeByte(byte, output);
    22022222    }
    22032223}
  • trunk/Tools/ChangeLog

    r206187 r206198  
     12016-09-20  Alex Christensen  <achristensen@webkit.org>
     2
     3        Optimize URLParser
     4        https://bugs.webkit.org/show_bug.cgi?id=162105
     5
     6        Reviewed by Geoffrey Garen.
     7
     8        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
     9        (TestWebKitAPI::TEST_F):
     10
    1112016-09-20  Aakash Jain  <aakash_jain@apple.com>
    212
  • trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

    r206177 r206198  
    216216    checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
    217217    checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
    218     // FIXME: Fix and add a test with an invalid surrogate pair at the end with a space as the second code unit.
    219218
    220219    // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
     
    657656        {"http", "", "", "host", 0, "/", "", "", "http://host/"},
    658657        {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
    659 
     658    checkURLDifferences("http://host/`",
     659        {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
     660        {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
    660661}
    661662   
     
    720721        {"ws", "", "", "", 0, "s:", "", "", "ws:s:"});
    721722    checkRelativeURL("notspecial:", "http://example.org/foo/bar", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
     723   
     724    const wchar_t surrogateBegin = 0xD800;
     725    const wchar_t validSurrogateEnd = 0xDD55;
     726    const wchar_t invalidSurrogateEnd = 'A';
     727    checkURL(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, validSurrogateEnd, '\0'}),
     728        {"http", "", "", "w", 0, "/%F0%90%85%95", "", "", "http://w/%F0%90%85%95"});
     729   
     730    // URLParser matches Chrome and Firefox but not URL::parse.
     731    checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, invalidSurrogateEnd}),
     732        {"http", "", "", "w", 0, "/%EF%BF%BDA", "", "", "http://w/%EF%BF%BDA"},
     733        {"http", "", "", "w", 0, "/%ED%A0%80A", "", "", "http://w/%ED%A0%80A"});
     734    checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, invalidSurrogateEnd, '\0'}),
     735        {"http", "", "", "w", 0, "/", "%EF%BF%BDA", "", "http://w/?%EF%BF%BDA"},
     736        {"http", "", "", "w", 0, "/", "%ED%A0%80A", "", "http://w/?%ED%A0%80A"});
     737    checkURLDifferences(wideString<11>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, '\0'}),
     738        {"http", "", "", "w", 0, "/%EF%BF%BD", "", "", "http://w/%EF%BF%BD"},
     739        {"http", "", "", "w", 0, "/%ED%A0%80", "", "", "http://w/%ED%A0%80"});
     740    checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, '\0'}),
     741        {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"},
     742        {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"});
     743    checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, ' ', '\0'}),
     744        {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"},
     745        {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"});
    722746}
    723747
Note: See TracChangeset for help on using the changeset viewer.