Changeset 206198 in webkit
- Timestamp:
- Sep 20, 2016 11:34:13 PM (8 years ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r206197 r206198 1 2016-09-20 Alex Christensen <achristensen@webkit.org> 2 3 Optimize URLParser 4 https://bugs.webkit.org/show_bug.cgi?id=162105 5 6 Reviewed by Geoffrey Garen. 7 8 Covered by new API tests. 9 This is about a 5% speedup on my URLParser benchmark. 10 11 * platform/URLParser.cpp: 12 (WebCore::percentEncodeByte): 13 (WebCore::utf8PercentEncode): 14 (WebCore::utf8QueryEncode): 15 (WebCore::encodeQuery): 16 (WebCore::URLParser::parse): 17 (WebCore::serializeURLEncodedForm): 18 (WebCore::percentEncode): Deleted. 19 (WebCore::utf8PercentEncodeQuery): Deleted. 20 1 21 2016-09-20 Carlos Garcia Campos <cgarcia@igalia.com> 2 22 -
trunk/Source/WebCore/platform/URLParser.cpp
r206196 r206198 458 458 } 459 459 460 inline static void percentEncode (uint8_t byte, Vector<LChar>& buffer)460 inline static void percentEncodeByte(uint8_t byte, Vector<LChar>& buffer) 461 461 { 462 462 buffer.append('%'); … … 464 464 buffer.append(lowerNibbleToASCIIHexDigit(byte)); 465 465 } 466 467 const char* replacementCharacterUTF8PercentEncoded = "%EF%BF%BD"; 468 const size_t replacementCharacterUTF8PercentEncodedLength = 9; 466 469 467 470 template<bool serialized> … … 473 476 destination.append(codePoint); 474 477 } else { 475 if (isInCodeSet(codePoint)) { 476 uint8_t buffer[U8_MAX_LENGTH]; 477 int32_t offset = 0; 478 UBool error = false; 479 U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error); 480 // FIXME: Check error. 481 for (int32_t i = 0; i < offset; ++i) 482 percentEncode(buffer[i], destination); 483 } else { 484 ASSERT_WITH_MESSAGE(isASCII(codePoint), "isInCodeSet should always return true for non-ASCII characters"); 485 destination.append(codePoint); 486 } 478 if (isASCII(codePoint)) { 479 if (isInCodeSet(codePoint)) 480 percentEncodeByte(codePoint, destination); 481 else 482 destination.append(codePoint); 483 return; 484 } 485 ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters"); 486 487 if (!U_IS_UNICODE_CHAR(codePoint)) { 488 destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength); 489 return; 490 } 491 492 uint8_t buffer[U8_MAX_LENGTH]; 493 int32_t offset = 0; 494 U8_APPEND_UNSAFE(buffer, offset, codePoint); 495 for (int32_t i = 0; i < offset; ++i) 496 percentEncodeByte(buffer[i], destination); 487 497 } 488 498 } 489 499 490 500 template<bool serialized> 491 inline static void utf8 PercentEncodeQuery(UChar32 codePoint, Vector<LChar>& destination)501 inline static void utf8QueryEncode(UChar32 codePoint, Vector<LChar>& destination) 492 502 { 493 503 if (serialized) { … … 496 506 destination.append(codePoint); 497 507 } else { 508 if (isASCII(codePoint)) { 509 if (shouldPercentEncodeQueryByte(codePoint)) 510 percentEncodeByte(codePoint, destination); 511 else 512 destination.append(codePoint); 513 return; 514 } 515 516 if (!U_IS_UNICODE_CHAR(codePoint)) { 517 destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength); 518 return; 519 } 520 498 521 uint8_t buffer[U8_MAX_LENGTH]; 499 522 int32_t offset = 0; 500 UBool error = false; 501 U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error); 502 ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer))); 503 // FIXME: Check error. 523 U8_APPEND_UNSAFE(buffer, offset, codePoint); 504 524 for (int32_t i = 0; i < offset; ++i) { 505 525 auto byte = buffer[i]; 506 526 if (shouldPercentEncodeQueryByte(byte)) 507 percentEncode (byte, destination);527 percentEncodeByte(byte, destination); 508 528 else 509 529 destination.append(byte); … … 521 541 uint8_t byte = data[i]; 522 542 if (shouldPercentEncodeQueryByte(byte)) 523 percentEncode (byte, destination);543 percentEncodeByte(byte, destination); 524 544 else 525 545 destination.append(byte); … … 1414 1434 } 1415 1435 if (isUTF8Encoding) 1416 utf8 PercentEncodeQuery<serialized>(*c, m_asciiBuffer);1436 utf8QueryEncode<serialized>(*c, m_asciiBuffer); 1417 1437 else 1418 1438 appendCodePoint(queryBuffer, *c); … … 2199 2219 output.append(byte); 2200 2220 else 2201 percentEncode (byte, output);2221 percentEncodeByte(byte, output); 2202 2222 } 2203 2223 } -
trunk/Tools/ChangeLog
r206187 r206198 1 2016-09-20 Alex Christensen <achristensen@webkit.org> 2 3 Optimize URLParser 4 https://bugs.webkit.org/show_bug.cgi?id=162105 5 6 Reviewed by Geoffrey Garen. 7 8 * TestWebKitAPI/Tests/WebCore/URLParser.cpp: 9 (TestWebKitAPI::TEST_F): 10 1 11 2016-09-20 Aakash Jain <aakash_jain@apple.com> 2 12 -
trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp
r206177 r206198 216 216 checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"}); 217 217 checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"}); 218 // FIXME: Fix and add a test with an invalid surrogate pair at the end with a space as the second code unit.219 218 220 219 // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse, … … 657 656 {"http", "", "", "host", 0, "/", "", "", "http://host/"}, 658 657 {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"}); 659 658 checkURLDifferences("http://host/`", 659 {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"}, 660 {"http", "", "", "host", 0, "/`", "", "", "http://host/`"}); 660 661 } 661 662 … … 720 721 {"ws", "", "", "", 0, "s:", "", "", "ws:s:"}); 721 722 checkRelativeURL("notspecial:", "http://example.org/foo/bar", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"}); 723 724 const wchar_t surrogateBegin = 0xD800; 725 const wchar_t validSurrogateEnd = 0xDD55; 726 const wchar_t invalidSurrogateEnd = 'A'; 727 checkURL(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, validSurrogateEnd, '\0'}), 728 {"http", "", "", "w", 0, "/%F0%90%85%95", "", "", "http://w/%F0%90%85%95"}); 729 730 // URLParser matches Chrome and Firefox but not URL::parse. 731 checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, invalidSurrogateEnd}), 732 {"http", "", "", "w", 0, "/%EF%BF%BDA", "", "", "http://w/%EF%BF%BDA"}, 733 {"http", "", "", "w", 0, "/%ED%A0%80A", "", "", "http://w/%ED%A0%80A"}); 734 checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, invalidSurrogateEnd, '\0'}), 735 {"http", "", "", "w", 0, "/", "%EF%BF%BDA", "", "http://w/?%EF%BF%BDA"}, 736 {"http", "", "", "w", 0, "/", "%ED%A0%80A", "", "http://w/?%ED%A0%80A"}); 737 checkURLDifferences(wideString<11>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, '\0'}), 738 {"http", "", "", "w", 0, "/%EF%BF%BD", "", "", "http://w/%EF%BF%BD"}, 739 {"http", "", "", "w", 0, "/%ED%A0%80", "", "", "http://w/%ED%A0%80"}); 740 checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, '\0'}), 741 {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"}, 742 {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"}); 743 checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, ' ', '\0'}), 744 {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"}, 745 {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"}); 722 746 } 723 747
Note: See TracChangeset
for help on using the changeset viewer.