Changeset 206125 in webkit
- Timestamp:
- Sep 19, 2016 4:03:02 PM (8 years ago)
- Location:
- trunk/Source/WebCore
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r206122 r206125 1 2016-09-19 Alex Christensen <achristensen@webkit.org> 2 3 URLParser should parse serialized valid URLs faster than unknown input 4 https://bugs.webkit.org/show_bug.cgi?id=162228 5 6 Reviewed by Chris Dumez. 7 8 The URL constructor with ParsedURLStringTag is almost twice as fast as the other URL constructors. 9 Assuming there are no tabs or newlines, and assuming characters are already encoded decreases the URLParser 10 runtime by over 25% and adds infrastructure for more optimizations. 11 12 No new tests. No change in behaviour. 13 14 * platform/URL.cpp: 15 (WebCore::URL::URL): 16 * platform/URLParser.cpp: 17 (WebCore::utf8PercentEncode): 18 (WebCore::utf8PercentEncodeQuery): 19 (WebCore::URLParser::parse): 20 (WebCore::URLParser::parseSerializedURL): 21 (WebCore::incrementIterator): 22 (WebCore::URLParser::parseAuthority): 23 (WebCore::URLParser::parsePort): 24 (WebCore::URLParser::parseHost): 25 * platform/URLParser.h: 26 (WebCore::URLParser::parse): Deleted. 27 1 28 2016-09-19 Daniel Bates <dabates@apple.com> 2 29 -
trunk/Source/WebCore/platform/URL.cpp
r206076 r206125 443 443 if (URLParser::enabled()) { 444 444 URLParser parser; 445 *this = parser.parse(url); 446 ASSERT((url.isEmpty() && m_string.isEmpty()) || url == m_string); // FIXME: Investigate parsing non-null empty ParsedURLStrings. 447 } else { 445 *this = parser.parseSerializedURL(url); 446 } else 448 447 parse(url); 449 448 #if OS(WINDOWS) … … 453 452 ASSERT(url == m_string); 454 453 #endif 455 }456 454 } 457 455 -
trunk/Source/WebCore/platform/URLParser.cpp
r206076 r206125 436 436 } 437 437 438 template<bool serialized> 438 439 inline static void utf8PercentEncode(UChar32 codePoint, Vector<LChar>& destination, bool(*isInCodeSet)(UChar32)) 439 440 { 440 if (isInCodeSet(codePoint)) { 441 if (serialized) { 442 ASSERT_WITH_SECURITY_IMPLICATION(isASCII(codePoint)); 443 ASSERT_WITH_SECURITY_IMPLICATION(!isInCodeSet(codePoint)); 444 destination.append(codePoint); 445 } else { 446 if (isInCodeSet(codePoint)) { 447 uint8_t buffer[U8_MAX_LENGTH]; 448 int32_t offset = 0; 449 UBool error = false; 450 U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error); 451 // FIXME: Check error. 452 for (int32_t i = 0; i < offset; ++i) 453 percentEncode(buffer[i], destination); 454 } else { 455 ASSERT_WITH_MESSAGE(isASCII(codePoint), "isInCodeSet should always return true for non-ASCII characters"); 456 destination.append(codePoint); 457 } 458 } 459 } 460 461 template<bool serialized> 462 inline static void utf8PercentEncodeQuery(UChar32 codePoint, Vector<LChar>& destination) 463 { 464 if (serialized) { 465 ASSERT_WITH_SECURITY_IMPLICATION(isASCII(codePoint)); 466 ASSERT_WITH_SECURITY_IMPLICATION(!shouldPercentEncodeQueryByte(codePoint)); 467 destination.append(codePoint); 468 } else { 441 469 uint8_t buffer[U8_MAX_LENGTH]; 442 470 int32_t offset = 0; 443 471 UBool error = false; 444 472 U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error); 473 ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer))); 445 474 // FIXME: Check error. 446 for (int32_t i = 0; i < offset; ++i) 447 percentEncode(buffer[i], destination); 448 } else { 449 ASSERT_WITH_MESSAGE(isASCII(codePoint), "isInCodeSet should always return true for non-ASCII characters"); 450 destination.append(codePoint); 451 } 452 } 453 454 inline static void utf8PercentEncodeQuery(UChar32 codePoint, Vector<LChar>& destination) 455 { 456 uint8_t buffer[U8_MAX_LENGTH]; 457 int32_t offset = 0; 458 UBool error = false; 459 U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error); 460 ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer))); 461 // FIXME: Check error. 462 for (int32_t i = 0; i < offset; ++i) { 463 auto byte = buffer[i]; 464 if (shouldPercentEncodeQueryByte(byte)) 465 percentEncode(byte, destination); 466 else 467 destination.append(byte); 475 for (int32_t i = 0; i < offset; ++i) { 476 auto byte = buffer[i]; 477 if (shouldPercentEncodeQueryByte(byte)) 478 percentEncode(byte, destination); 479 else 480 destination.append(byte); 481 } 468 482 } 469 483 } … … 864 878 URL URLParser::parse(const String& input, const URL& base, const TextEncoding& encoding) 865 879 { 880 const bool serialized = false; 866 881 if (input.is8Bit()) 867 return parse(input.characters8(), input.length(), base, encoding); 868 return parse(input.characters16(), input.length(), base, encoding); 869 } 870 871 template<typename CharacterType> 882 return parse<serialized>(input.characters8(), input.length(), base, encoding); 883 return parse<serialized>(input.characters16(), input.length(), base, encoding); 884 } 885 886 URL URLParser::parseSerializedURL(const String& input) 887 { 888 const bool serialized = true; 889 if (input.is8Bit()) 890 return parse<serialized>(input.characters8(), input.length(), { }, UTF8Encoding()); 891 return parse<serialized>(input.characters16(), input.length(), { }, UTF8Encoding()); 892 } 893 894 template<bool serialized, typename CharacterType> 895 void incrementIteratorSkippingTabAndNewLine(CodePointIterator<CharacterType>& iterator) 896 { 897 ++iterator; 898 while (!serialized && !iterator.atEnd() && isTabOrNewline(*iterator)) 899 ++iterator; 900 } 901 902 template<bool serialized, typename CharacterType> 872 903 URL URLParser::parse(const CharacterType* input, const unsigned length, const URL& base, const TextEncoding& encoding) 873 904 { … … 917 948 State state = State::SchemeStart; 918 949 while (!c.atEnd()) { 919 if ( isTabOrNewline(*c)) {950 if (!serialized && isTabOrNewline(*c)) { 920 951 ++c; 921 952 continue; … … 961 992 m_url.m_portEnd = m_url.m_userStart; 962 993 auto maybeSlash = c; 963 ++maybeSlash; 964 while (!maybeSlash.atEnd() && isTabOrNewline(*maybeSlash)) 965 ++maybeSlash; 994 incrementIteratorSkippingTabAndNewLine<serialized>(maybeSlash); 966 995 if (!maybeSlash.atEnd() && *maybeSlash == '/') { 967 996 m_asciiBuffer.append('/'); … … 984 1013 break; 985 1014 } 986 ++c; 987 while (!c.atEnd() && isTabOrNewline(*c)) 988 ++c; 1015 incrementIteratorSkippingTabAndNewLine<serialized>(c); 989 1016 if (c.atEnd()) { 990 1017 m_asciiBuffer.clear(); … … 1016 1043 if (*c == '/') { 1017 1044 m_asciiBuffer.append('/'); 1018 ++c; 1019 while (!c.atEnd() && isTabOrNewline(*c)) 1020 ++c; 1045 incrementIteratorSkippingTabAndNewLine<serialized>(c); 1021 1046 if (c.atEnd()) 1022 1047 return failure(input, length); … … 1084 1109 m_asciiBuffer.append("//", 2); 1085 1110 if (*c == '/' || *c == '\\') { 1086 ++c; 1087 while (!c.atEnd() && isTabOrNewline(*c)) 1088 ++c; 1111 incrementIteratorSkippingTabAndNewLine<serialized>(c); 1089 1112 if (!c.atEnd() && (*c == '/' || *c == '\\')) 1090 1113 ++c; … … 1106 1129 { 1107 1130 if (*c == '@') { 1108 parseAuthority(CodePointIterator<CharacterType>(authorityOrHostBegin, c)); 1109 ++c; 1110 while (!c.atEnd() && isTabOrNewline(*c)) 1111 ++c; 1131 parseAuthority<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)); 1132 incrementIteratorSkippingTabAndNewLine<serialized>(c); 1112 1133 authorityOrHostBegin = c; 1113 1134 state = State::Host; … … 1119 1140 m_url.m_userEnd = m_asciiBuffer.size(); 1120 1141 m_url.m_passwordEnd = m_url.m_userEnd; 1121 if (!parseHost (CodePointIterator<CharacterType>(authorityOrHostBegin, c)))1142 if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) 1122 1143 return failure(input, length); 1123 1144 if (!isSlash) { … … 1136 1157 LOG_STATE("Host"); 1137 1158 if (*c == '/' || *c == '?' || *c == '#') { 1138 if (!parseHost (CodePointIterator<CharacterType>(authorityOrHostBegin, c)))1159 if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) 1139 1160 return failure(input, length); 1140 1161 state = State::Path; … … 1266 1287 break; 1267 1288 } 1268 if (!parseHost (CodePointIterator<CharacterType>(authorityOrHostBegin, c)))1289 if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) 1269 1290 return failure(input, length); 1270 1291 … … 1328 1349 break; 1329 1350 } 1330 utf8PercentEncode (*c, m_asciiBuffer, isInDefaultEncodeSet);1351 utf8PercentEncode<serialized>(*c, m_asciiBuffer, isInDefaultEncodeSet); 1331 1352 ++c; 1332 1353 break; … … 1341 1362 state = State::Fragment; 1342 1363 } else { 1343 utf8PercentEncode (*c, m_asciiBuffer, isInSimpleEncodeSet);1364 utf8PercentEncode<serialized>(*c, m_asciiBuffer, isInSimpleEncodeSet); 1344 1365 ++c; 1345 1366 } … … 1355 1376 } 1356 1377 if (isUTF8Encoding) 1357 utf8PercentEncodeQuery (*c, m_asciiBuffer);1378 utf8PercentEncodeQuery<serialized>(*c, m_asciiBuffer); 1358 1379 else 1359 1380 queryBuffer.append(*c); … … 1431 1452 if (state == State::Host) 1432 1453 LOG_FINAL_STATE("Host"); 1433 if (!parseHost (authorityOrHostBegin))1454 if (!parseHost<serialized>(authorityOrHostBegin)) 1434 1455 return failure(input, length); 1435 1456 m_asciiBuffer.append('/'); … … 1485 1506 } 1486 1507 1487 if (!parseHost (CodePointIterator<CharacterType>(authorityOrHostBegin, c)))1508 if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) 1488 1509 return failure(input, length); 1489 1510 … … 1543 1564 } 1544 1565 1545 template< typename CharacterType>1566 template<bool serialized, typename CharacterType> 1546 1567 void URLParser::parseAuthority(CodePointIterator<CharacterType> iterator) 1547 1568 { … … 1564 1585 break; 1565 1586 } 1566 utf8PercentEncode (*iterator, m_asciiBuffer, isInUserInfoEncodeSet);1587 utf8PercentEncode<serialized>(*iterator, m_asciiBuffer, isInUserInfoEncodeSet); 1567 1588 } 1568 1589 for (; !iterator.atEnd(); ++iterator) 1569 utf8PercentEncode (*iterator, m_asciiBuffer, isInUserInfoEncodeSet);1590 utf8PercentEncode<serialized>(*iterator, m_asciiBuffer, isInUserInfoEncodeSet); 1570 1591 m_url.m_passwordEnd = m_asciiBuffer.size(); 1571 1592 if (!m_url.m_userEnd) … … 1930 1951 } 1931 1952 1932 template< typename CharacterType>1953 template<bool serialized, typename CharacterType> 1933 1954 bool URLParser::parsePort(CodePointIterator<CharacterType>& iterator) 1934 1955 { … … 1940 1961 m_asciiBuffer.append(':'); 1941 1962 for (; !iterator.atEnd(); ++iterator) { 1942 if ( isTabOrNewline(*iterator))1963 if (!serialized && isTabOrNewline(*iterator)) 1943 1964 continue; 1944 1965 if (isASCIIDigit(*iterator)) { … … 1960 1981 } 1961 1982 1962 template< typename CharacterType>1983 template<bool serialized, typename CharacterType> 1963 1984 bool URLParser::parseHost(CodePointIterator<CharacterType> iterator) 1964 1985 { … … 1977 1998 if (!ipv6End.atEnd() && *ipv6End == ':') { 1978 1999 ++ipv6End; 1979 return parsePort (ipv6End);2000 return parsePort<serialized>(ipv6End); 1980 2001 } 1981 2002 m_url.m_portEnd = m_asciiBuffer.size(); … … 1989 2010 auto hostIterator = iterator; 1990 2011 for (; !iterator.atEnd(); ++iterator) { 1991 if ( isTabOrNewline(*iterator))2012 if (!serialized && isTabOrNewline(*iterator)) 1992 2013 continue; 1993 2014 if (*iterator == ':') … … 2004 2025 } 2005 2026 ++iterator; 2006 return parsePort (iterator);2027 return parsePort<serialized>(iterator); 2007 2028 } 2008 2029 for (; hostIterator != iterator; ++hostIterator) { 2009 if ( !isTabOrNewline(*hostIterator))2030 if (serialized || !isTabOrNewline(*hostIterator)) 2010 2031 m_asciiBuffer.append(toASCIILower(*hostIterator)); 2011 2032 } … … 2013 2034 if (!hostIterator.atEnd()) { 2014 2035 ASSERT(*hostIterator == ':'); 2015 ++hostIterator; 2016 while (!hostIterator.atEnd() && isTabOrNewline(*hostIterator)) 2017 ++hostIterator; 2018 return parsePort(hostIterator); 2036 incrementIteratorSkippingTabAndNewLine<serialized>(hostIterator); 2037 return parsePort<serialized>(hostIterator); 2019 2038 } 2020 2039 m_url.m_portEnd = m_asciiBuffer.size(); … … 2025 2044 StringBuilder utf8Encoded; 2026 2045 for (; !iterator.atEnd(); ++iterator) { 2027 if ( isTabOrNewline(*iterator))2046 if (!serialized && isTabOrNewline(*iterator)) 2028 2047 continue; 2029 2048 if (*iterator == ':') … … 2056 2075 } 2057 2076 ++iterator; 2058 return parsePort (iterator);2077 return parsePort<serialized>(iterator); 2059 2078 } 2060 2079 … … 2063 2082 if (!iterator.atEnd()) { 2064 2083 ASSERT(*iterator == ':'); 2065 ++iterator; 2066 while (!iterator.atEnd() && isTabOrNewline(*iterator)) 2067 ++iterator; 2068 return parsePort(iterator); 2084 incrementIteratorSkippingTabAndNewLine<serialized>(iterator); 2085 return parsePort<serialized>(iterator); 2069 2086 } 2070 2087 m_url.m_portEnd = m_asciiBuffer.size(); -
trunk/Source/WebCore/platform/URLParser.h
r206044 r206125 38 38 public: 39 39 WEBCORE_EXPORT URL parse(const String&, const URL& = { }, const TextEncoding& = UTF8Encoding()); 40 WEBCORE_EXPORT URL parseSerializedURL(const String&); 40 41 WEBCORE_EXPORT static bool allValuesEqual(const URL&, const URL&); 41 42 … … 54 55 bool m_hostHasPercentOrNonASCII { false }; 55 56 56 template< typename CharacterType> URL parse(const CharacterType*, const unsigned length, const URL&, const TextEncoding&);57 template< typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>);58 template< typename CharacterType> bool parseHost(CodePointIterator<CharacterType>);59 template< typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&);57 template<bool serialized, typename CharacterType> URL parse(const CharacterType*, const unsigned length, const URL&, const TextEncoding&); 58 template<bool serialized, typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>); 59 template<bool serialized, typename CharacterType> bool parseHost(CodePointIterator<CharacterType>); 60 template<bool serialized, typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&); 60 61 template<typename CharacterType> URL failure(const CharacterType*, unsigned length); 61 62
Note: See TracChangeset
for help on using the changeset viewer.