Changeset 206223 in webkit
- Timestamp:
- Sep 21, 2016 11:33:30 AM (8 years ago)
- Location:
- trunk/Source/WebCore
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r206222 r206223 1 2016-09-21 Alex Christensen <achristensen@webkit.org> 2 3 Optimize URLParser 4 https://bugs.webkit.org/show_bug.cgi?id=162338 5 6 Reviewed by Tim Horton. 7 8 No change in behavior. Just a marginal performance improvement. 9 10 * platform/URLParser.cpp: 11 (WebCore::isValidSchemeCharacter): 12 Added to reduce branches when parsing the scheme of a URL. Now there is a table lookup instead of many range checks. 13 (WebCore::URLParser::parse): 14 Use appendVector, which uses memcpy. 15 (WebCore::URLParser::parseHostAndPort): 16 If we know that we are parsing the serialized output of a valid URL, 17 we do not need to convert host characters to lower case. 18 They are already lower case. 19 1 20 2016-09-21 Keith Miller <keith_miller@apple.com> 2 21 -
trunk/Source/WebCore/platform/URLParser.cpp
r206219 r206223 130 130 QueryPercent = 0x8, 131 131 SlashQuestionOrHash = 0x10, 132 Scheme = 0x20, 132 133 }; 133 134 … … 176 177 0, // ')' 177 178 0, // '*' 178 0, // '+'179 Scheme, // '+' 179 180 0, // ',' 180 0, // '-'181 0, // '.'181 Scheme, // '-' 182 Scheme, // '.' 182 183 UserInfo | InvalidDomain | SlashQuestionOrHash, // '/' 183 0, // '0'184 0, // '1'185 0, // '2'186 0, // '3'187 0, // '4'188 0, // '5'189 0, // '6'190 0, // '7'191 0, // '8'192 0, // '9'184 Scheme, // '0' 185 Scheme, // '1' 186 Scheme, // '2' 187 Scheme, // '3' 188 Scheme, // '4' 189 Scheme, // '5' 190 Scheme, // '6' 191 Scheme, // '7' 192 Scheme, // '8' 193 Scheme, // '9' 193 194 UserInfo | InvalidDomain, // ':' 194 195 UserInfo, // ';' … … 198 199 UserInfo | Default | InvalidDomain | SlashQuestionOrHash, // '?' 199 200 UserInfo | InvalidDomain, // '@' 200 0, // 'A'201 0, // 'B'202 0, // 'C'203 0, // 'D'204 0, // 'E'205 0, // 'F'206 0, // 'G'207 0, // 'H'208 0, // 'I'209 0, // 'J'210 0, // 'K'211 0, // 'L'212 0, // 'M'213 0, // 'N'214 0, // 'O'215 0, // 'P'216 0, // 'Q'217 0, // 'R'218 0, // 'S'219 0, // 'T'220 0, // 'U'221 0, // 'V'222 0, // 'W'223 0, // 'X'224 0, // 'Y'225 0, // 'Z'201 Scheme, // 'A' 202 Scheme, // 'B' 203 Scheme, // 'C' 204 Scheme, // 'D' 205 Scheme, // 'E' 206 Scheme, // 'F' 207 Scheme, // 'G' 208 Scheme, // 'H' 209 Scheme, // 'I' 210 Scheme, // 'J' 211 Scheme, // 'K' 212 Scheme, // 'L' 213 Scheme, // 'M' 214 Scheme, // 'N' 215 Scheme, // 'O' 216 Scheme, // 'P' 217 Scheme, // 'Q' 218 Scheme, // 'R' 219 Scheme, // 'S' 220 Scheme, // 'T' 221 Scheme, // 'U' 222 Scheme, // 'V' 223 Scheme, // 'W' 224 Scheme, // 'X' 225 Scheme, // 'Y' 226 Scheme, // 'Z' 226 227 UserInfo | InvalidDomain, // '[' 227 228 UserInfo | InvalidDomain | SlashQuestionOrHash, // '\\' … … 230 231 0, // '_' 231 232 UserInfo | Default, // '`' 232 0, // 'a'233 0, // 'b'234 0, // 'c'235 0, // 'd'236 0, // 'e'237 0, // 'f'238 0, // 'g'239 0, // 'h'240 0, // 'i'241 0, // 'j'242 0, // 'k'243 0, // 'l'244 0, // 'm'245 0, // 'n'246 0, // 'o'247 0, // 'p'248 0, // 'q'249 0, // 'r'250 0, // 's'251 0, // 't'252 0, // 'u'253 0, // 'v'254 0, // 'w'255 0, // 'x'256 0, // 'y'257 0, // 'z'233 Scheme, // 'a' 234 Scheme, // 'b' 235 Scheme, // 'c' 236 Scheme, // 'd' 237 Scheme, // 'e' 238 Scheme, // 'f' 239 Scheme, // 'g' 240 Scheme, // 'h' 241 Scheme, // 'i' 242 Scheme, // 'j' 243 Scheme, // 'k' 244 Scheme, // 'l' 245 Scheme, // 'm' 246 Scheme, // 'n' 247 Scheme, // 'o' 248 Scheme, // 'p' 249 Scheme, // 'q' 250 Scheme, // 'r' 251 Scheme, // 's' 252 Scheme, // 't' 253 Scheme, // 'u' 254 Scheme, // 'v' 255 Scheme, // 'w' 256 Scheme, // 'x' 257 Scheme, // 'y' 258 Scheme, // 'z' 258 259 UserInfo | Default, // '{' 259 260 UserInfo, // '|' … … 400 401 template<typename CharacterType> inline static bool isPercentOrNonASCII(CharacterType character) { return !isASCII(character) || character == '%'; } 401 402 template<typename CharacterType> inline static bool isSlashQuestionOrHash(CharacterType character) { return character <= '\\' && characterClassTable[character] & SlashQuestionOrHash; } 403 template<typename CharacterType> inline static bool isValidSchemeCharacter(CharacterType character) { return character <= 'z' && characterClassTable[character] & Scheme; } 402 404 static bool shouldPercentEncodeQueryByte(uint8_t byte) { return characterClassTable[byte] & QueryPercent; } 403 405 … … 946 948 LOG(URLParser, "Parsing URL <%s> base <%s>", String(input, length).utf8().data(), base.string().utf8().data()); 947 949 m_url = { }; 948 m_asciiBuffer.clear();949 m_unicodeFragmentBuffer.clear();950 m_asciiBuffer.reserve Capacity(length);950 ASSERT(m_asciiBuffer.isEmpty()); 951 ASSERT(m_unicodeFragmentBuffer.isEmpty()); 952 m_asciiBuffer.reserveInitialCapacity(length); 951 953 952 954 bool isUTF8Encoding = encoding == UTF8Encoding(); … … 1011 1013 case State::Scheme: 1012 1014 LOG_STATE("Scheme"); 1013 if (is ASCIIAlphanumeric(*c) || *c == '+' || *c == '-' || *c == '.')1015 if (isValidSchemeCharacter(*c)) 1014 1016 m_asciiBuffer.append(toASCIILower(*c)); 1015 1017 else if (*c == ':') { … … 1632 1634 m_url.m_string = String::adopt(WTFMove(m_asciiBuffer)); 1633 1635 else { 1634 // FIXME: This should use a Vector<UChar> and adopt it. 1635 StringBuilder builder; 1636 builder.reserveCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size()); 1637 builder.append(m_asciiBuffer.data(), m_asciiBuffer.size()); 1638 for (size_t i = 0; i < m_unicodeFragmentBuffer.size(); ++i) 1639 builder.append(m_unicodeFragmentBuffer[i]); 1640 m_url.m_string = builder.toString(); 1636 Vector<UChar> buffer; 1637 buffer.reserveInitialCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size()); 1638 buffer.appendVector(m_asciiBuffer); 1639 buffer.appendVector(m_unicodeFragmentBuffer); 1640 m_url.m_string = String::adopt(WTFMove(buffer)); 1641 1641 } 1642 1642 m_url.m_isValid = true; … … 2104 2104 } 2105 2105 2106 ASSERT(!serialized || m_hostHasPercentOrNonASCII); 2106 2107 if (!m_hostHasPercentOrNonASCII) { 2107 2108 auto hostIterator = iterator; … … 2125 2126 } 2126 2127 for (; hostIterator != iterator; ++hostIterator) { 2127 if (serialized || !isTabOrNewline(*hostIterator)) 2128 if (serialized) { 2129 ASSERT(!isASCIIUpper(*hostIterator)); 2130 m_asciiBuffer.append(*hostIterator); 2131 } else if (!isTabOrNewline(*hostIterator)) 2128 2132 m_asciiBuffer.append(toASCIILower(*hostIterator)); 2129 2133 }
Note: See TracChangeset
for help on using the changeset viewer.