Changeset 206044 in webkit
- Timestamp:
- Sep 16, 2016 1:35:16 PM (8 years ago)
- Location:
- trunk/Source/WebCore
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r206043 r206044 1 2016-09-16 Alex Christensen <achristensen@webkit.org> 2 3 Use Vector<LChar> instead of StringBuilder for the ASCII parts of URLParser 4 https://bugs.webkit.org/show_bug.cgi?id=162035 5 6 Reviewed by Chris Dumez. 7 8 StringBuilder::append checks to see whether its StringBuffer is 8-bit or 16-bit each time it is called. 9 When parsing URLs, almost all of the parsed URL is guaranteed to be 8-bit ASCII. 10 Using a Vector<LChar> for this allows us to use uncheckedAppend in some places, and it always eliminates the 8-bit check. 11 This is a ~20% speedup in url parsing. 12 13 Covered by existing API tests. 14 15 * platform/URLParser.cpp: 16 (WebCore::isWindowsDriveLetter): 17 (WebCore::percentEncode): 18 (WebCore::utf8PercentEncode): 19 (WebCore::utf8PercentEncodeQuery): 20 (WebCore::encodeQuery): 21 (WebCore::URLParser::copyURLPartsUntil): 22 (WebCore::URLParser::popPath): 23 (WebCore::URLParser::parse): 24 (WebCore::URLParser::parseAuthority): 25 (WebCore::appendNumber): 26 (WebCore::serializeIPv4): 27 (WebCore::serializeIPv6Piece): 28 (WebCore::serializeIPv6): 29 (WebCore::URLParser::parsePort): 30 (WebCore::URLParser::parseHost): 31 (WebCore::serializeURLEncodedForm): 32 (WebCore::URLParser::serialize): 33 (WebCore::bufferView): Deleted. 34 * platform/URLParser.h: 35 1 36 2016-09-16 Dave Hyatt <hyatt@apple.com> 2 37 -
trunk/Source/WebCore/platform/URLParser.cpp
r206036 r206044 110 110 { 111 111 ASSERT(!atEnd()); 112 if (U16_IS_LEAD(m_begin[0]) && m_begin < m_end && U16_IS_TRAIL(m_begin[1]))113 m_begin += 2;114 else115 m_begin++;112 unsigned i = 0; 113 size_t length = m_end - m_begin; 114 U16_FWD_1(m_begin, i, length); 115 m_begin += i; 116 116 return *this; 117 117 } … … 406 406 } 407 407 408 static bool isWindowsDriveLetter(const StringBuilder& builder, size_t index)409 { 410 if (bu ilder.length() < index + 2)411 return false; 412 return isASCIIAlpha(bu ilder[index]) && (builder[index + 1] == ':' || builder[index + 1] == '|');408 static bool isWindowsDriveLetter(const Vector<LChar>& buffer, size_t index) 409 { 410 if (buffer.size() < index + 2) 411 return false; 412 return isASCIIAlpha(buffer[index]) && (buffer[index + 1] == ':' || buffer[index + 1] == '|'); 413 413 } 414 414 … … 429 429 } 430 430 431 static void percentEncode(uint8_t byte, StringBuilder& builder)432 { 433 bu ilder.append('%');434 bu ilder.append(upperNibbleToASCIIHexDigit(byte));435 bu ilder.append(lowerNibbleToASCIIHexDigit(byte));436 } 437 438 static void utf8PercentEncode(UChar32 codePoint, StringBuilder& builder, bool(*isInCodeSet)(UChar32))431 static void percentEncode(uint8_t byte, Vector<LChar>& buffer) 432 { 433 buffer.append('%'); 434 buffer.append(upperNibbleToASCIIHexDigit(byte)); 435 buffer.append(lowerNibbleToASCIIHexDigit(byte)); 436 } 437 438 static void utf8PercentEncode(UChar32 codePoint, Vector<LChar>& destination, bool(*isInCodeSet)(UChar32)) 439 439 { 440 440 if (isInCodeSet(codePoint)) { … … 445 445 // FIXME: Check error. 446 446 for (int32_t i = 0; i < offset; ++i) 447 percentEncode(buffer[i], builder); 448 } else 449 builder.append(codePoint); 450 } 451 452 static void utf8PercentEncodeQuery(UChar32 codePoint, StringBuilder& builder) 447 percentEncode(buffer[i], destination); 448 } else { 449 ASSERT_WITH_MESSAGE(isASCII(codePoint), "isInCodeSet should always return true for non-ASCII characters"); 450 destination.append(codePoint); 451 } 452 } 453 454 static void utf8PercentEncodeQuery(UChar32 codePoint, Vector<LChar>& destination) 453 455 { 454 456 uint8_t buffer[U8_MAX_LENGTH]; … … 461 463 auto byte = buffer[i]; 462 464 if (shouldPercentEncodeQueryByte(byte)) 463 percentEncode(byte, builder);465 percentEncode(byte, destination); 464 466 else 465 builder.append(byte);467 destination.append(byte); 466 468 } 467 469 } 468 470 469 static void encodeQuery(const StringBuilder& source, StringBuilder& destination, const TextEncoding& encoding)471 static void encodeQuery(const StringBuilder& source, Vector<LChar>& destination, const TextEncoding& encoding) 470 472 { 471 473 // FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested. … … 594 596 return false; 595 597 } 596 }597 598 template<typename T>599 static StringView bufferView(const T& buffer, unsigned start, unsigned length)600 {601 ASSERT(buffer.length() >= length);602 if (buffer.is8Bit())603 return StringView(buffer.characters8() + start, length);604 return StringView(buffer.characters16() + start, length);605 598 } 606 599 … … 645 638 return 0; 646 639 } 647 640 641 static void copyASCIIStringUntil(Vector<LChar>& destination, const String& string, size_t lengthIf8Bit, size_t lengthIf16Bit) 642 { 643 ASSERT(destination.isEmpty()); 644 if (string.is8Bit()) { 645 RELEASE_ASSERT(lengthIf8Bit <= string.length()); 646 destination.append(string.characters8(), lengthIf8Bit); 647 } else { 648 RELEASE_ASSERT(lengthIf16Bit <= string.length()); 649 destination.reserveCapacity(lengthIf16Bit); 650 const UChar* characters = string.characters16(); 651 for (size_t i = 0; i < lengthIf16Bit; ++i) { 652 UChar c = characters[i]; 653 ASSERT_WITH_SECURITY_IMPLICATION(isASCII(c)); 654 destination.uncheckedAppend(c); 655 } 656 } 657 } 658 648 659 void URLParser::copyURLPartsUntil(const URL& base, URLPart part) 649 660 { 650 m_buffer.clear(); 651 m_buffer.append(base.m_string.substring(0, urlLengthUntilPart(base, part))); 661 m_asciiBuffer.clear(); 662 m_unicodeFragmentBuffer.clear(); 663 if (part == URLPart::FragmentEnd) { 664 copyASCIIStringUntil(m_asciiBuffer, base.m_string, urlLengthUntilPart(base, URLPart::FragmentEnd), urlLengthUntilPart(base, URLPart::QueryEnd)); 665 if (!base.m_string.is8Bit()) { 666 const String& fragment = base.m_string; 667 bool seenUnicode = false; 668 for (size_t i = base.m_queryEnd; i < base.m_fragmentEnd; ++i) { 669 if (!seenUnicode && !isASCII(fragment[i])) 670 seenUnicode = true; 671 if (seenUnicode) 672 m_unicodeFragmentBuffer.uncheckedAppend(fragment[i]); 673 else 674 m_asciiBuffer.uncheckedAppend(fragment[i]); 675 } 676 } 677 } else { 678 size_t length = urlLengthUntilPart(base, part); 679 copyASCIIStringUntil(m_asciiBuffer, base.m_string, length, length); 680 } 652 681 switch (part) { 653 682 case URLPart::FragmentEnd: … … 683 712 m_url.m_schemeEnd = base.m_schemeEnd; 684 713 } 685 m_urlIsSpecial = isSpecialScheme( bufferView(m_buffer, 0, m_url.m_schemeEnd));714 m_urlIsSpecial = isSpecialScheme(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd)); 686 715 } 687 716 … … 803 832 if (m_url.m_pathAfterLastSlash > m_url.m_portEnd + 1) { 804 833 m_url.m_pathAfterLastSlash--; 805 if (m_ buffer[m_url.m_pathAfterLastSlash] == '/')834 if (m_asciiBuffer[m_url.m_pathAfterLastSlash] == '/') 806 835 m_url.m_pathAfterLastSlash--; 807 while (m_url.m_pathAfterLastSlash > m_url.m_portEnd && m_ buffer[m_url.m_pathAfterLastSlash] != '/')836 while (m_url.m_pathAfterLastSlash > m_url.m_portEnd && m_asciiBuffer[m_url.m_pathAfterLastSlash] != '/') 808 837 m_url.m_pathAfterLastSlash--; 809 838 m_url.m_pathAfterLastSlash++; 810 839 } 811 m_ buffer.resize(m_url.m_pathAfterLastSlash);840 m_asciiBuffer.resize(m_url.m_pathAfterLastSlash); 812 841 } 813 842 … … 845 874 LOG(URLParser, "Parsing URL <%s> base <%s>", String(input, length).utf8().data(), base.string().utf8().data()); 846 875 m_url = { }; 847 m_buffer.clear(); 848 m_buffer.reserveCapacity(length); 876 m_asciiBuffer.clear(); 877 m_unicodeFragmentBuffer.clear(); 878 m_asciiBuffer.reserveCapacity(length); 849 879 850 880 bool isUTF8Encoding = encoding == UTF8Encoding(); … … 882 912 }; 883 913 884 #define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, buffer length %d", x, *c, m_buffer.length())914 #define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, asciiBuffer size %zu", x, *c, m_asciiBuffer.size()) 885 915 #define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x) 886 916 … … 896 926 LOG_STATE("SchemeStart"); 897 927 if (isASCIIAlpha(*c)) { 898 m_ buffer.append(toASCIILower(*c));928 m_asciiBuffer.uncheckedAppend(toASCIILower(*c)); 899 929 ++c; 900 930 state = State::Scheme; … … 905 935 LOG_STATE("Scheme"); 906 936 if (isASCIIAlphanumeric(*c) || *c == '+' || *c == '-' || *c == '.') 907 m_ buffer.append(toASCIILower(*c));937 m_asciiBuffer.append(toASCIILower(*c)); 908 938 else if (*c == ':') { 909 m_url.m_schemeEnd = m_ buffer.length();910 StringView urlScheme = bufferView(m_buffer, 0, m_url.m_schemeEnd);939 m_url.m_schemeEnd = m_asciiBuffer.size(); 940 StringView urlScheme = StringView(m_asciiBuffer.data(), m_url.m_schemeEnd); 911 941 m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https"; 912 942 if (urlScheme == "file") { 913 943 m_urlIsSpecial = true; 914 944 state = State::File; 915 m_ buffer.append(':');945 m_asciiBuffer.append(':'); 916 946 ++c; 917 947 break; 918 948 } 919 m_ buffer.append(':');949 m_asciiBuffer.append(':'); 920 950 if (isSpecialScheme(urlScheme)) { 921 951 m_urlIsSpecial = true; … … 927 957 state = State::SpecialAuthoritySlashes; 928 958 } else { 929 m_url.m_userStart = m_ buffer.length();959 m_url.m_userStart = m_asciiBuffer.size(); 930 960 m_url.m_userEnd = m_url.m_userStart; 931 961 m_url.m_passwordEnd = m_url.m_userStart; … … 937 967 ++maybeSlash; 938 968 if (!maybeSlash.atEnd() && *maybeSlash == '/') { 939 m_ buffer.append('/');969 m_asciiBuffer.append('/'); 940 970 m_url.m_pathAfterLastSlash = m_url.m_userStart + 1; 941 971 state = State::PathOrAuthority; … … 951 981 break; 952 982 } else { 953 m_ buffer.clear();983 m_asciiBuffer.clear(); 954 984 state = State::NoScheme; 955 985 c = beginAfterControlAndSpace; … … 960 990 ++c; 961 991 if (c.atEnd()) { 962 m_ buffer.clear();992 m_asciiBuffer.clear(); 963 993 state = State::NoScheme; 964 994 c = beginAfterControlAndSpace; … … 972 1002 copyURLPartsUntil(base, URLPart::QueryEnd); 973 1003 state = State::Fragment; 974 m_ buffer.append('#');1004 m_asciiBuffer.append('#'); 975 1005 ++c; 976 1006 break; … … 981 1011 } 982 1012 copyURLPartsUntil(base, URLPart::SchemeEnd); 983 m_ buffer.append(':');1013 m_asciiBuffer.append(':'); 984 1014 state = State::File; 985 1015 break; … … 987 1017 LOG_STATE("SpecialRelativeOrAuthority"); 988 1018 if (*c == '/') { 989 m_ buffer.append('/');1019 m_asciiBuffer.append('/'); 990 1020 ++c; 991 1021 while (!c.atEnd() && isTabOrNewline(*c)) … … 994 1024 return failure(input, length); 995 1025 if (*c == '/') { 996 m_ buffer.append('/');1026 m_asciiBuffer.append('/'); 997 1027 state = State::SpecialAuthorityIgnoreSlashes; 998 1028 ++c; … … 1004 1034 LOG_STATE("PathOrAuthority"); 1005 1035 if (*c == '/') { 1006 m_ buffer.append('/');1007 m_url.m_userStart = m_ buffer.length();1036 m_asciiBuffer.append('/'); 1037 m_url.m_userStart = m_asciiBuffer.size(); 1008 1038 state = State::AuthorityOrHost; 1009 1039 ++c; … … 1022 1052 case '?': 1023 1053 copyURLPartsUntil(base, URLPart::PathEnd); 1024 m_ buffer.append('?');1054 m_asciiBuffer.append('?'); 1025 1055 state = State::Query; 1026 1056 ++c; … … 1028 1058 case '#': 1029 1059 copyURLPartsUntil(base, URLPart::QueryEnd); 1030 m_ buffer.append('#');1060 m_asciiBuffer.append('#'); 1031 1061 state = State::Fragment; 1032 1062 ++c; … … 1043 1073 ++c; 1044 1074 copyURLPartsUntil(base, URLPart::SchemeEnd); 1045 m_ buffer.append("://");1075 m_asciiBuffer.append("://", 3); 1046 1076 state = State::SpecialAuthorityIgnoreSlashes; 1047 1077 } else { 1048 1078 copyURLPartsUntil(base, URLPart::PortEnd); 1049 m_ buffer.append('/');1079 m_asciiBuffer.append('/'); 1050 1080 m_url.m_pathAfterLastSlash = base.m_portEnd + 1; 1051 1081 state = State::Path; … … 1054 1084 case State::SpecialAuthoritySlashes: 1055 1085 LOG_STATE("SpecialAuthoritySlashes"); 1056 m_ buffer.append("//");1086 m_asciiBuffer.append("//", 2); 1057 1087 if (*c == '/' || *c == '\\') { 1058 1088 ++c; … … 1067 1097 LOG_STATE("SpecialAuthorityIgnoreSlashes"); 1068 1098 if (*c == '/' || *c == '\\') { 1069 m_ buffer.append('/');1070 ++c; 1071 } 1072 m_url.m_userStart = m_ buffer.length();1099 m_asciiBuffer.append('/'); 1100 ++c; 1101 } 1102 m_url.m_userStart = m_asciiBuffer.size(); 1073 1103 state = State::AuthorityOrHost; 1074 1104 authorityOrHostBegin = c; … … 1089 1119 bool isSlash = *c == '/' || (m_urlIsSpecial && *c == '\\'); 1090 1120 if (isSlash || *c == '?' || *c == '#') { 1091 m_url.m_userEnd = m_ buffer.length();1121 m_url.m_userEnd = m_asciiBuffer.size(); 1092 1122 m_url.m_passwordEnd = m_url.m_userEnd; 1093 1123 if (!parseHost(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) 1094 1124 return failure(input, length); 1095 1125 if (!isSlash) { 1096 m_ buffer.append('/');1097 m_url.m_pathAfterLastSlash = m_ buffer.length();1126 m_asciiBuffer.append('/'); 1127 m_url.m_pathAfterLastSlash = m_asciiBuffer.size(); 1098 1128 } 1099 1129 state = State::Path; … … 1122 1152 case '/': 1123 1153 case '\\': 1124 m_ buffer.append('/');1154 m_asciiBuffer.append('/'); 1125 1155 state = State::FileSlash; 1126 1156 ++c; … … 1129 1159 if (!base.isNull() && base.protocolIs("file")) 1130 1160 copyURLPartsUntil(base, URLPart::PathEnd); 1131 m_ buffer.append("///?");1132 m_url.m_userStart = m_ buffer.length() - 2;1161 m_asciiBuffer.append("///?", 4); 1162 m_url.m_userStart = m_asciiBuffer.size() - 2; 1133 1163 m_url.m_userEnd = m_url.m_userStart; 1134 1164 m_url.m_passwordEnd = m_url.m_userStart; … … 1143 1173 if (!base.isNull() && base.protocolIs("file")) 1144 1174 copyURLPartsUntil(base, URLPart::QueryEnd); 1145 m_ buffer.append("///#");1146 m_url.m_userStart = m_ buffer.length() - 2;1175 m_asciiBuffer.append("///#", 4); 1176 m_url.m_userStart = m_asciiBuffer.size() - 2; 1147 1177 m_url.m_userEnd = m_url.m_userStart; 1148 1178 m_url.m_passwordEnd = m_url.m_userStart; … … 1159 1189 copyURLPartsUntil(base, URLPart::PathAfterLastSlash); 1160 1190 else { 1161 m_ buffer.append("///");1162 m_url.m_userStart = m_ buffer.length() - 1;1191 m_asciiBuffer.append("///", 3); 1192 m_url.m_userStart = m_asciiBuffer.size() - 1; 1163 1193 m_url.m_userEnd = m_url.m_userStart; 1164 1194 m_url.m_passwordEnd = m_url.m_userStart; … … 1175 1205 if (*c == '/' || *c == '\\') { 1176 1206 ++c; 1177 m_ buffer.append('/');1178 m_url.m_userStart = m_ buffer.length();1207 m_asciiBuffer.append('/'); 1208 m_url.m_userStart = m_asciiBuffer.size(); 1179 1209 m_url.m_userEnd = m_url.m_userStart; 1180 1210 m_url.m_passwordEnd = m_url.m_userStart; … … 1193 1223 : isWindowsDriveLetter(CodePointIterator<UChar>(basePath.characters16(), basePath.characters16() + basePath.length())); 1194 1224 if (windowsQuirk) { 1195 m_ buffer.append(basePath[0]);1196 m_ buffer.append(basePath[1]);1225 m_asciiBuffer.append(basePath[0]); 1226 m_asciiBuffer.append(basePath[1]); 1197 1227 } 1198 1228 } … … 1200 1230 break; 1201 1231 } 1202 m_ buffer.append("//");1203 m_url.m_userStart = m_ buffer.length() - 1;1232 m_asciiBuffer.append("//", 2); 1233 m_url.m_userStart = m_asciiBuffer.size() - 1; 1204 1234 m_url.m_userEnd = m_url.m_userStart; 1205 1235 m_url.m_passwordEnd = m_url.m_userStart; … … 1212 1242 LOG_STATE("FileHost"); 1213 1243 if (isSlashQuestionOrHash(*c)) { 1214 if (isWindowsDriveLetter(m_ buffer, m_url.m_portEnd + 1)) {1244 if (isWindowsDriveLetter(m_asciiBuffer, m_url.m_portEnd + 1)) { 1215 1245 state = State::Path; 1216 1246 break; 1217 1247 } 1218 1248 if (authorityOrHostBegin == c) { 1219 ASSERT(m_ buffer[m_buffer.length() - 1] == '/');1249 ASSERT(m_asciiBuffer[m_asciiBuffer.size() - 1] == '/'); 1220 1250 if (*c == '?') { 1221 m_ buffer.append("/?");1222 m_url.m_pathAfterLastSlash = m_ buffer.length() - 1;1251 m_asciiBuffer.append("/?", 2); 1252 m_url.m_pathAfterLastSlash = m_asciiBuffer.size() - 1; 1223 1253 m_url.m_pathEnd = m_url.m_pathAfterLastSlash; 1224 1254 state = State::Query; … … 1227 1257 } 1228 1258 if (*c == '#') { 1229 m_ buffer.append("/#");1230 m_url.m_pathAfterLastSlash = m_ buffer.length() - 1;1259 m_asciiBuffer.append("/#", 2); 1260 m_url.m_pathAfterLastSlash = m_asciiBuffer.size() - 1; 1231 1261 m_url.m_pathEnd = m_url.m_pathAfterLastSlash; 1232 1262 m_url.m_queryEnd = m_url.m_pathAfterLastSlash; … … 1241 1271 return failure(input, length); 1242 1272 1243 if ( bufferView(m_buffer, m_url.m_passwordEnd, m_buffer.length() - m_url.m_passwordEnd) == "localhost") {1244 m_ buffer.resize(m_url.m_passwordEnd);1245 m_url.m_hostEnd = m_ buffer.length();1273 if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, m_asciiBuffer.size() - m_url.m_passwordEnd) == "localhost") { 1274 m_asciiBuffer.shrink(m_url.m_passwordEnd); 1275 m_url.m_hostEnd = m_asciiBuffer.size(); 1246 1276 m_url.m_portEnd = m_url.m_hostEnd; 1247 1277 } … … 1263 1293 LOG_STATE("Path"); 1264 1294 if (*c == '/' || (m_urlIsSpecial && *c == '\\')) { 1265 m_ buffer.append('/');1266 m_url.m_pathAfterLastSlash = m_ buffer.length();1267 ++c; 1268 break; 1269 } 1270 if (m_ buffer.length() && m_buffer[m_buffer.length() - 1] == '/') {1295 m_asciiBuffer.append('/'); 1296 m_url.m_pathAfterLastSlash = m_asciiBuffer.size(); 1297 ++c; 1298 break; 1299 } 1300 if (m_asciiBuffer.size() && m_asciiBuffer[m_asciiBuffer.size() - 1] == '/') { 1271 1301 if (isDoubleDotPathSegment(c)) { 1272 1302 consumeDoubleDotPathSegment(c); … … 1274 1304 break; 1275 1305 } 1276 if (m_ buffer[m_buffer.length() - 1] == '/' && isSingleDotPathSegment(c)) {1306 if (m_asciiBuffer[m_asciiBuffer.size() - 1] == '/' && isSingleDotPathSegment(c)) { 1277 1307 consumeSingleDotPathSegment(c); 1278 1308 break; … … 1280 1310 } 1281 1311 if (*c == '?') { 1282 m_url.m_pathEnd = m_ buffer.length();1312 m_url.m_pathEnd = m_asciiBuffer.size(); 1283 1313 state = State::Query; 1284 1314 break; 1285 1315 } 1286 1316 if (*c == '#') { 1287 m_url.m_pathEnd = m_ buffer.length();1317 m_url.m_pathEnd = m_asciiBuffer.size(); 1288 1318 m_url.m_queryEnd = m_url.m_pathEnd; 1289 1319 state = State::Fragment; … … 1291 1321 } 1292 1322 if (isPercentEncodedDot(c)) { 1293 m_ buffer.append('.');1323 m_asciiBuffer.append('.'); 1294 1324 ASSERT(*c == '%'); 1295 1325 ++c; … … 1300 1330 break; 1301 1331 } 1302 utf8PercentEncode(*c, m_ buffer, isInDefaultEncodeSet);1332 utf8PercentEncode(*c, m_asciiBuffer, isInDefaultEncodeSet); 1303 1333 ++c; 1304 1334 break; … … 1306 1336 LOG_STATE("CannotBeABaseURLPath"); 1307 1337 if (*c == '?') { 1308 m_url.m_pathEnd = m_ buffer.length();1338 m_url.m_pathEnd = m_asciiBuffer.size(); 1309 1339 state = State::Query; 1310 1340 } else if (*c == '#') { 1311 m_url.m_pathEnd = m_ buffer.length();1341 m_url.m_pathEnd = m_asciiBuffer.size(); 1312 1342 m_url.m_queryEnd = m_url.m_pathEnd; 1313 1343 state = State::Fragment; 1314 1344 } else { 1315 utf8PercentEncode(*c, m_ buffer, isInSimpleEncodeSet);1345 utf8PercentEncode(*c, m_asciiBuffer, isInSimpleEncodeSet); 1316 1346 ++c; 1317 1347 } … … 1321 1351 if (*c == '#') { 1322 1352 if (!isUTF8Encoding) 1323 encodeQuery(queryBuffer, m_ buffer, encoding);1324 m_url.m_queryEnd = m_ buffer.length();1353 encodeQuery(queryBuffer, m_asciiBuffer, encoding); 1354 m_url.m_queryEnd = m_asciiBuffer.size(); 1325 1355 state = State::Fragment; 1326 1356 break; 1327 1357 } 1328 1358 if (isUTF8Encoding) 1329 utf8PercentEncodeQuery(*c, m_ buffer);1359 utf8PercentEncodeQuery(*c, m_asciiBuffer); 1330 1360 else 1331 1361 queryBuffer.append(*c); … … 1334 1364 case State::Fragment: 1335 1365 LOG_STATE("Fragment"); 1336 m_buffer.append(*c); 1366 if (m_unicodeFragmentBuffer.isEmpty() && isASCII(*c)) 1367 m_asciiBuffer.append(*c); 1368 else 1369 m_unicodeFragmentBuffer.append(*c); 1337 1370 ++c; 1338 1371 break; … … 1343 1376 case State::SchemeStart: 1344 1377 LOG_FINAL_STATE("SchemeStart"); 1345 if (!m_ buffer.length() && !base.isNull())1378 if (!m_asciiBuffer.size() && !base.isNull()) 1346 1379 return base; 1347 1380 return failure(input, length); … … 1370 1403 LOG_FINAL_STATE("RelativeSlash"); 1371 1404 copyURLPartsUntil(base, URLPart::PortEnd); 1372 m_ buffer.append('/');1405 m_asciiBuffer.append('/'); 1373 1406 m_url.m_pathAfterLastSlash = base.m_portEnd + 1; 1374 1407 m_url.m_pathEnd = m_url.m_pathAfterLastSlash; … … 1378 1411 case State::SpecialAuthoritySlashes: 1379 1412 LOG_FINAL_STATE("SpecialAuthoritySlashes"); 1380 m_url.m_userStart = m_ buffer.length();1413 m_url.m_userStart = m_asciiBuffer.size(); 1381 1414 m_url.m_userEnd = m_url.m_userStart; 1382 1415 m_url.m_passwordEnd = m_url.m_userStart; … … 1394 1427 case State::AuthorityOrHost: 1395 1428 LOG_FINAL_STATE("AuthorityOrHost"); 1396 m_url.m_userEnd = m_ buffer.length();1429 m_url.m_userEnd = m_asciiBuffer.size(); 1397 1430 m_url.m_passwordEnd = m_url.m_userEnd; 1398 1431 FALLTHROUGH; … … 1402 1435 if (!parseHost(authorityOrHostBegin)) 1403 1436 return failure(input, length); 1404 m_ buffer.append('/');1437 m_asciiBuffer.append('/'); 1405 1438 m_url.m_pathEnd = m_url.m_portEnd + 1; 1406 1439 m_url.m_pathAfterLastSlash = m_url.m_pathEnd; … … 1412 1445 if (!base.isNull() && base.protocol() == "file") { 1413 1446 copyURLPartsUntil(base, URLPart::QueryEnd); 1414 m_ buffer.append(':');1415 } 1416 m_ buffer.append("///");1417 m_url.m_userStart = m_ buffer.length() - 1;1447 m_asciiBuffer.append(':'); 1448 } 1449 m_asciiBuffer.append("///", 3); 1450 m_url.m_userStart = m_asciiBuffer.size() - 1; 1418 1451 m_url.m_userEnd = m_url.m_userStart; 1419 1452 m_url.m_passwordEnd = m_url.m_userStart; … … 1427 1460 case State::FileSlash: 1428 1461 LOG_FINAL_STATE("FileSlash"); 1429 m_ buffer.append("//");1430 m_url.m_userStart = m_ buffer.length() - 1;1462 m_asciiBuffer.append("//", 2); 1463 m_url.m_userStart = m_asciiBuffer.size() - 1; 1431 1464 m_url.m_userEnd = m_url.m_userStart; 1432 1465 m_url.m_passwordEnd = m_url.m_userStart; … … 1441 1474 LOG_FINAL_STATE("FileHost"); 1442 1475 if (authorityOrHostBegin == c) { 1443 m_ buffer.append('/');1444 m_url.m_userStart = m_ buffer.length() - 1;1476 m_asciiBuffer.append('/'); 1477 m_url.m_userStart = m_asciiBuffer.size() - 1; 1445 1478 m_url.m_userEnd = m_url.m_userStart; 1446 1479 m_url.m_passwordEnd = m_url.m_userStart; … … 1456 1489 if (!parseHost(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) 1457 1490 return failure(input, length); 1458 1459 if ( bufferView(m_buffer, m_url.m_passwordEnd, m_buffer.length() - m_url.m_passwordEnd) == "localhost") {1460 m_ buffer.resize(m_url.m_passwordEnd);1461 m_url.m_hostEnd = m_ buffer.length();1491 1492 if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, m_asciiBuffer.size() - m_url.m_passwordEnd) == "localhost") { 1493 m_asciiBuffer.shrink(m_url.m_passwordEnd); 1494 m_url.m_hostEnd = m_asciiBuffer.size(); 1462 1495 m_url.m_portEnd = m_url.m_hostEnd; 1463 1496 } 1464 m_ buffer.append('/');1497 m_asciiBuffer.append('/'); 1465 1498 m_url.m_pathAfterLastSlash = m_url.m_hostEnd + 1; 1466 1499 m_url.m_pathEnd = m_url.m_pathAfterLastSlash; … … 1473 1506 case State::Path: 1474 1507 LOG_FINAL_STATE("Path"); 1475 m_url.m_pathEnd = m_ buffer.length();1508 m_url.m_pathEnd = m_asciiBuffer.size(); 1476 1509 m_url.m_queryEnd = m_url.m_pathEnd; 1477 1510 m_url.m_fragmentEnd = m_url.m_pathEnd; … … 1479 1512 case State::CannotBeABaseURLPath: 1480 1513 LOG_FINAL_STATE("CannotBeABaseURLPath"); 1481 m_url.m_pathEnd = m_ buffer.length();1514 m_url.m_pathEnd = m_asciiBuffer.size(); 1482 1515 m_url.m_queryEnd = m_url.m_pathEnd; 1483 1516 m_url.m_fragmentEnd = m_url.m_pathEnd; … … 1486 1519 LOG_FINAL_STATE("Query"); 1487 1520 if (!isUTF8Encoding) 1488 encodeQuery(queryBuffer, m_ buffer, encoding);1489 m_url.m_queryEnd = m_ buffer.length();1521 encodeQuery(queryBuffer, m_asciiBuffer, encoding); 1522 m_url.m_queryEnd = m_asciiBuffer.size(); 1490 1523 m_url.m_fragmentEnd = m_url.m_queryEnd; 1491 1524 break; 1492 1525 case State::Fragment: 1493 1526 LOG_FINAL_STATE("Fragment"); 1494 m_url.m_fragmentEnd = m_ buffer.length();1527 m_url.m_fragmentEnd = m_asciiBuffer.size() + m_unicodeFragmentBuffer.size(); 1495 1528 break; 1496 1529 } 1497 1530 1498 m_url.m_string = m_buffer.toString(); 1531 if (m_unicodeFragmentBuffer.isEmpty()) { 1532 // FIXME: String::adopt should require a WTFMove. 1533 m_url.m_string = String::adopt(m_asciiBuffer); 1534 } else { 1535 StringBuilder builder; 1536 builder.reserveCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size()); 1537 builder.append(m_asciiBuffer.data(), m_asciiBuffer.size()); 1538 for (size_t i = 0; i < m_unicodeFragmentBuffer.size(); ++i) 1539 builder.append(m_unicodeFragmentBuffer[i]); 1540 m_url.m_string = builder.toString(); 1541 } 1499 1542 m_url.m_isValid = true; 1500 1543 LOG(URLParser, "Parsed URL <%s>", m_url.m_string.utf8().data()); … … 1506 1549 { 1507 1550 if (iterator.atEnd()) { 1508 m_url.m_userEnd = m_ buffer.length();1551 m_url.m_userEnd = m_asciiBuffer.size(); 1509 1552 m_url.m_passwordEnd = m_url.m_userEnd; 1510 1553 return; … … 1513 1556 if (*iterator == ':') { 1514 1557 ++iterator; 1515 m_url.m_userEnd = m_ buffer.length();1558 m_url.m_userEnd = m_asciiBuffer.size(); 1516 1559 if (iterator.atEnd()) { 1517 1560 m_url.m_passwordEnd = m_url.m_userEnd; 1518 1561 if (m_url.m_userEnd > m_url.m_userStart) 1519 m_ buffer.append('@');1562 m_asciiBuffer.append('@'); 1520 1563 return; 1521 1564 } 1522 m_ buffer.append(':');1523 break; 1524 } 1525 utf8PercentEncode(*iterator, m_ buffer, isInUserInfoEncodeSet);1565 m_asciiBuffer.append(':'); 1566 break; 1567 } 1568 utf8PercentEncode(*iterator, m_asciiBuffer, isInUserInfoEncodeSet); 1526 1569 } 1527 1570 for (; !iterator.atEnd(); ++iterator) 1528 utf8PercentEncode(*iterator, m_ buffer, isInUserInfoEncodeSet);1529 m_url.m_passwordEnd = m_ buffer.length();1571 utf8PercentEncode(*iterator, m_asciiBuffer, isInUserInfoEncodeSet); 1572 m_url.m_passwordEnd = m_asciiBuffer.size(); 1530 1573 if (!m_url.m_userEnd) 1531 1574 m_url.m_userEnd = m_url.m_passwordEnd; 1532 m_buffer.append('@'); 1533 } 1534 1535 static void serializeIPv4(uint32_t address, StringBuilder& buffer) 1536 { 1537 buffer.appendNumber(address >> 24); 1575 m_asciiBuffer.append('@'); 1576 } 1577 1578 template<typename UnsignedIntegerType> 1579 void append(Vector<LChar>& destination, UnsignedIntegerType number) 1580 { 1581 LChar buf[sizeof(UnsignedIntegerType) * 3 + 1]; 1582 LChar* end = buf + WTF_ARRAY_LENGTH(buf); 1583 LChar* p = end; 1584 do { 1585 *--p = (number % 10) + '0'; 1586 number /= 10; 1587 } while (number); 1588 destination.append(p, end - p); 1589 } 1590 1591 static void serializeIPv4(uint32_t address, Vector<LChar>& buffer) 1592 { 1593 append<uint8_t>(buffer, address >> 24); 1538 1594 buffer.append('.'); 1539 buffer.appendNumber((address >> 16) & 0xFF);1595 append<uint8_t>(buffer, address >> 16); 1540 1596 buffer.append('.'); 1541 buffer.appendNumber((address >> 8) & 0xFF);1597 append<uint8_t>(buffer, address >> 8); 1542 1598 buffer.append('.'); 1543 buffer.appendNumber(address & 0xFF);1599 append<uint8_t>(buffer, address); 1544 1600 } 1545 1601 … … 1571 1627 } 1572 1628 1573 static void serializeIPv6Piece(uint16_t piece, StringBuilder& buffer)1629 static void serializeIPv6Piece(uint16_t piece, Vector<LChar>& buffer) 1574 1630 { 1575 1631 bool printed = false; … … 1589 1645 } 1590 1646 1591 static void serializeIPv6(std::array<uint16_t, 8> address, StringBuilder& buffer)1647 static void serializeIPv6(std::array<uint16_t, 8> address, Vector<LChar>& buffer) 1592 1648 { 1593 1649 buffer.append('['); … … 1599 1655 buffer.append(':'); 1600 1656 else 1601 buffer.append("::" );1657 buffer.append("::", 2); 1602 1658 while (piece < 8 && !address[piece]) 1603 1659 piece++; … … 1881 1937 uint32_t port = 0; 1882 1938 if (iterator.atEnd()) { 1883 m_url.m_portEnd = m_ buffer.length();1939 m_url.m_portEnd = m_asciiBuffer.size(); 1884 1940 return true; 1885 1941 } 1886 m_ buffer.append(':');1942 m_asciiBuffer.append(':'); 1887 1943 for (; !iterator.atEnd(); ++iterator) { 1888 1944 if (isTabOrNewline(*iterator)) … … 1895 1951 return false; 1896 1952 } 1897 1898 if (isDefaultPort( bufferView(m_buffer, 0, m_url.m_schemeEnd), port)) {1899 ASSERT(m_ buffer[m_buffer.length() - 1]== ':');1900 m_ buffer.resize(m_buffer.length() - 1);1953 1954 if (isDefaultPort(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd), port)) { 1955 ASSERT(m_asciiBuffer.last() == ':'); 1956 m_asciiBuffer.shrink(m_asciiBuffer.size() - 1); 1901 1957 } else 1902 m_buffer.appendNumber(port);1903 1904 m_url.m_portEnd = m_ buffer.length();1958 append<uint16_t>(m_asciiBuffer, static_cast<uint16_t>(port)); 1959 1960 m_url.m_portEnd = m_asciiBuffer.size(); 1905 1961 return true; 1906 1962 } … … 1917 1973 ++ipv6End; 1918 1974 if (auto address = parseIPv6Host(CodePointIterator<CharacterType>(iterator, ipv6End))) { 1919 serializeIPv6(address.value(), m_ buffer);1920 m_url.m_hostEnd = m_ buffer.length();1975 serializeIPv6(address.value(), m_asciiBuffer); 1976 m_url.m_hostEnd = m_asciiBuffer.size(); 1921 1977 if (!ipv6End.atEnd()) { 1922 1978 ++ipv6End; … … 1925 1981 return parsePort(ipv6End); 1926 1982 } 1927 m_url.m_portEnd = m_ buffer.length();1983 m_url.m_portEnd = m_asciiBuffer.size(); 1928 1984 return true; 1929 1985 } … … 1943 1999 } 1944 2000 if (auto address = parseIPv4Host(CodePointIterator<CharacterType>(hostIterator, iterator))) { 1945 serializeIPv4(address.value(), m_ buffer);1946 m_url.m_hostEnd = m_ buffer.length();2001 serializeIPv4(address.value(), m_asciiBuffer); 2002 m_url.m_hostEnd = m_asciiBuffer.size(); 1947 2003 if (iterator.atEnd()) { 1948 m_url.m_portEnd = m_ buffer.length();2004 m_url.m_portEnd = m_asciiBuffer.size(); 1949 2005 return true; 1950 2006 } … … 1954 2010 for (; hostIterator != iterator; ++hostIterator) { 1955 2011 if (!isTabOrNewline(*hostIterator)) 1956 m_ buffer.append(toASCIILower(*hostIterator));1957 } 1958 m_url.m_hostEnd = m_ buffer.length();2012 m_asciiBuffer.append(toASCIILower(*hostIterator)); 2013 } 2014 m_url.m_hostEnd = m_asciiBuffer.size(); 1959 2015 if (!hostIterator.atEnd()) { 1960 2016 ASSERT(*hostIterator == ':'); … … 1964 2020 return parsePort(hostIterator); 1965 2021 } 1966 m_url.m_portEnd = m_ buffer.length();2022 m_url.m_portEnd = m_asciiBuffer.size(); 1967 2023 return true; 1968 2024 } … … 1993 2049 RELEASE_ASSERT(asciiDomainValue.is8Bit()); 1994 2050 const LChar* asciiDomainCharacters = asciiDomainValue.characters8(); 1995 2051 1996 2052 if (auto address = parseIPv4Host(CodePointIterator<LChar>(asciiDomainCharacters, asciiDomainCharacters + asciiDomainValue.length()))) { 1997 serializeIPv4(address.value(), m_ buffer);1998 m_url.m_hostEnd = m_ buffer.length();2053 serializeIPv4(address.value(), m_asciiBuffer); 2054 m_url.m_hostEnd = m_asciiBuffer.size(); 1999 2055 if (iterator.atEnd()) { 2000 m_url.m_portEnd = m_ buffer.length();2056 m_url.m_portEnd = m_asciiBuffer.size(); 2001 2057 return true; 2002 2058 } … … 2004 2060 return parsePort(iterator); 2005 2061 } 2006 2007 m_ buffer.append(asciiDomain.value());2008 m_url.m_hostEnd = m_ buffer.length();2062 2063 m_asciiBuffer.append(asciiDomainCharacters, asciiDomainValue.length()); 2064 m_url.m_hostEnd = m_asciiBuffer.size(); 2009 2065 if (!iterator.atEnd()) { 2010 2066 ASSERT(*iterator == ':'); … … 2014 2070 return parsePort(iterator); 2015 2071 } 2016 m_url.m_portEnd = m_ buffer.length();2072 m_url.m_portEnd = m_asciiBuffer.size(); 2017 2073 return true; 2018 2074 } … … 2048 2104 } 2049 2105 2050 static void serializeURLEncodedForm(const String& input, StringBuilder& output)2106 static void serializeURLEncodedForm(const String& input, Vector<LChar>& output) 2051 2107 { 2052 2108 auto utf8 = input.utf8(StrictConversion); … … 2071 2127 String URLParser::serialize(const URLEncodedForm& tuples) 2072 2128 { 2073 StringBuilderoutput;2129 Vector<LChar> output; 2074 2130 for (auto& tuple : tuples) { 2075 2131 if (!output.isEmpty()) … … 2079 2135 serializeURLEncodedForm(tuple.second, output); 2080 2136 } 2081 return output.toString();2137 return String::adopt(output); 2082 2138 } 2083 2139 -
trunk/Source/WebCore/platform/URLParser.h
r205986 r206044 49 49 private: 50 50 URL m_url; 51 StringBuilder m_buffer; 51 Vector<LChar> m_asciiBuffer; 52 Vector<UChar32> m_unicodeFragmentBuffer; 52 53 bool m_urlIsSpecial { false }; 53 54 bool m_hostHasPercentOrNonASCII { false };
Note: See TracChangeset
for help on using the changeset viewer.