Changeset 236565 in webkit
- Timestamp:
- Sep 27, 2018, 1:05:52 PM (7 years ago)
- Location:
- trunk
- Files:
-
- 16 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r236563 r236565 1 2018-09-27 Alex Christensen <achristensen@webkit.org> 2 3 URLParser should use TextEncoding through an abstract class 4 https://bugs.webkit.org/show_bug.cgi?id=190027 5 6 Reviewed by Andy Estes. 7 8 URLParser uses TextEncoding for one call to encode, which is only used for encoding the query of URLs in documents with non-UTF encodings. 9 There are 3 call sites that specify the TextEncoding to use from the Document, and even those call sites use a UTF encoding most of the time. 10 All other URL parsing is done using a well-optimized path which assumes UTF-8 encoding and uses macros from ICU headers, not a TextEncoding. 11 Moving the logic in this way breaks URL and URLParser's dependency on TextEncoding, which makes it possible to use in a lower-level project 12 without also moving TextEncoding, TextCodec, TextCodecICU, ThreadGlobalData, and the rest of WebCore and JavaScriptCore. 13 14 There is no observable change in behavior. There is now one virtual function call in a code path in URLParser that is not performance-sensitive, 15 and TextEncodings now have a vtable, which uses a few more bytes of memory total for WebKit. 16 17 * css/parser/CSSParserContext.h: 18 (WebCore::CSSParserContext::completeURL const): 19 * css/parser/CSSParserIdioms.cpp: 20 (WebCore::completeURL): 21 * dom/Document.cpp: 22 (WebCore::Document::completeURL const): 23 * html/HTMLBaseElement.cpp: 24 (WebCore::HTMLBaseElement::href const): 25 Move the call to encodingForFormSubmission from the URL constructor to the 3 call sites that specify the encoding from the Document. 26 * loader/FormSubmission.cpp: 27 (WebCore::FormSubmission::create): 28 * loader/TextResourceDecoder.cpp: 29 (WebCore::TextResourceDecoder::encodingForURLParsing): 30 * loader/TextResourceDecoder.h: 31 * platform/URL.cpp: 32 (WebCore::URL::URL): 33 * platform/URL.h: 34 (WebCore::URLTextEncoding::~URLTextEncoding): 35 * platform/URLParser.cpp: 36 (WebCore::URLParser::encodeNonUTF8Query): 37 (WebCore::URLParser::copyURLPartsUntil): 38 (WebCore::URLParser::URLParser): 39 (WebCore::URLParser::parse): 40 (WebCore::URLParser::encodeQuery): Deleted. 41 A pointer replaces the boolean isUTF8Encoding and the TextEncoding& which had a default value of UTF8Encoding. 42 Now the pointer being null means that we use UTF8, and the pointer being non-null means we use that encoding. 43 * platform/URLParser.h: 44 (WebCore::URLParser::URLParser): 45 * platform/text/TextEncoding.cpp: 46 (WebCore::UTF7Encoding): 47 (WebCore::TextEncoding::encodingForFormSubmissionOrURLParsing const): 48 (WebCore::ASCIIEncoding): 49 (WebCore::Latin1Encoding): 50 (WebCore::UTF16BigEndianEncoding): 51 (WebCore::UTF16LittleEndianEncoding): 52 (WebCore::UTF8Encoding): 53 (WebCore::WindowsLatin1Encoding): 54 (WebCore::TextEncoding::encodingForFormSubmission const): Deleted. 55 Use NeverDestroyed because TextEncoding now has a virtual destructor. 56 * platform/text/TextEncoding.h: 57 Rename encodingForFormSubmission to encodingForFormSubmissionOrURLParsing to make it more clear that we are intentionally using it for both. 58 1 59 2018-09-27 John Wilander <wilander@apple.com> 2 60 -
trunk/Source/WebCore/css/parser/CSSParserContext.h
r234215 r236565 70 70 if (charset.isEmpty()) 71 71 return URL(baseURL, url); 72 return URL(baseURL, url, TextEncoding(charset)); 72 TextEncoding encoding(charset); 73 auto& encodingForURLParsing = encoding.encodingForFormSubmissionOrURLParsing(); 74 return URL(baseURL, url, encodingForURLParsing == UTF8Encoding() ? nullptr : &encodingForURLParsing); 73 75 } 74 76 }; -
trunk/Source/WebCore/css/parser/CSSParserIdioms.cpp
r218890 r236565 48 48 URL completeURL(const CSSParserContext& context, const String& url) 49 49 { 50 if (url.isNull()) 51 return URL(); 52 if (context.charset.isEmpty()) 53 return URL(context.baseURL, url); 54 return URL(context.baseURL, url, context.charset); 50 return context.completeURL(url); 55 51 } 56 52 -
trunk/Source/WebCore/dom/Document.cpp
r236560 r236565 4895 4895 if (!m_decoder) 4896 4896 return URL(baseURL, url); 4897 return URL(baseURL, url, m_decoder->encoding ());4897 return URL(baseURL, url, m_decoder->encodingForURLParsing()); 4898 4898 } 4899 4899 -
trunk/Source/WebCore/html/HTMLBaseElement.cpp
r229694 r236565 90 90 return document().url(); 91 91 92 URL url = !document().decoder() ? 93 URL(document().url(), stripLeadingAndTrailingHTMLSpaces(attributeValue)) : 94 URL(document().url(), stripLeadingAndTrailingHTMLSpaces(attributeValue), document().decoder()->encoding()); 92 auto* encoding = document().decoder() ? document().decoder()->encodingForURLParsing() : nullptr; 93 URL url(document().url(), stripLeadingAndTrailingHTMLSpaces(attributeValue), encoding); 95 94 96 95 if (!url.isValid()) -
trunk/Source/WebCore/loader/FormSubmission.cpp
r234278 r236565 176 176 177 177 auto dataEncoding = isMailtoForm ? UTF8Encoding() : encodingFromAcceptCharset(copiedAttributes.acceptCharset(), document); 178 auto domFormData = DOMFormData::create(dataEncoding.encodingForFormSubmission ());178 auto domFormData = DOMFormData::create(dataEncoding.encodingForFormSubmissionOrURLParsing()); 179 179 StringPairVector formValues; 180 180 -
trunk/Source/WebCore/loader/TextResourceDecoder.cpp
r228594 r236565 660 660 } 661 661 662 } 662 const TextEncoding* TextResourceDecoder::encodingForURLParsing() 663 { 664 // For UTF-{7,16,32}, we want to use UTF-8 for the query part as 665 // we do when submitting a form. A form with GET method 666 // has its contents added to a URL as query params and it makes sense 667 // to be consistent. 668 auto& encoding = m_encoding.encodingForFormSubmissionOrURLParsing(); 669 if (encoding == UTF8Encoding()) 670 return nullptr; 671 return &encoding; 672 } 673 674 } -
trunk/Source/WebCore/loader/TextResourceDecoder.h
r225618 r236565 49 49 void setEncoding(const TextEncoding&, EncodingSource); 50 50 const TextEncoding& encoding() const { return m_encoding; } 51 const TextEncoding* encodingForURLParsing(); 51 52 52 53 bool hasEqualEncodingForCharset(const String& charset) const; -
trunk/Source/WebCore/platform/URL.cpp
r235949 r236565 104 104 } 105 105 106 URL::URL(const URL& base, const String& relative) 107 { 108 URLParser parser(relative, base); 109 *this = parser.result(); 110 } 111 112 URL::URL(const URL& base, const String& relative, const TextEncoding& encoding) 113 { 114 // For UTF-{7,16,32}, we want to use UTF-8 for the query part as 115 // we do when submitting a form. A form with GET method 116 // has its contents added to a URL as query params and it makes sense 117 // to be consistent. 118 URLParser parser(relative, base, encoding.encodingForFormSubmission()); 106 URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding) 107 { 108 URLParser parser(relative, base, encoding); 119 109 *this = parser.result(); 120 110 } -
trunk/Source/WebCore/platform/URL.h
r235949 r236565 48 48 namespace WebCore { 49 49 50 class TextEncoding; 50 class URLTextEncoding { 51 public: 52 virtual Vector<uint8_t> encodeForURLParsing(StringView) const = 0; 53 virtual ~URLTextEncoding() { }; 54 }; 55 51 56 struct URLHash; 52 57 … … 66 71 67 72 // Resolves the relative URL with the given base URL. If provided, the 68 // TextEncoding is used to encode non-ASCII characers. The base URL can be73 // URLTextEncoding is used to encode non-ASCII characers. The base URL can be 69 74 // null or empty, in which case the relative URL will be interpreted as 70 75 // absolute. … … 72 77 // URL. Instead I think it would be better to treat all invalid base URLs 73 78 // the same way we treate null and empty base URLs. 74 WEBCORE_EXPORT URL(const URL& base, const String& relative); 75 URL(const URL& base, const String& relative, const TextEncoding&); 79 WEBCORE_EXPORT URL(const URL& base, const String& relative, const URLTextEncoding* = nullptr); 76 80 77 81 WEBCORE_EXPORT static URL fakeURLWithRelativePart(const String&); … … 209 213 WEBCORE_EXPORT void invalidate(); 210 214 static bool protocolIs(const String&, const char*); 211 void init(const URL&, const String&, const TextEncoding&);212 215 void copyToBuffer(Vector<char, 512>& buffer) const; 213 216 unsigned hostStart() const; … … 304 307 // in it, the resulting string will have embedded null characters! 305 308 WEBCORE_EXPORT String decodeURLEscapeSequences(const String&); 309 class TextEncoding; 306 310 String decodeURLEscapeSequences(const String&, const TextEncoding&); 307 311 -
trunk/Source/WebCore/platform/URLParser.cpp
r236528 r236565 619 619 620 620 template<typename CharacterType> 621 void URLParser::encode Query(const Vector<UChar>& source, constTextEncoding& encoding, CodePointIterator<CharacterType> iterator)622 { 623 auto encoded = encoding.encode (StringView(source.data(), source.size()), UnencodableHandling::URLEncodedEntities);621 void URLParser::encodeNonUTF8Query(const Vector<UChar>& source, const URLTextEncoding& encoding, CodePointIterator<CharacterType> iterator) 622 { 623 auto encoded = encoding.encodeForURLParsing(StringView(source.data(), source.size())); 624 624 auto* data = encoded.data(); 625 625 size_t length = encoded.size(); … … 881 881 882 882 template<typename CharacterType> 883 void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator, bool& isUTF8Encoding)883 void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator, const URLTextEncoding*& nonUTF8QueryEncoding) 884 884 { 885 885 syntaxViolation(iterator); … … 920 920 case Scheme::WS: 921 921 case Scheme::WSS: 922 isUTF8Encoding = true;922 nonUTF8QueryEncoding = nullptr; 923 923 m_urlIsSpecial = true; 924 924 return; … … 934 934 case Scheme::NonSpecial: 935 935 m_urlIsSpecial = false; 936 isUTF8Encoding = true;936 nonUTF8QueryEncoding = nullptr; 937 937 return; 938 938 } … … 1153 1153 } 1154 1154 1155 URLParser::URLParser(const String& input, const URL& base, const TextEncoding& encoding)1155 URLParser::URLParser(const String& input, const URL& base, const URLTextEncoding* nonUTF8QueryEncoding) 1156 1156 : m_inputString(input) 1157 1157 { … … 1166 1166 if (input.is8Bit()) { 1167 1167 m_inputBegin = input.characters8(); 1168 parse(input.characters8(), input.length(), base, encoding);1168 parse(input.characters8(), input.length(), base, nonUTF8QueryEncoding); 1169 1169 } else { 1170 1170 m_inputBegin = input.characters16(); 1171 parse(input.characters16(), input.length(), base, encoding);1171 parse(input.characters16(), input.length(), base, nonUTF8QueryEncoding); 1172 1172 } 1173 1173 … … 1180 1180 if (!m_didSeeSyntaxViolation) { 1181 1181 // Force a syntax violation at the beginning to make sure we get the same result. 1182 URLParser parser(makeString(" ", input), base, encoding);1182 URLParser parser(makeString(" ", input), base, nonUTF8QueryEncoding); 1183 1183 URL parsed = parser.result(); 1184 1184 if (parsed.isValid()) … … 1189 1189 1190 1190 template<typename CharacterType> 1191 void URLParser::parse(const CharacterType* input, const unsigned length, const URL& base, const TextEncoding& encoding)1192 { 1193 URL_PARSER_LOG("Parsing URL <%s> base <%s> encoding <%s>", String(input, length).utf8().data(), base.string().utf8().data(), encoding.name());1191 void URLParser::parse(const CharacterType* input, const unsigned length, const URL& base, const URLTextEncoding* nonUTF8QueryEncoding) 1192 { 1193 URL_PARSER_LOG("Parsing URL <%s> base <%s>", String(input, length).utf8().data(), base.string().utf8().data()); 1194 1194 m_url = { }; 1195 1195 ASSERT(m_asciiBuffer.isEmpty()); 1196 1197 bool isUTF8Encoding = encoding == UTF8Encoding(); 1196 1198 1197 Vector<UChar> queryBuffer; 1199 1198 … … 1288 1287 case Scheme::WS: 1289 1288 case Scheme::WSS: 1290 isUTF8Encoding = true;1289 nonUTF8QueryEncoding = nullptr; 1291 1290 m_urlIsSpecial = true; 1292 1291 if (base.protocolIs(urlScheme)) … … 1310 1309 break; 1311 1310 case Scheme::NonSpecial: 1312 isUTF8Encoding = true;1311 nonUTF8QueryEncoding = nullptr; 1313 1312 auto maybeSlash = c; 1314 1313 advance(maybeSlash); … … 1354 1353 } 1355 1354 if (base.m_cannotBeABaseURL && *c == '#') { 1356 copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);1355 copyURLPartsUntil(base, URLPart::QueryEnd, c, nonUTF8QueryEncoding); 1357 1356 state = State::Fragment; 1358 1357 appendToASCIIBuffer('#'); … … 1364 1363 break; 1365 1364 } 1366 copyURLPartsUntil(base, URLPart::SchemeEnd, c, isUTF8Encoding);1365 copyURLPartsUntil(base, URLPart::SchemeEnd, c, nonUTF8QueryEncoding); 1367 1366 appendToASCIIBuffer(':'); 1368 1367 state = State::File; … … 1414 1413 break; 1415 1414 case '?': 1416 copyURLPartsUntil(base, URLPart::PathEnd, c, isUTF8Encoding);1415 copyURLPartsUntil(base, URLPart::PathEnd, c, nonUTF8QueryEncoding); 1417 1416 appendToASCIIBuffer('?'); 1418 1417 ++c; 1419 if (isUTF8Encoding) 1420 state = State::UTF8Query; 1421 else { 1418 if (nonUTF8QueryEncoding) { 1422 1419 queryBegin = c; 1423 1420 state = State::NonUTF8Query; 1424 } 1421 } else 1422 state = State::UTF8Query; 1425 1423 break; 1426 1424 case '#': 1427 copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);1425 copyURLPartsUntil(base, URLPart::QueryEnd, c, nonUTF8QueryEncoding); 1428 1426 appendToASCIIBuffer('#'); 1429 1427 state = State::Fragment; … … 1431 1429 break; 1432 1430 default: 1433 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c, isUTF8Encoding);1431 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c, nonUTF8QueryEncoding); 1434 1432 if (currentPosition(c) && parsedDataView(currentPosition(c) - 1) != '/') { 1435 1433 appendToASCIIBuffer('/'); … … 1444 1442 if (*c == '/' || *c == '\\') { 1445 1443 ++c; 1446 copyURLPartsUntil(base, URLPart::SchemeEnd, c, isUTF8Encoding);1444 copyURLPartsUntil(base, URLPart::SchemeEnd, c, nonUTF8QueryEncoding); 1447 1445 appendToASCIIBuffer("://", 3); 1448 1446 if (m_urlIsSpecial) … … 1454 1452 } 1455 1453 } else { 1456 copyURLPartsUntil(base, URLPart::PortEnd, c, isUTF8Encoding);1454 copyURLPartsUntil(base, URLPart::PortEnd, c, nonUTF8QueryEncoding); 1457 1455 appendToASCIIBuffer('/'); 1458 1456 m_url.m_pathAfterLastSlash = base.m_hostEnd + base.m_portLength + 1; … … 1585 1583 syntaxViolation(c); 1586 1584 if (base.isValid() && base.protocolIs("file")) { 1587 copyURLPartsUntil(base, URLPart::PathEnd, c, isUTF8Encoding);1585 copyURLPartsUntil(base, URLPart::PathEnd, c, nonUTF8QueryEncoding); 1588 1586 appendToASCIIBuffer('?'); 1589 1587 ++c; … … 1599 1597 m_url.m_pathEnd = m_url.m_pathAfterLastSlash; 1600 1598 } 1601 if (isUTF8Encoding) 1602 state = State::UTF8Query; 1603 else { 1599 if (nonUTF8QueryEncoding) { 1604 1600 queryBegin = c; 1605 1601 state = State::NonUTF8Query; 1606 } 1602 } else 1603 state = State::UTF8Query; 1607 1604 break; 1608 1605 case '#': 1609 1606 syntaxViolation(c); 1610 1607 if (base.isValid() && base.protocolIs("file")) { 1611 copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);1608 copyURLPartsUntil(base, URLPart::QueryEnd, c, nonUTF8QueryEncoding); 1612 1609 appendToASCIIBuffer('#'); 1613 1610 } else { … … 1628 1625 syntaxViolation(c); 1629 1626 if (base.isValid() && base.protocolIs("file") && shouldCopyFileURL(c)) 1630 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c, isUTF8Encoding);1627 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c, nonUTF8QueryEncoding); 1631 1628 else { 1632 1629 appendToASCIIBuffer("///", 3); … … 1694 1691 appendToASCIIBuffer("/?", 2); 1695 1692 ++c; 1696 if (isUTF8Encoding) 1697 state = State::UTF8Query; 1698 else { 1693 if (nonUTF8QueryEncoding) { 1699 1694 queryBegin = c; 1700 1695 state = State::NonUTF8Query; 1701 } 1696 } else 1697 state = State::UTF8Query; 1702 1698 m_url.m_pathAfterLastSlash = currentPosition(c) - 1; 1703 1699 m_url.m_pathEnd = m_url.m_pathAfterLastSlash; … … 1772 1768 appendToASCIIBuffer('?'); 1773 1769 ++c; 1774 if (isUTF8Encoding) 1775 state = State::UTF8Query; 1776 else { 1770 if (nonUTF8QueryEncoding) { 1777 1771 queryBegin = c; 1778 1772 state = State::NonUTF8Query; 1779 } 1773 } else 1774 state = State::UTF8Query; 1780 1775 break; 1781 1776 } … … 1795 1790 appendToASCIIBuffer('?'); 1796 1791 ++c; 1797 if (isUTF8Encoding) 1798 state = State::UTF8Query; 1799 else { 1792 if (nonUTF8QueryEncoding) { 1800 1793 queryBegin = c; 1801 1794 state = State::NonUTF8Query; 1802 } 1795 } else 1796 state = State::UTF8Query; 1803 1797 } else if (*c == '#') { 1804 1798 m_url.m_pathEnd = currentPosition(c); … … 1822 1816 break; 1823 1817 } 1824 if (isUTF8Encoding) 1825 utf8QueryEncode(c); 1826 else 1827 appendCodePoint(queryBuffer, *c); 1818 ASSERT(!nonUTF8QueryEncoding); 1819 utf8QueryEncode(c); 1828 1820 ++c; 1829 1821 break; … … 1833 1825 ASSERT(queryBegin != CodePointIterator<CharacterType>()); 1834 1826 if (*c == '#') { 1835 encode Query(queryBuffer, encoding, CodePointIterator<CharacterType>(queryBegin, c));1827 encodeNonUTF8Query(queryBuffer, *nonUTF8QueryEncoding, CodePointIterator<CharacterType>(queryBegin, c)); 1836 1828 m_url.m_queryEnd = currentPosition(c); 1837 1829 state = State::Fragment; … … 1869 1861 case State::SpecialRelativeOrAuthority: 1870 1862 LOG_FINAL_STATE("SpecialRelativeOrAuthority"); 1871 copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);1863 copyURLPartsUntil(base, URLPart::QueryEnd, c, nonUTF8QueryEncoding); 1872 1864 break; 1873 1865 case State::PathOrAuthority: … … 1890 1882 case State::RelativeSlash: 1891 1883 LOG_FINAL_STATE("RelativeSlash"); 1892 copyURLPartsUntil(base, URLPart::PortEnd, c, isUTF8Encoding);1884 copyURLPartsUntil(base, URLPart::PortEnd, c, nonUTF8QueryEncoding); 1893 1885 appendToASCIIBuffer('/'); 1894 1886 m_url.m_pathAfterLastSlash = m_url.m_hostEnd + m_url.m_portLength + 1; … … 1953 1945 LOG_FINAL_STATE("File"); 1954 1946 if (base.isValid() && base.protocolIs("file")) { 1955 copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);1947 copyURLPartsUntil(base, URLPart::QueryEnd, c, nonUTF8QueryEncoding); 1956 1948 break; 1957 1949 } … … 2048 2040 LOG_FINAL_STATE("NonUTF8Query"); 2049 2041 ASSERT(queryBegin != CodePointIterator<CharacterType>()); 2050 encode Query(queryBuffer, encoding, CodePointIterator<CharacterType>(queryBegin, c));2042 encodeNonUTF8Query(queryBuffer, *nonUTF8QueryEncoding, CodePointIterator<CharacterType>(queryBegin, c)); 2051 2043 m_url.m_queryEnd = currentPosition(c); 2052 2044 break; -
trunk/Source/WebCore/platform/URLParser.h
r231337 r236565 26 26 #pragma once 27 27 28 #include "TextEncoding.h"29 28 #include "URL.h" 30 29 #include <wtf/Expected.h> … … 39 38 class URLParser { 40 39 public: 41 WEBCORE_EXPORT URLParser(const String&, const URL& = { }, const TextEncoding& = UTF8Encoding());40 WEBCORE_EXPORT URLParser(const String&, const URL& = { }, const URLTextEncoding* = nullptr); 42 41 URL result() { return m_url; } 43 42 … … 71 70 using LCharBuffer = Vector<LChar, defaultInlineBufferSize>; 72 71 73 template<typename CharacterType> void parse(const CharacterType*, const unsigned length, const URL&, const TextEncoding&);72 template<typename CharacterType> void parse(const CharacterType*, const unsigned length, const URL&, const URLTextEncoding*); 74 73 template<typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>); 75 74 template<typename CharacterType> bool parseHostAndPort(CodePointIterator<CharacterType>); … … 108 107 void appendToASCIIBuffer(const char*, size_t); 109 108 void appendToASCIIBuffer(const LChar* characters, size_t size) { appendToASCIIBuffer(reinterpret_cast<const char*>(characters), size); } 110 template<typename CharacterType> void encode Query(const Vector<UChar>& source, constTextEncoding&, CodePointIterator<CharacterType>);109 template<typename CharacterType> void encodeNonUTF8Query(const Vector<UChar>& source, const URLTextEncoding&, CodePointIterator<CharacterType>); 111 110 void copyASCIIStringUntil(const String&, size_t length); 112 111 bool copyBaseWindowsDriveLetter(const URL&); … … 128 127 129 128 enum class URLPart; 130 template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&, bool& isUTF8Encoding);129 template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&, const URLTextEncoding*&); 131 130 static size_t urlLengthUntilPart(const URL&, URLPart); 132 131 void popPath(); -
trunk/Source/WebCore/platform/text/TextEncoding.cpp
r235935 r236565 32 32 #include "TextEncodingRegistry.h" 33 33 #include <unicode/unorm.h> 34 #include <wtf/NeverDestroyed.h> 34 35 #include <wtf/StdLibExtras.h> 35 36 #include <wtf/text/CString.h> … … 40 41 static const TextEncoding& UTF7Encoding() 41 42 { 42 static TextEncodingglobalUTF7Encoding("UTF-7");43 static NeverDestroyed<TextEncoding> globalUTF7Encoding("UTF-7"); 43 44 return globalUTF7Encoding; 44 45 } … … 174 175 // should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, 175 176 // but it's fraught with problems and we'd rather steer clear of it. 176 const TextEncoding& TextEncoding::encodingForFormSubmission () const177 const TextEncoding& TextEncoding::encodingForFormSubmissionOrURLParsing() const 177 178 { 178 179 if (isNonByteBasedEncoding() || isUTF7Encoding()) … … 183 184 const TextEncoding& ASCIIEncoding() 184 185 { 185 static TextEncodingglobalASCIIEncoding("ASCII");186 static NeverDestroyed<TextEncoding> globalASCIIEncoding("ASCII"); 186 187 return globalASCIIEncoding; 187 188 } … … 189 190 const TextEncoding& Latin1Encoding() 190 191 { 191 static TextEncodingglobalLatin1Encoding("latin1");192 static NeverDestroyed<TextEncoding> globalLatin1Encoding("latin1"); 192 193 return globalLatin1Encoding; 193 194 } … … 195 196 const TextEncoding& UTF16BigEndianEncoding() 196 197 { 197 static TextEncodingglobalUTF16BigEndianEncoding("UTF-16BE");198 static NeverDestroyed<TextEncoding> globalUTF16BigEndianEncoding("UTF-16BE"); 198 199 return globalUTF16BigEndianEncoding; 199 200 } … … 201 202 const TextEncoding& UTF16LittleEndianEncoding() 202 203 { 203 static TextEncodingglobalUTF16LittleEndianEncoding("UTF-16LE");204 static NeverDestroyed<TextEncoding> globalUTF16LittleEndianEncoding("UTF-16LE"); 204 205 return globalUTF16LittleEndianEncoding; 205 206 } … … 207 208 const TextEncoding& UTF8Encoding() 208 209 { 209 static TextEncodingglobalUTF8Encoding("UTF-8");210 ASSERT(globalUTF8Encoding. isValid());210 static NeverDestroyed<TextEncoding> globalUTF8Encoding("UTF-8"); 211 ASSERT(globalUTF8Encoding.get().isValid()); 211 212 return globalUTF8Encoding; 212 213 } … … 214 215 const TextEncoding& WindowsLatin1Encoding() 215 216 { 216 static TextEncodingglobalWindowsLatin1Encoding("WinLatin-1");217 static NeverDestroyed<TextEncoding> globalWindowsLatin1Encoding("WinLatin-1"); 217 218 return globalWindowsLatin1Encoding; 218 219 } -
trunk/Source/WebCore/platform/text/TextEncoding.h
r228594 r236565 26 26 #pragma once 27 27 28 #include "URL.h" 28 29 #include <pal/text/UnencodableHandling.h> 29 30 #include <wtf/text/WTFString.h> … … 31 32 namespace WebCore { 32 33 33 class TextEncoding {34 class TextEncoding : public URLTextEncoding { 34 35 public: 35 36 TextEncoding() = default; … … 44 45 45 46 const TextEncoding& closestByteBasedEquivalent() const; 46 const TextEncoding& encodingForFormSubmission () const;47 const TextEncoding& encodingForFormSubmissionOrURLParsing() const; 47 48 48 49 WEBCORE_EXPORT String decode(const char*, size_t length, bool stopOnError, bool& sawError) const; 49 50 String decode(const char*, size_t length) const; 50 Vector<uint8_t> encode(StringView, UnencodableHandling) const; 51 WEBCORE_EXPORT Vector<uint8_t> encode(StringView, UnencodableHandling) const; 52 Vector<uint8_t> encodeForURLParsing(StringView string) const final { return encode(string, UnencodableHandling::URLEncodedEntities); } 51 53 52 54 UChar backslashAsCurrencySymbol() const; -
trunk/Tools/ChangeLog
r236562 r236565 1 2018-09-27 Alex Christensen <achristensen@webkit.org> 2 3 URLParser should use TextEncoding through an abstract class 4 https://bugs.webkit.org/show_bug.cgi?id=190027 5 6 Reviewed by Andy Estes. 7 8 * TestWebKitAPI/Tests/WebCore/URLParser.cpp: 9 (TestWebKitAPI::checkURL): 10 (TestWebKitAPI::TEST_F): 11 1 12 2018-09-27 Ryan Haddad <ryanhaddad@apple.com> 2 13 -
trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp
r236528 r236565 26 26 #include "config.h" 27 27 #include "WTFStringUtilities.h" 28 #include <WebCore/TextEncoding.h> 28 29 #include <WebCore/URLParser.h> 29 30 #include <wtf/MainThread.h> … … 211 212 } 212 213 213 static void checkURL(const String& urlString, const TextEncoding &encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes)214 static void checkURL(const String& urlString, const TextEncoding* encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes) 214 215 { 215 216 URLParser parser(urlString, { }, encoding); … … 236 237 } 237 238 238 static void checkURL(const String& urlString, const String& baseURLString, const TextEncoding &encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes)239 static void checkURL(const String& urlString, const String& baseURLString, const TextEncoding* encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes) 239 240 { 240 241 URLParser baseParser(baseURLString, { }, encoding); … … 1286 1287 TEST_F(URLParserTest, QueryEncoding) 1287 1288 { 1288 checkURL(utf16String(u"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D")}, testTabsValueForSurrogatePairs);1289 checkURL(utf16String(u"http://host?ß😍#ß😍"), nullptr, {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D")}, testTabsValueForSurrogatePairs); 1289 1290 1290 1291 TextEncoding latin1(String("latin1")); 1291 checkURL("http://host/?query with%20spaces", latin1, {"http", "", "", "host", 0, "/", "query%20with%20spaces", "", "http://host/?query%20with%20spaces"});1292 checkURL("http://host/?query", latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});1293 checkURL("http://host/?\tquery", latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});1294 checkURL("http://host/?q\tuery", latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});1295 checkURL("http://host/?query with SpAcEs#fragment", latin1, {"http", "", "", "host", 0, "/", "query%20with%20SpAcEs", "fragment", "http://host/?query%20with%20SpAcEs#fragment"});1296 checkURL("http://host/?que\rry\t\r\n#fragment", latin1, {"http", "", "", "host", 0, "/", "query", "fragment", "http://host/?query#fragment"});1292 checkURL("http://host/?query with%20spaces", &latin1, {"http", "", "", "host", 0, "/", "query%20with%20spaces", "", "http://host/?query%20with%20spaces"}); 1293 checkURL("http://host/?query", &latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"}); 1294 checkURL("http://host/?\tquery", &latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"}); 1295 checkURL("http://host/?q\tuery", &latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"}); 1296 checkURL("http://host/?query with SpAcEs#fragment", &latin1, {"http", "", "", "host", 0, "/", "query%20with%20SpAcEs", "fragment", "http://host/?query%20with%20SpAcEs#fragment"}); 1297 checkURL("http://host/?que\rry\t\r\n#fragment", &latin1, {"http", "", "", "host", 0, "/", "query", "fragment", "http://host/?query#fragment"}); 1297 1298 1298 1299 TextEncoding unrecognized(String("unrecognized invalid encoding name")); 1299 checkURL("http://host/?query", unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"});1300 checkURL("http://host/?", unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"});1300 checkURL("http://host/?query", &unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"}); 1301 checkURL("http://host/?", &unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"}); 1301 1302 1302 1303 TextEncoding iso88591(String("ISO-8859-1")); 1303 1304 String withUmlauts = utf16String<4>({0xDC, 0x430, 0x451, '\0'}); 1304 checkURL(makeString("ws://host/path?", withUmlauts), iso88591, {"ws", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "ws://host/path?%C3%9C%D0%B0%D1%91"});1305 checkURL(makeString("wss://host/path?", withUmlauts), iso88591, {"wss", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "wss://host/path?%C3%9C%D0%B0%D1%91"});1306 checkURL(makeString("asdf://host/path?", withUmlauts), iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "asdf://host/path?%C3%9C%D0%B0%D1%91"});1307 checkURL(makeString("https://host/path?", withUmlauts), iso88591, {"https", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "https://host/path?%DC%26%231072%3B%26%231105%3B"});1308 checkURL(makeString("gopher://host/path?", withUmlauts), iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "gopher://host/path?%DC%26%231072%3B%26%231105%3B"});1309 checkURL(makeString("/path?", withUmlauts, "#fragment"), "ws://example.com/", iso88591, {"ws", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "ws://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});1310 checkURL(makeString("/path?", withUmlauts, "#fragment"), "wss://example.com/", iso88591, {"wss", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "wss://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});1311 checkURL(makeString("/path?", withUmlauts, "#fragment"), "asdf://example.com/", iso88591, {"asdf", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});1312 checkURL(makeString("/path?", withUmlauts, "#fragment"), "https://example.com/", iso88591, {"https", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "https://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"});1313 checkURL(makeString("/path?", withUmlauts, "#fragment"), "gopher://example.com/", iso88591, {"gopher", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"});1314 checkURL(makeString("gopher://host/path?", withUmlauts, "#fragment"), "asdf://example.com/?doesntmatter", iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://host/path?%DC%26%231072%3B%26%231105%3B#fragment"});1315 checkURL(makeString("asdf://host/path?", withUmlauts, "#fragment"), "http://example.com/?doesntmatter", iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://host/path?%C3%9C%D0%B0%D1%91#fragment"});1316 1317 checkURL("http://host/pa'th?qu'ery#fr'agment", UTF8Encoding(), {"http", "", "", "host", 0, "/pa'th", "qu%27ery", "fr'agment", "http://host/pa'th?qu%27ery#fr'agment"});1318 checkURL("asdf://host/pa'th?qu'ery#fr'agment", UTF8Encoding(), {"asdf", "", "", "host", 0, "/pa'th", "qu'ery", "fr'agment", "asdf://host/pa'th?qu'ery#fr'agment"});1305 checkURL(makeString("ws://host/path?", withUmlauts), &iso88591, {"ws", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "ws://host/path?%C3%9C%D0%B0%D1%91"}); 1306 checkURL(makeString("wss://host/path?", withUmlauts), &iso88591, {"wss", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "wss://host/path?%C3%9C%D0%B0%D1%91"}); 1307 checkURL(makeString("asdf://host/path?", withUmlauts), &iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "asdf://host/path?%C3%9C%D0%B0%D1%91"}); 1308 checkURL(makeString("https://host/path?", withUmlauts), &iso88591, {"https", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "https://host/path?%DC%26%231072%3B%26%231105%3B"}); 1309 checkURL(makeString("gopher://host/path?", withUmlauts), &iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "gopher://host/path?%DC%26%231072%3B%26%231105%3B"}); 1310 checkURL(makeString("/path?", withUmlauts, "#fragment"), "ws://example.com/", &iso88591, {"ws", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "ws://example.com/path?%C3%9C%D0%B0%D1%91#fragment"}); 1311 checkURL(makeString("/path?", withUmlauts, "#fragment"), "wss://example.com/", &iso88591, {"wss", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "wss://example.com/path?%C3%9C%D0%B0%D1%91#fragment"}); 1312 checkURL(makeString("/path?", withUmlauts, "#fragment"), "asdf://example.com/", &iso88591, {"asdf", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://example.com/path?%C3%9C%D0%B0%D1%91#fragment"}); 1313 checkURL(makeString("/path?", withUmlauts, "#fragment"), "https://example.com/", &iso88591, {"https", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "https://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"}); 1314 checkURL(makeString("/path?", withUmlauts, "#fragment"), "gopher://example.com/", &iso88591, {"gopher", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"}); 1315 checkURL(makeString("gopher://host/path?", withUmlauts, "#fragment"), "asdf://example.com/?doesntmatter", &iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://host/path?%DC%26%231072%3B%26%231105%3B#fragment"}); 1316 checkURL(makeString("asdf://host/path?", withUmlauts, "#fragment"), "http://example.com/?doesntmatter", &iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://host/path?%C3%9C%D0%B0%D1%91#fragment"}); 1317 1318 checkURL("http://host/pa'th?qu'ery#fr'agment", nullptr, {"http", "", "", "host", 0, "/pa'th", "qu%27ery", "fr'agment", "http://host/pa'th?qu%27ery#fr'agment"}); 1319 checkURL("asdf://host/pa'th?qu'ery#fr'agment", nullptr, {"asdf", "", "", "host", 0, "/pa'th", "qu'ery", "fr'agment", "asdf://host/pa'th?qu'ery#fr'agment"}); 1319 1320 // FIXME: Add more tests with other encodings and things like non-ascii characters, emoji and unmatched surrogate pairs. 1320 1321 }
Note:
See TracChangeset
for help on using the changeset viewer.