Changeset 263722 in webkit
- Timestamp:
- Jun 29, 2020 8:52:16 PM (4 years ago)
- Location:
- trunk/Source/WebCore
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r263720 r263722 1 2020-06-29 Sam Weinig <weinig@apple.com> 2 3 Convert AppCache manifest parser over to using StringParsingBuffer 4 https://bugs.webkit.org/show_bug.cgi?id=213680 5 6 Reviewed by Darin Adler. 7 8 - Renames parseManifest to parseApplicationCacheManifest to differentiate between the manifest 9 for the application cache and the "application manifest", which is a different thing entirely. 10 Also renames the container struct from being called Manifest to ApplicationCacheManifest. 11 (The file should be renamed as well, but will do that in a seperate pass). 12 - Update parser to return an Optional<ApplicationCacheManifest> rather than using bool + out 13 parameter. 14 - Adopt readCharactersForParsing to replace unnecessary call to StringView::upconvertedCharacters(). 15 - Adopt StringParsingBuffer and ParsingUtilities along with some refinements to the code 16 to make the intent more clear. 17 18 19 * html/parser/ParsingUtilities.h: 20 (WebCore::skipUntil): 21 Fix formatting, putting the whole signature on one line. 22 23 * loader/appcache/ApplicationCacheGroup.cpp: 24 (WebCore::ApplicationCacheGroup::didFinishLoadingManifest): 25 Update for new parser function name and Optional return type. 26 27 * loader/appcache/ManifestParser.cpp: 28 (WebCore::isManifestWhitespace): 29 (WebCore::isManifestNewline): 30 (WebCore::isManifestWhitespaceOrNewline): 31 (WebCore::makeManifestURL): 32 (WebCore::parseApplicationCacheManifest): 33 * loader/appcache/ManifestParser.h: 34 Update parsing logic to use readCharactersForParsing (to avoid upconvesion) and rework 35 using StringParsingBuffer/ParsingUtilities to make things more clear. 36 1 37 2020-06-29 Zalan Bujtas <zalan@apple.com> 2 38 -
trunk/Source/WebCore/html/parser/ParsingUtilities.h
r263617 r263722 82 82 } 83 83 84 template<typename CharacterType, typename DelimiterType> 85 void skipUntil(StringParsingBuffer<CharacterType>& buffer, DelimiterType delimiter) 84 template<typename CharacterType, typename DelimiterType> void skipUntil(StringParsingBuffer<CharacterType>& buffer, DelimiterType delimiter) 86 85 { 87 86 while (buffer.hasCharactersRemaining() && *buffer != delimiter) -
trunk/Source/WebCore/loader/appcache/ApplicationCacheGroup.cpp
r262922 r263722 606 606 } 607 607 608 Manifest manifest;609 if (! parseManifest(m_manifestURL, m_manifestResource->response().mimeType(), m_manifestResource->data().data(), m_manifestResource->data().size(), manifest)) {608 auto manifest = parseApplicationCacheManifest(m_manifestURL, m_manifestResource->response().mimeType(), m_manifestResource->data().data(), m_manifestResource->data().size()); 609 if (!manifest) { 610 610 // At the time of this writing, lack of "CACHE MANIFEST" signature is the only reason for parseManifest to fail. 611 611 m_frame->document()->addConsoleMessage(MessageSource::AppCache, MessageLevel::Error, "Application Cache manifest could not be parsed. Does it start with CACHE MANIFEST?"_s); … … 636 636 } 637 637 638 for (const auto& explicitURL : manifest .explicitURLs)638 for (const auto& explicitURL : manifest->explicitURLs) 639 639 addEntry(explicitURL, ApplicationCacheResource::Explicit); 640 640 641 for (auto& fallbackURL : manifest .fallbackURLs)641 for (auto& fallbackURL : manifest->fallbackURLs) 642 642 addEntry(fallbackURL.second.string(), ApplicationCacheResource::Fallback); 643 643 644 m_cacheBeingUpdated->setOnlineAllowlist(manifest .onlineAllowedURLs);645 m_cacheBeingUpdated->setFallbackURLs(manifest .fallbackURLs);646 m_cacheBeingUpdated->setAllowsAllNetworkRequests(manifest .allowAllNetworkRequests);644 m_cacheBeingUpdated->setOnlineAllowlist(manifest->onlineAllowedURLs); 645 m_cacheBeingUpdated->setFallbackURLs(manifest->fallbackURLs); 646 m_cacheBeingUpdated->setAllowsAllNetworkRequests(manifest->allowAllNetworkRequests); 647 647 648 648 m_progressTotal = m_pendingEntries.size(); -
trunk/Source/WebCore/loader/appcache/ManifestParser.cpp
r262922 r263722 27 27 #include "ManifestParser.h" 28 28 29 #include "ParsingUtilities.h" 29 30 #include "TextResourceDecoder.h" 30 31 #include <wtf/URL.h> 32 #include <wtf/text/StringParsingBuffer.h> 31 33 #include <wtf/text/StringView.h> 32 #include <wtf/unicode/CharacterNames.h>33 34 34 35 namespace WebCore { 35 36 36 enum Mode { Explicit, Fallback, OnlineAllowlist, Unknown };37 enum class ApplicationCacheParserMode { Explicit, Fallback, OnlineAllowlist, Unknown }; 37 38 38 39 static StringView manifestPath(const URL& manifestURL) … … 45 46 } 46 47 47 bool parseManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length, Manifest& manifest) 48 { 49 ASSERT(manifest.explicitURLs.isEmpty()); 50 ASSERT(manifest.onlineAllowedURLs.isEmpty()); 51 ASSERT(manifest.fallbackURLs.isEmpty()); 52 manifest.allowAllNetworkRequests = false; 53 48 template<typename CharacterType> static constexpr bool isManifestWhitespace(CharacterType character) 49 { 50 return character == ' ' || character == '\t'; 51 } 52 53 template<typename CharacterType> static constexpr bool isManifestNewline(CharacterType character) 54 { 55 return character == '\n' || character == '\r'; 56 } 57 58 template<typename CharacterType> static constexpr bool isManifestWhitespaceOrNewline(CharacterType character) 59 { 60 return isManifestWhitespace(character) || isManifestNewline(character); 61 } 62 63 template<typename CharacterType> static URL makeManifestURL(const URL& manifestURL, const CharacterType* start, const CharacterType* end) 64 { 65 URL url(manifestURL, String(start, end - start)); 66 url.removeFragmentIdentifier(); 67 return url; 68 } 69 70 template<typename CharacterType> static constexpr CharacterType cacheManifestIdentifier[] = { 'C', 'A', 'C', 'H', 'E', ' ', 'M', 'A', 'N', 'I', 'F', 'E', 'S', 'T' }; 71 template<typename CharacterType> static constexpr CharacterType cacheModeIdentifier[] = { 'C', 'A', 'C', 'H', 'E' }; 72 template<typename CharacterType> static constexpr CharacterType fallbackModeIdentifier[] = { 'F', 'A', 'L', 'L', 'B', 'A', 'C', 'K' }; 73 template<typename CharacterType> static constexpr CharacterType networkModeIdentifier[] = { 'N', 'E', 'T', 'W', 'O', 'R', 'K' }; 74 75 Optional<ApplicationCacheManifest> parseApplicationCacheManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length) 76 { 77 static constexpr const char cacheManifestMIMEType[] = "text/cache-manifest"; 78 bool allowFallbackNamespaceOutsideManfestPath = equalLettersIgnoringASCIICase(manifestMIMEType, cacheManifestMIMEType); 54 79 auto manifestPath = WebCore::manifestPath(manifestURL); 55 80 56 const char cacheManifestMIMEType[] = "text/cache-manifest"; 57 bool allowFallbackNamespaceOutsideManfestPath = equalLettersIgnoringASCIICase(manifestMIMEType, cacheManifestMIMEType); 58 59 Mode mode = Explicit; 60 61 String manifestString = TextResourceDecoder::create(ASCIILiteral::fromLiteralUnsafe(cacheManifestMIMEType), "UTF-8")->decodeAndFlush(data, length); 81 auto manifestString = TextResourceDecoder::create(ASCIILiteral::fromLiteralUnsafe(cacheManifestMIMEType), "UTF-8")->decodeAndFlush(data, length); 82 83 return readCharactersForParsing(manifestString, [&](auto buffer) -> Optional<ApplicationCacheManifest> { 84 using CharacterType = typename decltype(buffer)::CharacterType; 62 85 63 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder). 64 // Example: "CACHE MANIFEST #comment" is a valid signature. 65 // Example: "CACHE MANIFEST;V2" is not. 66 const char manifestSignature[] = "CACHE MANIFEST"; 67 if (!manifestString.startsWith(manifestSignature)) 68 return false; 86 ApplicationCacheManifest manifest; 87 auto mode = ApplicationCacheParserMode::Explicit; 88 89 // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder). 90 // Example: "CACHE MANIFEST #comment" is a valid signature. 91 // Example: "CACHE MANIFEST;V2" is not. 92 if (!skipCharactersExactly(buffer, cacheManifestIdentifier<CharacterType>)) 93 return WTF::nullopt; 69 94 70 StringView manifestAfterSignature = StringView(manifestString).substring(sizeof(manifestSignature) - 1); 71 auto upconvertedCharacters = manifestAfterSignature.upconvertedCharacters(); 72 const UChar* p = upconvertedCharacters; 73 const UChar* end = p + manifestAfterSignature.length(); 74 75 if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') 76 return false; 77 78 // Skip to the end of the line. 79 while (p < end && *p != '\r' && *p != '\n') 80 p++; 81 82 while (1) { 83 // Skip whitespace 84 while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t')) 85 p++; 86 87 if (p == end) 88 break; 89 90 const UChar* lineStart = p; 91 92 // Find the end of the line 93 while (p < end && *p != '\r' && *p != '\n') 94 p++; 95 96 // Check if we have a comment 97 if (*lineStart == '#') 98 continue; 99 100 // Get rid of trailing whitespace 101 const UChar* tmp = p - 1; 102 while (tmp > lineStart && (*tmp == ' ' || *tmp == '\t')) 103 tmp--; 104 105 String line(lineStart, tmp - lineStart + 1); 106 107 if (line == "CACHE:") 108 mode = Explicit; 109 else if (line == "FALLBACK:") 110 mode = Fallback; 111 else if (line == "NETWORK:") 112 mode = OnlineAllowlist; 113 else if (line.endsWith(':')) 114 mode = Unknown; 115 else if (mode == Unknown) 116 continue; 117 else if (mode == Explicit || mode == OnlineAllowlist) { 118 auto upconvertedLineCharacters = StringView(line).upconvertedCharacters(); 119 const UChar* p = upconvertedLineCharacters; 120 const UChar* lineEnd = p + line.length(); 121 122 // Look for whitespace separating the URL from subsequent ignored tokens. 123 while (p < lineEnd && *p != '\t' && *p != ' ') 124 p++; 125 126 if (mode == OnlineAllowlist && p - upconvertedLineCharacters == 1 && line[0] == '*') { 127 // Wildcard was found. 128 manifest.allowAllNetworkRequests = true; 129 continue; 130 } 131 132 URL url(manifestURL, line.substring(0, p - upconvertedLineCharacters)); 133 134 if (!url.isValid()) 135 continue; 136 137 url.removeFragmentIdentifier(); 138 139 if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol())) 140 continue; 141 142 if (mode == Explicit && manifestURL.protocolIs("https") && !protocolHostAndPortAreEqual(manifestURL, url)) 143 continue; 144 145 if (mode == Explicit) 95 if (buffer.hasCharactersRemaining() && !isManifestWhitespaceOrNewline(*buffer)) 96 return WTF::nullopt; 97 98 // Skip to the end of the line. 99 skipUntil<CharacterType, isManifestNewline>(buffer); 100 101 while (1) { 102 // Skip whitespace 103 skipWhile<CharacterType, isManifestWhitespaceOrNewline>(buffer); 104 105 if (buffer.atEnd()) 106 break; 107 108 auto lineStart = buffer.position(); 109 110 // Find the end of the line 111 skipUntil<CharacterType, isManifestNewline>(buffer); 112 113 // Line is a comment, skip to the next line. 114 if (*lineStart == '#') 115 continue; 116 117 // Get rid of trailing whitespace 118 auto lineEnd = buffer.position() - 1; 119 while (lineEnd > lineStart && isManifestWhitespace(*lineEnd)) 120 --lineEnd; 121 122 auto lineBuffer = StringParsingBuffer { lineStart, lineEnd + 1 }; 123 124 if (lineBuffer[lineBuffer.lengthRemaining() - 1] == ':') { 125 if (skipCharactersExactly(lineBuffer, cacheModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) { 126 mode = ApplicationCacheParserMode::Explicit; 127 continue; 128 } 129 if (skipCharactersExactly(lineBuffer, fallbackModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) { 130 mode = ApplicationCacheParserMode::Fallback; 131 continue; 132 } 133 if (skipCharactersExactly(lineBuffer, networkModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) { 134 mode = ApplicationCacheParserMode::OnlineAllowlist; 135 continue; 136 } 137 138 // If the line (excluding the trailing whitespace) ends with a ':' and isn't one of the known mode 139 // headers, transition to the 'Unknown' mode. 140 mode = ApplicationCacheParserMode::Unknown; 141 continue; 142 } 143 144 switch (mode) { 145 case ApplicationCacheParserMode::Unknown: 146 continue; 147 148 case ApplicationCacheParserMode::Explicit: { 149 // Look for whitespace separating the URL from subsequent ignored tokens. 150 skipUntil<CharacterType, isManifestWhitespace>(lineBuffer); 151 152 auto url = makeManifestURL(manifestURL, lineStart, lineBuffer.position()); 153 if (!url.isValid()) 154 continue; 155 156 if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol())) 157 continue; 158 159 if (manifestURL.protocolIs("https") && !protocolHostAndPortAreEqual(manifestURL, url)) 160 continue; 161 146 162 manifest.explicitURLs.add(url.string()); 147 else 163 continue; 164 } 165 166 case ApplicationCacheParserMode::OnlineAllowlist: { 167 // Look for whitespace separating the URL from subsequent ignored tokens. 168 skipUntil<CharacterType, isManifestWhitespace>(lineBuffer); 169 170 if (lineBuffer.position() - lineStart == 1 && *lineStart == '*') { 171 // Wildcard was found. 172 manifest.allowAllNetworkRequests = true; 173 continue; 174 } 175 176 auto url = makeManifestURL(manifestURL, lineStart, lineBuffer.position()); 177 if (!url.isValid()) 178 continue; 179 180 if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol())) 181 continue; 182 148 183 manifest.onlineAllowedURLs.append(url); 149 150 } else if (mode == Fallback) { 151 auto upconvertedLineCharacters = StringView(line).upconvertedCharacters(); 152 const UChar* p = upconvertedLineCharacters; 153 const UChar* lineEnd = p + line.length(); 154 155 // Look for whitespace separating the two URLs 156 while (p < lineEnd && *p != '\t' && *p != ' ') 157 p++; 158 159 if (p == lineEnd) { 160 // There was no whitespace separating the URLs. 161 continue; 162 } 163 164 URL namespaceURL(manifestURL, line.substring(0, p - upconvertedLineCharacters)); 165 if (!namespaceURL.isValid()) 166 continue; 167 namespaceURL.removeFragmentIdentifier(); 168 169 if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL)) 170 continue; 171 172 // Although <https://html.spec.whatwg.org/multipage/offline.html#parsing-cache-manifests> (07/06/2017) saids 173 // that we should always prefix match the manifest path we only do so if the manifest was served with a non- 174 // standard HTTP Content-Type header for web compatibility. 175 if (!allowFallbackNamespaceOutsideManfestPath && !namespaceURL.path().startsWith(manifestPath)) 176 continue; 177 178 // Skip whitespace separating fallback namespace from URL. 179 while (p < lineEnd && (*p == '\t' || *p == ' ')) 180 p++; 181 182 // Look for whitespace separating the URL from subsequent ignored tokens. 183 const UChar* fallbackStart = p; 184 while (p < lineEnd && *p != '\t' && *p != ' ') 185 p++; 186 187 URL fallbackURL(manifestURL, String(fallbackStart, p - fallbackStart)); 188 if (!fallbackURL.isValid()) 189 continue; 190 fallbackURL.removeFragmentIdentifier(); 191 192 if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL)) 193 continue; 194 195 manifest.fallbackURLs.append(std::make_pair(namespaceURL, fallbackURL)); 196 } else 184 continue; 185 } 186 187 case ApplicationCacheParserMode::Fallback: { 188 // Look for whitespace separating the two URLs 189 skipUntil<CharacterType, isManifestWhitespace>(lineBuffer); 190 191 if (lineBuffer.atEnd()) { 192 // There was no whitespace separating the URLs. 193 continue; 194 } 195 196 auto namespaceURL = makeManifestURL(manifestURL, lineStart, lineBuffer.position()); 197 if (!namespaceURL.isValid()) 198 continue; 199 200 if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL)) 201 continue; 202 203 // Although <https://html.spec.whatwg.org/multipage/offline.html#parsing-cache-manifests> (07/06/2017) saids 204 // that we should always prefix match the manifest path we only do so if the manifest was served with a non- 205 // standard HTTP Content-Type header for web compatibility. 206 if (!allowFallbackNamespaceOutsideManfestPath && !namespaceURL.path().startsWith(manifestPath)) 207 continue; 208 209 // Skip whitespace separating fallback namespace from URL. 210 skipWhile<CharacterType, isManifestWhitespace>(lineBuffer); 211 212 auto fallbackStart = lineBuffer.position(); 213 214 // Look for whitespace separating the URL from subsequent ignored tokens. 215 skipUntil<CharacterType, isManifestWhitespace>(lineBuffer); 216 217 auto fallbackURL = makeManifestURL(manifestURL, fallbackStart, lineBuffer.position()); 218 if (!fallbackURL.isValid()) 219 continue; 220 221 if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL)) 222 continue; 223 224 manifest.fallbackURLs.append(std::make_pair(namespaceURL, fallbackURL)); 225 continue; 226 } 227 } 228 197 229 ASSERT_NOT_REACHED(); 198 } 199 200 return true; 201 } 202 203 } 230 } 231 232 return manifest; 233 }); 234 } 235 236 } -
trunk/Source/WebCore/loader/appcache/ManifestParser.h
r262922 r263722 31 31 namespace WebCore { 32 32 33 struct Manifest {33 struct ApplicationCacheManifest { 34 34 Vector<URL> onlineAllowedURLs; 35 35 HashSet<String> explicitURLs; 36 36 FallbackURLVector fallbackURLs; 37 bool allowAllNetworkRequests ; // Wildcard found in NETWORK section.37 bool allowAllNetworkRequests { false }; // Wildcard found in NETWORK section. 38 38 }; 39 39 40 bool parseManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length, Manifest&);40 Optional<ApplicationCacheManifest> parseApplicationCacheManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length); 41 41 42 42 } // namespace WebCore
Note: See TracChangeset
for help on using the changeset viewer.