Changeset 263722 in webkit


Ignore:
Timestamp:
Jun 29, 2020 8:52:16 PM (4 years ago)
Author:
weinig@apple.com
Message:

Convert AppCache manifest parser over to using StringParsingBuffer
https://bugs.webkit.org/show_bug.cgi?id=213680

Reviewed by Darin Adler.

  • Renames parseManifest to parseApplicationCacheManifest to differentiate between the manifest for the application cache and the "application manifest", which is a different thing entirely. Also renames the container struct from being called Manifest to ApplicationCacheManifest. (The file should be renamed as well, but will do that in a seperate pass).
  • Update parser to return an Optional<ApplicationCacheManifest> rather than using bool + out parameter.
  • Adopt readCharactersForParsing to replace unnecessary call to StringView::upconvertedCharacters().
  • Adopt StringParsingBuffer and ParsingUtilities along with some refinements to the code to make the intent more clear.
  • html/parser/ParsingUtilities.h:

(WebCore::skipUntil):
Fix formatting, putting the whole signature on one line.

  • loader/appcache/ApplicationCacheGroup.cpp:

(WebCore::ApplicationCacheGroup::didFinishLoadingManifest):
Update for new parser function name and Optional return type.

  • loader/appcache/ManifestParser.cpp:

(WebCore::isManifestWhitespace):
(WebCore::isManifestNewline):
(WebCore::isManifestWhitespaceOrNewline):
(WebCore::makeManifestURL):
(WebCore::parseApplicationCacheManifest):

  • loader/appcache/ManifestParser.h:

Update parsing logic to use readCharactersForParsing (to avoid upconvesion) and rework
using StringParsingBuffer/ParsingUtilities to make things more clear.

Location:
trunk/Source/WebCore
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/ChangeLog

    r263720 r263722  
     12020-06-29  Sam Weinig  <weinig@apple.com>
     2
     3        Convert AppCache manifest parser over to using StringParsingBuffer
     4        https://bugs.webkit.org/show_bug.cgi?id=213680
     5
     6        Reviewed by Darin Adler.
     7
     8        - Renames parseManifest to parseApplicationCacheManifest to differentiate between the manifest
     9          for the application cache and the "application manifest", which is a different thing entirely.
     10          Also renames the container struct from being called Manifest to ApplicationCacheManifest.
     11          (The file should be renamed as well, but will do that in a seperate pass).
     12        - Update parser to return an Optional<ApplicationCacheManifest> rather than using bool + out
     13          parameter.
     14        - Adopt readCharactersForParsing to replace unnecessary call to StringView::upconvertedCharacters().
     15        - Adopt StringParsingBuffer and ParsingUtilities along with some refinements to the code
     16          to make the intent more clear.
     17
     18
     19        * html/parser/ParsingUtilities.h:
     20        (WebCore::skipUntil):
     21        Fix formatting, putting the whole signature on one line.
     22
     23        * loader/appcache/ApplicationCacheGroup.cpp:
     24        (WebCore::ApplicationCacheGroup::didFinishLoadingManifest):
     25        Update for new parser function name and Optional return type.
     26
     27        * loader/appcache/ManifestParser.cpp:
     28        (WebCore::isManifestWhitespace):
     29        (WebCore::isManifestNewline):
     30        (WebCore::isManifestWhitespaceOrNewline):
     31        (WebCore::makeManifestURL):
     32        (WebCore::parseApplicationCacheManifest):
     33        * loader/appcache/ManifestParser.h:
     34        Update parsing logic to use readCharactersForParsing (to avoid upconvesion) and rework
     35        using StringParsingBuffer/ParsingUtilities to make things more clear.
     36
    1372020-06-29  Zalan Bujtas  <zalan@apple.com>
    238
  • trunk/Source/WebCore/html/parser/ParsingUtilities.h

    r263617 r263722  
    8282}
    8383
    84 template<typename CharacterType, typename DelimiterType>
    85 void skipUntil(StringParsingBuffer<CharacterType>& buffer, DelimiterType delimiter)
     84template<typename CharacterType, typename DelimiterType> void skipUntil(StringParsingBuffer<CharacterType>& buffer, DelimiterType delimiter)
    8685{
    8786    while (buffer.hasCharactersRemaining() && *buffer != delimiter)
  • trunk/Source/WebCore/loader/appcache/ApplicationCacheGroup.cpp

    r262922 r263722  
    606606    }
    607607   
    608     Manifest manifest;
    609     if (!parseManifest(m_manifestURL, m_manifestResource->response().mimeType(), m_manifestResource->data().data(), m_manifestResource->data().size(), manifest)) {
     608    auto manifest = parseApplicationCacheManifest(m_manifestURL, m_manifestResource->response().mimeType(), m_manifestResource->data().data(), m_manifestResource->data().size());
     609    if (!manifest) {
    610610        // At the time of this writing, lack of "CACHE MANIFEST" signature is the only reason for parseManifest to fail.
    611611        m_frame->document()->addConsoleMessage(MessageSource::AppCache, MessageLevel::Error, "Application Cache manifest could not be parsed. Does it start with CACHE MANIFEST?"_s);
     
    636636    }
    637637   
    638     for (const auto& explicitURL : manifest.explicitURLs)
     638    for (const auto& explicitURL : manifest->explicitURLs)
    639639        addEntry(explicitURL, ApplicationCacheResource::Explicit);
    640640
    641     for (auto& fallbackURL : manifest.fallbackURLs)
     641    for (auto& fallbackURL : manifest->fallbackURLs)
    642642        addEntry(fallbackURL.second.string(), ApplicationCacheResource::Fallback);
    643643   
    644     m_cacheBeingUpdated->setOnlineAllowlist(manifest.onlineAllowedURLs);
    645     m_cacheBeingUpdated->setFallbackURLs(manifest.fallbackURLs);
    646     m_cacheBeingUpdated->setAllowsAllNetworkRequests(manifest.allowAllNetworkRequests);
     644    m_cacheBeingUpdated->setOnlineAllowlist(manifest->onlineAllowedURLs);
     645    m_cacheBeingUpdated->setFallbackURLs(manifest->fallbackURLs);
     646    m_cacheBeingUpdated->setAllowsAllNetworkRequests(manifest->allowAllNetworkRequests);
    647647
    648648    m_progressTotal = m_pendingEntries.size();
  • trunk/Source/WebCore/loader/appcache/ManifestParser.cpp

    r262922 r263722  
    2727#include "ManifestParser.h"
    2828
     29#include "ParsingUtilities.h"
    2930#include "TextResourceDecoder.h"
    3031#include <wtf/URL.h>
     32#include <wtf/text/StringParsingBuffer.h>
    3133#include <wtf/text/StringView.h>
    32 #include <wtf/unicode/CharacterNames.h>
    3334
    3435namespace WebCore {
    3536
    36 enum Mode { Explicit, Fallback, OnlineAllowlist, Unknown };
     37enum class ApplicationCacheParserMode { Explicit, Fallback, OnlineAllowlist, Unknown };
    3738
    3839static StringView manifestPath(const URL& manifestURL)
     
    4546}
    4647
    47 bool parseManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length, Manifest& manifest)
    48 {
    49     ASSERT(manifest.explicitURLs.isEmpty());
    50     ASSERT(manifest.onlineAllowedURLs.isEmpty());
    51     ASSERT(manifest.fallbackURLs.isEmpty());
    52     manifest.allowAllNetworkRequests = false;
    53 
     48template<typename CharacterType> static constexpr bool isManifestWhitespace(CharacterType character)
     49{
     50    return character == ' ' || character == '\t';
     51}
     52
     53template<typename CharacterType> static constexpr bool isManifestNewline(CharacterType character)
     54{
     55    return character == '\n' || character == '\r';
     56}
     57
     58template<typename CharacterType> static constexpr bool isManifestWhitespaceOrNewline(CharacterType character)
     59{
     60    return isManifestWhitespace(character) || isManifestNewline(character);
     61}
     62
     63template<typename CharacterType> static URL makeManifestURL(const URL& manifestURL, const CharacterType* start, const CharacterType* end)
     64{
     65    URL url(manifestURL, String(start, end - start));
     66    url.removeFragmentIdentifier();
     67    return url;
     68}
     69
     70template<typename CharacterType> static constexpr CharacterType cacheManifestIdentifier[] = { 'C', 'A', 'C', 'H', 'E', ' ', 'M', 'A', 'N', 'I', 'F', 'E', 'S', 'T' };
     71template<typename CharacterType> static constexpr CharacterType cacheModeIdentifier[] = { 'C', 'A', 'C', 'H', 'E' };
     72template<typename CharacterType> static constexpr CharacterType fallbackModeIdentifier[] = { 'F', 'A', 'L', 'L', 'B', 'A', 'C', 'K' };
     73template<typename CharacterType> static constexpr CharacterType networkModeIdentifier[] = { 'N', 'E', 'T', 'W', 'O', 'R', 'K' };
     74
     75Optional<ApplicationCacheManifest> parseApplicationCacheManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length)
     76{
     77    static constexpr const char cacheManifestMIMEType[] = "text/cache-manifest";
     78    bool allowFallbackNamespaceOutsideManfestPath = equalLettersIgnoringASCIICase(manifestMIMEType, cacheManifestMIMEType);
    5479    auto manifestPath = WebCore::manifestPath(manifestURL);
    5580
    56     const char cacheManifestMIMEType[] = "text/cache-manifest";
    57     bool allowFallbackNamespaceOutsideManfestPath = equalLettersIgnoringASCIICase(manifestMIMEType, cacheManifestMIMEType);
    58 
    59     Mode mode = Explicit;
    60 
    61     String manifestString = TextResourceDecoder::create(ASCIILiteral::fromLiteralUnsafe(cacheManifestMIMEType), "UTF-8")->decodeAndFlush(data, length);
     81    auto manifestString = TextResourceDecoder::create(ASCIILiteral::fromLiteralUnsafe(cacheManifestMIMEType), "UTF-8")->decodeAndFlush(data, length);
     82
     83    return readCharactersForParsing(manifestString, [&](auto buffer) -> Optional<ApplicationCacheManifest> {
     84        using CharacterType = typename decltype(buffer)::CharacterType;
    6285   
    63     // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder).
    64     // Example: "CACHE MANIFEST #comment" is a valid signature.
    65     // Example: "CACHE MANIFEST;V2" is not.
    66     const char manifestSignature[] = "CACHE MANIFEST";
    67     if (!manifestString.startsWith(manifestSignature))
    68         return false;
     86        ApplicationCacheManifest manifest;
     87        auto mode = ApplicationCacheParserMode::Explicit;
     88
     89        // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder).
     90        // Example: "CACHE MANIFEST #comment" is a valid signature.
     91        // Example: "CACHE MANIFEST;V2" is not.
     92        if (!skipCharactersExactly(buffer, cacheManifestIdentifier<CharacterType>))
     93            return WTF::nullopt;
    6994   
    70     StringView manifestAfterSignature = StringView(manifestString).substring(sizeof(manifestSignature) - 1);
    71     auto upconvertedCharacters = manifestAfterSignature.upconvertedCharacters();
    72     const UChar* p = upconvertedCharacters;
    73     const UChar* end = p + manifestAfterSignature.length();
    74 
    75     if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
    76         return false;
    77 
    78     // Skip to the end of the line.
    79     while (p < end && *p != '\r' && *p != '\n')
    80         p++;
    81 
    82     while (1) {
    83         // Skip whitespace
    84         while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
    85             p++;
    86        
    87         if (p == end)
    88             break;
    89        
    90         const UChar* lineStart = p;
    91        
    92         // Find the end of the line
    93         while (p < end && *p != '\r' && *p != '\n')
    94             p++;
    95        
    96         // Check if we have a comment
    97         if (*lineStart == '#')
    98             continue;
    99        
    100         // Get rid of trailing whitespace
    101         const UChar* tmp = p - 1;
    102         while (tmp > lineStart && (*tmp == ' ' || *tmp == '\t'))
    103             tmp--;
    104        
    105         String line(lineStart, tmp - lineStart + 1);
    106 
    107         if (line == "CACHE:")
    108             mode = Explicit;
    109         else if (line == "FALLBACK:")
    110             mode = Fallback;
    111         else if (line == "NETWORK:")
    112             mode = OnlineAllowlist;
    113         else if (line.endsWith(':'))
    114             mode = Unknown;
    115         else if (mode == Unknown)
    116             continue;
    117         else if (mode == Explicit || mode == OnlineAllowlist) {
    118             auto upconvertedLineCharacters = StringView(line).upconvertedCharacters();
    119             const UChar* p = upconvertedLineCharacters;
    120             const UChar* lineEnd = p + line.length();
    121            
    122             // Look for whitespace separating the URL from subsequent ignored tokens.
    123             while (p < lineEnd && *p != '\t' && *p != ' ')
    124                 p++;
    125 
    126             if (mode == OnlineAllowlist && p - upconvertedLineCharacters == 1 && line[0] == '*') {
    127                 // Wildcard was found.
    128                 manifest.allowAllNetworkRequests = true;
    129                 continue;
    130             }
    131 
    132             URL url(manifestURL, line.substring(0, p - upconvertedLineCharacters));
    133            
    134             if (!url.isValid())
    135                 continue;
    136 
    137             url.removeFragmentIdentifier();
    138            
    139             if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol()))
    140                 continue;
    141            
    142             if (mode == Explicit && manifestURL.protocolIs("https") && !protocolHostAndPortAreEqual(manifestURL, url))
    143                 continue;
    144            
    145             if (mode == Explicit)
     95        if (buffer.hasCharactersRemaining() && !isManifestWhitespaceOrNewline(*buffer))
     96            return WTF::nullopt;
     97
     98        // Skip to the end of the line.
     99        skipUntil<CharacterType, isManifestNewline>(buffer);
     100
     101        while (1) {
     102            // Skip whitespace
     103            skipWhile<CharacterType, isManifestWhitespaceOrNewline>(buffer);
     104           
     105            if (buffer.atEnd())
     106                break;
     107           
     108            auto lineStart = buffer.position();
     109           
     110            // Find the end of the line
     111            skipUntil<CharacterType, isManifestNewline>(buffer);
     112           
     113            // Line is a comment, skip to the next line.
     114            if (*lineStart == '#')
     115                continue;
     116           
     117            // Get rid of trailing whitespace
     118            auto lineEnd = buffer.position() - 1;
     119            while (lineEnd > lineStart && isManifestWhitespace(*lineEnd))
     120                --lineEnd;
     121
     122            auto lineBuffer = StringParsingBuffer { lineStart, lineEnd + 1 };
     123
     124            if (lineBuffer[lineBuffer.lengthRemaining() - 1] == ':') {
     125                if (skipCharactersExactly(lineBuffer, cacheModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) {
     126                    mode = ApplicationCacheParserMode::Explicit;
     127                    continue;
     128                }
     129                if (skipCharactersExactly(lineBuffer, fallbackModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) {
     130                    mode = ApplicationCacheParserMode::Fallback;
     131                    continue;
     132                }
     133                if (skipCharactersExactly(lineBuffer, networkModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) {
     134                    mode = ApplicationCacheParserMode::OnlineAllowlist;
     135                    continue;
     136                }
     137
     138                // If the line (excluding the trailing whitespace) ends with a ':' and isn't one of the known mode
     139                // headers, transition to the 'Unknown' mode.
     140                mode = ApplicationCacheParserMode::Unknown;
     141                continue;
     142            }
     143   
     144            switch (mode) {
     145            case ApplicationCacheParserMode::Unknown:
     146                continue;
     147           
     148            case ApplicationCacheParserMode::Explicit: {
     149                // Look for whitespace separating the URL from subsequent ignored tokens.
     150                skipUntil<CharacterType, isManifestWhitespace>(lineBuffer);
     151
     152                auto url = makeManifestURL(manifestURL, lineStart, lineBuffer.position());
     153                if (!url.isValid())
     154                    continue;
     155               
     156                if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol()))
     157                    continue;
     158               
     159                if (manifestURL.protocolIs("https") && !protocolHostAndPortAreEqual(manifestURL, url))
     160                    continue;
     161               
    146162                manifest.explicitURLs.add(url.string());
    147             else
     163                continue;
     164            }
     165
     166            case ApplicationCacheParserMode::OnlineAllowlist: {
     167                // Look for whitespace separating the URL from subsequent ignored tokens.
     168                skipUntil<CharacterType, isManifestWhitespace>(lineBuffer);
     169
     170                if (lineBuffer.position() - lineStart == 1 && *lineStart == '*') {
     171                    // Wildcard was found.
     172                    manifest.allowAllNetworkRequests = true;
     173                    continue;
     174                }
     175               
     176                auto url = makeManifestURL(manifestURL, lineStart, lineBuffer.position());
     177                if (!url.isValid())
     178                    continue;
     179               
     180                if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol()))
     181                    continue;
     182
    148183                manifest.onlineAllowedURLs.append(url);
    149            
    150         } else if (mode == Fallback) {
    151             auto upconvertedLineCharacters = StringView(line).upconvertedCharacters();
    152             const UChar* p = upconvertedLineCharacters;
    153             const UChar* lineEnd = p + line.length();
    154            
    155             // Look for whitespace separating the two URLs
    156             while (p < lineEnd && *p != '\t' && *p != ' ')
    157                 p++;
    158 
    159             if (p == lineEnd) {
    160                 // There was no whitespace separating the URLs.
    161                 continue;
    162             }
    163            
    164             URL namespaceURL(manifestURL, line.substring(0, p - upconvertedLineCharacters));
    165             if (!namespaceURL.isValid())
    166                 continue;
    167             namespaceURL.removeFragmentIdentifier();
    168 
    169             if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL))
    170                 continue;
    171 
    172             // Although <https://html.spec.whatwg.org/multipage/offline.html#parsing-cache-manifests> (07/06/2017) saids
    173             // that we should always prefix match the manifest path we only do so if the manifest was served with a non-
    174             // standard HTTP Content-Type header for web compatibility.
    175             if (!allowFallbackNamespaceOutsideManfestPath && !namespaceURL.path().startsWith(manifestPath))
    176                 continue;
    177 
    178             // Skip whitespace separating fallback namespace from URL.
    179             while (p < lineEnd && (*p == '\t' || *p == ' '))
    180                 p++;
    181 
    182             // Look for whitespace separating the URL from subsequent ignored tokens.
    183             const UChar* fallbackStart = p;
    184             while (p < lineEnd && *p != '\t' && *p != ' ')
    185                 p++;
    186 
    187             URL fallbackURL(manifestURL, String(fallbackStart, p - fallbackStart));
    188             if (!fallbackURL.isValid())
    189                 continue;
    190             fallbackURL.removeFragmentIdentifier();
    191 
    192             if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL))
    193                 continue;
    194 
    195             manifest.fallbackURLs.append(std::make_pair(namespaceURL, fallbackURL));           
    196         } else
     184                continue;
     185            }
     186           
     187            case ApplicationCacheParserMode::Fallback: {
     188                // Look for whitespace separating the two URLs
     189                skipUntil<CharacterType, isManifestWhitespace>(lineBuffer);
     190
     191                if (lineBuffer.atEnd()) {
     192                    // There was no whitespace separating the URLs.
     193                    continue;
     194                }
     195
     196                auto namespaceURL = makeManifestURL(manifestURL, lineStart, lineBuffer.position());
     197                if (!namespaceURL.isValid())
     198                    continue;
     199
     200                if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL))
     201                    continue;
     202
     203                // Although <https://html.spec.whatwg.org/multipage/offline.html#parsing-cache-manifests> (07/06/2017) saids
     204                // that we should always prefix match the manifest path we only do so if the manifest was served with a non-
     205                // standard HTTP Content-Type header for web compatibility.
     206                if (!allowFallbackNamespaceOutsideManfestPath && !namespaceURL.path().startsWith(manifestPath))
     207                    continue;
     208
     209                // Skip whitespace separating fallback namespace from URL.
     210                skipWhile<CharacterType, isManifestWhitespace>(lineBuffer);
     211
     212                auto fallbackStart = lineBuffer.position();
     213
     214                // Look for whitespace separating the URL from subsequent ignored tokens.
     215                skipUntil<CharacterType, isManifestWhitespace>(lineBuffer);
     216
     217                auto fallbackURL = makeManifestURL(manifestURL, fallbackStart, lineBuffer.position());
     218                if (!fallbackURL.isValid())
     219                    continue;
     220
     221                if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL))
     222                    continue;
     223
     224                manifest.fallbackURLs.append(std::make_pair(namespaceURL, fallbackURL));
     225                continue;
     226            }
     227            }
     228           
    197229            ASSERT_NOT_REACHED();
    198     }
    199 
    200     return true;
    201 }
    202 
    203 }
     230        }
     231
     232        return manifest;
     233    });
     234}
     235
     236}
  • trunk/Source/WebCore/loader/appcache/ManifestParser.h

    r262922 r263722  
    3131namespace WebCore {
    3232
    33 struct Manifest {
     33struct ApplicationCacheManifest {
    3434    Vector<URL> onlineAllowedURLs;
    3535    HashSet<String> explicitURLs;
    3636    FallbackURLVector fallbackURLs;
    37     bool allowAllNetworkRequests; // Wildcard found in NETWORK section.
     37    bool allowAllNetworkRequests { false }; // Wildcard found in NETWORK section.
    3838};
    3939
    40 bool parseManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length, Manifest&);
     40Optional<ApplicationCacheManifest> parseApplicationCacheManifest(const URL& manifestURL, const String& manifestMIMEType, const char* data, int length);
    4141
    4242} // namespace WebCore
Note: See TracChangeset for help on using the changeset viewer.