Changeset 241863 in webkit
- Timestamp:
- Feb 21, 2019 12:39:00 AM (5 years ago)
- Location:
- trunk
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/imported/w3c/ChangeLog
r241746 r241863 1 2019-02-21 Rob Buis <rbuis@igalia.com> 2 3 Update MIME type parser 4 https://bugs.webkit.org/show_bug.cgi?id=180526 5 6 Reviewed by Darin Adler. 7 8 Update improved test expectations. 9 10 * web-platform-tests/xhr/overridemimetype-blob-expected.txt: 11 1 12 2019-02-18 Oriol Brufau <obrufau@igalia.com> 2 13 -
trunk/LayoutTests/imported/w3c/web-platform-tests/xhr/overridemimetype-blob-expected.txt
r240706 r241863 60 60 PASS 54) MIME types need to be parsed and serialized: ÿ/ÿ 61 61 PASS 55) MIME types need to be parsed and serialized: text/html(;doesnot=matter 62 FAIL 56) MIME types need to be parsed and serialized: {/} assert_equals: expected "application/octet-stream" but got "{/}" 62 PASS 56) MIME types need to be parsed and serialized: {/} 63 63 PASS 57) MIME types need to be parsed and serialized: Ā/Ā 64 64 -
trunk/Source/WebCore/ChangeLog
r241860 r241863 1 2019-02-21 Rob Buis <rbuis@igalia.com> 2 3 Update MIME type parser 4 https://bugs.webkit.org/show_bug.cgi?id=180526 5 6 Reviewed by Darin Adler. 7 8 Further testing showed the MIME parser needs these fixes: 9 - stripWhitespace is wrong for removing HTTP whitespace, use 10 stripLeadingAndTrailingHTTPSpaces instead. 11 - HTTP Token code points checking for Rfc2045 and Mimesniff were 12 mixed up, use the dedicated isValidHTTPToken for Mimesniff mode. 13 - Quoted Strings were not unescaped/escaped, this seems ok for 14 serializing but is wrong when gettings individual parameter values. 15 Implement [1] and [2] Step 2.4 to properly unescape and escape. 16 17 This change also tries to avoid hard to read uses of find. 18 19 Test: ParsedContentType.Serialize 20 21 [1] https://fetch.spec.whatwg.org/#collect-an-http-quoted-string 22 [2] https://mimesniff.spec.whatwg.org/#serializing-a-mime-type 23 24 * platform/network/ParsedContentType.cpp: 25 (WebCore::skipSpaces): 26 (WebCore::parseToken): 27 (WebCore::isNotQuoteOrBackslash): 28 (WebCore::collectHTTPQuotedString): 29 (WebCore::containsNonTokenCharacters): 30 (WebCore::parseQuotedString): 31 (WebCore::ParsedContentType::parseContentType): 32 (WebCore::ParsedContentType::create): 33 (WebCore::ParsedContentType::setContentType): 34 (WebCore::containsNonQuoteStringTokenCharacters): 35 (WebCore::ParsedContentType::setContentTypeParameter): 36 (WebCore::ParsedContentType::serialize const): 37 (WebCore::substringForRange): Deleted. 38 (WebCore::isNonTokenCharacter): Deleted. 39 (WebCore::isNonQuotedStringTokenCharacter): Deleted. 40 * platform/network/ParsedContentType.h: 41 1 42 2019-02-20 Simon Fraser <simon.fraser@apple.com> 2 43 -
trunk/Source/WebCore/platform/network/ParsedContentType.cpp
r241291 r241863 39 39 namespace WebCore { 40 40 41 static void skipSpaces( const String&input, unsigned& startIndex)41 static void skipSpaces(StringView input, unsigned& startIndex) 42 42 { 43 43 while (startIndex < input.length() && isHTTPSpace(input[startIndex])) … … 57 57 using CharacterMeetsCondition = bool (*)(UChar); 58 58 59 static Optional<S ubstringRange> parseToken(const String&input, unsigned& startIndex, CharacterMeetsCondition characterMeetsCondition, Mode mode, bool skipTrailingWhitespace = false)59 static Optional<StringView> parseToken(StringView input, unsigned& startIndex, CharacterMeetsCondition characterMeetsCondition, Mode mode, bool skipTrailingWhitespace = false) 60 60 { 61 61 unsigned inputLength = input.length(); … … 78 78 --tokenEnd; 79 79 } 80 return SubstringRange(tokenStart, tokenEnd - tokenStart); 81 } 82 83 static bool containsNonTokenCharacters(const String& input, SubstringRange range) 84 { 85 for (unsigned index = 0; index < range.second; ++index) { 86 if (!isTokenCharacter(input[range.first + index])) 80 return input.substring(tokenStart, tokenEnd - tokenStart); 81 } 82 83 static bool isNotQuoteOrBackslash(UChar ch) 84 { 85 return ch != '"' && ch != '\\'; 86 } 87 88 static String collectHTTPQuotedString(StringView input, unsigned& startIndex) 89 { 90 ASSERT(input[startIndex] == '"'); 91 unsigned inputLength = input.length(); 92 unsigned& position = startIndex; 93 position++; 94 StringBuilder builder; 95 while (true) { 96 unsigned positionStart = position; 97 parseToken(input, position, isNotQuoteOrBackslash, Mode::MimeSniff); 98 builder.append(input.substring(positionStart, position - positionStart)); 99 if (position >= inputLength) 100 break; 101 UChar quoteOrBackslash = input[position++]; 102 if (quoteOrBackslash == '\\') { 103 if (position >= inputLength) { 104 builder.append(quoteOrBackslash); 105 break; 106 } 107 builder.append(input[position++]); 108 } else { 109 ASSERT(quoteOrBackslash == '"'); 110 break; 111 } 112 113 } 114 return builder.toString(); 115 } 116 117 static bool containsNonTokenCharacters(StringView input, Mode mode) 118 { 119 if (mode == Mode::MimeSniff) 120 return !isValidHTTPToken(input.toStringWithoutCopying()); 121 for (unsigned index = 0; index < input.length(); ++index) { 122 if (!isTokenCharacter(input[index])) 87 123 return true; 88 124 } … … 90 126 } 91 127 92 static Optional<S ubstringRange> parseQuotedString(const String& input, unsigned& startIndex, Mode mode)128 static Optional<StringView> parseQuotedString(StringView input, unsigned& startIndex) 93 129 { 94 130 unsigned inputLength = input.length(); … … 99 135 return WTF::nullopt; 100 136 101 if (input[quotedStringEnd++] != '"' || quotedStringEnd >= inputLength) { 102 if (mode == Mode::Rfc2045) 103 return WTF::nullopt; 104 return SubstringRange(quotedStringStart, quotedStringEnd - quotedStringStart); 105 } 137 if (input[quotedStringEnd++] != '"' || quotedStringEnd >= inputLength) 138 return WTF::nullopt; 106 139 107 140 bool lastCharacterWasBackslash = false; 108 141 char currentCharacter; 109 142 while ((currentCharacter = input[quotedStringEnd++]) != '"' || lastCharacterWasBackslash) { 110 if (quotedStringEnd >= inputLength) { 111 if (mode == Mode::Rfc2045) 112 return WTF::nullopt; 113 break; 114 } 143 if (quotedStringEnd >= inputLength) 144 return WTF::nullopt; 115 145 if (currentCharacter == '\\' && !lastCharacterWasBackslash) { 116 146 lastCharacterWasBackslash = true; … … 121 151 } 122 152 if (input[quotedStringEnd - 1] == '"') 123 return SubstringRange(quotedStringStart, quotedStringEnd - quotedStringStart - 1); 124 return SubstringRange(quotedStringStart, quotedStringEnd - quotedStringStart); 125 } 126 127 static String substringForRange(const String& string, const SubstringRange& range) 128 { 129 return string.substring(range.first, range.second); 153 quotedStringEnd++; 154 return input.substring(quotedStringStart, quotedStringEnd - quotedStringStart); 130 155 } 131 156 … … 210 235 unsigned contentTypeStart = index; 211 236 auto typeRange = parseToken(m_contentType, index, isNotForwardSlash, mode); 212 if (!typeRange || containsNonTokenCharacters( m_contentType, *typeRange)) {237 if (!typeRange || containsNonTokenCharacters(*typeRange, mode)) { 213 238 LOG_ERROR("Invalid Content-Type, invalid type value."); 214 239 return false; 215 240 } 216 241 217 if ( m_contentType[index++] != '/') {242 if (index >= contentTypeLength || m_contentType[index++] != '/') { 218 243 LOG_ERROR("Invalid Content-Type, missing '/'."); 219 244 return false; … … 221 246 222 247 auto subTypeRange = parseToken(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); 223 if (!subTypeRange || containsNonTokenCharacters( m_contentType, *subTypeRange)) {248 if (!subTypeRange || containsNonTokenCharacters(*subTypeRange, mode)) { 224 249 LOG_ERROR("Invalid Content-Type, invalid subtype value."); 225 250 return false; … … 229 254 size_t semiColonIndex = m_contentType.find(';', contentTypeStart); 230 255 if (semiColonIndex == notFound) { 231 setContentType( SubstringRange(contentTypeStart, contentTypeLength - contentTypeStart), mode);256 setContentType(m_contentType.substring(contentTypeStart, contentTypeLength - contentTypeStart), mode); 232 257 return true; 233 258 } 234 259 235 setContentType( SubstringRange(contentTypeStart, semiColonIndex - contentTypeStart), mode);260 setContentType(m_contentType.substring(contentTypeStart, semiColonIndex - contentTypeStart), mode); 236 261 index = semiColonIndex + 1; 237 262 while (true) { … … 245 270 // Should we tolerate spaces here? 246 271 if (mode == Mode::Rfc2045) { 247 if ( m_contentType[index++] != '=' || index >= contentTypeLength) {272 if (index >= contentTypeLength || m_contentType[index++] != '=') { 248 273 LOG_ERROR("Invalid Content-Type malformed parameter."); 249 274 return false; … … 259 284 continue; 260 285 } 261 262 String parameterName = substringForRange(m_contentType, *keyRange); 286 String parameterName = keyRange->toString(); 263 287 264 288 // Should we tolerate spaces here? 265 Optional<SubstringRange> valueRange; 266 if (m_contentType[index] == '"') { 267 valueRange = parseQuotedString(m_contentType, index, mode); 268 if (mode == Mode::MimeSniff) 289 String parameterValue; 290 Optional<StringView> valueRange; 291 if (index < contentTypeLength && m_contentType[index] == '"') { 292 if (mode == Mode::MimeSniff) { 293 parameterValue = collectHTTPQuotedString(m_contentType, index); 269 294 parseToken(m_contentType, index, isNotSemicolon, mode); 295 } else 296 valueRange = parseQuotedString(m_contentType, index); 270 297 } else 271 298 valueRange = parseToken(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); 272 299 273 if (!valueRange) { 274 if (mode == Mode::MimeSniff) 275 continue; 276 LOG_ERROR("Invalid Content-Type, invalid parameter value."); 277 return false; 278 } 279 280 String parameterValue = substringForRange(m_contentType, *valueRange); 300 301 if (parameterValue.isNull()) { 302 if (!valueRange) { 303 if (mode == Mode::MimeSniff) 304 continue; 305 LOG_ERROR("Invalid Content-Type, invalid parameter value."); 306 return false; 307 } 308 parameterValue = valueRange->toString(); 309 } 310 281 311 // Should we tolerate spaces here? 282 312 if (mode == Mode::Rfc2045 && index < contentTypeLength && m_contentType[index++] != ';') { … … 296 326 Optional<ParsedContentType> ParsedContentType::create(const String& contentType, Mode mode) 297 327 { 298 ParsedContentType parsedContentType(mode == Mode::Rfc2045 ? contentType : contentType.stripWhiteSpace());328 ParsedContentType parsedContentType(mode == Mode::Rfc2045 ? contentType : stripLeadingAndTrailingHTTPSpaces(contentType)); 299 329 if (!parsedContentType.parseContentType(mode)) 300 330 return WTF::nullopt; … … 327 357 } 328 358 329 void ParsedContentType::setContentType( const SubstringRange&contentRange, Mode mode)330 { 331 m_mimeType = substringForRange(m_contentType, contentRange).stripWhiteSpace();359 void ParsedContentType::setContentType(StringView contentRange, Mode mode) 360 { 361 m_mimeType = contentRange.toString(); 332 362 if (mode == Mode::MimeSniff) 333 m_mimeType = m_mimeType.convertToASCIILowercase(); 334 } 335 336 static bool isNonTokenCharacter(UChar ch) 337 { 338 return !isTokenCharacter(ch); 339 } 340 341 static bool isNonQuotedStringTokenCharacter(UChar ch) 342 { 343 return !isQuotedStringTokenCharacter(ch); 363 m_mimeType = stripLeadingAndTrailingHTTPSpaces(m_mimeType).convertToASCIILowercase(); 364 else 365 m_mimeType = m_mimeType.stripWhiteSpace(); 366 } 367 368 static bool containsNonQuoteStringTokenCharacters(const String& input) 369 { 370 for (unsigned index = 0; index < input.length(); ++index) { 371 if (!isQuotedStringTokenCharacter(input[index])) 372 return true; 373 } 374 return false; 344 375 } 345 376 … … 348 379 String name = keyName; 349 380 if (mode == Mode::MimeSniff) { 350 if (m_parameterValues.contains( keyName) || keyName.find(isNonTokenCharacter) != notFound || keyValue.find(isNonQuotedStringTokenCharacter) != notFound)381 if (m_parameterValues.contains(name) || !isValidHTTPToken(name) || containsNonQuoteStringTokenCharacters(keyValue)) 351 382 return; 352 383 name = name.convertToASCIILowercase(); … … 365 396 builder.append('='); 366 397 String value = m_parameterValues.get(name); 367 if (value.isEmpty() || value.find(isNonTokenCharacter) != notFound) {398 if (value.isEmpty() || !isValidHTTPToken(value)) { 368 399 builder.append('"'); 369 builder.append(value); 400 for (unsigned index = 0; index < value.length(); ++index) { 401 auto ch = value[index]; 402 if (ch == '\\' || ch =='"') 403 builder.append('\\'); 404 builder.append(ch); 405 } 370 406 builder.append('"'); 371 407 } else -
trunk/Source/WebCore/platform/network/ParsedContentType.h
r241291 r241863 41 41 MimeSniff 42 42 }; 43 // <index, length>44 typedef std::pair<unsigned, unsigned> SubstringRange;45 43 WEBCORE_EXPORT bool isValidContentType(const String&, Mode = Mode::MimeSniff); 46 44 … … 65 63 ParsedContentType& operator=(ParsedContentType const&) = delete; 66 64 bool parseContentType(Mode); 67 void setContentType( const SubstringRange&, Mode);65 void setContentType(StringView, Mode); 68 66 void setContentTypeParameter(const String&, const String&, Mode); 69 67 -
trunk/Tools/ChangeLog
r241858 r241863 1 2019-02-21 Rob Buis <rbuis@igalia.com> 2 3 Update MIME type parser 4 https://bugs.webkit.org/show_bug.cgi?id=180526 5 6 Reviewed by Darin Adler. 7 8 Add tests involving leading and trailing whitespace, non-token 9 characters and quoted strings. 10 11 * TestWebKitAPI/Tests/WebCore/ParsedContentType.cpp: 12 (TestWebKitAPI::TEST): 13 1 14 2019-02-20 Don Olmstead <don.olmstead@sony.com> 2 15 -
trunk/Tools/TestWebKitAPI/Tests/WebCore/ParsedContentType.cpp
r241291 r241863 150 150 EXPECT_STREQ(serializeIfValid("text/\0"), "NOTVALID"); 151 151 EXPECT_STREQ(serializeIfValid("text/ "), "NOTVALID"); 152 EXPECT_STREQ(serializeIfValid("{/}"), "NOTVALID"); 153 EXPECT_STREQ(serializeIfValid("x/x ;x=x"), "x/x;x=x"); 152 154 EXPECT_STREQ(serializeIfValid("text/plain"), "text/plain"); 153 155 EXPECT_STREQ(serializeIfValid("text/plain\0"), "text/plain"); … … 157 159 EXPECT_STREQ(serializeIfValid("\rtext/plain"), "text/plain"); 158 160 EXPECT_STREQ(serializeIfValid("\btext/plain"), "NOTVALID"); 161 EXPECT_STREQ(serializeIfValid("\x0Btext/plain"), "NOTVALID"); 162 EXPECT_STREQ(serializeIfValid("\u000Btext/plain"), "NOTVALID"); 159 163 EXPECT_STREQ(serializeIfValid("text/plain "), "text/plain"); 160 164 EXPECT_STREQ(serializeIfValid("text/plain\t"), "text/plain"); … … 162 166 EXPECT_STREQ(serializeIfValid("text/plain\r"), "text/plain"); 163 167 EXPECT_STREQ(serializeIfValid("text/plain\b"), "NOTVALID"); 168 EXPECT_STREQ(serializeIfValid("text/plain\x0B"), "NOTVALID"); 169 EXPECT_STREQ(serializeIfValid("text/plain\u000B"), "NOTVALID"); 164 170 EXPECT_STREQ(serializeIfValid(" text/plain "), "text/plain"); 165 171 EXPECT_STREQ(serializeIfValid("\ttext/plain\t"), "text/plain"); … … 237 243 EXPECT_STREQ(serializeIfValid("text/plain;test=\"value\"foo"), "text/plain;test=value"); 238 244 EXPECT_STREQ(serializeIfValid("text/plain;test=\"value\"foo;foo=bar"), "text/plain;test=value;foo=bar"); 239 EXPECT_STREQ(serializeIfValid("text/plain;test=\"val\\ue\""), "text/plain;test= \"val\\ue\"");245 EXPECT_STREQ(serializeIfValid("text/plain;test=\"val\\ue\""), "text/plain;test=value"); 240 246 EXPECT_STREQ(serializeIfValid("text/plain;test=\"val\\\"ue\""), "text/plain;test=\"val\\\"ue\""); 241 247 EXPECT_STREQ(serializeIfValid("text/plain;test='value'"), "text/plain;test='value'"); 242 248 EXPECT_STREQ(serializeIfValid("text/plain;test='value"), "text/plain;test='value"); 243 249 EXPECT_STREQ(serializeIfValid("text/plain;test=value'"), "text/plain;test=value'"); 250 EXPECT_STREQ(serializeIfValid("text/plain;test={value}"), "text/plain;test=\"{value}\""); 244 251 } 245 252
Note: See TracChangeset
for help on using the changeset viewer.