Changeset 39162 in webkit
- Timestamp:
- Dec 9, 2008 8:59:06 PM (15 years ago)
- Location:
- trunk
- Files:
-
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/ChangeLog
r39161 r39162 1 2008-12-09 Geoffrey Garen <ggaren@apple.com> 2 3 Reviewed by Cameron Zwarich. 4 5 In preparation for compiling WREC without PCRE: 6 7 Further relaxed WREC's parsing to be more web-compatible. Fixed PCRE to 8 match in cases where it didn't already. 9 10 Changed JavaScriptCore to report syntax errors detected by WREC, rather 11 than falling back on PCRE any time WREC sees an error. 12 13 * pcre/pcre_compile.cpp: 14 (checkEscape): Relaxed parsing of \c and \N escapes to be more 15 web-compatible. 16 17 * runtime/RegExp.cpp: 18 (JSC::RegExp::RegExp): Only fall back on PCRE if WREC has not reported 19 a syntax error. 20 21 * wrec/WREC.cpp: 22 (JSC::WREC::Generator::compileRegExp): Fixed some error reporting to 23 match PCRE. 24 25 * wrec/WRECParser.cpp: Added error messages that match PCRE. 26 27 (JSC::WREC::Parser::consumeGreedyQuantifier): 28 (JSC::WREC::Parser::parseParentheses): 29 (JSC::WREC::Parser::parseCharacterClass): 30 (JSC::WREC::Parser::parseNonCharacterEscape): Updated the above functions to 31 use the new setError API. 32 33 (JSC::WREC::Parser::consumeEscape): Relaxed parsing of \c \N \u \x \B 34 to be more web-compatible. 35 36 (JSC::WREC::Parser::parseAlternative): Distinguish between a malformed 37 quantifier and a quantifier with no prefix, like PCRE does. 38 39 (JSC::WREC::Parser::consumeParenthesesType): Updated to use the new setError API. 40 41 * wrec/WRECParser.h: 42 (JSC::WREC::Parser::error): 43 (JSC::WREC::Parser::syntaxError): 44 (JSC::WREC::Parser::parsePattern): 45 (JSC::WREC::Parser::reset): 46 (JSC::WREC::Parser::setError): Store error messages instead of error codes, 47 to provide for exception messages. Use a setter for reporting errors, so 48 errors detected early are not overwritten by errors detected later. 49 1 50 2008-12-09 Gavin Barraclough <barraclough@apple.com> 2 51 -
trunk/JavaScriptCore/pcre/pcre_compile.cpp
r34858 r39162 237 237 this is not octal. */ 238 238 239 if ((c = *ptr) >= '8') 239 if ((c = *ptr) >= '8') { 240 c = '\\'; 241 ptr -= 1; 240 242 break; 243 } 241 244 242 245 /* \0 always starts an octal number, but we may drop through to here with a … … 299 302 return 0; 300 303 } 304 301 305 c = *ptr; 302 306 if (!isASCIIAlpha(c)) { 307 c = '\\'; 308 ptr -= 2; 309 break; 310 } 311 303 312 /* A letter is upper-cased; then the 0x40 bit is flipped. This coding 304 313 is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */ -
trunk/JavaScriptCore/runtime/RegExp.cpp
r39089 r39162 48 48 #if ENABLE(WREC) 49 49 m_wrecFunction = Generator::compileRegExp(globalData, pattern, &m_numSubpatterns, &m_constructionError, m_executablePool); 50 if (m_wrecFunction )50 if (m_wrecFunction || m_constructionError) 51 51 return; 52 52 // Fall through to non-WREC case. … … 91 91 #if ENABLE(WREC) 92 92 m_wrecFunction = Generator::compileRegExp(globalData, pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, (m_flagBits & IgnoreCase), (m_flagBits & Multiline)); 93 if (m_wrecFunction )93 if (m_wrecFunction || m_constructionError) 94 94 return; 95 95 // Fall through to non-WREC case. -
trunk/JavaScriptCore/wrec/WREC.cpp
r39128 r39162 47 47 { 48 48 if (pattern.size() > MaxPatternSize) { 49 *error_ptr = " Regular expression too large.";49 *error_ptr = "regular expression too large"; 50 50 return 0; 51 51 } … … 76 76 77 77 if (parser.error()) { 78 *error_ptr = "Regular expression malformed.";78 *error_ptr = parser.syntaxError(); // NULL in the case of patterns that WREC doesn't support yet. 79 79 return 0; 80 80 } -
trunk/JavaScriptCore/wrec/WRECParser.cpp
r39130 r39162 36 36 namespace JSC { namespace WREC { 37 37 38 // These error messages match the error messages used by PCRE. 39 const char* Parser::QuantifierOutOfOrder = "numbers out of order in {} quantifier"; 40 const char* Parser::QuantifierWithoutAtom = "nothing to repeat"; 41 const char* Parser::ParenthesesUnmatched = "unmatched parentheses"; 42 const char* Parser::ParenthesesTypeInvalid = "unrecognized character after (?"; 43 const char* Parser::ParenthesesNotSupported = ""; // Not a user-visible syntax error -- just signals a syntax that WREC doesn't support yet. 44 const char* Parser::CharacterClassUnmatched = "missing terminating ] for character class"; 45 const char* Parser::CharacterClassOutOfOrder = "range out of order in character class"; 46 const char* Parser::EscapeUnterminated = "\\ at end of pattern"; 47 38 48 class PatternCharacterSequence { 39 49 typedef Generator::JumpList JumpList; … … 140 150 141 151 if (min > max) { 142 m_error = MalformedQuantifier;152 setError(QuantifierOutOfOrder); 143 153 return Quantifier(Quantifier::Error); 144 154 } … … 233 243 234 244 default: 235 m_error = UnsupportedParentheses;245 setError(ParenthesesNotSupported); 236 246 return false; 237 247 } 238 248 239 249 if (consume() != ')') { 240 m_error = MalformedParentheses;250 setError(ParenthesesUnmatched); 241 251 return false; 242 252 } … … 249 259 250 260 case Quantifier::Greedy: 251 m_error = UnsupportedParentheses;261 setError(ParenthesesNotSupported); 252 262 return false; 253 263 254 264 case Quantifier::NonGreedy: 255 m_error = UnsupportedParentheses;265 setError(ParenthesesNotSupported); 256 266 return false; 257 267 … … 274 284 CharacterClassConstructor constructor(m_ignoreCase); 275 285 276 UCharch;286 int ch; 277 287 while ((ch = peek()) != ']') { 278 288 switch (ch) { 279 289 case EndOfPattern: 280 m_error = MalformedCharacterClass;290 setError(CharacterClassUnmatched); 281 291 return false; 282 292 … … 299 309 break; 300 310 } 301 case Escape::Error: { 302 m_error = MalformedEscape; 311 case Escape::Error: 303 312 return false; 304 }305 313 case Escape::Backreference: 306 314 case Escape::WordBoundaryAssertion: { … … 321 329 // lazily catch reversed ranges ([z-a])in character classes 322 330 if (constructor.isUpsideDown()) { 323 m_error = MalformedCharacterClass;331 setError(CharacterClassOutOfOrder); 324 332 return false; 325 333 } … … 348 356 349 357 case Escape::Error: 350 m_error = MalformedEscape;351 358 return false; 352 359 } … … 360 367 switch (peek()) { 361 368 case EndOfPattern: 369 setError(EscapeUnterminated); 362 370 return Escape(Escape::Error); 363 371 … … 371 379 consume(); 372 380 if (inCharacterClass) 373 return Escape(Escape::Error);381 return PatternCharacterEscape('B'); 374 382 return WordBoundaryAssertionEscape(true); // invert 375 383 … … 413 421 // To match Firefox, we parse an invalid backreference in the range [1-7] 414 422 // as an octal escape. 415 return peekDigit() > 7 ? Escape(Escape::Error) : PatternCharacterEscape(consumeOctal());423 return peekDigit() > 7 ? PatternCharacterEscape('\\') : PatternCharacterEscape(consumeOctal()); 416 424 } 417 425 … … 452 460 // ControlLetter 453 461 case 'c': { 454 consume(); 462 SavedState state(*this); 463 consume(); 464 455 465 int control = consume(); 456 if (!isASCIIAlpha(control)) 457 return Escape(Escape::Error); 466 if (!isASCIIAlpha(control)) { 467 state.restore(); 468 return PatternCharacterEscape('\\'); 469 } 458 470 return PatternCharacterEscape(control & 31); 459 471 } … … 462 474 case 'x': { 463 475 consume(); 476 477 SavedState state(*this); 464 478 int x = consumeHex(2); 465 if (x == -1) 466 return Escape(Escape::Error); 479 if (x == -1) { 480 state.restore(); 481 return PatternCharacterEscape('x'); 482 } 467 483 return PatternCharacterEscape(x); 468 484 } … … 471 487 case 'u': { 472 488 consume(); 489 490 SavedState state(*this); 473 491 int x = consumeHex(4); 474 if (x == -1) 475 return Escape(Escape::Error); 492 if (x == -1) { 493 state.restore(); 494 return PatternCharacterEscape('u'); 495 } 476 496 return PatternCharacterEscape(x); 477 497 } … … 506 526 } 507 527 508 if (q.type == Quantifier::Error || !sequence.size()) { 509 m_error = MalformedQuantifier; 528 if (q.type == Quantifier::Error) 529 return; 530 531 if (!sequence.size()) { 532 setError(QuantifierWithoutAtom); 510 533 return; 511 534 } … … 611 634 612 635 default: 613 m_error = MalformedParentheses;636 setError(ParenthesesTypeInvalid); 614 637 return Generator::Error; 615 638 } -
trunk/JavaScriptCore/wrec/WRECParser.h
r39130 r39162 48 48 49 49 public: 50 enum Error {51 NoError,52 MalformedCharacterClass,53 MalformedParentheses,54 MalformedPattern,55 MalformedQuantifier,56 MalformedEscape,57 UnsupportedParentheses,58 };59 60 50 Parser(const UString& pattern, bool ignoreCase, bool multiline) 61 51 : m_generator(*this) … … 76 66 unsigned numSubpatterns() const { return m_numSubpatterns; } 77 67 78 Error error() const { return m_error; } 68 const char* error() const { return m_error; } 69 const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; } 79 70 80 71 void parsePattern(JumpList& failures) … … 85 76 86 77 if (peek() != EndOfPattern) 87 m_error = MalformedPattern; // Parsing the pattern should fully consume it.78 setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it. 88 79 } 89 80 … … 120 111 m_index = 0; 121 112 m_numSubpatterns = 0; 122 m_error = NoError; 113 m_error = 0; 114 } 115 116 void setError(const char* error) 117 { 118 if (m_error) 119 return; 120 m_error = error; 123 121 } 124 122 … … 189 187 190 188 static const int EndOfPattern = -1; 189 190 // Error messages. 191 static const char* QuantifierOutOfOrder; 192 static const char* QuantifierWithoutAtom; 193 static const char* ParenthesesUnmatched; 194 static const char* ParenthesesTypeInvalid; 195 static const char* ParenthesesNotSupported; 196 static const char* CharacterClassUnmatched; 197 static const char* CharacterClassOutOfOrder; 198 static const char* EscapeUnterminated; 191 199 192 200 Generator m_generator; … … 197 205 bool m_multiline; 198 206 unsigned m_numSubpatterns; 199 Errorm_error;207 const char* m_error; 200 208 }; 201 209 -
trunk/LayoutTests/ChangeLog
r39159 r39162 1 2008-12-09 Geoffrey Garen <ggaren@apple.com> 2 3 Reviewed by Cameron Zwarich. 4 5 Updated regular expression layout tests to be agnostic between WREC 6 and PCRE quirks. Also, updated results to match new, more web-compatible 7 regular expression parsing. 8 9 * fast/js/regexp-charclass-crash-expected.txt: 10 * fast/js/regexp-charclass-crash.html: 11 * fast/js/regexp-no-extensions-expected.txt: 12 * fast/js/resources/regexp-no-extensions.js: 13 * fast/regex/test1-expected.txt: 14 1 15 2008-12-09 David Levin <levin@chromium.org> 2 16 -
trunk/LayoutTests/fast/js/regexp-charclass-crash-expected.txt
r24430 r39162 1 1 Tests a crash in the regular expression engine. If this stops with a single "regular expression too large" exception, then the test succeeded. 2 2 3 Got up to iteration 1872and then got this exception: SyntaxError: Invalid regular expression: regular expression too large.3 Got over 1000 iterations and then got this exception: SyntaxError: Invalid regular expression: regular expression too large. -
trunk/LayoutTests/fast/js/regexp-charclass-crash.html
r24430 r39162 12 12 new RegExp(string); 13 13 } catch (exception) { 14 if (/too large/.test(exception) ) {15 document.writeln("<div>Got up to iteration " + i + "and then got this exception: " + exception + ".</div>");14 if (/too large/.test(exception) && i > 1000) { 15 document.writeln("<div>Got over 1000 iterations and then got this exception: " + exception + ".</div>"); 16 16 break; 17 17 } -
trunk/LayoutTests/fast/js/regexp-no-extensions-expected.txt
r27752 r39162 11 11 PASS /\2147483648/.exec(String.fromCharCode(140) + "7483648").toString() is String.fromCharCode(140) + "7483648" 12 12 PASS /\4294967296/.exec("\"94967296").toString() is "\"94967296" 13 PASS /\8589934592/.exec(" 8589934592").toString() is "8589934592"13 PASS /\8589934592/.exec("\\8589934592").toString() is "\\8589934592" 14 14 PASS "\nAbc\n".replace(/(\n)[^\n]+$/, "$1") is "\nAbc\n" 15 15 PASS /x$/.exec("x\n") is null … … 34 34 PASS /[\1q]/.exec("y" + String.fromCharCode(1) + "q").toString() is String.fromCharCode(1) 35 35 PASS /[\1q]/.exec("yq").toString() is "q" 36 PASS /\8q/.exec(" y8q").toString() is "8q"36 PASS /\8q/.exec("\\8q").toString() is "\\8q" 37 37 PASS /[\8q]/.exec("y8q").toString() is "8" 38 38 PASS /[\8q]/.exec("yq").toString() is "q" -
trunk/LayoutTests/fast/js/resources/regexp-no-extensions.js
r27752 r39162 12 12 shouldBe('/\\2147483648/.exec(String.fromCharCode(140) + "7483648").toString()', 'String.fromCharCode(140) + "7483648"'); 13 13 shouldBe('/\\4294967296/.exec("\\"94967296").toString()', '"\\"94967296"'); 14 shouldBe('/\\8589934592/.exec(" 8589934592").toString()', '"8589934592"');14 shouldBe('/\\8589934592/.exec("\\\\8589934592").toString()', '"\\\\8589934592"'); 15 15 shouldBe('"\\nAbc\\n".replace(/(\\n)[^\\n]+$/, "$1")', '"\\nAbc\\n"'); 16 16 shouldBe('/x$/.exec("x\\n")', 'null'); … … 37 37 shouldBe('/[\\1q]/.exec("y" + String.fromCharCode(1) + "q").toString()', 'String.fromCharCode(1)'); 38 38 shouldBe('/[\\1q]/.exec("yq").toString()', '"q"'); 39 shouldBe('/\\8q/.exec(" y8q").toString()', '"8q"');39 shouldBe('/\\8q/.exec("\\\\8q").toString()', '"\\\\8q"'); 40 40 shouldBe('/[\\8q]/.exec("y8q").toString()', '"8"'); 41 41 shouldBe('/[\\8q]/.exec("yq").toString()', '"q"'); -
trunk/LayoutTests/fast/regex/test1-expected.txt
r30517 r39162 109 109 110 110 /^\ca\cA\c[\c{\c:/ 111 \e;z: FAIL. Actual results: "null" 111 FAILED TO COMPILE 112 112 113 113 /^[ab\]cde]/
Note: See TracChangeset
for help on using the changeset viewer.