Changeset 259262 in webkit
- Timestamp:
- Mar 30, 2020 6:27:10 PM (4 years ago)
- Location:
- trunk
- Files:
-
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JSTests/ChangeLog
r259246 r259262 1 2020-03-30 Alexey Shvayka <shvaikalesh@gmail.com> 2 3 Add support in named capture group identifiers for direct surrogate pairs 4 https://bugs.webkit.org/show_bug.cgi?id=178174 5 6 Reviewed by Darin Adler and Michael Saboff. 7 8 * test262/expectations.yaml: Mark 2 test cases as passing. 9 1 10 2020-03-30 Ross Kirsling <ross.kirsling@sony.com> 2 11 -
trunk/JSTests/test262/expectations.yaml
r259246 r259262 1247 1247 default: 'Test262Error: Expected [Symbol(b), Symbol(a)] and [Symbol(a), Symbol(b)] to have the same contents. ' 1248 1248 strict mode: 'Test262Error: Expected [Symbol(b), Symbol(a)] and [Symbol(a), Symbol(b)] to have the same contents. ' 1249 test/built-ins/RegExp/named-groups/unicode-property-names.js:1250 default: 'SyntaxError: Invalid regular expression: invalid group specifier name'1251 strict mode: 'SyntaxError: Invalid regular expression: invalid group specifier name'1252 1249 test/built-ins/RegExp/property-escapes/generated/Alphabetic.js: 1253 1250 default: 'Test262Error: `\p{Alphabetic}` should match U+001CFA (`ᳺ`)' -
trunk/LayoutTests/ChangeLog
r259261 r259262 1 2020-03-30 Alexey Shvayka <shvaikalesh@gmail.com> 2 3 Add support in named capture group identifiers for direct surrogate pairs 4 https://bugs.webkit.org/show_bug.cgi?id=178174 5 6 Reviewed by Darin Adler and Michael Saboff. 7 8 Adjusted tests for error messages changes and added coverage for messages 9 of syntax errors due to invalid \u escapes inside named groups/references. 10 11 * js/regexp-named-capture-groups-expected.txt: 12 * js/regexp-unicode-expected.txt: 13 * js/regress-158080-expected.txt: 14 * js/script-tests/regexp-named-capture-groups.js: 15 * js/script-tests/regexp-unicode.js: 16 1 17 2020-03-30 Devin Rousso <drousso@apple.com> 2 18 -
trunk/LayoutTests/js/regexp-named-capture-groups-expected.txt
r259026 r259262 62 62 PASS let r = new RegExp("/(?<groupName1>abc)/u") threw exception SyntaxError: Invalid regular expression: invalid group specifier name. 63 63 PASS let r = new RegExp("/(?<groupName1>abc)/u") threw exception SyntaxError: Invalid regular expression: invalid group specifier name. 64 PASS /(?<\u>.)/u threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape. 65 PASS /\k<\uzzz>/u threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape. 66 PASS /(?<\u{>.)/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 67 PASS /\k<\u{0>/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 64 68 PASS "XzzXzz".match(/\k<z>X(?<z>z*)X\k<z>/) is ["XzzXzz", "zz"] 65 69 PASS "XzzXzz".match(/\k<z>X(?<z>z*)X\k<z>/u) is ["XzzXzz", "zz"] -
trunk/LayoutTests/js/regexp-unicode-expected.txt
r258976 r259262 179 179 PASS "this is ba test".match(/is b\cha test/u)[0].length is 11 180 180 PASS new RegExp("\\/", "u").source is "\\/" 181 PASS r = new RegExp("\\u{110000}", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.181 PASS r = new RegExp("\\u{110000}", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 182 182 PASS r = new RegExp("𐐅{2147483648}", "u") threw exception SyntaxError: Invalid regular expression: pattern exceeds string length limits. 183 183 PASS /{/u threw exception SyntaxError: Invalid regular expression: incomplete {} quantifier for Unicode pattern. … … 191 191 PASS r = new RegExp("\\x", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern. 192 192 PASS r = new RegExp("[\\x]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern. 193 PASS r = new RegExp("\\u", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.194 PASS r = new RegExp("[\\u]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.195 PASS r = new RegExp("\\u{", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.196 PASS r = new RegExp("\\u{\udead", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.193 PASS r = new RegExp("\\u", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape. 194 PASS r = new RegExp("[\\u]", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape. 195 PASS r = new RegExp("\\u{", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 196 PASS r = new RegExp("\\u{\udead", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 197 197 PASS /\1/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern. 198 198 PASS /\2/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern. -
trunk/LayoutTests/js/regress-158080-expected.txt
r255452 r259262 4 4 5 5 6 PASS let r = /\u{|abc/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.7 PASS let r = /\u{/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.8 PASS let r = /\u{1/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.9 PASS let r = /\u{12/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.10 PASS let r = /\u{123/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.11 PASS let r = /\u{1234/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.12 PASS let r = /\u{abcde/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.13 PASS let r = /\u{abcdef/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.14 PASS let r = /\u{1111111}/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.15 PASS let r = /\u{fedbca98}/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.16 PASS let r = /\u{1{123}}/u threw exception SyntaxError: Invalid regular expression: invalid Unicode {} escape.6 PASS let r = /\u{|abc/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 7 PASS let r = /\u{/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 8 PASS let r = /\u{1/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 9 PASS let r = /\u{12/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 10 PASS let r = /\u{123/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 11 PASS let r = /\u{1234/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 12 PASS let r = /\u{abcde/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 13 PASS let r = /\u{abcdef/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 14 PASS let r = /\u{1111111}/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 15 PASS let r = /\u{fedbca98}/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 16 PASS let r = /\u{1{123}}/u threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape. 17 17 PASS successfullyParsed is true 18 18 -
trunk/LayoutTests/js/script-tests/regexp-named-capture-groups.js
r259026 r259262 105 105 shouldThrow('let r = new RegExp("/(?<\u200dgroupName1>abc)/u")', '"SyntaxError: Invalid regular expression: invalid group specifier name"'); 106 106 107 // Check that invalid \u escape errors are not get overriden. 108 shouldThrow('/(?<\\u>.)/u', '"SyntaxError: Invalid regular expression: invalid Unicode \\\\u escape"'); 109 shouldThrow('/\\k<\\uzzz>/u', '"SyntaxError: Invalid regular expression: invalid Unicode \\\\u escape"'); 110 shouldThrow('/(?<\\u{>.)/u', '"SyntaxError: Invalid regular expression: invalid Unicode code point \\\\u{} escape"'); 111 shouldThrow('/\\k<\\u{0>/u', '"SyntaxError: Invalid regular expression: invalid Unicode code point \\\\u{} escape"'); 112 107 113 // Check the named forward references work 108 114 shouldBe('"XzzXzz".match(/\\\k<z>X(?<z>z*)X\\\k<z>/)', '["XzzXzz", "zz"]'); -
trunk/LayoutTests/js/script-tests/regexp-unicode.js
r258976 r259262 228 228 // Check that invalid unicode patterns throw exceptions 229 229 shouldBe('new RegExp("\\\\/", "u").source', '"\\\\/"'); 230 shouldThrow('r = new RegExp("\\\\u{110000}", "u")', '"SyntaxError: Invalid regular expression: invalid Unicode {} escape"');230 shouldThrow('r = new RegExp("\\\\u{110000}", "u")', '"SyntaxError: Invalid regular expression: invalid Unicode code point \\\\u{} escape"'); 231 231 shouldThrow('r = new RegExp("\u{10405}{2147483648}", "u")', '"SyntaxError: Invalid regular expression: pattern exceeds string length limits"'); 232 232 shouldThrow('/{/u', '"SyntaxError: Invalid regular expression: incomplete {} quantifier for Unicode pattern"'); … … 251 251 shouldThrowInvalidEscape("\\\\x"); 252 252 shouldThrowInvalidEscape("[\\\\x]"); 253 shouldThrowInvalidEscape("\\\\u" );254 shouldThrowInvalidEscape("[\\\\u]" );255 256 shouldThrowInvalidEscape("\\\\u{", '"SyntaxError: Invalid regular expression: invalid Unicode {} escape"');257 shouldThrowInvalidEscape("\\\\u{\\udead", '"SyntaxError: Invalid regular expression: invalid Unicode {} escape"');253 shouldThrowInvalidEscape("\\\\u", '"SyntaxError: Invalid regular expression: invalid Unicode \\\\u escape"'); 254 shouldThrowInvalidEscape("[\\\\u]", '"SyntaxError: Invalid regular expression: invalid Unicode \\\\u escape"'); 255 256 shouldThrowInvalidEscape("\\\\u{", '"SyntaxError: Invalid regular expression: invalid Unicode code point \\\\u{} escape"'); 257 shouldThrowInvalidEscape("\\\\u{\\udead", '"SyntaxError: Invalid regular expression: invalid Unicode code point \\\\u{} escape"'); 258 258 259 259 // Check that invalid backreferences in unicode patterns throw exceptions. -
trunk/Source/JavaScriptCore/ChangeLog
r259246 r259262 1 2020-03-30 Alexey Shvayka <shvaikalesh@gmail.com> 2 3 Add support in named capture group identifiers for direct surrogate pairs 4 https://bugs.webkit.org/show_bug.cgi?id=178174 5 6 Reviewed by Darin Adler and Michael Saboff. 7 8 This change: 9 10 a) Adds support for unescaped astral symbols in RegExp identifier names [1], 11 aligning JSC with V8. 12 13 b) Rewords InvalidUnicodeEscape error code to be used for \uXXXX escapes in 14 Unicode patterns and named groups/references instead of InvalidIdentityEscape, 15 matching error messages in V8 and SpiderMonkey. 16 17 c) Adds hasError() checks after tryConsumeGroupName() so errors generated in 18 tryConsumeIdentifierCharacter() would not get overriden. 19 20 d) Removes code duplication by using tryConsumeUnicodeEscape() for parsing \u 21 in parseEscape(); cleans up parsing \u{} escapes a bit, preferring ASSERTs 22 over hasError() checks. 23 24 [1]: https://tc39.es/ecma262/#prod-RegExpIdentifierName 25 26 * yarr/YarrErrorCode.cpp: 27 (JSC::Yarr::errorMessage): 28 (JSC::Yarr::errorToThrow): 29 * yarr/YarrErrorCode.h: 30 * yarr/YarrParser.h: 31 (JSC::Yarr::Parser::parseEscape): 32 (JSC::Yarr::Parser::parseParenthesesBegin): 33 (JSC::Yarr::Parser::tryConsumeUnicodeEscape): 34 (JSC::Yarr::Parser::tryConsumeIdentifierCharacter): 35 1 36 2020-03-30 Ross Kirsling <ross.kirsling@sony.com> 2 37 -
trunk/Source/JavaScriptCore/yarr/YarrErrorCode.cpp
r259026 r259262 52 52 REGEXP_ERROR_PREFIX "invalid range in character class for Unicode pattern", // CharacterClassRangeInvalid 53 53 REGEXP_ERROR_PREFIX "\\ at end of pattern", // EscapeUnterminated 54 REGEXP_ERROR_PREFIX "invalid Unicode {} escape", // InvalidUnicodeEscape 54 REGEXP_ERROR_PREFIX "invalid Unicode \\u escape", // InvalidUnicodeEscape 55 REGEXP_ERROR_PREFIX "invalid Unicode code point \\u{} escape", // InvalidUnicodeCodePointEscape 55 56 REGEXP_ERROR_PREFIX "invalid backreference for Unicode pattern", // InvalidBackreference 56 57 REGEXP_ERROR_PREFIX "invalid \\k<> named backreference", // InvalidNamedBackReference … … 88 89 case ErrorCode::EscapeUnterminated: 89 90 case ErrorCode::InvalidUnicodeEscape: 91 case ErrorCode::InvalidUnicodeCodePointEscape: 90 92 case ErrorCode::InvalidBackreference: 91 93 case ErrorCode::InvalidNamedBackReference: -
trunk/Source/JavaScriptCore/yarr/YarrErrorCode.h
r259026 r259262 52 52 EscapeUnterminated, 53 53 InvalidUnicodeEscape, 54 InvalidUnicodeCodePointEscape, 54 55 InvalidBackreference, 55 56 InvalidNamedBackReference, -
trunk/Source/JavaScriptCore/yarr/YarrParser.h
r259026 r259262 439 439 if (!inCharacterClass && tryConsume('<')) { 440 440 auto groupName = tryConsumeGroupName(); 441 if (hasError(m_errorCode)) 442 break; 443 441 444 if (groupName) { 442 445 if (m_captureGroupNames.contains(groupName.value())) { … … 488 491 // UnicodeEscape 489 492 case 'u': { 490 consume(); 491 if (atEndOfPattern()) { 492 if (isIdentityEscapeAnError('u')) 493 break; 494 495 delegate.atomPatternCharacter('u'); 496 break; 497 } 498 499 if (m_isUnicode && peek() == '{') { 500 consume(); 501 UChar32 codePoint = 0; 502 do { 503 if (atEndOfPattern() || !isASCIIHexDigit(peek())) { 504 m_errorCode = ErrorCode::InvalidUnicodeEscape; 505 break; 506 } 507 508 codePoint = (codePoint << 4) | toASCIIHexValue(consume()); 509 510 if (codePoint > UCHAR_MAX_VALUE) 511 m_errorCode = ErrorCode::InvalidUnicodeEscape; 512 } while (!atEndOfPattern() && peek() != '}'); 513 if (!atEndOfPattern() && peek() == '}') 514 consume(); 515 else if (!hasError(m_errorCode)) 516 m_errorCode = ErrorCode::InvalidUnicodeEscape; 517 if (hasError(m_errorCode)) 518 return false; 519 520 delegate.atomPatternCharacter(codePoint); 521 break; 522 } 523 int u = tryConsumeHex(4); 524 if (u == -1) { 525 if (isIdentityEscapeAnError('u')) 526 break; 527 528 delegate.atomPatternCharacter('u'); 529 } else { 530 // If we have the first of a surrogate pair, look for the second. 531 if (U16_IS_LEAD(u) && m_isUnicode && (patternRemaining() >= 6) && peek() == '\\') { 532 ParseState state = saveState(); 533 consume(); 534 535 if (tryConsume('u')) { 536 int surrogate2 = tryConsumeHex(4); 537 if (U16_IS_TRAIL(surrogate2)) { 538 u = U16_GET_SUPPLEMENTARY(u, surrogate2); 539 delegate.atomPatternCharacter(u); 540 break; 541 } 542 } 543 544 restoreState(state); 545 } 546 delegate.atomPatternCharacter(u); 547 } 493 int codePoint = tryConsumeUnicodeEscape<UnicodeEscapeContext::CharacterEscape>(); 494 if (hasError(m_errorCode)) 495 break; 496 497 delegate.atomPatternCharacter(codePoint == -1 ? 'u' : codePoint); 548 498 break; 549 499 } … … 673 623 case '<': { 674 624 auto groupName = tryConsumeGroupName(); 625 if (hasError(m_errorCode)) 626 break; 627 675 628 if (groupName) { 676 629 if (m_kIdentityEscapeSeen) { … … 1010 963 } 1011 964 965 enum class UnicodeEscapeContext : uint8_t { CharacterEscape, IdentifierName }; 966 967 template<UnicodeEscapeContext context> 1012 968 int tryConsumeUnicodeEscape() 1013 969 { 1014 if (!tryConsume('u')) 970 ASSERT(!hasError(m_errorCode)); 971 972 if (!tryConsume('u') || atEndOfPattern()) { 973 if (m_isUnicode || context == UnicodeEscapeContext::IdentifierName) 974 m_errorCode = ErrorCode::InvalidUnicodeEscape; 1015 975 return -1; 976 } 1016 977 1017 978 if (m_isUnicode && tryConsume('{')) { … … 1019 980 do { 1020 981 if (atEndOfPattern() || !isASCIIHexDigit(peek())) { 1021 m_errorCode = ErrorCode::InvalidUnicode Escape;982 m_errorCode = ErrorCode::InvalidUnicodeCodePointEscape; 1022 983 return -1; 1023 984 } … … 1026 987 1027 988 if (codePoint > UCHAR_MAX_VALUE) { 1028 m_errorCode = ErrorCode::InvalidUnicode Escape;989 m_errorCode = ErrorCode::InvalidUnicodeCodePointEscape; 1029 990 return -1; 1030 991 } 1031 992 } while (!atEndOfPattern() && peek() != '}'); 1032 if (!atEndOfPattern() && peek() == '}') 1033 consume(); 1034 else if (!hasError(m_errorCode)) 993 994 if (!tryConsume('}')) { 995 m_errorCode = ErrorCode::InvalidUnicodeCodePointEscape; 996 return -1; 997 } 998 999 return codePoint; 1000 } 1001 1002 int codeUnit = tryConsumeHex(4); 1003 if (codeUnit == -1) { 1004 if (m_isUnicode || context == UnicodeEscapeContext::IdentifierName) 1035 1005 m_errorCode = ErrorCode::InvalidUnicodeEscape; 1036 if (hasError(m_errorCode))1037 return -1;1038 1039 return codePoint;1040 }1041 1042 int u = tryConsumeHex(4);1043 if (u == -1)1044 1006 return -1; 1007 } 1045 1008 1046 1009 // If we have the first of a surrogate pair, look for the second. 1047 if (U16_IS_LEAD( u) && m_isUnicode && (patternRemaining() >= 6)&& peek() == '\\') {1010 if (U16_IS_LEAD(codeUnit) && m_isUnicode && patternRemaining() >= 6 && peek() == '\\') { 1048 1011 ParseState state = saveState(); 1049 1012 consume(); … … 1051 1014 if (tryConsume('u')) { 1052 1015 int surrogate2 = tryConsumeHex(4); 1053 if (U16_IS_TRAIL(surrogate2)) { 1054 u = U16_GET_SUPPLEMENTARY(u, surrogate2); 1055 return u; 1056 } 1016 if (U16_IS_TRAIL(surrogate2)) 1017 return U16_GET_SUPPLEMENTARY(codeUnit, surrogate2); 1057 1018 } 1058 1019 … … 1060 1021 } 1061 1022 1062 return u;1023 return codeUnit; 1063 1024 } 1064 1025 1065 1026 int tryConsumeIdentifierCharacter() 1066 1027 { 1067 int ch = peek(); 1068 1069 if (ch == '\\') { 1070 consume(); 1071 ch = tryConsumeUnicodeEscape(); 1072 } else 1073 consume(); 1074 1075 return ch; 1028 if (tryConsume('\\')) 1029 return tryConsumeUnicodeEscape<UnicodeEscapeContext::IdentifierName>(); 1030 1031 return consumePossibleSurrogatePair(); 1076 1032 } 1077 1033
Note: See TracChangeset
for help on using the changeset viewer.