Changeset 202490 in webkit
- Timestamp:
- Jun 27, 2016 10:38:55 AM (8 years ago)
- Location:
- trunk
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r202489 r202490 1 2016-06-27 Michael Saboff <msaboff@apple.com> 2 3 ES6 Change: Unify handling of RegExp CharacterClassEscapes \w and \W and Word Asserts \b and \B 4 https://bugs.webkit.org/show_bug.cgi?id=158505 5 6 Reviewed by Geoffrey Garen. 7 8 Updated and added test cases. 9 10 * js/regexp-unicode-expected.txt: 11 * js/script-tests/regexp-unicode.js: 12 1 13 2016-06-27 Frederic Wang <fwang@igalia.com> 2 14 -
trunk/LayoutTests/js/regexp-unicode-expected.txt
r201714 r202490 42 42 PASS /\w/iu.test("ſ") is true 43 43 PASS /\w/iu.test("K") is true 44 PASS /!\w/iu.test("ſ") is false 45 PASS /!\w/iu.test("K") is false 46 PASS /\W/iu.test("ſ") is true 47 PASS /\W/iu.test("K") is true 48 PASS /!\W/iu.test("ſ") is false 49 PASS /!\W/iu.test("K") is false 44 PASS /\W/iu.test("ſ") is false 45 PASS /\W/iu.test("K") is false 50 46 PASS /[\w\d]/iu.test("ſ") is true 51 47 PASS /[\w\d]/iu.test("K") is true 52 48 PASS /[^\w\d]/iu.test("ſ") is false 53 49 PASS /[^\w\d]/iu.test("K") is false 54 PASS /[\W\d]/iu.test("ſ") is true55 PASS /[\W\d]/iu.test("K") is true56 PASS /[^\W\d]/iu.test("ſ") is false57 PASS /[^\W\d]/iu.test("K") is false50 PASS /[\W\d]/iu.test("ſ") is false 51 PASS /[\W\d]/iu.test("K") is false 52 PASS /[^\W\d]/iu.test("ſ") is true 53 PASS /[^\W\d]/iu.test("K") is true 58 54 PASS /\w/iu.test("S") is true 59 55 PASS /\w/iu.test("K") is true 60 PASS /!\w/iu.test("S") is false 61 PASS /!\w/iu.test("K") is false 62 PASS /\W/iu.test("S") is true 63 PASS /\W/iu.test("K") is true 64 PASS /!\W/iu.test("S") is false 65 PASS /!\W/iu.test("K") is false 56 PASS /\W/iu.test("S") is false 57 PASS /\W/iu.test("K") is false 66 58 PASS /[\w\d]/iu.test("S") is true 67 59 PASS /[\w\d]/iu.test("K") is true 68 60 PASS /[^\w\d]/iu.test("S") is false 69 61 PASS /[^\w\d]/iu.test("K") is false 70 PASS /[\W\d]/iu.test("S") is true 71 PASS /[\W\d]/iu.test("K") is true 72 PASS /[^\W\d]/iu.test("S") is false 73 PASS /[^\W\d]/iu.test("K") is false 62 PASS /[\W\d]/iu.test("S") is false 63 PASS /[\W\d]/iu.test("K") is false 64 PASS /[^\W\d]/iu.test("S") is true 65 PASS /[^\W\d]/iu.test("K") is true 66 PASS "Grasſoden is old German for grass".match(/.*?\Bs\u017foden/iu)[0] is "Grasſoden" 67 PASS "Grasſoden is old German for grass".match(/.*?\B\u017foden/iu)[0] is "Grasſoden" 68 PASS "Grasſoden is old German for grass".match(/.*?\Boden/iu)[0] is "Grasſoden" 69 PASS "Grasſoden is old German for grass".match(/.*?\Bden/iu)[0] is "Grasſoden" 70 PASS "Water freezes at 273K which is 0C.".split(/\b\s/iu) is ["Water","freezes","at","273K","which","is","0C."] 74 71 PASS "𝌆".match(/^.$/u)[0].length is 2 75 72 PASS "It is 78°".match(/.*/u)[0].length is 9 -
trunk/LayoutTests/js/script-tests/regexp-unicode.js
r201714 r202490 46 46 shouldBeTrue('/\\w/iu.test("\u017f")'); 47 47 shouldBeTrue('/\\w/iu.test("\u212a")'); 48 shouldBeFalse('/!\\w/iu.test("\u017f")'); 49 shouldBeFalse('/!\\w/iu.test("\u212a")'); 50 shouldBeTrue('/\\W/iu.test("\u017f")'); 51 shouldBeTrue('/\\W/iu.test("\u212a")'); 52 shouldBeFalse('/!\\W/iu.test("\u017f")'); 53 shouldBeFalse('/!\\W/iu.test("\u212a")'); 48 shouldBeFalse('/\\W/iu.test("\u017f")'); 49 shouldBeFalse('/\\W/iu.test("\u212a")'); 54 50 shouldBeTrue('/[\\w\\d]/iu.test("\u017f")'); 55 51 shouldBeTrue('/[\\w\\d]/iu.test("\u212a")'); 56 52 shouldBeFalse('/[^\\w\\d]/iu.test("\u017f")'); 57 53 shouldBeFalse('/[^\\w\\d]/iu.test("\u212a")'); 58 shouldBe True('/[\\W\\d]/iu.test("\u017f")');59 shouldBe True('/[\\W\\d]/iu.test("\u212a")');60 shouldBe False('/[^\\W\\d]/iu.test("\u017f")');61 shouldBe False('/[^\\W\\d]/iu.test("\u212a")');54 shouldBeFalse('/[\\W\\d]/iu.test("\u017f")'); 55 shouldBeFalse('/[\\W\\d]/iu.test("\u212a")'); 56 shouldBeTrue('/[^\\W\\d]/iu.test("\u017f")'); 57 shouldBeTrue('/[^\\W\\d]/iu.test("\u212a")'); 62 58 shouldBeTrue('/\\w/iu.test("S")'); 63 59 shouldBeTrue('/\\w/iu.test("K")'); 64 shouldBeFalse('/!\\w/iu.test("S")'); 65 shouldBeFalse('/!\\w/iu.test("K")'); 66 shouldBeTrue('/\\W/iu.test("S")'); 67 shouldBeTrue('/\\W/iu.test("K")'); 68 shouldBeFalse('/!\\W/iu.test("S")'); 69 shouldBeFalse('/!\\W/iu.test("K")'); 60 shouldBeFalse('/\\W/iu.test("S")'); 61 shouldBeFalse('/\\W/iu.test("K")'); 70 62 shouldBeTrue('/[\\w\\d]/iu.test("S")'); 71 63 shouldBeTrue('/[\\w\\d]/iu.test("K")'); 72 64 shouldBeFalse('/[^\\w\\d]/iu.test("S")'); 73 65 shouldBeFalse('/[^\\w\\d]/iu.test("K")'); 74 shouldBeTrue('/[\\W\\d]/iu.test("S")'); 75 shouldBeTrue('/[\\W\\d]/iu.test("K")'); 76 shouldBeFalse('/[^\\W\\d]/iu.test("S")'); 77 shouldBeFalse('/[^\\W\\d]/iu.test("K")'); 78 66 shouldBeFalse('/[\\W\\d]/iu.test("S")'); 67 shouldBeFalse('/[\\W\\d]/iu.test("K")'); 68 shouldBeTrue('/[^\\W\\d]/iu.test("S")'); 69 shouldBeTrue('/[^\\W\\d]/iu.test("K")'); 70 shouldBe('"Gras\u017foden is old German for grass".match(/.*?\\Bs\\u017foden/iu)[0]', '"Gras\u017foden"'); 71 shouldBe('"Gras\u017foden is old German for grass".match(/.*?\\B\\u017foden/iu)[0]', '"Gras\u017foden"'); 72 shouldBe('"Gras\u017foden is old German for grass".match(/.*?\\Boden/iu)[0]', '"Gras\u017foden"'); 73 shouldBe('"Gras\u017foden is old German for grass".match(/.*?\\Bden/iu)[0]', '"Gras\u017foden"'); 74 shouldBe('"Water freezes at 273\u212a which is 0C.".split(/\\b\\s/iu)', '["Water","freezes","at","273\u212a","which","is","0C."]'); 79 75 80 76 // Test . matches with Unicode flag -
trunk/Source/JavaScriptCore/ChangeLog
r202487 r202490 1 2016-06-27 Michael Saboff <msaboff@apple.com> 2 3 ES6 Change: Unify handling of RegExp CharacterClassEscapes \w and \W and Word Asserts \b and \B 4 https://bugs.webkit.org/show_bug.cgi?id=158505 5 6 Reviewed by Geoffrey Garen. 7 8 This change makes it so that the CharacterClassEscape \w matches the inverse of 9 \W and vice versa for unicode, ignore case RegExp's. 10 11 Before this change, both /\w/ui and /\W/ui RegExp's would match the characters 12 k, K, s, S, \u017f (Latin Small Letter Long S) and \u212a (Kelvin Sign). 13 This was due to how the ES6 standard defined matching of character classes 14 specifically that the abstract operation "Canonicalize()" is called for the 15 character to be matched AND for the characters in the character class we are 16 matching against. This change is to make \W always be the inverse of \w. 17 It is still the case that the characters that match against \w changes 18 depending on a regular expression's flags. 19 20 The only real changes occur for regular expressions with both the unicode and 21 ignore case flags set. Updated the character class generator to make 22 nonwordUnicodeIgnoreCaseChar not include k, K, s, S, \u017f and \u212a. 23 Changed BytecodePattern.wordcharCharacterClass to use the correct 24 word character class for the flags. Simplfied character class set up in 25 in the pattern to use m_pattern.wordUnicodeIgnoreCaseCharCharacterClass and 26 invert as appropriate when unicode and ignore case are both set. 27 28 * create_regex_tables: 29 * yarr/YarrInterpreter.h: 30 (JSC::Yarr::BytecodePattern::BytecodePattern): 31 * yarr/YarrPattern.cpp: 32 (JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass): 33 1 34 2016-06-25 Keith Miller <keith_miller@apple.com> 2 35 -
trunk/Source/JavaScriptCore/create_regex_tables
r199523 r202490 28 28 "wordUnicodeIgnoreCaseChar": { "UseTable" : False, "data": ['_', ('0', '9'), ('A', 'Z'), ('a', 'z'), 0x017f, 0x212a]}, 29 29 "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]}, 30 "nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "word char", "data": ['k', 'K', 's', 'S', '`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]},30 "nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "wordUnicodeIgnoreCaseChar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x017e), (0x0180, 0x2129), (0x212b, 0x10ffff)]}, 31 31 "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, 32 32 "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]}, -
trunk/Source/JavaScriptCore/yarr/YarrInterpreter.h
r199075 r202490 348 348 349 349 newlineCharacterClass = pattern.newlineCharacterClass(); 350 wordcharCharacterClass = pattern.wordcharCharacterClass(); 350 if (unicode() && ignoreCase()) 351 wordcharCharacterClass = pattern.wordUnicodeIgnoreCaseCharCharacterClass(); 352 else 353 wordcharCharacterClass = pattern.wordcharCharacterClass(); 351 354 352 355 m_allParenthesesInfo.swap(parenthesesInfoToAdopt); -
trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp
r201412 r202490 352 352 break; 353 353 case WordClassID: 354 if (m_pattern.unicode() && m_pattern.ignoreCase()) { 355 if (invert) 356 m_alternative->m_terms.append(PatternTerm(m_pattern.nonwordUnicodeIgnoreCaseCharCharacterClass(), false)); 357 else 358 m_alternative->m_terms.append(PatternTerm(m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(), false)); 359 } else 354 if (m_pattern.unicode() && m_pattern.ignoreCase()) 355 m_alternative->m_terms.append(PatternTerm(m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(), invert)); 356 else 360 357 m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert)); 361 358 break;
Note: See TracChangeset
for help on using the changeset viewer.