Changeset 197426 in webkit
- Timestamp:
- Mar 1, 2016 4:39:01 PM (8 years ago)
- Location:
- trunk
- Files:
-
- 3 added
- 27 edited
- 3 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r197425 r197426 1 2016-03-01 Michael Saboff <msaboff@apple.com> 2 3 [ES6] Add support for Unicode regular expressions 4 https://bugs.webkit.org/show_bug.cgi?id=154842 5 6 Reviewed by Filip Pizlo. 7 8 Added a new test for the added unicode regular expression processing. 9 10 Updated several tests for the y flag changes and "unicode" property. 11 12 * js/regexp-unicode-expected.txt: Added. 13 * js/regexp-unicode.html: Added. 14 * js/script-tests/regexp-unicode.js: Added. 15 New test. 16 17 * js/Object-getOwnPropertyNames-expected.txt: 18 * js/regexp-flags-expected.txt: 19 * js/script-tests/Object-getOwnPropertyNames.js: 20 * js/script-tests/regexp-flags.js: 21 (RegExp.prototype.hasOwnProperty): 22 Updated tests. 23 1 24 2016-03-01 Ryan Haddad <ryanhaddad@apple.com> 2 25 -
trunk/LayoutTests/js/Object-getOwnPropertyNames-expected.txt
r196498 r197426 57 57 PASS getSortedOwnPropertyNames(Date.prototype) is ['constructor', 'getDate', 'getDay', 'getFullYear', 'getHours', 'getMilliseconds', 'getMinutes', 'getMonth', 'getSeconds', 'getTime', 'getTimezoneOffset', 'getUTCDate', 'getUTCDay', 'getUTCFullYear', 'getUTCHours', 'getUTCMilliseconds', 'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds', 'getYear', 'setDate', 'setFullYear', 'setHours', 'setMilliseconds', 'setMinutes', 'setMonth', 'setSeconds', 'setTime', 'setUTCDate', 'setUTCFullYear', 'setUTCHours', 'setUTCMilliseconds', 'setUTCMinutes', 'setUTCMonth', 'setUTCSeconds', 'setYear', 'toDateString', 'toGMTString', 'toISOString', 'toJSON', 'toLocaleDateString', 'toLocaleString', 'toLocaleTimeString', 'toString', 'toTimeString', 'toUTCString', 'valueOf'] 58 58 PASS getSortedOwnPropertyNames(RegExp) is ['$&', "$'", '$*', '$+', '$1', '$2', '$3', '$4', '$5', '$6', '$7', '$8', '$9', '$_', '$`', 'input', 'lastMatch', 'lastParen', 'leftContext', 'length', 'multiline', 'name', 'prototype', 'rightContext'] 59 PASS getSortedOwnPropertyNames(RegExp.prototype) is ['compile', 'constructor', 'exec', 'flags', 'global', 'ignoreCase', 'lastIndex', 'multiline', 'source', 'test', 'toString' ]59 PASS getSortedOwnPropertyNames(RegExp.prototype) is ['compile', 'constructor', 'exec', 'flags', 'global', 'ignoreCase', 'lastIndex', 'multiline', 'source', 'test', 'toString', 'unicode'] 60 60 PASS getSortedOwnPropertyNames(Error) is ['length', 'name', 'prototype'] 61 61 PASS getSortedOwnPropertyNames(Error.prototype) is ['constructor', 'message', 'name', 'toString'] -
trunk/LayoutTests/js/regexp-flags-expected.txt
r185432 r197426 24 24 PASS flags.call({global: 1, multiline: 0, ignoreCase: 2}) is 'gi' 25 25 PASS flags.call({ __proto__: { multiline: true } }) is 'm' 26 unicode flag 27 PASS /a/uimg.flags is 'gimu' 28 PASS new RegExp('a', 'uimg').flags is 'gimu' 29 PASS flags.call({global: true, multiline: true, ignoreCase: true, unicode: true}) is 'gimu' 26 30 PASS successfullyParsed is true 27 31 -
trunk/LayoutTests/js/script-tests/Object-getOwnPropertyNames.js
r196498 r197426 66 66 "Date.prototype": "['constructor', 'getDate', 'getDay', 'getFullYear', 'getHours', 'getMilliseconds', 'getMinutes', 'getMonth', 'getSeconds', 'getTime', 'getTimezoneOffset', 'getUTCDate', 'getUTCDay', 'getUTCFullYear', 'getUTCHours', 'getUTCMilliseconds', 'getUTCMinutes', 'getUTCMonth', 'getUTCSeconds', 'getYear', 'setDate', 'setFullYear', 'setHours', 'setMilliseconds', 'setMinutes', 'setMonth', 'setSeconds', 'setTime', 'setUTCDate', 'setUTCFullYear', 'setUTCHours', 'setUTCMilliseconds', 'setUTCMinutes', 'setUTCMonth', 'setUTCSeconds', 'setYear', 'toDateString', 'toGMTString', 'toISOString', 'toJSON', 'toLocaleDateString', 'toLocaleString', 'toLocaleTimeString', 'toString', 'toTimeString', 'toUTCString', 'valueOf']", 67 67 "RegExp": "['$&', \"$'\", '$*', '$+', '$1', '$2', '$3', '$4', '$5', '$6', '$7', '$8', '$9', '$_', '$`', 'input', 'lastMatch', 'lastParen', 'leftContext', 'length', 'multiline', 'name', 'prototype', 'rightContext']", 68 "RegExp.prototype": "['compile', 'constructor', 'exec', 'flags', 'global', 'ignoreCase', 'lastIndex', 'multiline', 'source', 'test', 'toString' ]",68 "RegExp.prototype": "['compile', 'constructor', 'exec', 'flags', 'global', 'ignoreCase', 'lastIndex', 'multiline', 'source', 'test', 'toString', 'unicode']", 69 69 "Error": "['length', 'name', 'prototype']", 70 70 "Error.prototype": "['constructor', 'message', 'name', 'toString']", -
trunk/LayoutTests/js/script-tests/regexp-flags.js
r185432 r197426 29 29 shouldBe("flags.call({ __proto__: { multiline: true } })", "'m'"); 30 30 31 debug("unicode flag"); 32 shouldBe("/a/uimg.flags", "'gimu'"); 33 shouldBe("new RegExp('a', 'uimg').flags", "'gimu'"); 34 shouldBe("flags.call({global: true, multiline: true, ignoreCase: true, unicode: true})", "'gimu'"); 35 31 36 if (RegExp.prototype.hasOwnProperty('sticky')) { 32 37 debug("sticky flag"); … … 37 42 shouldBe("flags.call({global: true, multiline: true, ignoreCase: true, sticky: true})", "'gimy'"); 38 43 } 39 if (RegExp.prototype.hasOwnProperty('unicode')) {40 debug("unicode flag");41 // when the engine supports "unicode", these tests will fail by design.42 // Hopefully, only the expected output will need updating.43 shouldBe("/a/uimg.flags", "'gimu'");44 shouldBe("new RegExp('a', 'uimg').flags", "'gimu'");45 shouldBe("flags.call({global: true, multiline: true, ignoreCase: true, unicode: true})", "'gimu'");46 } -
trunk/Source/JavaScriptCore/CMakeLists.txt
r197406 r197426 827 827 828 828 yarr/RegularExpression.cpp 829 yarr/YarrCanonicalizeU CS2.cpp829 yarr/YarrCanonicalizeUnicode.cpp 830 830 yarr/YarrInterpreter.cpp 831 831 yarr/YarrJIT.cpp -
trunk/Source/JavaScriptCore/ChangeLog
r197422 r197426 1 2016-03-01 Michael Saboff <msaboff@apple.com> 2 3 [ES6] Add support for Unicode regular expressions 4 https://bugs.webkit.org/show_bug.cgi?id=154842 5 6 Reviewed by Filip Pizlo. 7 8 Added processing of Unicode regular expressions to the Yarr interpreter. 9 10 Changed parsing of regular expression patterns and PatternTerms to process characters as 11 UChar32 in the Yarr code. The parser converts matched surrogate pairs into the appropriate 12 Unicode character when the expression is parsed. When matching a unicode expression and 13 reading source characters, we convert proper surrogate pair into a Unicode character and 14 advance the source cursor, "pos", one more position. The exception to this is when we 15 know when generating a fixed character atom that we need to match a unicode character 16 that doesn't fit in 16 bits. The code calls this an extendedUnicodeCharacter and has a 17 helper to determine this. 18 19 Added 'u' flag and 'unicode' identifier to regular expression classes. Added an "isUnicode" 20 parameter to YarrPattern pattern() and internal users of that function. 21 22 Updated the generation of the canonicalization tables to include a new set a tables that 23 follow the ES 6.0, 21.2.2.8.2 Step 2. Renamed the YarrCanonicalizeUCS2.* files to 24 YarrCanonicalizeUnicode.*. 25 26 Added a new Layout/js test that tests the added functionality. Updated other tests that 27 have minor es6 unicode checks and look for valid flags. 28 29 Ran the ChakraCore Unicode regular expression tests as well. 30 31 * CMakeLists.txt: 32 * JavaScriptCore.vcxproj/JavaScriptCore.vcxproj: 33 * JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters: 34 * JavaScriptCore.xcodeproj/project.pbxproj: 35 36 * inspector/ContentSearchUtilities.cpp: 37 (Inspector::ContentSearchUtilities::findMagicComment): 38 * yarr/RegularExpression.cpp: 39 (JSC::Yarr::RegularExpression::Private::compile): 40 Updated use of pattern(). 41 42 * runtime/CommonIdentifiers.h: 43 * runtime/RegExp.cpp: 44 (JSC::regExpFlags): 45 (JSC::RegExpFunctionalTestCollector::outputOneTest): 46 (JSC::RegExp::finishCreation): 47 (JSC::RegExp::compile): 48 (JSC::RegExp::compileMatchOnly): 49 * runtime/RegExp.h: 50 * runtime/RegExpKey.h: 51 * runtime/RegExpPrototype.cpp: 52 (JSC::regExpProtoFuncCompile): 53 (JSC::flagsString): 54 (JSC::regExpProtoGetterMultiline): 55 (JSC::regExpProtoGetterUnicode): 56 (JSC::regExpProtoGetterFlags): 57 Updated for new 'y' (unicode) flag. Add check to use the interpreter for unicode regular expressions. 58 59 * tests/es6.yaml: 60 * tests/stress/static-getter-in-names.js: 61 Updated tests for new flag and for passing the minimal es6 regular expression processing. 62 63 * yarr/Yarr.h: Updated the size of information now kept for backtracking. 64 65 * yarr/YarrCanonicalizeUCS2.cpp: Removed. 66 * yarr/YarrCanonicalizeUCS2.h: Removed. 67 * yarr/YarrCanonicalizeUCS2.js: Removed. 68 * yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp. 69 * yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h. 70 (JSC::Yarr::canonicalCharacterSetInfo): 71 (JSC::Yarr::canonicalRangeInfoFor): 72 (JSC::Yarr::getCanonicalPair): 73 (JSC::Yarr::isCanonicallyUnique): 74 (JSC::Yarr::areCanonicallyEquivalent): 75 (JSC::Yarr::rangeInfoFor): Deleted. 76 * yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js. 77 (printHeader): 78 (printFooter): 79 (hex): 80 (canonicalize): 81 (canonicalizeUnicode): 82 (createUCS2CanonicalGroups): 83 (createUnicodeCanonicalGroups): 84 (cu.in.groupedCanonically.characters.sort): Deleted. 85 (cu.in.groupedCanonically.else): Deleted. 86 Refactored to output two sets of tables, one for UCS2 and one for Unicode. The UCS2 tables follow 87 the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3. The new Unicode 88 tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2. Eliminated the unused Latin1 tables. 89 90 * yarr/YarrInterpreter.cpp: 91 (JSC::Yarr::Interpreter::InputStream::InputStream): 92 (JSC::Yarr::Interpreter::InputStream::readChecked): 93 (JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked): 94 (JSC::Yarr::Interpreter::InputStream::reread): 95 (JSC::Yarr::Interpreter::InputStream::prev): 96 (JSC::Yarr::Interpreter::testCharacterClass): 97 (JSC::Yarr::Interpreter::checkCharacter): 98 (JSC::Yarr::Interpreter::checkSurrogatePair): 99 (JSC::Yarr::Interpreter::checkCasedCharacter): 100 (JSC::Yarr::Interpreter::tryConsumeBackReference): 101 (JSC::Yarr::Interpreter::backtrackPatternCharacter): 102 (JSC::Yarr::Interpreter::matchCharacterClass): 103 (JSC::Yarr::Interpreter::backtrackCharacterClass): 104 (JSC::Yarr::Interpreter::matchParenthesesTerminalEnd): 105 (JSC::Yarr::Interpreter::matchDisjunction): 106 (JSC::Yarr::Interpreter::Interpreter): 107 (JSC::Yarr::ByteCompiler::assertionWordBoundary): 108 (JSC::Yarr::ByteCompiler::atomPatternCharacter): 109 * yarr/YarrInterpreter.h: 110 (JSC::Yarr::ByteTerm::ByteTerm): 111 (JSC::Yarr::BytecodePattern::BytecodePattern): 112 * yarr/YarrJIT.cpp: 113 (JSC::Yarr::YarrGenerator::optimizeAlternative): 114 (JSC::Yarr::YarrGenerator::matchCharacterClassRange): 115 (JSC::Yarr::YarrGenerator::matchCharacterClass): 116 (JSC::Yarr::YarrGenerator::notAtEndOfInput): 117 (JSC::Yarr::YarrGenerator::jumpIfCharNotEquals): 118 (JSC::Yarr::YarrGenerator::generatePatternCharacterOnce): 119 (JSC::Yarr::YarrGenerator::generatePatternCharacterFixed): 120 (JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy): 121 (JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy): 122 * yarr/YarrParser.h: 123 (JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter): 124 (JSC::Yarr::Parser::Parser): 125 (JSC::Yarr::Parser::parseEscape): 126 (JSC::Yarr::Parser::consumePossibleSurrogatePair): 127 (JSC::Yarr::Parser::parseCharacterClass): 128 (JSC::Yarr::Parser::parseTokens): 129 (JSC::Yarr::Parser::parse): 130 (JSC::Yarr::Parser::atEndOfPattern): 131 (JSC::Yarr::Parser::patternRemaining): 132 (JSC::Yarr::Parser::peek): 133 (JSC::Yarr::parse): 134 * yarr/YarrPattern.cpp: 135 (JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor): 136 (JSC::Yarr::CharacterClassConstructor::append): 137 (JSC::Yarr::CharacterClassConstructor::putChar): 138 (JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase): 139 (JSC::Yarr::CharacterClassConstructor::putRange): 140 (JSC::Yarr::CharacterClassConstructor::charClass): 141 (JSC::Yarr::CharacterClassConstructor::addSorted): 142 (JSC::Yarr::CharacterClassConstructor::addSortedRange): 143 (JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor): 144 (JSC::Yarr::YarrPatternConstructor::assertionWordBoundary): 145 (JSC::Yarr::YarrPatternConstructor::atomPatternCharacter): 146 (JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin): 147 (JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom): 148 (JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange): 149 (JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets): 150 (JSC::Yarr::YarrPattern::compile): 151 (JSC::Yarr::YarrPattern::YarrPattern): 152 * yarr/YarrPattern.h: 153 (JSC::Yarr::CharacterRange::CharacterRange): 154 (JSC::Yarr::CharacterClass::CharacterClass): 155 (JSC::Yarr::PatternTerm::PatternTerm): 156 (JSC::Yarr::YarrPattern::reset): 157 * yarr/YarrSyntaxChecker.cpp: 158 (JSC::Yarr::SyntaxChecker::assertionBOL): 159 (JSC::Yarr::SyntaxChecker::assertionEOL): 160 (JSC::Yarr::SyntaxChecker::assertionWordBoundary): 161 (JSC::Yarr::SyntaxChecker::atomPatternCharacter): 162 (JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass): 163 (JSC::Yarr::SyntaxChecker::atomCharacterClassBegin): 164 (JSC::Yarr::SyntaxChecker::atomCharacterClassAtom): 165 (JSC::Yarr::checkSyntax): 166 1 167 2016-03-01 Saam barati <sbarati@apple.com> 2 168 -
trunk/Source/JavaScriptCore/JavaScriptCore.vcxproj/JavaScriptCore.vcxproj
r197365 r197426 938 938 <ClCompile Include="..\wasm\WASMReader.cpp" /> 939 939 <ClCompile Include="..\yarr\RegularExpression.cpp" /> 940 <ClCompile Include="..\yarr\YarrCanonicalizeU CS2.cpp" />940 <ClCompile Include="..\yarr\YarrCanonicalizeUnicode.cpp" /> 941 941 <ClCompile Include="..\yarr\YarrInterpreter.cpp" /> 942 942 <ClCompile Include="..\yarr\YarrJIT.cpp" /> … … 1879 1879 <ClInclude Include="..\yarr\RegularExpression.h" /> 1880 1880 <ClInclude Include="..\yarr\Yarr.h" /> 1881 <ClInclude Include="..\yarr\YarrCanonicalizeU CS2.h" />1881 <ClInclude Include="..\yarr\YarrCanonicalizeUnicode.h" /> 1882 1882 <ClInclude Include="..\yarr\YarrInterpreter.h" /> 1883 1883 <ClInclude Include="..\yarr\YarrJIT.h" /> -
trunk/Source/JavaScriptCore/JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters
r197365 r197426 1060 1060 <Filter>yarr</Filter> 1061 1061 </ClCompile> 1062 <ClCompile Include="..\yarr\YarrCanonicalizeU CS2.cpp">1062 <ClCompile Include="..\yarr\YarrCanonicalizeUnicode.cpp"> 1063 1063 <Filter>yarr</Filter> 1064 1064 </ClCompile> … … 3317 3317 <Filter>yarr</Filter> 3318 3318 </ClInclude> 3319 <ClInclude Include="..\yarr\YarrCanonicalizeU CS2.h">3319 <ClInclude Include="..\yarr\YarrCanonicalizeUnicode.h"> 3320 3320 <Filter>yarr</Filter> 3321 3321 </ClInclude> -
trunk/Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
r197408 r197426 1325 1325 862553D216136E1A009F17D0 /* JSProxy.h in Headers */ = {isa = PBXBuildFile; fileRef = 862553CF16136AA5009F17D0 /* JSProxy.h */; settings = {ATTRIBUTES = (Private, ); }; }; 1326 1326 863B23E00FC6118900703AA4 /* MacroAssemblerCodeRef.h in Headers */ = {isa = PBXBuildFile; fileRef = 863B23DF0FC60E6200703AA4 /* MacroAssemblerCodeRef.h */; settings = {ATTRIBUTES = (Private, ); }; }; 1327 863C6D9C1521111A00585E4E /* YarrCanonicalizeU CS2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 863C6D981521111200585E4E /* YarrCanonicalizeUCS2.cpp */; };1327 863C6D9C1521111A00585E4E /* YarrCanonicalizeUnicode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 863C6D981521111200585E4E /* YarrCanonicalizeUnicode.cpp */; }; 1328 1328 8642C510151C06A90046D4EF /* RegExpCachedResult.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86F75EFB151C062F007C9BA3 /* RegExpCachedResult.cpp */; }; 1329 1329 8642C512151C083D0046D4EF /* RegExpMatchesArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 86F75EFD151C062F007C9BA3 /* RegExpMatchesArray.cpp */; }; … … 3490 3490 862553CF16136AA5009F17D0 /* JSProxy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSProxy.h; sourceTree = "<group>"; }; 3491 3491 863B23DF0FC60E6200703AA4 /* MacroAssemblerCodeRef.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MacroAssemblerCodeRef.h; sourceTree = "<group>"; }; 3492 863C6D981521111200585E4E /* YarrCanonicalizeU CS2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = YarrCanonicalizeUCS2.cpp; path = yarr/YarrCanonicalizeUCS2.cpp; sourceTree = "<group>"; };3493 863C6D991521111200585E4E /* YarrCanonicalizeU CS2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrCanonicalizeUCS2.h; path = yarr/YarrCanonicalizeUCS2.h; sourceTree = "<group>"; };3494 863C6D9A1521111200585E4E /* YarrCanonicalizeU CS2.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; name = YarrCanonicalizeUCS2.js; path = yarr/YarrCanonicalizeUCS2.js; sourceTree = "<group>"; };3492 863C6D981521111200585E4E /* YarrCanonicalizeUnicode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = YarrCanonicalizeUnicode.cpp; path = yarr/YarrCanonicalizeUnicode.cpp; sourceTree = "<group>"; }; 3493 863C6D991521111200585E4E /* YarrCanonicalizeUnicode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrCanonicalizeUnicode.h; path = yarr/YarrCanonicalizeUnicode.h; sourceTree = "<group>"; }; 3494 863C6D9A1521111200585E4E /* YarrCanonicalizeUnicode.js */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.javascript; name = YarrCanonicalizeUnicode.js; path = yarr/YarrCanonicalizeUnicode.js; sourceTree = "<group>"; }; 3495 3495 8640923B156EED3B00566CB2 /* ARM64Assembler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ARM64Assembler.h; sourceTree = "<group>"; }; 3496 3496 8640923C156EED3B00566CB2 /* MacroAssemblerARM64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MacroAssemblerARM64.h; sourceTree = "<group>"; }; … … 5997 5997 A57D23EC1891B5540031C7FA /* RegularExpression.h */, 5998 5998 451539B812DC994500EF7AC4 /* Yarr.h */, 5999 863C6D981521111200585E4E /* YarrCanonicalizeU CS2.cpp */,6000 863C6D991521111200585E4E /* YarrCanonicalizeU CS2.h */,6001 863C6D9A1521111200585E4E /* YarrCanonicalizeU CS2.js */,5999 863C6D981521111200585E4E /* YarrCanonicalizeUnicode.cpp */, 6000 863C6D991521111200585E4E /* YarrCanonicalizeUnicode.h */, 6001 863C6D9A1521111200585E4E /* YarrCanonicalizeUnicode.js */, 6002 6002 86704B7D12DBA33700A9FE7B /* YarrInterpreter.cpp */, 6003 6003 86704B7E12DBA33700A9FE7B /* YarrInterpreter.h */, … … 9310 9310 A7E5AB3A1799E4B200D2833D /* X86Disassembler.cpp in Sources */, 9311 9311 0F2BBD971C5FF3F50023EF23 /* B3Variable.cpp in Sources */, 9312 863C6D9C1521111A00585E4E /* YarrCanonicalizeU CS2.cpp in Sources */,9312 863C6D9C1521111A00585E4E /* YarrCanonicalizeUnicode.cpp in Sources */, 9313 9313 86704B8412DBA33700A9FE7B /* YarrInterpreter.cpp in Sources */, 9314 9314 86704B8612DBA33700A9FE7B /* YarrJIT.cpp in Sources */, -
trunk/Source/JavaScriptCore/inspector/ContentSearchUtilities.cpp
r194496 r197426 177 177 ASSERT(!content.isNull()); 178 178 const char* error = nullptr; 179 JSC::Yarr::YarrPattern pattern(patternString, false, true, &error);179 JSC::Yarr::YarrPattern pattern(patternString, false, true, false, &error); 180 180 ASSERT(!error); 181 181 BumpPointerAllocator regexAllocator; -
trunk/Source/JavaScriptCore/runtime/CommonIdentifiers.h
r196950 r197426 212 212 macro(toString) \ 213 213 macro(top) \ 214 macro(unicode) \ 214 215 macro(usage) \ 215 216 macro(value) \ -
trunk/Source/JavaScriptCore/runtime/RegExp.cpp
r197379 r197426 67 67 break; 68 68 69 case 'u': 70 if (flags & FlagUnicode) 71 return InvalidFlags; 72 flags = static_cast<RegExpFlags>(flags | FlagUnicode); 73 break; 74 69 75 default: 70 76 return InvalidFlags; … … 127 133 if (regExp->multiline()) 128 134 fputc('m', m_file); 135 if (regExp->unicode()) 136 fputc('u', m_file); 129 137 fprintf(m_file, "\n"); 130 138 } … … 241 249 { 242 250 Base::finishCreation(vm); 243 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);251 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), unicode(), &m_constructionError); 244 252 if (m_constructionError) 245 253 m_state = ParseError; … … 281 289 void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize) 282 290 { 283 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);291 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), unicode(), &m_constructionError); 284 292 if (m_constructionError) { 285 293 RELEASE_ASSERT_NOT_REACHED(); … … 298 306 299 307 #if ENABLE(YARR_JIT) 300 if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) {308 if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && !unicode() && vm->canUseRegExpJIT()) { 301 309 Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode); 302 310 if (!m_regExpJITCode.isFallBack()) { … … 400 408 void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize) 401 409 { 402 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);410 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), unicode(), &m_constructionError); 403 411 if (m_constructionError) { 404 412 RELEASE_ASSERT_NOT_REACHED(); … … 417 425 418 426 #if ENABLE(YARR_JIT) 419 if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && vm->canUseRegExpJIT()) {427 if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && !unicode() && vm->canUseRegExpJIT()) { 420 428 Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly); 421 429 if (!m_regExpJITCode.isFallBack()) { -
trunk/Source/JavaScriptCore/runtime/RegExp.h
r197379 r197426 56 56 bool ignoreCase() const { return m_flags & FlagIgnoreCase; } 57 57 bool multiline() const { return m_flags & FlagMultiline; } 58 bool unicode() const { return m_flags & FlagUnicode; } 58 59 59 60 const String& pattern() const { return m_patternString; } -
trunk/Source/JavaScriptCore/runtime/RegExpKey.h
r131913 r197426 39 39 FlagIgnoreCase = 2, 40 40 FlagMultiline = 4, 41 InvalidFlags = 8, 41 FlagUnicode = 8, 42 InvalidFlags = 16, 42 43 DeletedValueFlags = -1 43 44 }; -
trunk/Source/JavaScriptCore/runtime/RegExpPrototype.cpp
r196498 r197426 49 49 static EncodedJSValue JSC_HOST_CALL regExpProtoGetterIgnoreCase(ExecState*); 50 50 static EncodedJSValue JSC_HOST_CALL regExpProtoGetterMultiline(ExecState*); 51 static EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState*); 51 52 static EncodedJSValue JSC_HOST_CALL regExpProtoGetterSource(ExecState*); 52 53 static EncodedJSValue JSC_HOST_CALL regExpProtoGetterFlags(ExecState*); … … 69 70 ignoreCase regExpProtoGetterIgnoreCase DontEnum|Accessor 70 71 multiline regExpProtoGetterMultiline DontEnum|Accessor 72 unicode regExpProtoGetterUnicode DontEnum|Accessor 71 73 source regExpProtoGetterSource DontEnum|Accessor 72 74 flags regExpProtoGetterFlags DontEnum|Accessor … … 147 149 } 148 150 149 typedef std::array<char, 3 + 1> FlagsString; // 3different flags and a null character terminator.151 typedef std::array<char, 4 + 1> FlagsString; // 4 different flags and a null character terminator. 150 152 151 153 static inline FlagsString flagsString(ExecState* exec, JSObject* regexp) … … 160 162 return string; 161 163 JSValue multilineValue = regexp->get(exec, exec->propertyNames().multiline); 164 if (exec->hadException()) 165 return string; 166 JSValue unicodeValue = regexp->get(exec, exec->propertyNames().unicode); 162 167 163 168 unsigned index = 0; … … 168 173 if (multilineValue.toBoolean(exec)) 169 174 string[index++] = 'm'; 175 if (unicodeValue.toBoolean(exec)) 176 string[index++] = 'u'; 170 177 ASSERT(index < string.size()); 171 178 string[index] = 0; … … 224 231 225 232 return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->regExp()->multiline())); 233 } 234 235 EncodedJSValue JSC_HOST_CALL regExpProtoGetterUnicode(ExecState* exec) 236 { 237 JSValue thisValue = exec->thisValue(); 238 if (!thisValue.inherits(RegExpObject::info())) 239 return throwVMTypeError(exec); 240 241 return JSValue::encode(jsBoolean(asRegExpObject(thisValue)->regExp()->unicode())); 226 242 } 227 243 -
trunk/Source/JavaScriptCore/tests/es6.yaml
r197420 r197426 1092 1092 cmd: runES6 :normal 1093 1093 - path: es6/RegExp_y_and_u_flags_u_flag.js 1094 cmd: runES6 : fail1094 cmd: runES6 :normal 1095 1095 - path: es6/RegExp_y_and_u_flags_u_flag_Unicode_code_point_escapes.js 1096 cmd: runES6 : fail1096 cmd: runES6 :normal 1097 1097 - path: es6/RegExp_y_and_u_flags_y_flag.js 1098 1098 cmd: runES6 :fail -
trunk/Source/JavaScriptCore/tests/stress/static-getter-in-names.js
r185432 r197426 4 4 } 5 5 6 shouldBe(JSON.stringify(Object.getOwnPropertyNames(RegExp.prototype).sort()), '["compile","constructor","exec","flags","global","ignoreCase","lastIndex","multiline","source","test","toString" ]');6 shouldBe(JSON.stringify(Object.getOwnPropertyNames(RegExp.prototype).sort()), '["compile","constructor","exec","flags","global","ignoreCase","lastIndex","multiline","source","test","toString","unicode"]'); 7 7 shouldBe(JSON.stringify(Object.getOwnPropertyNames(/Cocoa/).sort()), '["lastIndex"]'); -
trunk/Source/JavaScriptCore/yarr/RegularExpression.cpp
r185346 r197426 58 58 std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) 59 59 { 60 JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &m_constructionError);60 JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), false, &m_constructionError); 61 61 if (m_constructionError) { 62 62 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError); -
trunk/Source/JavaScriptCore/yarr/Yarr.h
r181343 r197426 34 34 namespace JSC { namespace Yarr { 35 35 36 #define YarrStackSpaceForBackTrackInfoPatternCharacter 1// Only for !fixed quantifiers.37 #define YarrStackSpaceForBackTrackInfoCharacterClass 1// Only for !fixed quantifiers.36 #define YarrStackSpaceForBackTrackInfoPatternCharacter 2 // Only for !fixed quantifiers. 37 #define YarrStackSpaceForBackTrackInfoCharacterClass 2 // Only for !fixed quantifiers. 38 38 #define YarrStackSpaceForBackTrackInfoBackReference 2 39 39 #define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative. -
trunk/Source/JavaScriptCore/yarr/YarrCanonicalizeUnicode.cpp
r197165 r197426 1 1 /* 2 * Copyright (C) 2012 Apple Inc. All rights reserved.2 * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 24 24 */ 25 25 26 // DO NOT EDIT! - this file autogenerated by YarrCanonicalizeU CS2.js26 // DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUnicode.js 27 27 28 28 #include "config.h" 29 #include "YarrCanonicalizeU CS2.h"29 #include "YarrCanonicalizeUnicode.h" 30 30 31 31 namespace JSC { namespace Yarr { … … 33 33 #include <stdint.h> 34 34 35 const uint16_t ucs2CharacterSet0[] = { 0x01c4u, 0x01c5u, 0x01c6u, 0 };36 const uint16_t ucs2CharacterSet1[] = { 0x01c7u, 0x01c8u, 0x01c9u, 0 };37 const uint16_t ucs2CharacterSet2[] = { 0x01cau, 0x01cbu, 0x01ccu, 0 };38 const uint16_t ucs2CharacterSet3[] = { 0x01f1u, 0x01f2u, 0x01f3u, 0 };39 const uint16_t ucs2CharacterSet4[] = { 0x0392u, 0x03b2u, 0x03d0u, 0 };40 const uint16_t ucs2CharacterSet5[] = { 0x0395u, 0x03b5u, 0x03f5u, 0 };41 const uint16_t ucs2CharacterSet6[] = { 0x0398u, 0x03b8u, 0x03d1u, 0 };42 const uint16_t ucs2CharacterSet7[] = { 0x0345u, 0x0399u, 0x03b9u, 0x1fbeu, 0 };43 const uint16_t ucs2CharacterSet8[] = { 0x039au, 0x03bau, 0x03f0u, 0 };44 const uint16_t ucs2CharacterSet9[] = { 0x00b5u, 0x039cu, 0x03bcu, 0 };45 const uint16_t ucs2CharacterSet10[] = { 0x03a0u, 0x03c0u, 0x03d6u, 0 };46 const uint16_t ucs2CharacterSet11[] = { 0x03a1u, 0x03c1u, 0x03f1u, 0 };47 const uint16_t ucs2CharacterSet12[] = { 0x03a3u, 0x03c2u, 0x03c3u, 0 };48 const uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 };49 const uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 };35 const UChar32 ucs2CharacterSet0[] = { 0x01c4, 0x01c5, 0x01c6, 0 }; 36 const UChar32 ucs2CharacterSet1[] = { 0x01c7, 0x01c8, 0x01c9, 0 }; 37 const UChar32 ucs2CharacterSet2[] = { 0x01ca, 0x01cb, 0x01cc, 0 }; 38 const UChar32 ucs2CharacterSet3[] = { 0x01f1, 0x01f2, 0x01f3, 0 }; 39 const UChar32 ucs2CharacterSet4[] = { 0x0392, 0x03b2, 0x03d0, 0 }; 40 const UChar32 ucs2CharacterSet5[] = { 0x0395, 0x03b5, 0x03f5, 0 }; 41 const UChar32 ucs2CharacterSet6[] = { 0x0398, 0x03b8, 0x03d1, 0 }; 42 const UChar32 ucs2CharacterSet7[] = { 0x0345, 0x0399, 0x03b9, 0x1fbe, 0 }; 43 const UChar32 ucs2CharacterSet8[] = { 0x039a, 0x03ba, 0x03f0, 0 }; 44 const UChar32 ucs2CharacterSet9[] = { 0x00b5, 0x039c, 0x03bc, 0 }; 45 const UChar32 ucs2CharacterSet10[] = { 0x03a0, 0x03c0, 0x03d6, 0 }; 46 const UChar32 ucs2CharacterSet11[] = { 0x03a1, 0x03c1, 0x03f1, 0 }; 47 const UChar32 ucs2CharacterSet12[] = { 0x03a3, 0x03c2, 0x03c3, 0 }; 48 const UChar32 ucs2CharacterSet13[] = { 0x03a6, 0x03c6, 0x03d5, 0 }; 49 const UChar32 ucs2CharacterSet14[] = { 0x1e60, 0x1e61, 0x1e9b, 0 }; 50 50 51 51 static const size_t UCS2_CANONICALIZATION_SETS = 15; 52 const uint16_t* const characterSetInfo[UCS2_CANONICALIZATION_SETS] = {52 const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = { 53 53 ucs2CharacterSet0, 54 54 ucs2CharacterSet1, … … 68 68 }; 69 69 70 const size_t UCS2_CANONICALIZATION_RANGES = 364; 71 const UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = { 72 { 0x0000u, 0x0040u, 0x0000u, CanonicalizeUnique }, 73 { 0x0041u, 0x005au, 0x0020u, CanonicalizeRangeLo }, 74 { 0x005bu, 0x0060u, 0x0000u, CanonicalizeUnique }, 75 { 0x0061u, 0x007au, 0x0020u, CanonicalizeRangeHi }, 76 { 0x007bu, 0x00b4u, 0x0000u, CanonicalizeUnique }, 77 { 0x00b5u, 0x00b5u, 0x0009u, CanonicalizeSet }, 78 { 0x00b6u, 0x00bfu, 0x0000u, CanonicalizeUnique }, 79 { 0x00c0u, 0x00d6u, 0x0020u, CanonicalizeRangeLo }, 80 { 0x00d7u, 0x00d7u, 0x0000u, CanonicalizeUnique }, 81 { 0x00d8u, 0x00deu, 0x0020u, CanonicalizeRangeLo }, 82 { 0x00dfu, 0x00dfu, 0x0000u, CanonicalizeUnique }, 83 { 0x00e0u, 0x00f6u, 0x0020u, CanonicalizeRangeHi }, 84 { 0x00f7u, 0x00f7u, 0x0000u, CanonicalizeUnique }, 85 { 0x00f8u, 0x00feu, 0x0020u, CanonicalizeRangeHi }, 86 { 0x00ffu, 0x00ffu, 0x0079u, CanonicalizeRangeLo }, 87 { 0x0100u, 0x012fu, 0x0000u, CanonicalizeAlternatingAligned }, 88 { 0x0130u, 0x0131u, 0x0000u, CanonicalizeUnique }, 89 { 0x0132u, 0x0137u, 0x0000u, CanonicalizeAlternatingAligned }, 90 { 0x0138u, 0x0138u, 0x0000u, CanonicalizeUnique }, 91 { 0x0139u, 0x0148u, 0x0000u, CanonicalizeAlternatingUnaligned }, 92 { 0x0149u, 0x0149u, 0x0000u, CanonicalizeUnique }, 93 { 0x014au, 0x0177u, 0x0000u, CanonicalizeAlternatingAligned }, 94 { 0x0178u, 0x0178u, 0x0079u, CanonicalizeRangeHi }, 95 { 0x0179u, 0x017eu, 0x0000u, CanonicalizeAlternatingUnaligned }, 96 { 0x017fu, 0x017fu, 0x0000u, CanonicalizeUnique }, 97 { 0x0180u, 0x0180u, 0x00c3u, CanonicalizeRangeLo }, 98 { 0x0181u, 0x0181u, 0x00d2u, CanonicalizeRangeLo }, 99 { 0x0182u, 0x0185u, 0x0000u, CanonicalizeAlternatingAligned }, 100 { 0x0186u, 0x0186u, 0x00ceu, CanonicalizeRangeLo }, 101 { 0x0187u, 0x0188u, 0x0000u, CanonicalizeAlternatingUnaligned }, 102 { 0x0189u, 0x018au, 0x00cdu, CanonicalizeRangeLo }, 103 { 0x018bu, 0x018cu, 0x0000u, CanonicalizeAlternatingUnaligned }, 104 { 0x018du, 0x018du, 0x0000u, CanonicalizeUnique }, 105 { 0x018eu, 0x018eu, 0x004fu, CanonicalizeRangeLo }, 106 { 0x018fu, 0x018fu, 0x00cau, CanonicalizeRangeLo }, 107 { 0x0190u, 0x0190u, 0x00cbu, CanonicalizeRangeLo }, 108 { 0x0191u, 0x0192u, 0x0000u, CanonicalizeAlternatingUnaligned }, 109 { 0x0193u, 0x0193u, 0x00cdu, CanonicalizeRangeLo }, 110 { 0x0194u, 0x0194u, 0x00cfu, CanonicalizeRangeLo }, 111 { 0x0195u, 0x0195u, 0x0061u, CanonicalizeRangeLo }, 112 { 0x0196u, 0x0196u, 0x00d3u, CanonicalizeRangeLo }, 113 { 0x0197u, 0x0197u, 0x00d1u, CanonicalizeRangeLo }, 114 { 0x0198u, 0x0199u, 0x0000u, CanonicalizeAlternatingAligned }, 115 { 0x019au, 0x019au, 0x00a3u, CanonicalizeRangeLo }, 116 { 0x019bu, 0x019bu, 0x0000u, CanonicalizeUnique }, 117 { 0x019cu, 0x019cu, 0x00d3u, CanonicalizeRangeLo }, 118 { 0x019du, 0x019du, 0x00d5u, CanonicalizeRangeLo }, 119 { 0x019eu, 0x019eu, 0x0082u, CanonicalizeRangeLo }, 120 { 0x019fu, 0x019fu, 0x00d6u, CanonicalizeRangeLo }, 121 { 0x01a0u, 0x01a5u, 0x0000u, CanonicalizeAlternatingAligned }, 122 { 0x01a6u, 0x01a6u, 0x00dau, CanonicalizeRangeLo }, 123 { 0x01a7u, 0x01a8u, 0x0000u, CanonicalizeAlternatingUnaligned }, 124 { 0x01a9u, 0x01a9u, 0x00dau, CanonicalizeRangeLo }, 125 { 0x01aau, 0x01abu, 0x0000u, CanonicalizeUnique }, 126 { 0x01acu, 0x01adu, 0x0000u, CanonicalizeAlternatingAligned }, 127 { 0x01aeu, 0x01aeu, 0x00dau, CanonicalizeRangeLo }, 128 { 0x01afu, 0x01b0u, 0x0000u, CanonicalizeAlternatingUnaligned }, 129 { 0x01b1u, 0x01b2u, 0x00d9u, CanonicalizeRangeLo }, 130 { 0x01b3u, 0x01b6u, 0x0000u, CanonicalizeAlternatingUnaligned }, 131 { 0x01b7u, 0x01b7u, 0x00dbu, CanonicalizeRangeLo }, 132 { 0x01b8u, 0x01b9u, 0x0000u, CanonicalizeAlternatingAligned }, 133 { 0x01bau, 0x01bbu, 0x0000u, CanonicalizeUnique }, 134 { 0x01bcu, 0x01bdu, 0x0000u, CanonicalizeAlternatingAligned }, 135 { 0x01beu, 0x01beu, 0x0000u, CanonicalizeUnique }, 136 { 0x01bfu, 0x01bfu, 0x0038u, CanonicalizeRangeLo }, 137 { 0x01c0u, 0x01c3u, 0x0000u, CanonicalizeUnique }, 138 { 0x01c4u, 0x01c6u, 0x0000u, CanonicalizeSet }, 139 { 0x01c7u, 0x01c9u, 0x0001u, CanonicalizeSet }, 140 { 0x01cau, 0x01ccu, 0x0002u, CanonicalizeSet }, 141 { 0x01cdu, 0x01dcu, 0x0000u, CanonicalizeAlternatingUnaligned }, 142 { 0x01ddu, 0x01ddu, 0x004fu, CanonicalizeRangeHi }, 143 { 0x01deu, 0x01efu, 0x0000u, CanonicalizeAlternatingAligned }, 144 { 0x01f0u, 0x01f0u, 0x0000u, CanonicalizeUnique }, 145 { 0x01f1u, 0x01f3u, 0x0003u, CanonicalizeSet }, 146 { 0x01f4u, 0x01f5u, 0x0000u, CanonicalizeAlternatingAligned }, 147 { 0x01f6u, 0x01f6u, 0x0061u, CanonicalizeRangeHi }, 148 { 0x01f7u, 0x01f7u, 0x0038u, CanonicalizeRangeHi }, 149 { 0x01f8u, 0x021fu, 0x0000u, CanonicalizeAlternatingAligned }, 150 { 0x0220u, 0x0220u, 0x0082u, CanonicalizeRangeHi }, 151 { 0x0221u, 0x0221u, 0x0000u, CanonicalizeUnique }, 152 { 0x0222u, 0x0233u, 0x0000u, CanonicalizeAlternatingAligned }, 153 { 0x0234u, 0x0239u, 0x0000u, CanonicalizeUnique }, 154 { 0x023au, 0x023au, 0x2a2bu, CanonicalizeRangeLo }, 155 { 0x023bu, 0x023cu, 0x0000u, CanonicalizeAlternatingUnaligned }, 156 { 0x023du, 0x023du, 0x00a3u, CanonicalizeRangeHi }, 157 { 0x023eu, 0x023eu, 0x2a28u, CanonicalizeRangeLo }, 158 { 0x023fu, 0x0240u, 0x2a3fu, CanonicalizeRangeLo }, 159 { 0x0241u, 0x0242u, 0x0000u, CanonicalizeAlternatingUnaligned }, 160 { 0x0243u, 0x0243u, 0x00c3u, CanonicalizeRangeHi }, 161 { 0x0244u, 0x0244u, 0x0045u, CanonicalizeRangeLo }, 162 { 0x0245u, 0x0245u, 0x0047u, CanonicalizeRangeLo }, 163 { 0x0246u, 0x024fu, 0x0000u, CanonicalizeAlternatingAligned }, 164 { 0x0250u, 0x0250u, 0x2a1fu, CanonicalizeRangeLo }, 165 { 0x0251u, 0x0251u, 0x2a1cu, CanonicalizeRangeLo }, 166 { 0x0252u, 0x0252u, 0x2a1eu, CanonicalizeRangeLo }, 167 { 0x0253u, 0x0253u, 0x00d2u, CanonicalizeRangeHi }, 168 { 0x0254u, 0x0254u, 0x00ceu, CanonicalizeRangeHi }, 169 { 0x0255u, 0x0255u, 0x0000u, CanonicalizeUnique }, 170 { 0x0256u, 0x0257u, 0x00cdu, CanonicalizeRangeHi }, 171 { 0x0258u, 0x0258u, 0x0000u, CanonicalizeUnique }, 172 { 0x0259u, 0x0259u, 0x00cau, CanonicalizeRangeHi }, 173 { 0x025au, 0x025au, 0x0000u, CanonicalizeUnique }, 174 { 0x025bu, 0x025bu, 0x00cbu, CanonicalizeRangeHi }, 175 { 0x025cu, 0x025fu, 0x0000u, CanonicalizeUnique }, 176 { 0x0260u, 0x0260u, 0x00cdu, CanonicalizeRangeHi }, 177 { 0x0261u, 0x0262u, 0x0000u, CanonicalizeUnique }, 178 { 0x0263u, 0x0263u, 0x00cfu, CanonicalizeRangeHi }, 179 { 0x0264u, 0x0264u, 0x0000u, CanonicalizeUnique }, 180 { 0x0265u, 0x0265u, 0xa528u, CanonicalizeRangeLo }, 181 { 0x0266u, 0x0267u, 0x0000u, CanonicalizeUnique }, 182 { 0x0268u, 0x0268u, 0x00d1u, CanonicalizeRangeHi }, 183 { 0x0269u, 0x0269u, 0x00d3u, CanonicalizeRangeHi }, 184 { 0x026au, 0x026au, 0x0000u, CanonicalizeUnique }, 185 { 0x026bu, 0x026bu, 0x29f7u, CanonicalizeRangeLo }, 186 { 0x026cu, 0x026eu, 0x0000u, CanonicalizeUnique }, 187 { 0x026fu, 0x026fu, 0x00d3u, CanonicalizeRangeHi }, 188 { 0x0270u, 0x0270u, 0x0000u, CanonicalizeUnique }, 189 { 0x0271u, 0x0271u, 0x29fdu, CanonicalizeRangeLo }, 190 { 0x0272u, 0x0272u, 0x00d5u, CanonicalizeRangeHi }, 191 { 0x0273u, 0x0274u, 0x0000u, CanonicalizeUnique }, 192 { 0x0275u, 0x0275u, 0x00d6u, CanonicalizeRangeHi }, 193 { 0x0276u, 0x027cu, 0x0000u, CanonicalizeUnique }, 194 { 0x027du, 0x027du, 0x29e7u, CanonicalizeRangeLo }, 195 { 0x027eu, 0x027fu, 0x0000u, CanonicalizeUnique }, 196 { 0x0280u, 0x0280u, 0x00dau, CanonicalizeRangeHi }, 197 { 0x0281u, 0x0282u, 0x0000u, CanonicalizeUnique }, 198 { 0x0283u, 0x0283u, 0x00dau, CanonicalizeRangeHi }, 199 { 0x0284u, 0x0287u, 0x0000u, CanonicalizeUnique }, 200 { 0x0288u, 0x0288u, 0x00dau, CanonicalizeRangeHi }, 201 { 0x0289u, 0x0289u, 0x0045u, CanonicalizeRangeHi }, 202 { 0x028au, 0x028bu, 0x00d9u, CanonicalizeRangeHi }, 203 { 0x028cu, 0x028cu, 0x0047u, CanonicalizeRangeHi }, 204 { 0x028du, 0x0291u, 0x0000u, CanonicalizeUnique }, 205 { 0x0292u, 0x0292u, 0x00dbu, CanonicalizeRangeHi }, 206 { 0x0293u, 0x0344u, 0x0000u, CanonicalizeUnique }, 207 { 0x0345u, 0x0345u, 0x0007u, CanonicalizeSet }, 208 { 0x0346u, 0x036fu, 0x0000u, CanonicalizeUnique }, 209 { 0x0370u, 0x0373u, 0x0000u, CanonicalizeAlternatingAligned }, 210 { 0x0374u, 0x0375u, 0x0000u, CanonicalizeUnique }, 211 { 0x0376u, 0x0377u, 0x0000u, CanonicalizeAlternatingAligned }, 212 { 0x0378u, 0x037au, 0x0000u, CanonicalizeUnique }, 213 { 0x037bu, 0x037du, 0x0082u, CanonicalizeRangeLo }, 214 { 0x037eu, 0x0385u, 0x0000u, CanonicalizeUnique }, 215 { 0x0386u, 0x0386u, 0x0026u, CanonicalizeRangeLo }, 216 { 0x0387u, 0x0387u, 0x0000u, CanonicalizeUnique }, 217 { 0x0388u, 0x038au, 0x0025u, CanonicalizeRangeLo }, 218 { 0x038bu, 0x038bu, 0x0000u, CanonicalizeUnique }, 219 { 0x038cu, 0x038cu, 0x0040u, CanonicalizeRangeLo }, 220 { 0x038du, 0x038du, 0x0000u, CanonicalizeUnique }, 221 { 0x038eu, 0x038fu, 0x003fu, CanonicalizeRangeLo }, 222 { 0x0390u, 0x0390u, 0x0000u, CanonicalizeUnique }, 223 { 0x0391u, 0x0391u, 0x0020u, CanonicalizeRangeLo }, 224 { 0x0392u, 0x0392u, 0x0004u, CanonicalizeSet }, 225 { 0x0393u, 0x0394u, 0x0020u, CanonicalizeRangeLo }, 226 { 0x0395u, 0x0395u, 0x0005u, CanonicalizeSet }, 227 { 0x0396u, 0x0397u, 0x0020u, CanonicalizeRangeLo }, 228 { 0x0398u, 0x0398u, 0x0006u, CanonicalizeSet }, 229 { 0x0399u, 0x0399u, 0x0007u, CanonicalizeSet }, 230 { 0x039au, 0x039au, 0x0008u, CanonicalizeSet }, 231 { 0x039bu, 0x039bu, 0x0020u, CanonicalizeRangeLo }, 232 { 0x039cu, 0x039cu, 0x0009u, CanonicalizeSet }, 233 { 0x039du, 0x039fu, 0x0020u, CanonicalizeRangeLo }, 234 { 0x03a0u, 0x03a0u, 0x000au, CanonicalizeSet }, 235 { 0x03a1u, 0x03a1u, 0x000bu, CanonicalizeSet }, 236 { 0x03a2u, 0x03a2u, 0x0000u, CanonicalizeUnique }, 237 { 0x03a3u, 0x03a3u, 0x000cu, CanonicalizeSet }, 238 { 0x03a4u, 0x03a5u, 0x0020u, CanonicalizeRangeLo }, 239 { 0x03a6u, 0x03a6u, 0x000du, CanonicalizeSet }, 240 { 0x03a7u, 0x03abu, 0x0020u, CanonicalizeRangeLo }, 241 { 0x03acu, 0x03acu, 0x0026u, CanonicalizeRangeHi }, 242 { 0x03adu, 0x03afu, 0x0025u, CanonicalizeRangeHi }, 243 { 0x03b0u, 0x03b0u, 0x0000u, CanonicalizeUnique }, 244 { 0x03b1u, 0x03b1u, 0x0020u, CanonicalizeRangeHi }, 245 { 0x03b2u, 0x03b2u, 0x0004u, CanonicalizeSet }, 246 { 0x03b3u, 0x03b4u, 0x0020u, CanonicalizeRangeHi }, 247 { 0x03b5u, 0x03b5u, 0x0005u, CanonicalizeSet }, 248 { 0x03b6u, 0x03b7u, 0x0020u, CanonicalizeRangeHi }, 249 { 0x03b8u, 0x03b8u, 0x0006u, CanonicalizeSet }, 250 { 0x03b9u, 0x03b9u, 0x0007u, CanonicalizeSet }, 251 { 0x03bau, 0x03bau, 0x0008u, CanonicalizeSet }, 252 { 0x03bbu, 0x03bbu, 0x0020u, CanonicalizeRangeHi }, 253 { 0x03bcu, 0x03bcu, 0x0009u, CanonicalizeSet }, 254 { 0x03bdu, 0x03bfu, 0x0020u, CanonicalizeRangeHi }, 255 { 0x03c0u, 0x03c0u, 0x000au, CanonicalizeSet }, 256 { 0x03c1u, 0x03c1u, 0x000bu, CanonicalizeSet }, 257 { 0x03c2u, 0x03c3u, 0x000cu, CanonicalizeSet }, 258 { 0x03c4u, 0x03c5u, 0x0020u, CanonicalizeRangeHi }, 259 { 0x03c6u, 0x03c6u, 0x000du, CanonicalizeSet }, 260 { 0x03c7u, 0x03cbu, 0x0020u, CanonicalizeRangeHi }, 261 { 0x03ccu, 0x03ccu, 0x0040u, CanonicalizeRangeHi }, 262 { 0x03cdu, 0x03ceu, 0x003fu, CanonicalizeRangeHi }, 263 { 0x03cfu, 0x03cfu, 0x0008u, CanonicalizeRangeLo }, 264 { 0x03d0u, 0x03d0u, 0x0004u, CanonicalizeSet }, 265 { 0x03d1u, 0x03d1u, 0x0006u, CanonicalizeSet }, 266 { 0x03d2u, 0x03d4u, 0x0000u, CanonicalizeUnique }, 267 { 0x03d5u, 0x03d5u, 0x000du, CanonicalizeSet }, 268 { 0x03d6u, 0x03d6u, 0x000au, CanonicalizeSet }, 269 { 0x03d7u, 0x03d7u, 0x0008u, CanonicalizeRangeHi }, 270 { 0x03d8u, 0x03efu, 0x0000u, CanonicalizeAlternatingAligned }, 271 { 0x03f0u, 0x03f0u, 0x0008u, CanonicalizeSet }, 272 { 0x03f1u, 0x03f1u, 0x000bu, CanonicalizeSet }, 273 { 0x03f2u, 0x03f2u, 0x0007u, CanonicalizeRangeLo }, 274 { 0x03f3u, 0x03f4u, 0x0000u, CanonicalizeUnique }, 275 { 0x03f5u, 0x03f5u, 0x0005u, CanonicalizeSet }, 276 { 0x03f6u, 0x03f6u, 0x0000u, CanonicalizeUnique }, 277 { 0x03f7u, 0x03f8u, 0x0000u, CanonicalizeAlternatingUnaligned }, 278 { 0x03f9u, 0x03f9u, 0x0007u, CanonicalizeRangeHi }, 279 { 0x03fau, 0x03fbu, 0x0000u, CanonicalizeAlternatingAligned }, 280 { 0x03fcu, 0x03fcu, 0x0000u, CanonicalizeUnique }, 281 { 0x03fdu, 0x03ffu, 0x0082u, CanonicalizeRangeHi }, 282 { 0x0400u, 0x040fu, 0x0050u, CanonicalizeRangeLo }, 283 { 0x0410u, 0x042fu, 0x0020u, CanonicalizeRangeLo }, 284 { 0x0430u, 0x044fu, 0x0020u, CanonicalizeRangeHi }, 285 { 0x0450u, 0x045fu, 0x0050u, CanonicalizeRangeHi }, 286 { 0x0460u, 0x0481u, 0x0000u, CanonicalizeAlternatingAligned }, 287 { 0x0482u, 0x0489u, 0x0000u, CanonicalizeUnique }, 288 { 0x048au, 0x04bfu, 0x0000u, CanonicalizeAlternatingAligned }, 289 { 0x04c0u, 0x04c0u, 0x000fu, CanonicalizeRangeLo }, 290 { 0x04c1u, 0x04ceu, 0x0000u, CanonicalizeAlternatingUnaligned }, 291 { 0x04cfu, 0x04cfu, 0x000fu, CanonicalizeRangeHi }, 292 { 0x04d0u, 0x0527u, 0x0000u, CanonicalizeAlternatingAligned }, 293 { 0x0528u, 0x0530u, 0x0000u, CanonicalizeUnique }, 294 { 0x0531u, 0x0556u, 0x0030u, CanonicalizeRangeLo }, 295 { 0x0557u, 0x0560u, 0x0000u, CanonicalizeUnique }, 296 { 0x0561u, 0x0586u, 0x0030u, CanonicalizeRangeHi }, 297 { 0x0587u, 0x109fu, 0x0000u, CanonicalizeUnique }, 298 { 0x10a0u, 0x10c5u, 0x1c60u, CanonicalizeRangeLo }, 299 { 0x10c6u, 0x1d78u, 0x0000u, CanonicalizeUnique }, 300 { 0x1d79u, 0x1d79u, 0x8a04u, CanonicalizeRangeLo }, 301 { 0x1d7au, 0x1d7cu, 0x0000u, CanonicalizeUnique }, 302 { 0x1d7du, 0x1d7du, 0x0ee6u, CanonicalizeRangeLo }, 303 { 0x1d7eu, 0x1dffu, 0x0000u, CanonicalizeUnique }, 304 { 0x1e00u, 0x1e5fu, 0x0000u, CanonicalizeAlternatingAligned }, 305 { 0x1e60u, 0x1e61u, 0x000eu, CanonicalizeSet }, 306 { 0x1e62u, 0x1e95u, 0x0000u, CanonicalizeAlternatingAligned }, 307 { 0x1e96u, 0x1e9au, 0x0000u, CanonicalizeUnique }, 308 { 0x1e9bu, 0x1e9bu, 0x000eu, CanonicalizeSet }, 309 { 0x1e9cu, 0x1e9fu, 0x0000u, CanonicalizeUnique }, 310 { 0x1ea0u, 0x1effu, 0x0000u, CanonicalizeAlternatingAligned }, 311 { 0x1f00u, 0x1f07u, 0x0008u, CanonicalizeRangeLo }, 312 { 0x1f08u, 0x1f0fu, 0x0008u, CanonicalizeRangeHi }, 313 { 0x1f10u, 0x1f15u, 0x0008u, CanonicalizeRangeLo }, 314 { 0x1f16u, 0x1f17u, 0x0000u, CanonicalizeUnique }, 315 { 0x1f18u, 0x1f1du, 0x0008u, CanonicalizeRangeHi }, 316 { 0x1f1eu, 0x1f1fu, 0x0000u, CanonicalizeUnique }, 317 { 0x1f20u, 0x1f27u, 0x0008u, CanonicalizeRangeLo }, 318 { 0x1f28u, 0x1f2fu, 0x0008u, CanonicalizeRangeHi }, 319 { 0x1f30u, 0x1f37u, 0x0008u, CanonicalizeRangeLo }, 320 { 0x1f38u, 0x1f3fu, 0x0008u, CanonicalizeRangeHi }, 321 { 0x1f40u, 0x1f45u, 0x0008u, CanonicalizeRangeLo }, 322 { 0x1f46u, 0x1f47u, 0x0000u, CanonicalizeUnique }, 323 { 0x1f48u, 0x1f4du, 0x0008u, CanonicalizeRangeHi }, 324 { 0x1f4eu, 0x1f50u, 0x0000u, CanonicalizeUnique }, 325 { 0x1f51u, 0x1f51u, 0x0008u, CanonicalizeRangeLo }, 326 { 0x1f52u, 0x1f52u, 0x0000u, CanonicalizeUnique }, 327 { 0x1f53u, 0x1f53u, 0x0008u, CanonicalizeRangeLo }, 328 { 0x1f54u, 0x1f54u, 0x0000u, CanonicalizeUnique }, 329 { 0x1f55u, 0x1f55u, 0x0008u, CanonicalizeRangeLo }, 330 { 0x1f56u, 0x1f56u, 0x0000u, CanonicalizeUnique }, 331 { 0x1f57u, 0x1f57u, 0x0008u, CanonicalizeRangeLo }, 332 { 0x1f58u, 0x1f58u, 0x0000u, CanonicalizeUnique }, 333 { 0x1f59u, 0x1f59u, 0x0008u, CanonicalizeRangeHi }, 334 { 0x1f5au, 0x1f5au, 0x0000u, CanonicalizeUnique }, 335 { 0x1f5bu, 0x1f5bu, 0x0008u, CanonicalizeRangeHi }, 336 { 0x1f5cu, 0x1f5cu, 0x0000u, CanonicalizeUnique }, 337 { 0x1f5du, 0x1f5du, 0x0008u, CanonicalizeRangeHi }, 338 { 0x1f5eu, 0x1f5eu, 0x0000u, CanonicalizeUnique }, 339 { 0x1f5fu, 0x1f5fu, 0x0008u, CanonicalizeRangeHi }, 340 { 0x1f60u, 0x1f67u, 0x0008u, CanonicalizeRangeLo }, 341 { 0x1f68u, 0x1f6fu, 0x0008u, CanonicalizeRangeHi }, 342 { 0x1f70u, 0x1f71u, 0x004au, CanonicalizeRangeLo }, 343 { 0x1f72u, 0x1f75u, 0x0056u, CanonicalizeRangeLo }, 344 { 0x1f76u, 0x1f77u, 0x0064u, CanonicalizeRangeLo }, 345 { 0x1f78u, 0x1f79u, 0x0080u, CanonicalizeRangeLo }, 346 { 0x1f7au, 0x1f7bu, 0x0070u, CanonicalizeRangeLo }, 347 { 0x1f7cu, 0x1f7du, 0x007eu, CanonicalizeRangeLo }, 348 { 0x1f7eu, 0x1fafu, 0x0000u, CanonicalizeUnique }, 349 { 0x1fb0u, 0x1fb1u, 0x0008u, CanonicalizeRangeLo }, 350 { 0x1fb2u, 0x1fb7u, 0x0000u, CanonicalizeUnique }, 351 { 0x1fb8u, 0x1fb9u, 0x0008u, CanonicalizeRangeHi }, 352 { 0x1fbau, 0x1fbbu, 0x004au, CanonicalizeRangeHi }, 353 { 0x1fbcu, 0x1fbdu, 0x0000u, CanonicalizeUnique }, 354 { 0x1fbeu, 0x1fbeu, 0x0007u, CanonicalizeSet }, 355 { 0x1fbfu, 0x1fc7u, 0x0000u, CanonicalizeUnique }, 356 { 0x1fc8u, 0x1fcbu, 0x0056u, CanonicalizeRangeHi }, 357 { 0x1fccu, 0x1fcfu, 0x0000u, CanonicalizeUnique }, 358 { 0x1fd0u, 0x1fd1u, 0x0008u, CanonicalizeRangeLo }, 359 { 0x1fd2u, 0x1fd7u, 0x0000u, CanonicalizeUnique }, 360 { 0x1fd8u, 0x1fd9u, 0x0008u, CanonicalizeRangeHi }, 361 { 0x1fdau, 0x1fdbu, 0x0064u, CanonicalizeRangeHi }, 362 { 0x1fdcu, 0x1fdfu, 0x0000u, CanonicalizeUnique }, 363 { 0x1fe0u, 0x1fe1u, 0x0008u, CanonicalizeRangeLo }, 364 { 0x1fe2u, 0x1fe4u, 0x0000u, CanonicalizeUnique }, 365 { 0x1fe5u, 0x1fe5u, 0x0007u, CanonicalizeRangeLo }, 366 { 0x1fe6u, 0x1fe7u, 0x0000u, CanonicalizeUnique }, 367 { 0x1fe8u, 0x1fe9u, 0x0008u, CanonicalizeRangeHi }, 368 { 0x1feau, 0x1febu, 0x0070u, CanonicalizeRangeHi }, 369 { 0x1fecu, 0x1fecu, 0x0007u, CanonicalizeRangeHi }, 370 { 0x1fedu, 0x1ff7u, 0x0000u, CanonicalizeUnique }, 371 { 0x1ff8u, 0x1ff9u, 0x0080u, CanonicalizeRangeHi }, 372 { 0x1ffau, 0x1ffbu, 0x007eu, CanonicalizeRangeHi }, 373 { 0x1ffcu, 0x2131u, 0x0000u, CanonicalizeUnique }, 374 { 0x2132u, 0x2132u, 0x001cu, CanonicalizeRangeLo }, 375 { 0x2133u, 0x214du, 0x0000u, CanonicalizeUnique }, 376 { 0x214eu, 0x214eu, 0x001cu, CanonicalizeRangeHi }, 377 { 0x214fu, 0x215fu, 0x0000u, CanonicalizeUnique }, 378 { 0x2160u, 0x216fu, 0x0010u, CanonicalizeRangeLo }, 379 { 0x2170u, 0x217fu, 0x0010u, CanonicalizeRangeHi }, 380 { 0x2180u, 0x2182u, 0x0000u, CanonicalizeUnique }, 381 { 0x2183u, 0x2184u, 0x0000u, CanonicalizeAlternatingUnaligned }, 382 { 0x2185u, 0x24b5u, 0x0000u, CanonicalizeUnique }, 383 { 0x24b6u, 0x24cfu, 0x001au, CanonicalizeRangeLo }, 384 { 0x24d0u, 0x24e9u, 0x001au, CanonicalizeRangeHi }, 385 { 0x24eau, 0x2bffu, 0x0000u, CanonicalizeUnique }, 386 { 0x2c00u, 0x2c2eu, 0x0030u, CanonicalizeRangeLo }, 387 { 0x2c2fu, 0x2c2fu, 0x0000u, CanonicalizeUnique }, 388 { 0x2c30u, 0x2c5eu, 0x0030u, CanonicalizeRangeHi }, 389 { 0x2c5fu, 0x2c5fu, 0x0000u, CanonicalizeUnique }, 390 { 0x2c60u, 0x2c61u, 0x0000u, CanonicalizeAlternatingAligned }, 391 { 0x2c62u, 0x2c62u, 0x29f7u, CanonicalizeRangeHi }, 392 { 0x2c63u, 0x2c63u, 0x0ee6u, CanonicalizeRangeHi }, 393 { 0x2c64u, 0x2c64u, 0x29e7u, CanonicalizeRangeHi }, 394 { 0x2c65u, 0x2c65u, 0x2a2bu, CanonicalizeRangeHi }, 395 { 0x2c66u, 0x2c66u, 0x2a28u, CanonicalizeRangeHi }, 396 { 0x2c67u, 0x2c6cu, 0x0000u, CanonicalizeAlternatingUnaligned }, 397 { 0x2c6du, 0x2c6du, 0x2a1cu, CanonicalizeRangeHi }, 398 { 0x2c6eu, 0x2c6eu, 0x29fdu, CanonicalizeRangeHi }, 399 { 0x2c6fu, 0x2c6fu, 0x2a1fu, CanonicalizeRangeHi }, 400 { 0x2c70u, 0x2c70u, 0x2a1eu, CanonicalizeRangeHi }, 401 { 0x2c71u, 0x2c71u, 0x0000u, CanonicalizeUnique }, 402 { 0x2c72u, 0x2c73u, 0x0000u, CanonicalizeAlternatingAligned }, 403 { 0x2c74u, 0x2c74u, 0x0000u, CanonicalizeUnique }, 404 { 0x2c75u, 0x2c76u, 0x0000u, CanonicalizeAlternatingUnaligned }, 405 { 0x2c77u, 0x2c7du, 0x0000u, CanonicalizeUnique }, 406 { 0x2c7eu, 0x2c7fu, 0x2a3fu, CanonicalizeRangeHi }, 407 { 0x2c80u, 0x2ce3u, 0x0000u, CanonicalizeAlternatingAligned }, 408 { 0x2ce4u, 0x2ceau, 0x0000u, CanonicalizeUnique }, 409 { 0x2cebu, 0x2ceeu, 0x0000u, CanonicalizeAlternatingUnaligned }, 410 { 0x2cefu, 0x2cffu, 0x0000u, CanonicalizeUnique }, 411 { 0x2d00u, 0x2d25u, 0x1c60u, CanonicalizeRangeHi }, 412 { 0x2d26u, 0xa63fu, 0x0000u, CanonicalizeUnique }, 413 { 0xa640u, 0xa66du, 0x0000u, CanonicalizeAlternatingAligned }, 414 { 0xa66eu, 0xa67fu, 0x0000u, CanonicalizeUnique }, 415 { 0xa680u, 0xa697u, 0x0000u, CanonicalizeAlternatingAligned }, 416 { 0xa698u, 0xa721u, 0x0000u, CanonicalizeUnique }, 417 { 0xa722u, 0xa72fu, 0x0000u, CanonicalizeAlternatingAligned }, 418 { 0xa730u, 0xa731u, 0x0000u, CanonicalizeUnique }, 419 { 0xa732u, 0xa76fu, 0x0000u, CanonicalizeAlternatingAligned }, 420 { 0xa770u, 0xa778u, 0x0000u, CanonicalizeUnique }, 421 { 0xa779u, 0xa77cu, 0x0000u, CanonicalizeAlternatingUnaligned }, 422 { 0xa77du, 0xa77du, 0x8a04u, CanonicalizeRangeHi }, 423 { 0xa77eu, 0xa787u, 0x0000u, CanonicalizeAlternatingAligned }, 424 { 0xa788u, 0xa78au, 0x0000u, CanonicalizeUnique }, 425 { 0xa78bu, 0xa78cu, 0x0000u, CanonicalizeAlternatingUnaligned }, 426 { 0xa78du, 0xa78du, 0xa528u, CanonicalizeRangeHi }, 427 { 0xa78eu, 0xa78fu, 0x0000u, CanonicalizeUnique }, 428 { 0xa790u, 0xa791u, 0x0000u, CanonicalizeAlternatingAligned }, 429 { 0xa792u, 0xa79fu, 0x0000u, CanonicalizeUnique }, 430 { 0xa7a0u, 0xa7a9u, 0x0000u, CanonicalizeAlternatingAligned }, 431 { 0xa7aau, 0xff20u, 0x0000u, CanonicalizeUnique }, 432 { 0xff21u, 0xff3au, 0x0020u, CanonicalizeRangeLo }, 433 { 0xff3bu, 0xff40u, 0x0000u, CanonicalizeUnique }, 434 { 0xff41u, 0xff5au, 0x0020u, CanonicalizeRangeHi }, 435 { 0xff5bu, 0xffffu, 0x0000u, CanonicalizeUnique }, 70 const size_t UCS2_CANONICALIZATION_RANGES = 391; 71 const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = { 72 { 0x0000, 0x0040, 0x0000, CanonicalizeUnique }, 73 { 0x0041, 0x005a, 0x0020, CanonicalizeRangeLo }, 74 { 0x005b, 0x0060, 0x0000, CanonicalizeUnique }, 75 { 0x0061, 0x007a, 0x0020, CanonicalizeRangeHi }, 76 { 0x007b, 0x00b4, 0x0000, CanonicalizeUnique }, 77 { 0x00b5, 0x00b5, 0x0009, CanonicalizeSet }, 78 { 0x00b6, 0x00bf, 0x0000, CanonicalizeUnique }, 79 { 0x00c0, 0x00d6, 0x0020, CanonicalizeRangeLo }, 80 { 0x00d7, 0x00d7, 0x0000, CanonicalizeUnique }, 81 { 0x00d8, 0x00de, 0x0020, CanonicalizeRangeLo }, 82 { 0x00df, 0x00df, 0x0000, CanonicalizeUnique }, 83 { 0x00e0, 0x00f6, 0x0020, CanonicalizeRangeHi }, 84 { 0x00f7, 0x00f7, 0x0000, CanonicalizeUnique }, 85 { 0x00f8, 0x00fe, 0x0020, CanonicalizeRangeHi }, 86 { 0x00ff, 0x00ff, 0x0079, CanonicalizeRangeLo }, 87 { 0x0100, 0x012f, 0x0000, CanonicalizeAlternatingAligned }, 88 { 0x0130, 0x0131, 0x0000, CanonicalizeUnique }, 89 { 0x0132, 0x0137, 0x0000, CanonicalizeAlternatingAligned }, 90 { 0x0138, 0x0138, 0x0000, CanonicalizeUnique }, 91 { 0x0139, 0x0148, 0x0000, CanonicalizeAlternatingUnaligned }, 92 { 0x0149, 0x0149, 0x0000, CanonicalizeUnique }, 93 { 0x014a, 0x0177, 0x0000, CanonicalizeAlternatingAligned }, 94 { 0x0178, 0x0178, 0x0079, CanonicalizeRangeHi }, 95 { 0x0179, 0x017e, 0x0000, CanonicalizeAlternatingUnaligned }, 96 { 0x017f, 0x017f, 0x0000, CanonicalizeUnique }, 97 { 0x0180, 0x0180, 0x00c3, CanonicalizeRangeLo }, 98 { 0x0181, 0x0181, 0x00d2, CanonicalizeRangeLo }, 99 { 0x0182, 0x0185, 0x0000, CanonicalizeAlternatingAligned }, 100 { 0x0186, 0x0186, 0x00ce, CanonicalizeRangeLo }, 101 { 0x0187, 0x0188, 0x0000, CanonicalizeAlternatingUnaligned }, 102 { 0x0189, 0x018a, 0x00cd, CanonicalizeRangeLo }, 103 { 0x018b, 0x018c, 0x0000, CanonicalizeAlternatingUnaligned }, 104 { 0x018d, 0x018d, 0x0000, CanonicalizeUnique }, 105 { 0x018e, 0x018e, 0x004f, CanonicalizeRangeLo }, 106 { 0x018f, 0x018f, 0x00ca, CanonicalizeRangeLo }, 107 { 0x0190, 0x0190, 0x00cb, CanonicalizeRangeLo }, 108 { 0x0191, 0x0192, 0x0000, CanonicalizeAlternatingUnaligned }, 109 { 0x0193, 0x0193, 0x00cd, CanonicalizeRangeLo }, 110 { 0x0194, 0x0194, 0x00cf, CanonicalizeRangeLo }, 111 { 0x0195, 0x0195, 0x0061, CanonicalizeRangeLo }, 112 { 0x0196, 0x0196, 0x00d3, CanonicalizeRangeLo }, 113 { 0x0197, 0x0197, 0x00d1, CanonicalizeRangeLo }, 114 { 0x0198, 0x0199, 0x0000, CanonicalizeAlternatingAligned }, 115 { 0x019a, 0x019a, 0x00a3, CanonicalizeRangeLo }, 116 { 0x019b, 0x019b, 0x0000, CanonicalizeUnique }, 117 { 0x019c, 0x019c, 0x00d3, CanonicalizeRangeLo }, 118 { 0x019d, 0x019d, 0x00d5, CanonicalizeRangeLo }, 119 { 0x019e, 0x019e, 0x0082, CanonicalizeRangeLo }, 120 { 0x019f, 0x019f, 0x00d6, CanonicalizeRangeLo }, 121 { 0x01a0, 0x01a5, 0x0000, CanonicalizeAlternatingAligned }, 122 { 0x01a6, 0x01a6, 0x00da, CanonicalizeRangeLo }, 123 { 0x01a7, 0x01a8, 0x0000, CanonicalizeAlternatingUnaligned }, 124 { 0x01a9, 0x01a9, 0x00da, CanonicalizeRangeLo }, 125 { 0x01aa, 0x01ab, 0x0000, CanonicalizeUnique }, 126 { 0x01ac, 0x01ad, 0x0000, CanonicalizeAlternatingAligned }, 127 { 0x01ae, 0x01ae, 0x00da, CanonicalizeRangeLo }, 128 { 0x01af, 0x01b0, 0x0000, CanonicalizeAlternatingUnaligned }, 129 { 0x01b1, 0x01b2, 0x00d9, CanonicalizeRangeLo }, 130 { 0x01b3, 0x01b6, 0x0000, CanonicalizeAlternatingUnaligned }, 131 { 0x01b7, 0x01b7, 0x00db, CanonicalizeRangeLo }, 132 { 0x01b8, 0x01b9, 0x0000, CanonicalizeAlternatingAligned }, 133 { 0x01ba, 0x01bb, 0x0000, CanonicalizeUnique }, 134 { 0x01bc, 0x01bd, 0x0000, CanonicalizeAlternatingAligned }, 135 { 0x01be, 0x01be, 0x0000, CanonicalizeUnique }, 136 { 0x01bf, 0x01bf, 0x0038, CanonicalizeRangeLo }, 137 { 0x01c0, 0x01c3, 0x0000, CanonicalizeUnique }, 138 { 0x01c4, 0x01c6, 0x0000, CanonicalizeSet }, 139 { 0x01c7, 0x01c9, 0x0001, CanonicalizeSet }, 140 { 0x01ca, 0x01cc, 0x0002, CanonicalizeSet }, 141 { 0x01cd, 0x01dc, 0x0000, CanonicalizeAlternatingUnaligned }, 142 { 0x01dd, 0x01dd, 0x004f, CanonicalizeRangeHi }, 143 { 0x01de, 0x01ef, 0x0000, CanonicalizeAlternatingAligned }, 144 { 0x01f0, 0x01f0, 0x0000, CanonicalizeUnique }, 145 { 0x01f1, 0x01f3, 0x0003, CanonicalizeSet }, 146 { 0x01f4, 0x01f5, 0x0000, CanonicalizeAlternatingAligned }, 147 { 0x01f6, 0x01f6, 0x0061, CanonicalizeRangeHi }, 148 { 0x01f7, 0x01f7, 0x0038, CanonicalizeRangeHi }, 149 { 0x01f8, 0x021f, 0x0000, CanonicalizeAlternatingAligned }, 150 { 0x0220, 0x0220, 0x0082, CanonicalizeRangeHi }, 151 { 0x0221, 0x0221, 0x0000, CanonicalizeUnique }, 152 { 0x0222, 0x0233, 0x0000, CanonicalizeAlternatingAligned }, 153 { 0x0234, 0x0239, 0x0000, CanonicalizeUnique }, 154 { 0x023a, 0x023a, 0x2a2b, CanonicalizeRangeLo }, 155 { 0x023b, 0x023c, 0x0000, CanonicalizeAlternatingUnaligned }, 156 { 0x023d, 0x023d, 0x00a3, CanonicalizeRangeHi }, 157 { 0x023e, 0x023e, 0x2a28, CanonicalizeRangeLo }, 158 { 0x023f, 0x0240, 0x2a3f, CanonicalizeRangeLo }, 159 { 0x0241, 0x0242, 0x0000, CanonicalizeAlternatingUnaligned }, 160 { 0x0243, 0x0243, 0x00c3, CanonicalizeRangeHi }, 161 { 0x0244, 0x0244, 0x0045, CanonicalizeRangeLo }, 162 { 0x0245, 0x0245, 0x0047, CanonicalizeRangeLo }, 163 { 0x0246, 0x024f, 0x0000, CanonicalizeAlternatingAligned }, 164 { 0x0250, 0x0250, 0x2a1f, CanonicalizeRangeLo }, 165 { 0x0251, 0x0251, 0x2a1c, CanonicalizeRangeLo }, 166 { 0x0252, 0x0252, 0x2a1e, CanonicalizeRangeLo }, 167 { 0x0253, 0x0253, 0x00d2, CanonicalizeRangeHi }, 168 { 0x0254, 0x0254, 0x00ce, CanonicalizeRangeHi }, 169 { 0x0255, 0x0255, 0x0000, CanonicalizeUnique }, 170 { 0x0256, 0x0257, 0x00cd, CanonicalizeRangeHi }, 171 { 0x0258, 0x0258, 0x0000, CanonicalizeUnique }, 172 { 0x0259, 0x0259, 0x00ca, CanonicalizeRangeHi }, 173 { 0x025a, 0x025a, 0x0000, CanonicalizeUnique }, 174 { 0x025b, 0x025b, 0x00cb, CanonicalizeRangeHi }, 175 { 0x025c, 0x025c, 0xa54f, CanonicalizeRangeLo }, 176 { 0x025d, 0x025f, 0x0000, CanonicalizeUnique }, 177 { 0x0260, 0x0260, 0x00cd, CanonicalizeRangeHi }, 178 { 0x0261, 0x0261, 0xa54b, CanonicalizeRangeLo }, 179 { 0x0262, 0x0262, 0x0000, CanonicalizeUnique }, 180 { 0x0263, 0x0263, 0x00cf, CanonicalizeRangeHi }, 181 { 0x0264, 0x0264, 0x0000, CanonicalizeUnique }, 182 { 0x0265, 0x0265, 0xa528, CanonicalizeRangeLo }, 183 { 0x0266, 0x0266, 0xa544, CanonicalizeRangeLo }, 184 { 0x0267, 0x0267, 0x0000, CanonicalizeUnique }, 185 { 0x0268, 0x0268, 0x00d1, CanonicalizeRangeHi }, 186 { 0x0269, 0x0269, 0x00d3, CanonicalizeRangeHi }, 187 { 0x026a, 0x026a, 0x0000, CanonicalizeUnique }, 188 { 0x026b, 0x026b, 0x29f7, CanonicalizeRangeLo }, 189 { 0x026c, 0x026c, 0xa541, CanonicalizeRangeLo }, 190 { 0x026d, 0x026e, 0x0000, CanonicalizeUnique }, 191 { 0x026f, 0x026f, 0x00d3, CanonicalizeRangeHi }, 192 { 0x0270, 0x0270, 0x0000, CanonicalizeUnique }, 193 { 0x0271, 0x0271, 0x29fd, CanonicalizeRangeLo }, 194 { 0x0272, 0x0272, 0x00d5, CanonicalizeRangeHi }, 195 { 0x0273, 0x0274, 0x0000, CanonicalizeUnique }, 196 { 0x0275, 0x0275, 0x00d6, CanonicalizeRangeHi }, 197 { 0x0276, 0x027c, 0x0000, CanonicalizeUnique }, 198 { 0x027d, 0x027d, 0x29e7, CanonicalizeRangeLo }, 199 { 0x027e, 0x027f, 0x0000, CanonicalizeUnique }, 200 { 0x0280, 0x0280, 0x00da, CanonicalizeRangeHi }, 201 { 0x0281, 0x0282, 0x0000, CanonicalizeUnique }, 202 { 0x0283, 0x0283, 0x00da, CanonicalizeRangeHi }, 203 { 0x0284, 0x0286, 0x0000, CanonicalizeUnique }, 204 { 0x0287, 0x0287, 0xa52a, CanonicalizeRangeLo }, 205 { 0x0288, 0x0288, 0x00da, CanonicalizeRangeHi }, 206 { 0x0289, 0x0289, 0x0045, CanonicalizeRangeHi }, 207 { 0x028a, 0x028b, 0x00d9, CanonicalizeRangeHi }, 208 { 0x028c, 0x028c, 0x0047, CanonicalizeRangeHi }, 209 { 0x028d, 0x0291, 0x0000, CanonicalizeUnique }, 210 { 0x0292, 0x0292, 0x00db, CanonicalizeRangeHi }, 211 { 0x0293, 0x029d, 0x0000, CanonicalizeUnique }, 212 { 0x029e, 0x029e, 0xa512, CanonicalizeRangeLo }, 213 { 0x029f, 0x0344, 0x0000, CanonicalizeUnique }, 214 { 0x0345, 0x0345, 0x0007, CanonicalizeSet }, 215 { 0x0346, 0x036f, 0x0000, CanonicalizeUnique }, 216 { 0x0370, 0x0373, 0x0000, CanonicalizeAlternatingAligned }, 217 { 0x0374, 0x0375, 0x0000, CanonicalizeUnique }, 218 { 0x0376, 0x0377, 0x0000, CanonicalizeAlternatingAligned }, 219 { 0x0378, 0x037a, 0x0000, CanonicalizeUnique }, 220 { 0x037b, 0x037d, 0x0082, CanonicalizeRangeLo }, 221 { 0x037e, 0x037e, 0x0000, CanonicalizeUnique }, 222 { 0x037f, 0x037f, 0x0074, CanonicalizeRangeLo }, 223 { 0x0380, 0x0385, 0x0000, CanonicalizeUnique }, 224 { 0x0386, 0x0386, 0x0026, CanonicalizeRangeLo }, 225 { 0x0387, 0x0387, 0x0000, CanonicalizeUnique }, 226 { 0x0388, 0x038a, 0x0025, CanonicalizeRangeLo }, 227 { 0x038b, 0x038b, 0x0000, CanonicalizeUnique }, 228 { 0x038c, 0x038c, 0x0040, CanonicalizeRangeLo }, 229 { 0x038d, 0x038d, 0x0000, CanonicalizeUnique }, 230 { 0x038e, 0x038f, 0x003f, CanonicalizeRangeLo }, 231 { 0x0390, 0x0390, 0x0000, CanonicalizeUnique }, 232 { 0x0391, 0x0391, 0x0020, CanonicalizeRangeLo }, 233 { 0x0392, 0x0392, 0x0004, CanonicalizeSet }, 234 { 0x0393, 0x0394, 0x0020, CanonicalizeRangeLo }, 235 { 0x0395, 0x0395, 0x0005, CanonicalizeSet }, 236 { 0x0396, 0x0397, 0x0020, CanonicalizeRangeLo }, 237 { 0x0398, 0x0398, 0x0006, CanonicalizeSet }, 238 { 0x0399, 0x0399, 0x0007, CanonicalizeSet }, 239 { 0x039a, 0x039a, 0x0008, CanonicalizeSet }, 240 { 0x039b, 0x039b, 0x0020, CanonicalizeRangeLo }, 241 { 0x039c, 0x039c, 0x0009, CanonicalizeSet }, 242 { 0x039d, 0x039f, 0x0020, CanonicalizeRangeLo }, 243 { 0x03a0, 0x03a0, 0x000a, CanonicalizeSet }, 244 { 0x03a1, 0x03a1, 0x000b, CanonicalizeSet }, 245 { 0x03a2, 0x03a2, 0x0000, CanonicalizeUnique }, 246 { 0x03a3, 0x03a3, 0x000c, CanonicalizeSet }, 247 { 0x03a4, 0x03a5, 0x0020, CanonicalizeRangeLo }, 248 { 0x03a6, 0x03a6, 0x000d, CanonicalizeSet }, 249 { 0x03a7, 0x03ab, 0x0020, CanonicalizeRangeLo }, 250 { 0x03ac, 0x03ac, 0x0026, CanonicalizeRangeHi }, 251 { 0x03ad, 0x03af, 0x0025, CanonicalizeRangeHi }, 252 { 0x03b0, 0x03b0, 0x0000, CanonicalizeUnique }, 253 { 0x03b1, 0x03b1, 0x0020, CanonicalizeRangeHi }, 254 { 0x03b2, 0x03b2, 0x0004, CanonicalizeSet }, 255 { 0x03b3, 0x03b4, 0x0020, CanonicalizeRangeHi }, 256 { 0x03b5, 0x03b5, 0x0005, CanonicalizeSet }, 257 { 0x03b6, 0x03b7, 0x0020, CanonicalizeRangeHi }, 258 { 0x03b8, 0x03b8, 0x0006, CanonicalizeSet }, 259 { 0x03b9, 0x03b9, 0x0007, CanonicalizeSet }, 260 { 0x03ba, 0x03ba, 0x0008, CanonicalizeSet }, 261 { 0x03bb, 0x03bb, 0x0020, CanonicalizeRangeHi }, 262 { 0x03bc, 0x03bc, 0x0009, CanonicalizeSet }, 263 { 0x03bd, 0x03bf, 0x0020, CanonicalizeRangeHi }, 264 { 0x03c0, 0x03c0, 0x000a, CanonicalizeSet }, 265 { 0x03c1, 0x03c1, 0x000b, CanonicalizeSet }, 266 { 0x03c2, 0x03c3, 0x000c, CanonicalizeSet }, 267 { 0x03c4, 0x03c5, 0x0020, CanonicalizeRangeHi }, 268 { 0x03c6, 0x03c6, 0x000d, CanonicalizeSet }, 269 { 0x03c7, 0x03cb, 0x0020, CanonicalizeRangeHi }, 270 { 0x03cc, 0x03cc, 0x0040, CanonicalizeRangeHi }, 271 { 0x03cd, 0x03ce, 0x003f, CanonicalizeRangeHi }, 272 { 0x03cf, 0x03cf, 0x0008, CanonicalizeRangeLo }, 273 { 0x03d0, 0x03d0, 0x0004, CanonicalizeSet }, 274 { 0x03d1, 0x03d1, 0x0006, CanonicalizeSet }, 275 { 0x03d2, 0x03d4, 0x0000, CanonicalizeUnique }, 276 { 0x03d5, 0x03d5, 0x000d, CanonicalizeSet }, 277 { 0x03d6, 0x03d6, 0x000a, CanonicalizeSet }, 278 { 0x03d7, 0x03d7, 0x0008, CanonicalizeRangeHi }, 279 { 0x03d8, 0x03ef, 0x0000, CanonicalizeAlternatingAligned }, 280 { 0x03f0, 0x03f0, 0x0008, CanonicalizeSet }, 281 { 0x03f1, 0x03f1, 0x000b, CanonicalizeSet }, 282 { 0x03f2, 0x03f2, 0x0007, CanonicalizeRangeLo }, 283 { 0x03f3, 0x03f3, 0x0074, CanonicalizeRangeHi }, 284 { 0x03f4, 0x03f4, 0x0000, CanonicalizeUnique }, 285 { 0x03f5, 0x03f5, 0x0005, CanonicalizeSet }, 286 { 0x03f6, 0x03f6, 0x0000, CanonicalizeUnique }, 287 { 0x03f7, 0x03f8, 0x0000, CanonicalizeAlternatingUnaligned }, 288 { 0x03f9, 0x03f9, 0x0007, CanonicalizeRangeHi }, 289 { 0x03fa, 0x03fb, 0x0000, CanonicalizeAlternatingAligned }, 290 { 0x03fc, 0x03fc, 0x0000, CanonicalizeUnique }, 291 { 0x03fd, 0x03ff, 0x0082, CanonicalizeRangeHi }, 292 { 0x0400, 0x040f, 0x0050, CanonicalizeRangeLo }, 293 { 0x0410, 0x042f, 0x0020, CanonicalizeRangeLo }, 294 { 0x0430, 0x044f, 0x0020, CanonicalizeRangeHi }, 295 { 0x0450, 0x045f, 0x0050, CanonicalizeRangeHi }, 296 { 0x0460, 0x0481, 0x0000, CanonicalizeAlternatingAligned }, 297 { 0x0482, 0x0489, 0x0000, CanonicalizeUnique }, 298 { 0x048a, 0x04bf, 0x0000, CanonicalizeAlternatingAligned }, 299 { 0x04c0, 0x04c0, 0x000f, CanonicalizeRangeLo }, 300 { 0x04c1, 0x04ce, 0x0000, CanonicalizeAlternatingUnaligned }, 301 { 0x04cf, 0x04cf, 0x000f, CanonicalizeRangeHi }, 302 { 0x04d0, 0x052f, 0x0000, CanonicalizeAlternatingAligned }, 303 { 0x0530, 0x0530, 0x0000, CanonicalizeUnique }, 304 { 0x0531, 0x0556, 0x0030, CanonicalizeRangeLo }, 305 { 0x0557, 0x0560, 0x0000, CanonicalizeUnique }, 306 { 0x0561, 0x0586, 0x0030, CanonicalizeRangeHi }, 307 { 0x0587, 0x109f, 0x0000, CanonicalizeUnique }, 308 { 0x10a0, 0x10c5, 0x1c60, CanonicalizeRangeLo }, 309 { 0x10c6, 0x10c6, 0x0000, CanonicalizeUnique }, 310 { 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo }, 311 { 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique }, 312 { 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo }, 313 { 0x10ce, 0x1d78, 0x0000, CanonicalizeUnique }, 314 { 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo }, 315 { 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique }, 316 { 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo }, 317 { 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique }, 318 { 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned }, 319 { 0x1e60, 0x1e61, 0x000e, CanonicalizeSet }, 320 { 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned }, 321 { 0x1e96, 0x1e9a, 0x0000, CanonicalizeUnique }, 322 { 0x1e9b, 0x1e9b, 0x000e, CanonicalizeSet }, 323 { 0x1e9c, 0x1e9f, 0x0000, CanonicalizeUnique }, 324 { 0x1ea0, 0x1eff, 0x0000, CanonicalizeAlternatingAligned }, 325 { 0x1f00, 0x1f07, 0x0008, CanonicalizeRangeLo }, 326 { 0x1f08, 0x1f0f, 0x0008, CanonicalizeRangeHi }, 327 { 0x1f10, 0x1f15, 0x0008, CanonicalizeRangeLo }, 328 { 0x1f16, 0x1f17, 0x0000, CanonicalizeUnique }, 329 { 0x1f18, 0x1f1d, 0x0008, CanonicalizeRangeHi }, 330 { 0x1f1e, 0x1f1f, 0x0000, CanonicalizeUnique }, 331 { 0x1f20, 0x1f27, 0x0008, CanonicalizeRangeLo }, 332 { 0x1f28, 0x1f2f, 0x0008, CanonicalizeRangeHi }, 333 { 0x1f30, 0x1f37, 0x0008, CanonicalizeRangeLo }, 334 { 0x1f38, 0x1f3f, 0x0008, CanonicalizeRangeHi }, 335 { 0x1f40, 0x1f45, 0x0008, CanonicalizeRangeLo }, 336 { 0x1f46, 0x1f47, 0x0000, CanonicalizeUnique }, 337 { 0x1f48, 0x1f4d, 0x0008, CanonicalizeRangeHi }, 338 { 0x1f4e, 0x1f50, 0x0000, CanonicalizeUnique }, 339 { 0x1f51, 0x1f51, 0x0008, CanonicalizeRangeLo }, 340 { 0x1f52, 0x1f52, 0x0000, CanonicalizeUnique }, 341 { 0x1f53, 0x1f53, 0x0008, CanonicalizeRangeLo }, 342 { 0x1f54, 0x1f54, 0x0000, CanonicalizeUnique }, 343 { 0x1f55, 0x1f55, 0x0008, CanonicalizeRangeLo }, 344 { 0x1f56, 0x1f56, 0x0000, CanonicalizeUnique }, 345 { 0x1f57, 0x1f57, 0x0008, CanonicalizeRangeLo }, 346 { 0x1f58, 0x1f58, 0x0000, CanonicalizeUnique }, 347 { 0x1f59, 0x1f59, 0x0008, CanonicalizeRangeHi }, 348 { 0x1f5a, 0x1f5a, 0x0000, CanonicalizeUnique }, 349 { 0x1f5b, 0x1f5b, 0x0008, CanonicalizeRangeHi }, 350 { 0x1f5c, 0x1f5c, 0x0000, CanonicalizeUnique }, 351 { 0x1f5d, 0x1f5d, 0x0008, CanonicalizeRangeHi }, 352 { 0x1f5e, 0x1f5e, 0x0000, CanonicalizeUnique }, 353 { 0x1f5f, 0x1f5f, 0x0008, CanonicalizeRangeHi }, 354 { 0x1f60, 0x1f67, 0x0008, CanonicalizeRangeLo }, 355 { 0x1f68, 0x1f6f, 0x0008, CanonicalizeRangeHi }, 356 { 0x1f70, 0x1f71, 0x004a, CanonicalizeRangeLo }, 357 { 0x1f72, 0x1f75, 0x0056, CanonicalizeRangeLo }, 358 { 0x1f76, 0x1f77, 0x0064, CanonicalizeRangeLo }, 359 { 0x1f78, 0x1f79, 0x0080, CanonicalizeRangeLo }, 360 { 0x1f7a, 0x1f7b, 0x0070, CanonicalizeRangeLo }, 361 { 0x1f7c, 0x1f7d, 0x007e, CanonicalizeRangeLo }, 362 { 0x1f7e, 0x1faf, 0x0000, CanonicalizeUnique }, 363 { 0x1fb0, 0x1fb1, 0x0008, CanonicalizeRangeLo }, 364 { 0x1fb2, 0x1fb7, 0x0000, CanonicalizeUnique }, 365 { 0x1fb8, 0x1fb9, 0x0008, CanonicalizeRangeHi }, 366 { 0x1fba, 0x1fbb, 0x004a, CanonicalizeRangeHi }, 367 { 0x1fbc, 0x1fbd, 0x0000, CanonicalizeUnique }, 368 { 0x1fbe, 0x1fbe, 0x0007, CanonicalizeSet }, 369 { 0x1fbf, 0x1fc7, 0x0000, CanonicalizeUnique }, 370 { 0x1fc8, 0x1fcb, 0x0056, CanonicalizeRangeHi }, 371 { 0x1fcc, 0x1fcf, 0x0000, CanonicalizeUnique }, 372 { 0x1fd0, 0x1fd1, 0x0008, CanonicalizeRangeLo }, 373 { 0x1fd2, 0x1fd7, 0x0000, CanonicalizeUnique }, 374 { 0x1fd8, 0x1fd9, 0x0008, CanonicalizeRangeHi }, 375 { 0x1fda, 0x1fdb, 0x0064, CanonicalizeRangeHi }, 376 { 0x1fdc, 0x1fdf, 0x0000, CanonicalizeUnique }, 377 { 0x1fe0, 0x1fe1, 0x0008, CanonicalizeRangeLo }, 378 { 0x1fe2, 0x1fe4, 0x0000, CanonicalizeUnique }, 379 { 0x1fe5, 0x1fe5, 0x0007, CanonicalizeRangeLo }, 380 { 0x1fe6, 0x1fe7, 0x0000, CanonicalizeUnique }, 381 { 0x1fe8, 0x1fe9, 0x0008, CanonicalizeRangeHi }, 382 { 0x1fea, 0x1feb, 0x0070, CanonicalizeRangeHi }, 383 { 0x1fec, 0x1fec, 0x0007, CanonicalizeRangeHi }, 384 { 0x1fed, 0x1ff7, 0x0000, CanonicalizeUnique }, 385 { 0x1ff8, 0x1ff9, 0x0080, CanonicalizeRangeHi }, 386 { 0x1ffa, 0x1ffb, 0x007e, CanonicalizeRangeHi }, 387 { 0x1ffc, 0x2131, 0x0000, CanonicalizeUnique }, 388 { 0x2132, 0x2132, 0x001c, CanonicalizeRangeLo }, 389 { 0x2133, 0x214d, 0x0000, CanonicalizeUnique }, 390 { 0x214e, 0x214e, 0x001c, CanonicalizeRangeHi }, 391 { 0x214f, 0x215f, 0x0000, CanonicalizeUnique }, 392 { 0x2160, 0x216f, 0x0010, CanonicalizeRangeLo }, 393 { 0x2170, 0x217f, 0x0010, CanonicalizeRangeHi }, 394 { 0x2180, 0x2182, 0x0000, CanonicalizeUnique }, 395 { 0x2183, 0x2184, 0x0000, CanonicalizeAlternatingUnaligned }, 396 { 0x2185, 0x24b5, 0x0000, CanonicalizeUnique }, 397 { 0x24b6, 0x24cf, 0x001a, CanonicalizeRangeLo }, 398 { 0x24d0, 0x24e9, 0x001a, CanonicalizeRangeHi }, 399 { 0x24ea, 0x2bff, 0x0000, CanonicalizeUnique }, 400 { 0x2c00, 0x2c2e, 0x0030, CanonicalizeRangeLo }, 401 { 0x2c2f, 0x2c2f, 0x0000, CanonicalizeUnique }, 402 { 0x2c30, 0x2c5e, 0x0030, CanonicalizeRangeHi }, 403 { 0x2c5f, 0x2c5f, 0x0000, CanonicalizeUnique }, 404 { 0x2c60, 0x2c61, 0x0000, CanonicalizeAlternatingAligned }, 405 { 0x2c62, 0x2c62, 0x29f7, CanonicalizeRangeHi }, 406 { 0x2c63, 0x2c63, 0x0ee6, CanonicalizeRangeHi }, 407 { 0x2c64, 0x2c64, 0x29e7, CanonicalizeRangeHi }, 408 { 0x2c65, 0x2c65, 0x2a2b, CanonicalizeRangeHi }, 409 { 0x2c66, 0x2c66, 0x2a28, CanonicalizeRangeHi }, 410 { 0x2c67, 0x2c6c, 0x0000, CanonicalizeAlternatingUnaligned }, 411 { 0x2c6d, 0x2c6d, 0x2a1c, CanonicalizeRangeHi }, 412 { 0x2c6e, 0x2c6e, 0x29fd, CanonicalizeRangeHi }, 413 { 0x2c6f, 0x2c6f, 0x2a1f, CanonicalizeRangeHi }, 414 { 0x2c70, 0x2c70, 0x2a1e, CanonicalizeRangeHi }, 415 { 0x2c71, 0x2c71, 0x0000, CanonicalizeUnique }, 416 { 0x2c72, 0x2c73, 0x0000, CanonicalizeAlternatingAligned }, 417 { 0x2c74, 0x2c74, 0x0000, CanonicalizeUnique }, 418 { 0x2c75, 0x2c76, 0x0000, CanonicalizeAlternatingUnaligned }, 419 { 0x2c77, 0x2c7d, 0x0000, CanonicalizeUnique }, 420 { 0x2c7e, 0x2c7f, 0x2a3f, CanonicalizeRangeHi }, 421 { 0x2c80, 0x2ce3, 0x0000, CanonicalizeAlternatingAligned }, 422 { 0x2ce4, 0x2cea, 0x0000, CanonicalizeUnique }, 423 { 0x2ceb, 0x2cee, 0x0000, CanonicalizeAlternatingUnaligned }, 424 { 0x2cef, 0x2cf1, 0x0000, CanonicalizeUnique }, 425 { 0x2cf2, 0x2cf3, 0x0000, CanonicalizeAlternatingAligned }, 426 { 0x2cf4, 0x2cff, 0x0000, CanonicalizeUnique }, 427 { 0x2d00, 0x2d25, 0x1c60, CanonicalizeRangeHi }, 428 { 0x2d26, 0x2d26, 0x0000, CanonicalizeUnique }, 429 { 0x2d27, 0x2d27, 0x1c60, CanonicalizeRangeHi }, 430 { 0x2d28, 0x2d2c, 0x0000, CanonicalizeUnique }, 431 { 0x2d2d, 0x2d2d, 0x1c60, CanonicalizeRangeHi }, 432 { 0x2d2e, 0xa63f, 0x0000, CanonicalizeUnique }, 433 { 0xa640, 0xa66d, 0x0000, CanonicalizeAlternatingAligned }, 434 { 0xa66e, 0xa67f, 0x0000, CanonicalizeUnique }, 435 { 0xa680, 0xa69b, 0x0000, CanonicalizeAlternatingAligned }, 436 { 0xa69c, 0xa721, 0x0000, CanonicalizeUnique }, 437 { 0xa722, 0xa72f, 0x0000, CanonicalizeAlternatingAligned }, 438 { 0xa730, 0xa731, 0x0000, CanonicalizeUnique }, 439 { 0xa732, 0xa76f, 0x0000, CanonicalizeAlternatingAligned }, 440 { 0xa770, 0xa778, 0x0000, CanonicalizeUnique }, 441 { 0xa779, 0xa77c, 0x0000, CanonicalizeAlternatingUnaligned }, 442 { 0xa77d, 0xa77d, 0x8a04, CanonicalizeRangeHi }, 443 { 0xa77e, 0xa787, 0x0000, CanonicalizeAlternatingAligned }, 444 { 0xa788, 0xa78a, 0x0000, CanonicalizeUnique }, 445 { 0xa78b, 0xa78c, 0x0000, CanonicalizeAlternatingUnaligned }, 446 { 0xa78d, 0xa78d, 0xa528, CanonicalizeRangeHi }, 447 { 0xa78e, 0xa78f, 0x0000, CanonicalizeUnique }, 448 { 0xa790, 0xa793, 0x0000, CanonicalizeAlternatingAligned }, 449 { 0xa794, 0xa795, 0x0000, CanonicalizeUnique }, 450 { 0xa796, 0xa7a9, 0x0000, CanonicalizeAlternatingAligned }, 451 { 0xa7aa, 0xa7aa, 0xa544, CanonicalizeRangeHi }, 452 { 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi }, 453 { 0xa7ac, 0xa7ac, 0xa54b, CanonicalizeRangeHi }, 454 { 0xa7ad, 0xa7ad, 0xa541, CanonicalizeRangeHi }, 455 { 0xa7ae, 0xa7af, 0x0000, CanonicalizeUnique }, 456 { 0xa7b0, 0xa7b0, 0xa512, CanonicalizeRangeHi }, 457 { 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi }, 458 { 0xa7b2, 0xff20, 0x0000, CanonicalizeUnique }, 459 { 0xff21, 0xff3a, 0x0020, CanonicalizeRangeLo }, 460 { 0xff3b, 0xff40, 0x0000, CanonicalizeUnique }, 461 { 0xff41, 0xff5a, 0x0020, CanonicalizeRangeHi }, 462 { 0xff5b, 0xffff, 0x0000, CanonicalizeUnique }, 436 463 }; 437 464 438 const size_t LATIN_CANONICALIZATION_RANGES = 20; 439 LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = { 440 { 0x0000u, 0x0040u, 0x0000u, CanonicalizeLatinSelf }, 441 { 0x0041u, 0x005au, 0x0000u, CanonicalizeLatinMask0x20 }, 442 { 0x005bu, 0x0060u, 0x0000u, CanonicalizeLatinSelf }, 443 { 0x0061u, 0x007au, 0x0000u, CanonicalizeLatinMask0x20 }, 444 { 0x007bu, 0x00bfu, 0x0000u, CanonicalizeLatinSelf }, 445 { 0x00c0u, 0x00d6u, 0x0000u, CanonicalizeLatinMask0x20 }, 446 { 0x00d7u, 0x00d7u, 0x0000u, CanonicalizeLatinSelf }, 447 { 0x00d8u, 0x00deu, 0x0000u, CanonicalizeLatinMask0x20 }, 448 { 0x00dfu, 0x00dfu, 0x0000u, CanonicalizeLatinSelf }, 449 { 0x00e0u, 0x00f6u, 0x0000u, CanonicalizeLatinMask0x20 }, 450 { 0x00f7u, 0x00f7u, 0x0000u, CanonicalizeLatinSelf }, 451 { 0x00f8u, 0x00feu, 0x0000u, CanonicalizeLatinMask0x20 }, 452 { 0x00ffu, 0x00ffu, 0x0000u, CanonicalizeLatinSelf }, 453 { 0x0100u, 0x0177u, 0x0000u, CanonicalizeLatinInvalid }, 454 { 0x0178u, 0x0178u, 0x00ffu, CanonicalizeLatinOther }, 455 { 0x0179u, 0x039bu, 0x0000u, CanonicalizeLatinInvalid }, 456 { 0x039cu, 0x039cu, 0x00b5u, CanonicalizeLatinOther }, 457 { 0x039du, 0x03bbu, 0x0000u, CanonicalizeLatinInvalid }, 458 { 0x03bcu, 0x03bcu, 0x00b5u, CanonicalizeLatinOther }, 459 { 0x03bdu, 0xffffu, 0x0000u, CanonicalizeLatinInvalid }, 465 const UChar32 unicodeCharacterSet0[] = { 0x0041, 0x0061, 0x1e9a, 0 }; 466 const UChar32 unicodeCharacterSet1[] = { 0x0046, 0x0066, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0 }; 467 const UChar32 unicodeCharacterSet2[] = { 0x0048, 0x0068, 0x1e96, 0 }; 468 const UChar32 unicodeCharacterSet3[] = { 0x0049, 0x0069, 0x0131, 0 }; 469 const UChar32 unicodeCharacterSet4[] = { 0x004a, 0x006a, 0x01f0, 0 }; 470 const UChar32 unicodeCharacterSet5[] = { 0x0053, 0x0073, 0x00df, 0x017f, 0xfb05, 0xfb06, 0 }; 471 const UChar32 unicodeCharacterSet6[] = { 0x0054, 0x0074, 0x1e97, 0 }; 472 const UChar32 unicodeCharacterSet7[] = { 0x0057, 0x0077, 0x1e98, 0 }; 473 const UChar32 unicodeCharacterSet8[] = { 0x0059, 0x0079, 0x1e99, 0 }; 474 const UChar32 unicodeCharacterSet9[] = { 0x01c4, 0x01c5, 0x01c6, 0 }; 475 const UChar32 unicodeCharacterSet10[] = { 0x01c7, 0x01c8, 0x01c9, 0 }; 476 const UChar32 unicodeCharacterSet11[] = { 0x01ca, 0x01cb, 0x01cc, 0 }; 477 const UChar32 unicodeCharacterSet12[] = { 0x01f1, 0x01f2, 0x01f3, 0 }; 478 const UChar32 unicodeCharacterSet13[] = { 0x0386, 0x03ac, 0x1fb4, 0 }; 479 const UChar32 unicodeCharacterSet14[] = { 0x0389, 0x03ae, 0x1fc4, 0 }; 480 const UChar32 unicodeCharacterSet15[] = { 0x038f, 0x03ce, 0x1ff4, 0 }; 481 const UChar32 unicodeCharacterSet16[] = { 0x0391, 0x03b1, 0x1fb3, 0x1fb6, 0x1fb7, 0x1fbc, 0 }; 482 const UChar32 unicodeCharacterSet17[] = { 0x0392, 0x03b2, 0x03d0, 0 }; 483 const UChar32 unicodeCharacterSet18[] = { 0x0395, 0x03b5, 0x03f5, 0 }; 484 const UChar32 unicodeCharacterSet19[] = { 0x0397, 0x03b7, 0x1fc3, 0x1fc6, 0x1fc7, 0x1fcc, 0 }; 485 const UChar32 unicodeCharacterSet20[] = { 0x0398, 0x03b8, 0x03d1, 0 }; 486 const UChar32 unicodeCharacterSet21[] = { 0x0345, 0x0390, 0x0399, 0x03b9, 0x1fbe, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0 }; 487 const UChar32 unicodeCharacterSet22[] = { 0x039a, 0x03ba, 0x03f0, 0 }; 488 const UChar32 unicodeCharacterSet23[] = { 0x00b5, 0x039c, 0x03bc, 0 }; 489 const UChar32 unicodeCharacterSet24[] = { 0x03a0, 0x03c0, 0x03d6, 0 }; 490 const UChar32 unicodeCharacterSet25[] = { 0x03a1, 0x03c1, 0x03f1, 0x1fe4, 0 }; 491 const UChar32 unicodeCharacterSet26[] = { 0x03a3, 0x03c2, 0x03c3, 0 }; 492 const UChar32 unicodeCharacterSet27[] = { 0x03a5, 0x03b0, 0x03c5, 0x1f50, 0x1f52, 0x1f54, 0x1f56, 0x1fe2, 0x1fe3, 0x1fe6, 0x1fe7, 0 }; 493 const UChar32 unicodeCharacterSet28[] = { 0x03a6, 0x03c6, 0x03d5, 0 }; 494 const UChar32 unicodeCharacterSet29[] = { 0x03a9, 0x03c9, 0x1ff3, 0x1ff6, 0x1ff7, 0x1ffc, 0 }; 495 const UChar32 unicodeCharacterSet30[] = { 0x0535, 0x0565, 0x0587, 0 }; 496 const UChar32 unicodeCharacterSet31[] = { 0x0544, 0x0574, 0xfb13, 0xfb14, 0xfb15, 0xfb17, 0 }; 497 const UChar32 unicodeCharacterSet32[] = { 0x054e, 0x057e, 0xfb16, 0 }; 498 const UChar32 unicodeCharacterSet33[] = { 0x1e60, 0x1e61, 0x1e9b, 0 }; 499 const UChar32 unicodeCharacterSet34[] = { 0x1f00, 0x1f08, 0x1f80, 0x1f88, 0 }; 500 const UChar32 unicodeCharacterSet35[] = { 0x1f01, 0x1f09, 0x1f81, 0x1f89, 0 }; 501 const UChar32 unicodeCharacterSet36[] = { 0x1f02, 0x1f0a, 0x1f82, 0x1f8a, 0 }; 502 const UChar32 unicodeCharacterSet37[] = { 0x1f03, 0x1f0b, 0x1f83, 0x1f8b, 0 }; 503 const UChar32 unicodeCharacterSet38[] = { 0x1f04, 0x1f0c, 0x1f84, 0x1f8c, 0 }; 504 const UChar32 unicodeCharacterSet39[] = { 0x1f05, 0x1f0d, 0x1f85, 0x1f8d, 0 }; 505 const UChar32 unicodeCharacterSet40[] = { 0x1f06, 0x1f0e, 0x1f86, 0x1f8e, 0 }; 506 const UChar32 unicodeCharacterSet41[] = { 0x1f07, 0x1f0f, 0x1f87, 0x1f8f, 0 }; 507 const UChar32 unicodeCharacterSet42[] = { 0x1f20, 0x1f28, 0x1f90, 0x1f98, 0 }; 508 const UChar32 unicodeCharacterSet43[] = { 0x1f21, 0x1f29, 0x1f91, 0x1f99, 0 }; 509 const UChar32 unicodeCharacterSet44[] = { 0x1f22, 0x1f2a, 0x1f92, 0x1f9a, 0 }; 510 const UChar32 unicodeCharacterSet45[] = { 0x1f23, 0x1f2b, 0x1f93, 0x1f9b, 0 }; 511 const UChar32 unicodeCharacterSet46[] = { 0x1f24, 0x1f2c, 0x1f94, 0x1f9c, 0 }; 512 const UChar32 unicodeCharacterSet47[] = { 0x1f25, 0x1f2d, 0x1f95, 0x1f9d, 0 }; 513 const UChar32 unicodeCharacterSet48[] = { 0x1f26, 0x1f2e, 0x1f96, 0x1f9e, 0 }; 514 const UChar32 unicodeCharacterSet49[] = { 0x1f27, 0x1f2f, 0x1f97, 0x1f9f, 0 }; 515 const UChar32 unicodeCharacterSet50[] = { 0x1f60, 0x1f68, 0x1fa0, 0x1fa8, 0 }; 516 const UChar32 unicodeCharacterSet51[] = { 0x1f61, 0x1f69, 0x1fa1, 0x1fa9, 0 }; 517 const UChar32 unicodeCharacterSet52[] = { 0x1f62, 0x1f6a, 0x1fa2, 0x1faa, 0 }; 518 const UChar32 unicodeCharacterSet53[] = { 0x1f63, 0x1f6b, 0x1fa3, 0x1fab, 0 }; 519 const UChar32 unicodeCharacterSet54[] = { 0x1f64, 0x1f6c, 0x1fa4, 0x1fac, 0 }; 520 const UChar32 unicodeCharacterSet55[] = { 0x1f65, 0x1f6d, 0x1fa5, 0x1fad, 0 }; 521 const UChar32 unicodeCharacterSet56[] = { 0x1f66, 0x1f6e, 0x1fa6, 0x1fae, 0 }; 522 const UChar32 unicodeCharacterSet57[] = { 0x1f67, 0x1f6f, 0x1fa7, 0x1faf, 0 }; 523 const UChar32 unicodeCharacterSet58[] = { 0x1f70, 0x1fb2, 0x1fba, 0 }; 524 const UChar32 unicodeCharacterSet59[] = { 0x1f74, 0x1fc2, 0x1fca, 0 }; 525 const UChar32 unicodeCharacterSet60[] = { 0x1f7c, 0x1ff2, 0x1ffa, 0 }; 526 527 static const size_t UNICODE_CANONICALIZATION_SETS = 61; 528 const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = { 529 unicodeCharacterSet0, 530 unicodeCharacterSet1, 531 unicodeCharacterSet2, 532 unicodeCharacterSet3, 533 unicodeCharacterSet4, 534 unicodeCharacterSet5, 535 unicodeCharacterSet6, 536 unicodeCharacterSet7, 537 unicodeCharacterSet8, 538 unicodeCharacterSet9, 539 unicodeCharacterSet10, 540 unicodeCharacterSet11, 541 unicodeCharacterSet12, 542 unicodeCharacterSet13, 543 unicodeCharacterSet14, 544 unicodeCharacterSet15, 545 unicodeCharacterSet16, 546 unicodeCharacterSet17, 547 unicodeCharacterSet18, 548 unicodeCharacterSet19, 549 unicodeCharacterSet20, 550 unicodeCharacterSet21, 551 unicodeCharacterSet22, 552 unicodeCharacterSet23, 553 unicodeCharacterSet24, 554 unicodeCharacterSet25, 555 unicodeCharacterSet26, 556 unicodeCharacterSet27, 557 unicodeCharacterSet28, 558 unicodeCharacterSet29, 559 unicodeCharacterSet30, 560 unicodeCharacterSet31, 561 unicodeCharacterSet32, 562 unicodeCharacterSet33, 563 unicodeCharacterSet34, 564 unicodeCharacterSet35, 565 unicodeCharacterSet36, 566 unicodeCharacterSet37, 567 unicodeCharacterSet38, 568 unicodeCharacterSet39, 569 unicodeCharacterSet40, 570 unicodeCharacterSet41, 571 unicodeCharacterSet42, 572 unicodeCharacterSet43, 573 unicodeCharacterSet44, 574 unicodeCharacterSet45, 575 unicodeCharacterSet46, 576 unicodeCharacterSet47, 577 unicodeCharacterSet48, 578 unicodeCharacterSet49, 579 unicodeCharacterSet50, 580 unicodeCharacterSet51, 581 unicodeCharacterSet52, 582 unicodeCharacterSet53, 583 unicodeCharacterSet54, 584 unicodeCharacterSet55, 585 unicodeCharacterSet56, 586 unicodeCharacterSet57, 587 unicodeCharacterSet58, 588 unicodeCharacterSet59, 589 unicodeCharacterSet60, 590 }; 591 592 const size_t UNICODE_CANONICALIZATION_RANGES = 585; 593 const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = { 594 { 0x0000, 0x0040, 0x0000, CanonicalizeUnique }, 595 { 0x0041, 0x0041, 0x0000, CanonicalizeSet }, 596 { 0x0042, 0x0045, 0x0020, CanonicalizeRangeLo }, 597 { 0x0046, 0x0046, 0x0001, CanonicalizeSet }, 598 { 0x0047, 0x0047, 0x0020, CanonicalizeRangeLo }, 599 { 0x0048, 0x0048, 0x0002, CanonicalizeSet }, 600 { 0x0049, 0x0049, 0x0003, CanonicalizeSet }, 601 { 0x004a, 0x004a, 0x0004, CanonicalizeSet }, 602 { 0x004b, 0x0052, 0x0020, CanonicalizeRangeLo }, 603 { 0x0053, 0x0053, 0x0005, CanonicalizeSet }, 604 { 0x0054, 0x0054, 0x0006, CanonicalizeSet }, 605 { 0x0055, 0x0056, 0x0020, CanonicalizeRangeLo }, 606 { 0x0057, 0x0057, 0x0007, CanonicalizeSet }, 607 { 0x0058, 0x0058, 0x0020, CanonicalizeRangeLo }, 608 { 0x0059, 0x0059, 0x0008, CanonicalizeSet }, 609 { 0x005a, 0x005a, 0x0020, CanonicalizeRangeLo }, 610 { 0x005b, 0x0060, 0x0000, CanonicalizeUnique }, 611 { 0x0061, 0x0061, 0x0000, CanonicalizeSet }, 612 { 0x0062, 0x0065, 0x0020, CanonicalizeRangeHi }, 613 { 0x0066, 0x0066, 0x0001, CanonicalizeSet }, 614 { 0x0067, 0x0067, 0x0020, CanonicalizeRangeHi }, 615 { 0x0068, 0x0068, 0x0002, CanonicalizeSet }, 616 { 0x0069, 0x0069, 0x0003, CanonicalizeSet }, 617 { 0x006a, 0x006a, 0x0004, CanonicalizeSet }, 618 { 0x006b, 0x0072, 0x0020, CanonicalizeRangeHi }, 619 { 0x0073, 0x0073, 0x0005, CanonicalizeSet }, 620 { 0x0074, 0x0074, 0x0006, CanonicalizeSet }, 621 { 0x0075, 0x0076, 0x0020, CanonicalizeRangeHi }, 622 { 0x0077, 0x0077, 0x0007, CanonicalizeSet }, 623 { 0x0078, 0x0078, 0x0020, CanonicalizeRangeHi }, 624 { 0x0079, 0x0079, 0x0008, CanonicalizeSet }, 625 { 0x007a, 0x007a, 0x0020, CanonicalizeRangeHi }, 626 { 0x007b, 0x00b4, 0x0000, CanonicalizeUnique }, 627 { 0x00b5, 0x00b5, 0x0017, CanonicalizeSet }, 628 { 0x00b6, 0x00bf, 0x0000, CanonicalizeUnique }, 629 { 0x00c0, 0x00d6, 0x0020, CanonicalizeRangeLo }, 630 { 0x00d7, 0x00d7, 0x0000, CanonicalizeUnique }, 631 { 0x00d8, 0x00de, 0x0020, CanonicalizeRangeLo }, 632 { 0x00df, 0x00df, 0x0005, CanonicalizeSet }, 633 { 0x00e0, 0x00f6, 0x0020, CanonicalizeRangeHi }, 634 { 0x00f7, 0x00f7, 0x0000, CanonicalizeUnique }, 635 { 0x00f8, 0x00fe, 0x0020, CanonicalizeRangeHi }, 636 { 0x00ff, 0x00ff, 0x0079, CanonicalizeRangeLo }, 637 { 0x0100, 0x012f, 0x0000, CanonicalizeAlternatingAligned }, 638 { 0x0130, 0x0130, 0x0000, CanonicalizeUnique }, 639 { 0x0131, 0x0131, 0x0003, CanonicalizeSet }, 640 { 0x0132, 0x0137, 0x0000, CanonicalizeAlternatingAligned }, 641 { 0x0138, 0x0138, 0x0000, CanonicalizeUnique }, 642 { 0x0139, 0x0148, 0x0000, CanonicalizeAlternatingUnaligned }, 643 { 0x0149, 0x0149, 0x0173, CanonicalizeRangeLo }, 644 { 0x014a, 0x0177, 0x0000, CanonicalizeAlternatingAligned }, 645 { 0x0178, 0x0178, 0x0079, CanonicalizeRangeHi }, 646 { 0x0179, 0x017e, 0x0000, CanonicalizeAlternatingUnaligned }, 647 { 0x017f, 0x017f, 0x0005, CanonicalizeSet }, 648 { 0x0180, 0x0180, 0x00c3, CanonicalizeRangeLo }, 649 { 0x0181, 0x0181, 0x00d2, CanonicalizeRangeLo }, 650 { 0x0182, 0x0185, 0x0000, CanonicalizeAlternatingAligned }, 651 { 0x0186, 0x0186, 0x00ce, CanonicalizeRangeLo }, 652 { 0x0187, 0x0188, 0x0000, CanonicalizeAlternatingUnaligned }, 653 { 0x0189, 0x018a, 0x00cd, CanonicalizeRangeLo }, 654 { 0x018b, 0x018c, 0x0000, CanonicalizeAlternatingUnaligned }, 655 { 0x018d, 0x018d, 0x0000, CanonicalizeUnique }, 656 { 0x018e, 0x018e, 0x004f, CanonicalizeRangeLo }, 657 { 0x018f, 0x018f, 0x00ca, CanonicalizeRangeLo }, 658 { 0x0190, 0x0190, 0x00cb, CanonicalizeRangeLo }, 659 { 0x0191, 0x0192, 0x0000, CanonicalizeAlternatingUnaligned }, 660 { 0x0193, 0x0193, 0x00cd, CanonicalizeRangeLo }, 661 { 0x0194, 0x0194, 0x00cf, CanonicalizeRangeLo }, 662 { 0x0195, 0x0195, 0x0061, CanonicalizeRangeLo }, 663 { 0x0196, 0x0196, 0x00d3, CanonicalizeRangeLo }, 664 { 0x0197, 0x0197, 0x00d1, CanonicalizeRangeLo }, 665 { 0x0198, 0x0199, 0x0000, CanonicalizeAlternatingAligned }, 666 { 0x019a, 0x019a, 0x00a3, CanonicalizeRangeLo }, 667 { 0x019b, 0x019b, 0x0000, CanonicalizeUnique }, 668 { 0x019c, 0x019c, 0x00d3, CanonicalizeRangeLo }, 669 { 0x019d, 0x019d, 0x00d5, CanonicalizeRangeLo }, 670 { 0x019e, 0x019e, 0x0082, CanonicalizeRangeLo }, 671 { 0x019f, 0x019f, 0x00d6, CanonicalizeRangeLo }, 672 { 0x01a0, 0x01a5, 0x0000, CanonicalizeAlternatingAligned }, 673 { 0x01a6, 0x01a6, 0x00da, CanonicalizeRangeLo }, 674 { 0x01a7, 0x01a8, 0x0000, CanonicalizeAlternatingUnaligned }, 675 { 0x01a9, 0x01a9, 0x00da, CanonicalizeRangeLo }, 676 { 0x01aa, 0x01ab, 0x0000, CanonicalizeUnique }, 677 { 0x01ac, 0x01ad, 0x0000, CanonicalizeAlternatingAligned }, 678 { 0x01ae, 0x01ae, 0x00da, CanonicalizeRangeLo }, 679 { 0x01af, 0x01b0, 0x0000, CanonicalizeAlternatingUnaligned }, 680 { 0x01b1, 0x01b2, 0x00d9, CanonicalizeRangeLo }, 681 { 0x01b3, 0x01b6, 0x0000, CanonicalizeAlternatingUnaligned }, 682 { 0x01b7, 0x01b7, 0x00db, CanonicalizeRangeLo }, 683 { 0x01b8, 0x01b9, 0x0000, CanonicalizeAlternatingAligned }, 684 { 0x01ba, 0x01bb, 0x0000, CanonicalizeUnique }, 685 { 0x01bc, 0x01bd, 0x0000, CanonicalizeAlternatingAligned }, 686 { 0x01be, 0x01be, 0x0000, CanonicalizeUnique }, 687 { 0x01bf, 0x01bf, 0x0038, CanonicalizeRangeLo }, 688 { 0x01c0, 0x01c3, 0x0000, CanonicalizeUnique }, 689 { 0x01c4, 0x01c6, 0x0009, CanonicalizeSet }, 690 { 0x01c7, 0x01c9, 0x000a, CanonicalizeSet }, 691 { 0x01ca, 0x01cc, 0x000b, CanonicalizeSet }, 692 { 0x01cd, 0x01dc, 0x0000, CanonicalizeAlternatingUnaligned }, 693 { 0x01dd, 0x01dd, 0x004f, CanonicalizeRangeHi }, 694 { 0x01de, 0x01ef, 0x0000, CanonicalizeAlternatingAligned }, 695 { 0x01f0, 0x01f0, 0x0004, CanonicalizeSet }, 696 { 0x01f1, 0x01f3, 0x000c, CanonicalizeSet }, 697 { 0x01f4, 0x01f5, 0x0000, CanonicalizeAlternatingAligned }, 698 { 0x01f6, 0x01f6, 0x0061, CanonicalizeRangeHi }, 699 { 0x01f7, 0x01f7, 0x0038, CanonicalizeRangeHi }, 700 { 0x01f8, 0x021f, 0x0000, CanonicalizeAlternatingAligned }, 701 { 0x0220, 0x0220, 0x0082, CanonicalizeRangeHi }, 702 { 0x0221, 0x0221, 0x0000, CanonicalizeUnique }, 703 { 0x0222, 0x0233, 0x0000, CanonicalizeAlternatingAligned }, 704 { 0x0234, 0x0239, 0x0000, CanonicalizeUnique }, 705 { 0x023a, 0x023a, 0x2a2b, CanonicalizeRangeLo }, 706 { 0x023b, 0x023c, 0x0000, CanonicalizeAlternatingUnaligned }, 707 { 0x023d, 0x023d, 0x00a3, CanonicalizeRangeHi }, 708 { 0x023e, 0x023e, 0x2a28, CanonicalizeRangeLo }, 709 { 0x023f, 0x0240, 0x2a3f, CanonicalizeRangeLo }, 710 { 0x0241, 0x0242, 0x0000, CanonicalizeAlternatingUnaligned }, 711 { 0x0243, 0x0243, 0x00c3, CanonicalizeRangeHi }, 712 { 0x0244, 0x0244, 0x0045, CanonicalizeRangeLo }, 713 { 0x0245, 0x0245, 0x0047, CanonicalizeRangeLo }, 714 { 0x0246, 0x024f, 0x0000, CanonicalizeAlternatingAligned }, 715 { 0x0250, 0x0250, 0x2a1f, CanonicalizeRangeLo }, 716 { 0x0251, 0x0251, 0x2a1c, CanonicalizeRangeLo }, 717 { 0x0252, 0x0252, 0x2a1e, CanonicalizeRangeLo }, 718 { 0x0253, 0x0253, 0x00d2, CanonicalizeRangeHi }, 719 { 0x0254, 0x0254, 0x00ce, CanonicalizeRangeHi }, 720 { 0x0255, 0x0255, 0x0000, CanonicalizeUnique }, 721 { 0x0256, 0x0257, 0x00cd, CanonicalizeRangeHi }, 722 { 0x0258, 0x0258, 0x0000, CanonicalizeUnique }, 723 { 0x0259, 0x0259, 0x00ca, CanonicalizeRangeHi }, 724 { 0x025a, 0x025a, 0x0000, CanonicalizeUnique }, 725 { 0x025b, 0x025b, 0x00cb, CanonicalizeRangeHi }, 726 { 0x025c, 0x025c, 0xa54f, CanonicalizeRangeLo }, 727 { 0x025d, 0x025f, 0x0000, CanonicalizeUnique }, 728 { 0x0260, 0x0260, 0x00cd, CanonicalizeRangeHi }, 729 { 0x0261, 0x0261, 0xa54b, CanonicalizeRangeLo }, 730 { 0x0262, 0x0262, 0x0000, CanonicalizeUnique }, 731 { 0x0263, 0x0263, 0x00cf, CanonicalizeRangeHi }, 732 { 0x0264, 0x0264, 0x0000, CanonicalizeUnique }, 733 { 0x0265, 0x0265, 0xa528, CanonicalizeRangeLo }, 734 { 0x0266, 0x0266, 0xa544, CanonicalizeRangeLo }, 735 { 0x0267, 0x0267, 0x0000, CanonicalizeUnique }, 736 { 0x0268, 0x0268, 0x00d1, CanonicalizeRangeHi }, 737 { 0x0269, 0x0269, 0x00d3, CanonicalizeRangeHi }, 738 { 0x026a, 0x026a, 0x0000, CanonicalizeUnique }, 739 { 0x026b, 0x026b, 0x29f7, CanonicalizeRangeLo }, 740 { 0x026c, 0x026c, 0xa541, CanonicalizeRangeLo }, 741 { 0x026d, 0x026e, 0x0000, CanonicalizeUnique }, 742 { 0x026f, 0x026f, 0x00d3, CanonicalizeRangeHi }, 743 { 0x0270, 0x0270, 0x0000, CanonicalizeUnique }, 744 { 0x0271, 0x0271, 0x29fd, CanonicalizeRangeLo }, 745 { 0x0272, 0x0272, 0x00d5, CanonicalizeRangeHi }, 746 { 0x0273, 0x0274, 0x0000, CanonicalizeUnique }, 747 { 0x0275, 0x0275, 0x00d6, CanonicalizeRangeHi }, 748 { 0x0276, 0x027c, 0x0000, CanonicalizeUnique }, 749 { 0x027d, 0x027d, 0x29e7, CanonicalizeRangeLo }, 750 { 0x027e, 0x027f, 0x0000, CanonicalizeUnique }, 751 { 0x0280, 0x0280, 0x00da, CanonicalizeRangeHi }, 752 { 0x0281, 0x0282, 0x0000, CanonicalizeUnique }, 753 { 0x0283, 0x0283, 0x00da, CanonicalizeRangeHi }, 754 { 0x0284, 0x0286, 0x0000, CanonicalizeUnique }, 755 { 0x0287, 0x0287, 0xa52a, CanonicalizeRangeLo }, 756 { 0x0288, 0x0288, 0x00da, CanonicalizeRangeHi }, 757 { 0x0289, 0x0289, 0x0045, CanonicalizeRangeHi }, 758 { 0x028a, 0x028b, 0x00d9, CanonicalizeRangeHi }, 759 { 0x028c, 0x028c, 0x0047, CanonicalizeRangeHi }, 760 { 0x028d, 0x0291, 0x0000, CanonicalizeUnique }, 761 { 0x0292, 0x0292, 0x00db, CanonicalizeRangeHi }, 762 { 0x0293, 0x029d, 0x0000, CanonicalizeUnique }, 763 { 0x029e, 0x029e, 0xa512, CanonicalizeRangeLo }, 764 { 0x029f, 0x02bb, 0x0000, CanonicalizeUnique }, 765 { 0x02bc, 0x02bc, 0x0173, CanonicalizeRangeHi }, 766 { 0x02bd, 0x0344, 0x0000, CanonicalizeUnique }, 767 { 0x0345, 0x0345, 0x0015, CanonicalizeSet }, 768 { 0x0346, 0x036f, 0x0000, CanonicalizeUnique }, 769 { 0x0370, 0x0373, 0x0000, CanonicalizeAlternatingAligned }, 770 { 0x0374, 0x0375, 0x0000, CanonicalizeUnique }, 771 { 0x0376, 0x0377, 0x0000, CanonicalizeAlternatingAligned }, 772 { 0x0378, 0x037a, 0x0000, CanonicalizeUnique }, 773 { 0x037b, 0x037d, 0x0082, CanonicalizeRangeLo }, 774 { 0x037e, 0x037e, 0x0000, CanonicalizeUnique }, 775 { 0x037f, 0x037f, 0x0074, CanonicalizeRangeLo }, 776 { 0x0380, 0x0385, 0x0000, CanonicalizeUnique }, 777 { 0x0386, 0x0386, 0x000d, CanonicalizeSet }, 778 { 0x0387, 0x0387, 0x0000, CanonicalizeUnique }, 779 { 0x0388, 0x0388, 0x0025, CanonicalizeRangeLo }, 780 { 0x0389, 0x0389, 0x000e, CanonicalizeSet }, 781 { 0x038a, 0x038a, 0x0025, CanonicalizeRangeLo }, 782 { 0x038b, 0x038b, 0x0000, CanonicalizeUnique }, 783 { 0x038c, 0x038c, 0x0040, CanonicalizeRangeLo }, 784 { 0x038d, 0x038d, 0x0000, CanonicalizeUnique }, 785 { 0x038e, 0x038e, 0x003f, CanonicalizeRangeLo }, 786 { 0x038f, 0x038f, 0x000f, CanonicalizeSet }, 787 { 0x0390, 0x0390, 0x0015, CanonicalizeSet }, 788 { 0x0391, 0x0391, 0x0010, CanonicalizeSet }, 789 { 0x0392, 0x0392, 0x0011, CanonicalizeSet }, 790 { 0x0393, 0x0394, 0x0020, CanonicalizeRangeLo }, 791 { 0x0395, 0x0395, 0x0012, CanonicalizeSet }, 792 { 0x0396, 0x0396, 0x0020, CanonicalizeRangeLo }, 793 { 0x0397, 0x0397, 0x0013, CanonicalizeSet }, 794 { 0x0398, 0x0398, 0x0014, CanonicalizeSet }, 795 { 0x0399, 0x0399, 0x0015, CanonicalizeSet }, 796 { 0x039a, 0x039a, 0x0016, CanonicalizeSet }, 797 { 0x039b, 0x039b, 0x0020, CanonicalizeRangeLo }, 798 { 0x039c, 0x039c, 0x0017, CanonicalizeSet }, 799 { 0x039d, 0x039f, 0x0020, CanonicalizeRangeLo }, 800 { 0x03a0, 0x03a0, 0x0018, CanonicalizeSet }, 801 { 0x03a1, 0x03a1, 0x0019, CanonicalizeSet }, 802 { 0x03a2, 0x03a2, 0x0000, CanonicalizeUnique }, 803 { 0x03a3, 0x03a3, 0x001a, CanonicalizeSet }, 804 { 0x03a4, 0x03a4, 0x0020, CanonicalizeRangeLo }, 805 { 0x03a5, 0x03a5, 0x001b, CanonicalizeSet }, 806 { 0x03a6, 0x03a6, 0x001c, CanonicalizeSet }, 807 { 0x03a7, 0x03a8, 0x0020, CanonicalizeRangeLo }, 808 { 0x03a9, 0x03a9, 0x001d, CanonicalizeSet }, 809 { 0x03aa, 0x03ab, 0x0020, CanonicalizeRangeLo }, 810 { 0x03ac, 0x03ac, 0x000d, CanonicalizeSet }, 811 { 0x03ad, 0x03ad, 0x0025, CanonicalizeRangeHi }, 812 { 0x03ae, 0x03ae, 0x000e, CanonicalizeSet }, 813 { 0x03af, 0x03af, 0x0025, CanonicalizeRangeHi }, 814 { 0x03b0, 0x03b0, 0x001b, CanonicalizeSet }, 815 { 0x03b1, 0x03b1, 0x0010, CanonicalizeSet }, 816 { 0x03b2, 0x03b2, 0x0011, CanonicalizeSet }, 817 { 0x03b3, 0x03b4, 0x0020, CanonicalizeRangeHi }, 818 { 0x03b5, 0x03b5, 0x0012, CanonicalizeSet }, 819 { 0x03b6, 0x03b6, 0x0020, CanonicalizeRangeHi }, 820 { 0x03b7, 0x03b7, 0x0013, CanonicalizeSet }, 821 { 0x03b8, 0x03b8, 0x0014, CanonicalizeSet }, 822 { 0x03b9, 0x03b9, 0x0015, CanonicalizeSet }, 823 { 0x03ba, 0x03ba, 0x0016, CanonicalizeSet }, 824 { 0x03bb, 0x03bb, 0x0020, CanonicalizeRangeHi }, 825 { 0x03bc, 0x03bc, 0x0017, CanonicalizeSet }, 826 { 0x03bd, 0x03bf, 0x0020, CanonicalizeRangeHi }, 827 { 0x03c0, 0x03c0, 0x0018, CanonicalizeSet }, 828 { 0x03c1, 0x03c1, 0x0019, CanonicalizeSet }, 829 { 0x03c2, 0x03c3, 0x001a, CanonicalizeSet }, 830 { 0x03c4, 0x03c4, 0x0020, CanonicalizeRangeHi }, 831 { 0x03c5, 0x03c5, 0x001b, CanonicalizeSet }, 832 { 0x03c6, 0x03c6, 0x001c, CanonicalizeSet }, 833 { 0x03c7, 0x03c8, 0x0020, CanonicalizeRangeHi }, 834 { 0x03c9, 0x03c9, 0x001d, CanonicalizeSet }, 835 { 0x03ca, 0x03cb, 0x0020, CanonicalizeRangeHi }, 836 { 0x03cc, 0x03cc, 0x0040, CanonicalizeRangeHi }, 837 { 0x03cd, 0x03cd, 0x003f, CanonicalizeRangeHi }, 838 { 0x03ce, 0x03ce, 0x000f, CanonicalizeSet }, 839 { 0x03cf, 0x03cf, 0x0008, CanonicalizeRangeLo }, 840 { 0x03d0, 0x03d0, 0x0011, CanonicalizeSet }, 841 { 0x03d1, 0x03d1, 0x0014, CanonicalizeSet }, 842 { 0x03d2, 0x03d4, 0x0000, CanonicalizeUnique }, 843 { 0x03d5, 0x03d5, 0x001c, CanonicalizeSet }, 844 { 0x03d6, 0x03d6, 0x0018, CanonicalizeSet }, 845 { 0x03d7, 0x03d7, 0x0008, CanonicalizeRangeHi }, 846 { 0x03d8, 0x03ef, 0x0000, CanonicalizeAlternatingAligned }, 847 { 0x03f0, 0x03f0, 0x0016, CanonicalizeSet }, 848 { 0x03f1, 0x03f1, 0x0019, CanonicalizeSet }, 849 { 0x03f2, 0x03f2, 0x0007, CanonicalizeRangeLo }, 850 { 0x03f3, 0x03f3, 0x0074, CanonicalizeRangeHi }, 851 { 0x03f4, 0x03f4, 0x0000, CanonicalizeUnique }, 852 { 0x03f5, 0x03f5, 0x0012, CanonicalizeSet }, 853 { 0x03f6, 0x03f6, 0x0000, CanonicalizeUnique }, 854 { 0x03f7, 0x03f8, 0x0000, CanonicalizeAlternatingUnaligned }, 855 { 0x03f9, 0x03f9, 0x0007, CanonicalizeRangeHi }, 856 { 0x03fa, 0x03fb, 0x0000, CanonicalizeAlternatingAligned }, 857 { 0x03fc, 0x03fc, 0x0000, CanonicalizeUnique }, 858 { 0x03fd, 0x03ff, 0x0082, CanonicalizeRangeHi }, 859 { 0x0400, 0x040f, 0x0050, CanonicalizeRangeLo }, 860 { 0x0410, 0x042f, 0x0020, CanonicalizeRangeLo }, 861 { 0x0430, 0x044f, 0x0020, CanonicalizeRangeHi }, 862 { 0x0450, 0x045f, 0x0050, CanonicalizeRangeHi }, 863 { 0x0460, 0x0481, 0x0000, CanonicalizeAlternatingAligned }, 864 { 0x0482, 0x0489, 0x0000, CanonicalizeUnique }, 865 { 0x048a, 0x04bf, 0x0000, CanonicalizeAlternatingAligned }, 866 { 0x04c0, 0x04c0, 0x000f, CanonicalizeRangeLo }, 867 { 0x04c1, 0x04ce, 0x0000, CanonicalizeAlternatingUnaligned }, 868 { 0x04cf, 0x04cf, 0x000f, CanonicalizeRangeHi }, 869 { 0x04d0, 0x052f, 0x0000, CanonicalizeAlternatingAligned }, 870 { 0x0530, 0x0530, 0x0000, CanonicalizeUnique }, 871 { 0x0531, 0x0534, 0x0030, CanonicalizeRangeLo }, 872 { 0x0535, 0x0535, 0x001e, CanonicalizeSet }, 873 { 0x0536, 0x0543, 0x0030, CanonicalizeRangeLo }, 874 { 0x0544, 0x0544, 0x001f, CanonicalizeSet }, 875 { 0x0545, 0x054d, 0x0030, CanonicalizeRangeLo }, 876 { 0x054e, 0x054e, 0x0020, CanonicalizeSet }, 877 { 0x054f, 0x0556, 0x0030, CanonicalizeRangeLo }, 878 { 0x0557, 0x0560, 0x0000, CanonicalizeUnique }, 879 { 0x0561, 0x0564, 0x0030, CanonicalizeRangeHi }, 880 { 0x0565, 0x0565, 0x001e, CanonicalizeSet }, 881 { 0x0566, 0x0573, 0x0030, CanonicalizeRangeHi }, 882 { 0x0574, 0x0574, 0x001f, CanonicalizeSet }, 883 { 0x0575, 0x057d, 0x0030, CanonicalizeRangeHi }, 884 { 0x057e, 0x057e, 0x0020, CanonicalizeSet }, 885 { 0x057f, 0x0586, 0x0030, CanonicalizeRangeHi }, 886 { 0x0587, 0x0587, 0x001e, CanonicalizeSet }, 887 { 0x0588, 0x109f, 0x0000, CanonicalizeUnique }, 888 { 0x10a0, 0x10c5, 0x1c60, CanonicalizeRangeLo }, 889 { 0x10c6, 0x10c6, 0x0000, CanonicalizeUnique }, 890 { 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo }, 891 { 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique }, 892 { 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo }, 893 { 0x10ce, 0x1d78, 0x0000, CanonicalizeUnique }, 894 { 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo }, 895 { 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique }, 896 { 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo }, 897 { 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique }, 898 { 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned }, 899 { 0x1e60, 0x1e61, 0x0021, CanonicalizeSet }, 900 { 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned }, 901 { 0x1e96, 0x1e96, 0x0002, CanonicalizeSet }, 902 { 0x1e97, 0x1e97, 0x0006, CanonicalizeSet }, 903 { 0x1e98, 0x1e98, 0x0007, CanonicalizeSet }, 904 { 0x1e99, 0x1e99, 0x0008, CanonicalizeSet }, 905 { 0x1e9a, 0x1e9a, 0x0000, CanonicalizeSet }, 906 { 0x1e9b, 0x1e9b, 0x0021, CanonicalizeSet }, 907 { 0x1e9c, 0x1e9f, 0x0000, CanonicalizeUnique }, 908 { 0x1ea0, 0x1eff, 0x0000, CanonicalizeAlternatingAligned }, 909 { 0x1f00, 0x1f00, 0x0022, CanonicalizeSet }, 910 { 0x1f01, 0x1f01, 0x0023, CanonicalizeSet }, 911 { 0x1f02, 0x1f02, 0x0024, CanonicalizeSet }, 912 { 0x1f03, 0x1f03, 0x0025, CanonicalizeSet }, 913 { 0x1f04, 0x1f04, 0x0026, CanonicalizeSet }, 914 { 0x1f05, 0x1f05, 0x0027, CanonicalizeSet }, 915 { 0x1f06, 0x1f06, 0x0028, CanonicalizeSet }, 916 { 0x1f07, 0x1f07, 0x0029, CanonicalizeSet }, 917 { 0x1f08, 0x1f08, 0x0022, CanonicalizeSet }, 918 { 0x1f09, 0x1f09, 0x0023, CanonicalizeSet }, 919 { 0x1f0a, 0x1f0a, 0x0024, CanonicalizeSet }, 920 { 0x1f0b, 0x1f0b, 0x0025, CanonicalizeSet }, 921 { 0x1f0c, 0x1f0c, 0x0026, CanonicalizeSet }, 922 { 0x1f0d, 0x1f0d, 0x0027, CanonicalizeSet }, 923 { 0x1f0e, 0x1f0e, 0x0028, CanonicalizeSet }, 924 { 0x1f0f, 0x1f0f, 0x0029, CanonicalizeSet }, 925 { 0x1f10, 0x1f15, 0x0008, CanonicalizeRangeLo }, 926 { 0x1f16, 0x1f17, 0x0000, CanonicalizeUnique }, 927 { 0x1f18, 0x1f1d, 0x0008, CanonicalizeRangeHi }, 928 { 0x1f1e, 0x1f1f, 0x0000, CanonicalizeUnique }, 929 { 0x1f20, 0x1f20, 0x002a, CanonicalizeSet }, 930 { 0x1f21, 0x1f21, 0x002b, CanonicalizeSet }, 931 { 0x1f22, 0x1f22, 0x002c, CanonicalizeSet }, 932 { 0x1f23, 0x1f23, 0x002d, CanonicalizeSet }, 933 { 0x1f24, 0x1f24, 0x002e, CanonicalizeSet }, 934 { 0x1f25, 0x1f25, 0x002f, CanonicalizeSet }, 935 { 0x1f26, 0x1f26, 0x0030, CanonicalizeSet }, 936 { 0x1f27, 0x1f27, 0x0031, CanonicalizeSet }, 937 { 0x1f28, 0x1f28, 0x002a, CanonicalizeSet }, 938 { 0x1f29, 0x1f29, 0x002b, CanonicalizeSet }, 939 { 0x1f2a, 0x1f2a, 0x002c, CanonicalizeSet }, 940 { 0x1f2b, 0x1f2b, 0x002d, CanonicalizeSet }, 941 { 0x1f2c, 0x1f2c, 0x002e, CanonicalizeSet }, 942 { 0x1f2d, 0x1f2d, 0x002f, CanonicalizeSet }, 943 { 0x1f2e, 0x1f2e, 0x0030, CanonicalizeSet }, 944 { 0x1f2f, 0x1f2f, 0x0031, CanonicalizeSet }, 945 { 0x1f30, 0x1f37, 0x0008, CanonicalizeRangeLo }, 946 { 0x1f38, 0x1f3f, 0x0008, CanonicalizeRangeHi }, 947 { 0x1f40, 0x1f45, 0x0008, CanonicalizeRangeLo }, 948 { 0x1f46, 0x1f47, 0x0000, CanonicalizeUnique }, 949 { 0x1f48, 0x1f4d, 0x0008, CanonicalizeRangeHi }, 950 { 0x1f4e, 0x1f4f, 0x0000, CanonicalizeUnique }, 951 { 0x1f50, 0x1f50, 0x001b, CanonicalizeSet }, 952 { 0x1f51, 0x1f51, 0x0008, CanonicalizeRangeLo }, 953 { 0x1f52, 0x1f52, 0x001b, CanonicalizeSet }, 954 { 0x1f53, 0x1f53, 0x0008, CanonicalizeRangeLo }, 955 { 0x1f54, 0x1f54, 0x001b, CanonicalizeSet }, 956 { 0x1f55, 0x1f55, 0x0008, CanonicalizeRangeLo }, 957 { 0x1f56, 0x1f56, 0x001b, CanonicalizeSet }, 958 { 0x1f57, 0x1f57, 0x0008, CanonicalizeRangeLo }, 959 { 0x1f58, 0x1f58, 0x0000, CanonicalizeUnique }, 960 { 0x1f59, 0x1f59, 0x0008, CanonicalizeRangeHi }, 961 { 0x1f5a, 0x1f5a, 0x0000, CanonicalizeUnique }, 962 { 0x1f5b, 0x1f5b, 0x0008, CanonicalizeRangeHi }, 963 { 0x1f5c, 0x1f5c, 0x0000, CanonicalizeUnique }, 964 { 0x1f5d, 0x1f5d, 0x0008, CanonicalizeRangeHi }, 965 { 0x1f5e, 0x1f5e, 0x0000, CanonicalizeUnique }, 966 { 0x1f5f, 0x1f5f, 0x0008, CanonicalizeRangeHi }, 967 { 0x1f60, 0x1f60, 0x0032, CanonicalizeSet }, 968 { 0x1f61, 0x1f61, 0x0033, CanonicalizeSet }, 969 { 0x1f62, 0x1f62, 0x0034, CanonicalizeSet }, 970 { 0x1f63, 0x1f63, 0x0035, CanonicalizeSet }, 971 { 0x1f64, 0x1f64, 0x0036, CanonicalizeSet }, 972 { 0x1f65, 0x1f65, 0x0037, CanonicalizeSet }, 973 { 0x1f66, 0x1f66, 0x0038, CanonicalizeSet }, 974 { 0x1f67, 0x1f67, 0x0039, CanonicalizeSet }, 975 { 0x1f68, 0x1f68, 0x0032, CanonicalizeSet }, 976 { 0x1f69, 0x1f69, 0x0033, CanonicalizeSet }, 977 { 0x1f6a, 0x1f6a, 0x0034, CanonicalizeSet }, 978 { 0x1f6b, 0x1f6b, 0x0035, CanonicalizeSet }, 979 { 0x1f6c, 0x1f6c, 0x0036, CanonicalizeSet }, 980 { 0x1f6d, 0x1f6d, 0x0037, CanonicalizeSet }, 981 { 0x1f6e, 0x1f6e, 0x0038, CanonicalizeSet }, 982 { 0x1f6f, 0x1f6f, 0x0039, CanonicalizeSet }, 983 { 0x1f70, 0x1f70, 0x003a, CanonicalizeSet }, 984 { 0x1f71, 0x1f71, 0x004a, CanonicalizeRangeLo }, 985 { 0x1f72, 0x1f73, 0x0056, CanonicalizeRangeLo }, 986 { 0x1f74, 0x1f74, 0x003b, CanonicalizeSet }, 987 { 0x1f75, 0x1f75, 0x0056, CanonicalizeRangeLo }, 988 { 0x1f76, 0x1f77, 0x0064, CanonicalizeRangeLo }, 989 { 0x1f78, 0x1f79, 0x0080, CanonicalizeRangeLo }, 990 { 0x1f7a, 0x1f7b, 0x0070, CanonicalizeRangeLo }, 991 { 0x1f7c, 0x1f7c, 0x003c, CanonicalizeSet }, 992 { 0x1f7d, 0x1f7d, 0x007e, CanonicalizeRangeLo }, 993 { 0x1f7e, 0x1f7f, 0x0000, CanonicalizeUnique }, 994 { 0x1f80, 0x1f80, 0x0022, CanonicalizeSet }, 995 { 0x1f81, 0x1f81, 0x0023, CanonicalizeSet }, 996 { 0x1f82, 0x1f82, 0x0024, CanonicalizeSet }, 997 { 0x1f83, 0x1f83, 0x0025, CanonicalizeSet }, 998 { 0x1f84, 0x1f84, 0x0026, CanonicalizeSet }, 999 { 0x1f85, 0x1f85, 0x0027, CanonicalizeSet }, 1000 { 0x1f86, 0x1f86, 0x0028, CanonicalizeSet }, 1001 { 0x1f87, 0x1f87, 0x0029, CanonicalizeSet }, 1002 { 0x1f88, 0x1f88, 0x0022, CanonicalizeSet }, 1003 { 0x1f89, 0x1f89, 0x0023, CanonicalizeSet }, 1004 { 0x1f8a, 0x1f8a, 0x0024, CanonicalizeSet }, 1005 { 0x1f8b, 0x1f8b, 0x0025, CanonicalizeSet }, 1006 { 0x1f8c, 0x1f8c, 0x0026, CanonicalizeSet }, 1007 { 0x1f8d, 0x1f8d, 0x0027, CanonicalizeSet }, 1008 { 0x1f8e, 0x1f8e, 0x0028, CanonicalizeSet }, 1009 { 0x1f8f, 0x1f8f, 0x0029, CanonicalizeSet }, 1010 { 0x1f90, 0x1f90, 0x002a, CanonicalizeSet }, 1011 { 0x1f91, 0x1f91, 0x002b, CanonicalizeSet }, 1012 { 0x1f92, 0x1f92, 0x002c, CanonicalizeSet }, 1013 { 0x1f93, 0x1f93, 0x002d, CanonicalizeSet }, 1014 { 0x1f94, 0x1f94, 0x002e, CanonicalizeSet }, 1015 { 0x1f95, 0x1f95, 0x002f, CanonicalizeSet }, 1016 { 0x1f96, 0x1f96, 0x0030, CanonicalizeSet }, 1017 { 0x1f97, 0x1f97, 0x0031, CanonicalizeSet }, 1018 { 0x1f98, 0x1f98, 0x002a, CanonicalizeSet }, 1019 { 0x1f99, 0x1f99, 0x002b, CanonicalizeSet }, 1020 { 0x1f9a, 0x1f9a, 0x002c, CanonicalizeSet }, 1021 { 0x1f9b, 0x1f9b, 0x002d, CanonicalizeSet }, 1022 { 0x1f9c, 0x1f9c, 0x002e, CanonicalizeSet }, 1023 { 0x1f9d, 0x1f9d, 0x002f, CanonicalizeSet }, 1024 { 0x1f9e, 0x1f9e, 0x0030, CanonicalizeSet }, 1025 { 0x1f9f, 0x1f9f, 0x0031, CanonicalizeSet }, 1026 { 0x1fa0, 0x1fa0, 0x0032, CanonicalizeSet }, 1027 { 0x1fa1, 0x1fa1, 0x0033, CanonicalizeSet }, 1028 { 0x1fa2, 0x1fa2, 0x0034, CanonicalizeSet }, 1029 { 0x1fa3, 0x1fa3, 0x0035, CanonicalizeSet }, 1030 { 0x1fa4, 0x1fa4, 0x0036, CanonicalizeSet }, 1031 { 0x1fa5, 0x1fa5, 0x0037, CanonicalizeSet }, 1032 { 0x1fa6, 0x1fa6, 0x0038, CanonicalizeSet }, 1033 { 0x1fa7, 0x1fa7, 0x0039, CanonicalizeSet }, 1034 { 0x1fa8, 0x1fa8, 0x0032, CanonicalizeSet }, 1035 { 0x1fa9, 0x1fa9, 0x0033, CanonicalizeSet }, 1036 { 0x1faa, 0x1faa, 0x0034, CanonicalizeSet }, 1037 { 0x1fab, 0x1fab, 0x0035, CanonicalizeSet }, 1038 { 0x1fac, 0x1fac, 0x0036, CanonicalizeSet }, 1039 { 0x1fad, 0x1fad, 0x0037, CanonicalizeSet }, 1040 { 0x1fae, 0x1fae, 0x0038, CanonicalizeSet }, 1041 { 0x1faf, 0x1faf, 0x0039, CanonicalizeSet }, 1042 { 0x1fb0, 0x1fb1, 0x0008, CanonicalizeRangeLo }, 1043 { 0x1fb2, 0x1fb2, 0x003a, CanonicalizeSet }, 1044 { 0x1fb3, 0x1fb3, 0x0010, CanonicalizeSet }, 1045 { 0x1fb4, 0x1fb4, 0x000d, CanonicalizeSet }, 1046 { 0x1fb5, 0x1fb5, 0x0000, CanonicalizeUnique }, 1047 { 0x1fb6, 0x1fb7, 0x0010, CanonicalizeSet }, 1048 { 0x1fb8, 0x1fb9, 0x0008, CanonicalizeRangeHi }, 1049 { 0x1fba, 0x1fba, 0x003a, CanonicalizeSet }, 1050 { 0x1fbb, 0x1fbb, 0x004a, CanonicalizeRangeHi }, 1051 { 0x1fbc, 0x1fbc, 0x0010, CanonicalizeSet }, 1052 { 0x1fbd, 0x1fbd, 0x0000, CanonicalizeUnique }, 1053 { 0x1fbe, 0x1fbe, 0x0015, CanonicalizeSet }, 1054 { 0x1fbf, 0x1fc1, 0x0000, CanonicalizeUnique }, 1055 { 0x1fc2, 0x1fc2, 0x003b, CanonicalizeSet }, 1056 { 0x1fc3, 0x1fc3, 0x0013, CanonicalizeSet }, 1057 { 0x1fc4, 0x1fc4, 0x000e, CanonicalizeSet }, 1058 { 0x1fc5, 0x1fc5, 0x0000, CanonicalizeUnique }, 1059 { 0x1fc6, 0x1fc7, 0x0013, CanonicalizeSet }, 1060 { 0x1fc8, 0x1fc9, 0x0056, CanonicalizeRangeHi }, 1061 { 0x1fca, 0x1fca, 0x003b, CanonicalizeSet }, 1062 { 0x1fcb, 0x1fcb, 0x0056, CanonicalizeRangeHi }, 1063 { 0x1fcc, 0x1fcc, 0x0013, CanonicalizeSet }, 1064 { 0x1fcd, 0x1fcf, 0x0000, CanonicalizeUnique }, 1065 { 0x1fd0, 0x1fd1, 0x0008, CanonicalizeRangeLo }, 1066 { 0x1fd2, 0x1fd3, 0x0015, CanonicalizeSet }, 1067 { 0x1fd4, 0x1fd5, 0x0000, CanonicalizeUnique }, 1068 { 0x1fd6, 0x1fd7, 0x0015, CanonicalizeSet }, 1069 { 0x1fd8, 0x1fd9, 0x0008, CanonicalizeRangeHi }, 1070 { 0x1fda, 0x1fdb, 0x0064, CanonicalizeRangeHi }, 1071 { 0x1fdc, 0x1fdf, 0x0000, CanonicalizeUnique }, 1072 { 0x1fe0, 0x1fe1, 0x0008, CanonicalizeRangeLo }, 1073 { 0x1fe2, 0x1fe3, 0x001b, CanonicalizeSet }, 1074 { 0x1fe4, 0x1fe4, 0x0019, CanonicalizeSet }, 1075 { 0x1fe5, 0x1fe5, 0x0007, CanonicalizeRangeLo }, 1076 { 0x1fe6, 0x1fe7, 0x001b, CanonicalizeSet }, 1077 { 0x1fe8, 0x1fe9, 0x0008, CanonicalizeRangeHi }, 1078 { 0x1fea, 0x1feb, 0x0070, CanonicalizeRangeHi }, 1079 { 0x1fec, 0x1fec, 0x0007, CanonicalizeRangeHi }, 1080 { 0x1fed, 0x1ff1, 0x0000, CanonicalizeUnique }, 1081 { 0x1ff2, 0x1ff2, 0x003c, CanonicalizeSet }, 1082 { 0x1ff3, 0x1ff3, 0x001d, CanonicalizeSet }, 1083 { 0x1ff4, 0x1ff4, 0x000f, CanonicalizeSet }, 1084 { 0x1ff5, 0x1ff5, 0x0000, CanonicalizeUnique }, 1085 { 0x1ff6, 0x1ff7, 0x001d, CanonicalizeSet }, 1086 { 0x1ff8, 0x1ff9, 0x0080, CanonicalizeRangeHi }, 1087 { 0x1ffa, 0x1ffa, 0x003c, CanonicalizeSet }, 1088 { 0x1ffb, 0x1ffb, 0x007e, CanonicalizeRangeHi }, 1089 { 0x1ffc, 0x1ffc, 0x001d, CanonicalizeSet }, 1090 { 0x1ffd, 0x2131, 0x0000, CanonicalizeUnique }, 1091 { 0x2132, 0x2132, 0x001c, CanonicalizeRangeLo }, 1092 { 0x2133, 0x214d, 0x0000, CanonicalizeUnique }, 1093 { 0x214e, 0x214e, 0x001c, CanonicalizeRangeHi }, 1094 { 0x214f, 0x215f, 0x0000, CanonicalizeUnique }, 1095 { 0x2160, 0x216f, 0x0010, CanonicalizeRangeLo }, 1096 { 0x2170, 0x217f, 0x0010, CanonicalizeRangeHi }, 1097 { 0x2180, 0x2182, 0x0000, CanonicalizeUnique }, 1098 { 0x2183, 0x2184, 0x0000, CanonicalizeAlternatingUnaligned }, 1099 { 0x2185, 0x24b5, 0x0000, CanonicalizeUnique }, 1100 { 0x24b6, 0x24cf, 0x001a, CanonicalizeRangeLo }, 1101 { 0x24d0, 0x24e9, 0x001a, CanonicalizeRangeHi }, 1102 { 0x24ea, 0x2bff, 0x0000, CanonicalizeUnique }, 1103 { 0x2c00, 0x2c2e, 0x0030, CanonicalizeRangeLo }, 1104 { 0x2c2f, 0x2c2f, 0x0000, CanonicalizeUnique }, 1105 { 0x2c30, 0x2c5e, 0x0030, CanonicalizeRangeHi }, 1106 { 0x2c5f, 0x2c5f, 0x0000, CanonicalizeUnique }, 1107 { 0x2c60, 0x2c61, 0x0000, CanonicalizeAlternatingAligned }, 1108 { 0x2c62, 0x2c62, 0x29f7, CanonicalizeRangeHi }, 1109 { 0x2c63, 0x2c63, 0x0ee6, CanonicalizeRangeHi }, 1110 { 0x2c64, 0x2c64, 0x29e7, CanonicalizeRangeHi }, 1111 { 0x2c65, 0x2c65, 0x2a2b, CanonicalizeRangeHi }, 1112 { 0x2c66, 0x2c66, 0x2a28, CanonicalizeRangeHi }, 1113 { 0x2c67, 0x2c6c, 0x0000, CanonicalizeAlternatingUnaligned }, 1114 { 0x2c6d, 0x2c6d, 0x2a1c, CanonicalizeRangeHi }, 1115 { 0x2c6e, 0x2c6e, 0x29fd, CanonicalizeRangeHi }, 1116 { 0x2c6f, 0x2c6f, 0x2a1f, CanonicalizeRangeHi }, 1117 { 0x2c70, 0x2c70, 0x2a1e, CanonicalizeRangeHi }, 1118 { 0x2c71, 0x2c71, 0x0000, CanonicalizeUnique }, 1119 { 0x2c72, 0x2c73, 0x0000, CanonicalizeAlternatingAligned }, 1120 { 0x2c74, 0x2c74, 0x0000, CanonicalizeUnique }, 1121 { 0x2c75, 0x2c76, 0x0000, CanonicalizeAlternatingUnaligned }, 1122 { 0x2c77, 0x2c7d, 0x0000, CanonicalizeUnique }, 1123 { 0x2c7e, 0x2c7f, 0x2a3f, CanonicalizeRangeHi }, 1124 { 0x2c80, 0x2ce3, 0x0000, CanonicalizeAlternatingAligned }, 1125 { 0x2ce4, 0x2cea, 0x0000, CanonicalizeUnique }, 1126 { 0x2ceb, 0x2cee, 0x0000, CanonicalizeAlternatingUnaligned }, 1127 { 0x2cef, 0x2cf1, 0x0000, CanonicalizeUnique }, 1128 { 0x2cf2, 0x2cf3, 0x0000, CanonicalizeAlternatingAligned }, 1129 { 0x2cf4, 0x2cff, 0x0000, CanonicalizeUnique }, 1130 { 0x2d00, 0x2d25, 0x1c60, CanonicalizeRangeHi }, 1131 { 0x2d26, 0x2d26, 0x0000, CanonicalizeUnique }, 1132 { 0x2d27, 0x2d27, 0x1c60, CanonicalizeRangeHi }, 1133 { 0x2d28, 0x2d2c, 0x0000, CanonicalizeUnique }, 1134 { 0x2d2d, 0x2d2d, 0x1c60, CanonicalizeRangeHi }, 1135 { 0x2d2e, 0xa63f, 0x0000, CanonicalizeUnique }, 1136 { 0xa640, 0xa66d, 0x0000, CanonicalizeAlternatingAligned }, 1137 { 0xa66e, 0xa67f, 0x0000, CanonicalizeUnique }, 1138 { 0xa680, 0xa69b, 0x0000, CanonicalizeAlternatingAligned }, 1139 { 0xa69c, 0xa721, 0x0000, CanonicalizeUnique }, 1140 { 0xa722, 0xa72f, 0x0000, CanonicalizeAlternatingAligned }, 1141 { 0xa730, 0xa731, 0x0000, CanonicalizeUnique }, 1142 { 0xa732, 0xa76f, 0x0000, CanonicalizeAlternatingAligned }, 1143 { 0xa770, 0xa778, 0x0000, CanonicalizeUnique }, 1144 { 0xa779, 0xa77c, 0x0000, CanonicalizeAlternatingUnaligned }, 1145 { 0xa77d, 0xa77d, 0x8a04, CanonicalizeRangeHi }, 1146 { 0xa77e, 0xa787, 0x0000, CanonicalizeAlternatingAligned }, 1147 { 0xa788, 0xa78a, 0x0000, CanonicalizeUnique }, 1148 { 0xa78b, 0xa78c, 0x0000, CanonicalizeAlternatingUnaligned }, 1149 { 0xa78d, 0xa78d, 0xa528, CanonicalizeRangeHi }, 1150 { 0xa78e, 0xa78f, 0x0000, CanonicalizeUnique }, 1151 { 0xa790, 0xa793, 0x0000, CanonicalizeAlternatingAligned }, 1152 { 0xa794, 0xa795, 0x0000, CanonicalizeUnique }, 1153 { 0xa796, 0xa7a9, 0x0000, CanonicalizeAlternatingAligned }, 1154 { 0xa7aa, 0xa7aa, 0xa544, CanonicalizeRangeHi }, 1155 { 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi }, 1156 { 0xa7ac, 0xa7ac, 0xa54b, CanonicalizeRangeHi }, 1157 { 0xa7ad, 0xa7ad, 0xa541, CanonicalizeRangeHi }, 1158 { 0xa7ae, 0xa7af, 0x0000, CanonicalizeUnique }, 1159 { 0xa7b0, 0xa7b0, 0xa512, CanonicalizeRangeHi }, 1160 { 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi }, 1161 { 0xa7b2, 0xfaff, 0x0000, CanonicalizeUnique }, 1162 { 0xfb00, 0xfb04, 0x0001, CanonicalizeSet }, 1163 { 0xfb05, 0xfb06, 0x0005, CanonicalizeSet }, 1164 { 0xfb07, 0xfb12, 0x0000, CanonicalizeUnique }, 1165 { 0xfb13, 0xfb15, 0x001f, CanonicalizeSet }, 1166 { 0xfb16, 0xfb16, 0x0020, CanonicalizeSet }, 1167 { 0xfb17, 0xfb17, 0x001f, CanonicalizeSet }, 1168 { 0xfb18, 0xff20, 0x0000, CanonicalizeUnique }, 1169 { 0xff21, 0xff3a, 0x0020, CanonicalizeRangeLo }, 1170 { 0xff3b, 0xff40, 0x0000, CanonicalizeUnique }, 1171 { 0xff41, 0xff5a, 0x0020, CanonicalizeRangeHi }, 1172 { 0xff5b, 0x103ff, 0x0000, CanonicalizeUnique }, 1173 { 0x10400, 0x10427, 0x0028, CanonicalizeRangeLo }, 1174 { 0x10428, 0x1044f, 0x0028, CanonicalizeRangeHi }, 1175 { 0x10450, 0x1189f, 0x0000, CanonicalizeUnique }, 1176 { 0x118a0, 0x118bf, 0x0020, CanonicalizeRangeLo }, 1177 { 0x118c0, 0x118df, 0x0020, CanonicalizeRangeHi }, 1178 { 0x118e0, 0x10ffff, 0x0000, CanonicalizeUnique }, 460 1179 }; 461 1180 -
trunk/Source/JavaScriptCore/yarr/YarrCanonicalizeUnicode.h
r197165 r197426 1 1 /* 2 * Copyright (C) 2012 Apple Inc. All rights reserved.2 * Copyright (C) 2012-2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 24 24 */ 25 25 26 #ifndef YarrCanonicalizeU CS2_H27 #define YarrCanonicalizeU CS2_H26 #ifndef YarrCanonicalizeUnicode_h 27 #define YarrCanonicalizeUnicode_h 28 28 29 29 #include <stdint.h> … … 32 32 namespace JSC { namespace Yarr { 33 33 34 // This set of data (autogenerated using YarrCanonicalizeU CS2.js into YarrCanonicalizeUCS2.cpp)34 // This set of data (autogenerated using YarrCanonicalizeUnicode.js into YarrCanonicalizeUnicode.cpp) 35 35 // provides information for each UCS2 code point as to the set of code points that it should 36 36 // match under the ES5.1 case insensitive RegExp matching rules, specified in 15.10.2.8. … … 43 43 CanonicalizeAlternatingUnaligned, // Unaligned consequtive pair, e.g. 0x241,0x242. 44 44 }; 45 struct UCS2CanonicalizationRange { uint16_t begin, end, value, type; }; 45 struct CanonicalizationRange { 46 UChar32 begin; 47 UChar32 end; 48 UChar32 value; 49 UCS2CanonicalizationType type; 50 }; 51 46 52 extern const size_t UCS2_CANONICALIZATION_RANGES; 47 extern const uint16_t* const characterSetInfo[];48 extern const UCS2CanonicalizationRange rangeInfo[];53 extern const UChar32* const ucs2CharacterSetInfo[]; 54 extern const CanonicalizationRange ucs2RangeInfo[]; 49 55 50 // This table is similar to the full rangeInfo table, however this maps from UCS2 codepoints to 51 // the set of Latin1 codepoints that could match. 52 enum LatinCanonicalizationType { 53 CanonicalizeLatinSelf, // This character is in the Latin1 range, but has no canonical equivalent in the range. 54 CanonicalizeLatinMask0x20, // One of a pair of characters, under the mask 0x20. 55 CanonicalizeLatinOther, // This character is not in the Latin1 range, but canonicalizes to another that is. 56 CanonicalizeLatinInvalid, // Cannot match against Latin1 input. 57 }; 58 struct LatinCanonicalizationRange { uint16_t begin, end, value, type; }; 59 extern const size_t LATIN_CANONICALIZATION_RANGES; 60 extern LatinCanonicalizationRange latinRangeInfo[]; 56 extern const size_t UNICODE_CANONICALIZATION_RANGES; 57 extern const UChar32* const unicodeCharacterSetInfo[]; 58 extern const CanonicalizationRange unicodeRangeInfo[]; 61 59 62 // This searches in log2 time over ~364 entries, so should typically result in 8 compares. 63 inline const UCS2CanonicalizationRange* rangeInfoFor(UChar ch) 60 enum class CanonicalMode { UCS2, Unicode }; 61 62 inline const UChar32* canonicalCharacterSetInfo(unsigned index, CanonicalMode canonicalMode) 64 63 { 65 const UCS2CanonicalizationRange* info = rangeInfo; 66 size_t entries = UCS2_CANONICALIZATION_RANGES; 64 const UChar32* const* rangeInfo = canonicalMode == CanonicalMode::UCS2 ? ucs2CharacterSetInfo : unicodeCharacterSetInfo; 65 return rangeInfo[index]; 66 } 67 68 // This searches in log2 time over ~400-600 entries, so should typically result in 9 compares. 69 inline const CanonicalizationRange* canonicalRangeInfoFor(UChar32 ch, CanonicalMode canonicalMode = CanonicalMode::UCS2) 70 { 71 const CanonicalizationRange* info = canonicalMode == CanonicalMode::UCS2 ? ucs2RangeInfo : unicodeRangeInfo; 72 size_t entries = canonicalMode == CanonicalMode::UCS2 ? UCS2_CANONICALIZATION_RANGES : UNICODE_CANONICALIZATION_RANGES; 67 73 68 74 while (true) { 69 75 size_t candidate = entries >> 1; 70 const UCS2CanonicalizationRange* candidateInfo = info + candidate;76 const CanonicalizationRange* candidateInfo = info + candidate; 71 77 if (ch < candidateInfo->begin) 72 78 entries = candidate; … … 81 87 82 88 // Should only be called for characters that have one canonically matching value. 83 inline UChar getCanonicalPair(const UCS2CanonicalizationRange* info, UCharch)89 inline UChar32 getCanonicalPair(const CanonicalizationRange* info, UChar32 ch) 84 90 { 85 91 ASSERT(ch >= info->begin && ch <= info->end); … … 101 107 102 108 // Returns true if no other UCS2 codepoint can match this value. 103 inline bool isCanonicallyUnique(UChar ch)109 inline bool isCanonicallyUnique(UChar32 ch, CanonicalMode canonicalMode = CanonicalMode::UCS2) 104 110 { 105 return rangeInfoFor(ch)->type == CanonicalizeUnique;111 return canonicalRangeInfoFor(ch, canonicalMode)->type == CanonicalizeUnique; 106 112 } 107 113 108 114 // Returns true if values are equal, under the canonicalization rules. 109 inline bool areCanonicallyEquivalent(UChar a, UChar b)115 inline bool areCanonicallyEquivalent(UChar32 a, UChar32 b, CanonicalMode canonicalMode = CanonicalMode::UCS2) 110 116 { 111 const UCS2CanonicalizationRange* info = rangeInfoFor(a);117 const CanonicalizationRange* info = canonicalRangeInfoFor(a, canonicalMode); 112 118 switch (info->type) { 113 119 case CanonicalizeUnique: 114 120 return a == b; 115 121 case CanonicalizeSet: { 116 for (const uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) {122 for (const UChar32* set = canonicalCharacterSetInfo(info->value, canonicalMode); (a = *set); ++set) { 117 123 if (a == b) 118 124 return true; -
trunk/Source/JavaScriptCore/yarr/YarrCanonicalizeUnicode.js
r197165 r197426 1 1 /* 2 * Copyright (C) 2012 Apple Inc. All rights reserved.2 * Copyright (C) 2012, 2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 24 24 */ 25 25 26 // See ES 5.1, 15.10.2.8 26 function printHeader() 27 { 28 var copyright = ( 29 "/*" + "\n" + 30 " * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved." + "\n" + 31 " *" + "\n" + 32 " * Redistribution and use in source and binary forms, with or without" + "\n" + 33 " * modification, are permitted provided that the following conditions" + "\n" + 34 " * are met:" + "\n" + 35 " * 1. Redistributions of source code must retain the above copyright" + "\n" + 36 " * notice, this list of conditions and the following disclaimer." + "\n" + 37 " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + 38 " * notice, this list of conditions and the following disclaimer in the" + "\n" + 39 " * documentation and/or other materials provided with the distribution." + "\n" + 40 " *" + "\n" + 41 " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + 42 " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + 43 " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + 44 " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + 45 " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + 46 " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + 47 " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + 48 " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + 49 " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + 50 " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + 51 " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + 52 " */"); 53 54 print(copyright); 55 print(); 56 print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUnicode.js"); 57 print(); 58 print('#include "config.h"'); 59 print('#include "YarrCanonicalizeUnicode.h"'); 60 print(); 61 print("namespace JSC { namespace Yarr {"); 62 print(); 63 print("#include <stdint.h>"); 64 print(); 65 } 66 67 function printFooter() 68 { 69 print("} } // JSC::Yarr"); 70 print(); 71 } 72 73 // Helper function to convert a number to a fixed width hex representation of a UChar32. 74 function hex(x) 75 { 76 var s = Number(x).toString(16); 77 while (s.length < 4) 78 s = 0 + s; 79 return "0x" + s; 80 } 81 82 // See ES 6.0, 21.2.2.8.2 Steps 3 27 83 function canonicalize(ch) 28 84 { … … 36 92 } 37 93 94 // See ES 6.0, 21.2.2.8.2 Step 2 95 function canonicalizeUnicode(ch) 96 { 97 if (ch < 128) 98 return canonicalize(ch); 99 100 return String.fromCodePoint(ch).toUpperCase().codePointAt(0); 101 } 102 38 103 var MAX_UCS2 = 0xFFFF; 39 var MAX_LATIN = 0xFF; 40 41 var groupedCanonically = []; 42 // Pass 1: populate groupedCanonically - this is mapping from canonicalized 43 // values back to the set of character code that canonicalize to them. 44 for (var i = 0; i <= MAX_UCS2; ++i) { 45 var ch = canonicalize(i); 46 if (!groupedCanonically[ch]) 47 groupedCanonically[ch] = []; 48 groupedCanonically[ch].push(i); 49 } 50 51 var typeInfo = []; 52 var latinTypeInfo = []; 53 var characterSetInfo = []; 54 // Pass 2: populate typeInfo & characterSetInfo. For every character calculate 55 // a typeInfo value, described by the types above, and a value payload. 56 for (cu in groupedCanonically) { 57 // The set of characters that canonicalize to cu 58 var characters = groupedCanonically[cu]; 59 60 // If there is only one, it is unique. 61 if (characters.length == 1) { 62 typeInfo[characters[0]] = "CanonicalizeUnique:0"; 63 latinTypeInfo[characters[0]] = characters[0] <= MAX_LATIN ? "CanonicalizeLatinSelf:0" : "CanonicalizeLatinInvalid:0"; 64 continue; 65 } 66 67 // Sort the array. 68 characters.sort(function(x,y){return x-y;}); 69 70 // If there are more than two characters, create an entry in characterSetInfo. 71 if (characters.length > 2) { 72 for (i in characters) 73 typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; 74 characterSetInfo.push(characters); 75 76 if (characters[1] <= MAX_LATIN) 77 throw new Error("sets with more than one latin character not supported!"); 78 if (characters[0] <= MAX_LATIN) { 104 var MAX_UNICODE = 0x10FFFF; 105 106 function createUCS2CanonicalGroups() 107 { 108 var groupedCanonically = []; 109 // Pass 1: populate groupedCanonically - this is mapping from canonicalized 110 // values back to the set of character code that canonicalize to them. 111 for (var i = 0; i <= MAX_UCS2; ++i) { 112 var ch = canonicalize(i); 113 if (!groupedCanonically[ch]) 114 groupedCanonically[ch] = []; 115 groupedCanonically[ch].push(i); 116 } 117 118 return groupedCanonically; 119 } 120 121 function createUnicodeCanonicalGroups() 122 { 123 var groupedCanonically = []; 124 // Pass 1: populate groupedCanonically - this is mapping from canonicalized 125 // values back to the set of character code that canonicalize to them. 126 for (var i = 0; i <= MAX_UNICODE; ++i) { 127 var ch = canonicalizeUnicode(i); 128 if (!groupedCanonically[ch]) 129 groupedCanonically[ch] = []; 130 groupedCanonically[ch].push(i); 131 } 132 133 return groupedCanonically; 134 } 135 136 function createTables(prefix, maxValue, canonicalGroups) 137 { 138 var prefixLower = prefix.toLowerCase(); 139 var prefixUpper = prefix.toUpperCase(); 140 var typeInfo = []; 141 var characterSetInfo = []; 142 // Pass 2: populate typeInfo & characterSetInfo. For every character calculate 143 // a typeInfo value, described by the types above, and a value payload. 144 for (cu in canonicalGroups) { 145 // The set of characters that canonicalize to cu 146 var characters = canonicalGroups[cu]; 147 148 // If there is only one, it is unique. 149 if (characters.length == 1) { 150 typeInfo[characters[0]] = "CanonicalizeUnique:0"; 151 continue; 152 } 153 154 // Sort the array. 155 characters.sort(function(x,y){return x-y;}); 156 157 // If there are more than two characters, create an entry in characterSetInfo. 158 if (characters.length > 2) { 79 159 for (i in characters) 80 latinTypeInfo[characters[i]] = "CanonicalizeLatinOther:" + characters[0]; 81 latinTypeInfo[characters[0]] = "CanonicalizeLatinSelf:0"; 160 typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; 161 characterSetInfo.push(characters); 162 163 continue; 164 } 165 166 // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. 167 var lo = characters[0]; 168 var hi = characters[1]; 169 var delta = hi - lo; 170 if (delta == 1) { 171 var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; 172 typeInfo[lo] = type; 173 typeInfo[hi] = type; 82 174 } else { 83 for (i in characters)84 latinTypeInfo[characters[i]] = "CanonicalizeLatinInvalid:0";175 typeInfo[lo] = "CanonicalizeRangeLo:" + delta; 176 typeInfo[hi] = "CanonicalizeRangeHi:" + delta; 85 177 } 86 87 continue; 88 } 89 90 // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. 91 var lo = characters[0]; 92 var hi = characters[1]; 93 var delta = hi - lo; 94 if (delta == 1) { 95 var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; 96 typeInfo[lo] = type; 97 typeInfo[hi] = type; 98 } else { 99 typeInfo[lo] = "CanonicalizeRangeLo:" + delta; 100 typeInfo[hi] = "CanonicalizeRangeHi:" + delta; 101 } 102 103 if (lo > MAX_LATIN) { 104 latinTypeInfo[lo] = "CanonicalizeLatinInvalid:0"; 105 latinTypeInfo[hi] = "CanonicalizeLatinInvalid:0"; 106 } else if (hi > MAX_LATIN) { 107 latinTypeInfo[lo] = "CanonicalizeLatinSelf:0"; 108 latinTypeInfo[hi] = "CanonicalizeLatinOther:" + lo; 109 } else { 110 if (delta != 0x20 || lo & 0x20) 111 throw new Error("pairs of latin characters that don't mask with 0x20 not supported!"); 112 latinTypeInfo[lo] = "CanonicalizeLatinMask0x20:0"; 113 latinTypeInfo[hi] = "CanonicalizeLatinMask0x20:0"; 114 } 115 } 116 117 var rangeInfo = []; 118 // Pass 3: coallesce types into ranges. 119 for (var end = 0; end <= MAX_UCS2; ++end) { 120 var begin = end; 121 var type = typeInfo[end]; 122 while (end < MAX_UCS2 && typeInfo[end + 1] == type) 123 ++end; 124 rangeInfo.push({begin:begin, end:end, type:type}); 125 } 126 127 var latinRangeInfo = []; 128 // Pass 4: coallesce latin-1 types into ranges. 129 for (var end = 0; end <= MAX_UCS2; ++end) { 130 var begin = end; 131 var type = latinTypeInfo[end]; 132 while (end < MAX_UCS2 && latinTypeInfo[end + 1] == type) 133 ++end; 134 latinRangeInfo.push({begin:begin, end:end, type:type}); 135 } 136 137 138 // Helper function to convert a number to a fixed width hex representation of a C uint16_t. 139 function hex(x) 140 { 141 var s = Number(x).toString(16); 142 while (s.length < 4) 143 s = 0 + s; 144 return "0x" + s + "u"; 145 } 146 147 var copyright = ( 148 "/*" + "\n" + 149 " * Copyright (C) 2012 Apple Inc. All rights reserved." + "\n" + 150 " *" + "\n" + 151 " * Redistribution and use in source and binary forms, with or without" + "\n" + 152 " * modification, are permitted provided that the following conditions" + "\n" + 153 " * are met:" + "\n" + 154 " * 1. Redistributions of source code must retain the above copyright" + "\n" + 155 " * notice, this list of conditions and the following disclaimer." + "\n" + 156 " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + 157 " * notice, this list of conditions and the following disclaimer in the" + "\n" + 158 " * documentation and/or other materials provided with the distribution." + "\n" + 159 " *" + "\n" + 160 " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + 161 " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + 162 " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + 163 " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + 164 " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + 165 " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + 166 " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + 167 " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + 168 " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + 169 " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + 170 " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + 171 " */"); 172 173 print(copyright); 174 print(); 175 print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js"); 176 print(); 177 print('#include "config.h"'); 178 print('#include "YarrCanonicalizeUCS2.h"'); 179 print(); 180 print("namespace JSC { namespace Yarr {"); 181 print(); 182 print("#include <stdint.h>"); 183 print(); 184 185 for (i in characterSetInfo) { 186 var characters = "" 187 var set = characterSetInfo[i]; 188 for (var j in set) 189 characters += hex(set[j]) + ", "; 190 print("uint16_t ucs2CharacterSet" + i + "[] = { " + characters + "0 };"); 191 } 192 print(); 193 print("static const size_t UCS2_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); 194 print("uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {"); 195 for (i in characterSetInfo) 196 print(" ucs2CharacterSet" + i + ","); 197 print("};"); 198 print(); 199 print("const size_t UCS2_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); 200 print("UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {"); 201 for (i in rangeInfo) { 202 var info = rangeInfo[i]; 203 var typeAndValue = info.type.split(':'); 204 print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); 205 } 206 print("};"); 207 print(); 208 print("const size_t LATIN_CANONICALIZATION_RANGES = " + latinRangeInfo.length + ";"); 209 print("LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = {"); 210 for (i in latinRangeInfo) { 211 var info = latinRangeInfo[i]; 212 var typeAndValue = info.type.split(':'); 213 print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); 214 } 215 print("};"); 216 print(); 217 print("} } // JSC::Yarr"); 218 print(); 219 178 } 179 180 var rangeInfo = []; 181 // Pass 3: coallesce types into ranges. 182 for (var end = 0; end <= maxValue; ++end) { 183 var begin = end; 184 var type = typeInfo[end]; 185 while (end < maxValue && typeInfo[end + 1] == type) 186 ++end; 187 rangeInfo.push({begin:begin, end:end, type:type}); 188 } 189 190 for (i in characterSetInfo) { 191 var characters = "" 192 var set = characterSetInfo[i]; 193 for (var j in set) 194 characters += hex(set[j]) + ", "; 195 print("const UChar32 " + prefixLower + "CharacterSet" + i + "[] = { " + characters + "0 };"); 196 } 197 print(); 198 print("static const size_t " + prefixUpper + "_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); 199 print("const UChar32* const " + prefixLower + "CharacterSetInfo[" + prefixUpper + "_CANONICALIZATION_SETS] = {"); 200 for (i in characterSetInfo) 201 print(" " + prefixLower + "CharacterSet" + i + ","); 202 print("};"); 203 print(); 204 print("const size_t " + prefixUpper + "_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); 205 print("const CanonicalizationRange " + prefixLower + "RangeInfo[" + prefixUpper + "_CANONICALIZATION_RANGES] = {"); 206 for (i in rangeInfo) { 207 var info = rangeInfo[i]; 208 var typeAndValue = info.type.split(':'); 209 print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); 210 } 211 print("};"); 212 print(); 213 } 214 215 printHeader(); 216 217 createTables("UCS2", MAX_UCS2, createUCS2CanonicalGroups()); 218 createTables("Unicode", MAX_UNICODE, createUnicodeCanonicalGroups()); 219 220 printFooter(); 221 -
trunk/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
r194496 r197426 1 1 /* 2 * Copyright (C) 2009 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2013, 2016 Apple Inc. All rights reserved. 3 3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged 4 4 * … … 29 29 30 30 #include "Yarr.h" 31 #include "YarrCanonicalizeU CS2.h"31 #include "YarrCanonicalizeUnicode.h" 32 32 #include <wtf/BumpPointerAllocator.h> 33 33 #include <wtf/DataLog.h> … … 45 45 46 46 struct BackTrackInfoPatternCharacter { 47 uintptr_t begin; // Only needed for unicode patterns 47 48 uintptr_t matchAmount; 48 49 }; 49 50 struct BackTrackInfoCharacterClass { 51 uintptr_t begin; // Only needed for unicode patterns 50 52 uintptr_t matchAmount; 51 53 }; … … 168 170 class InputStream { 169 171 public: 170 InputStream(const CharType* input, unsigned start, unsigned length )172 InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs) 171 173 : input(input) 172 174 , pos(start) 173 175 , length(length) 176 , decodeSurrogatePairs(decodeSurrogatePairs) 174 177 { 175 178 } … … 205 208 unsigned p = pos - negativePositionOffest; 206 209 ASSERT(p < length); 207 return input[p]; 210 int result = input[p]; 211 if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length 212 && U16_IS_TRAIL(input[p + 1])) { 213 if (atEnd()) 214 return -1; 215 216 result = U16_GET_SUPPLEMENTARY(result, input[p + 1]); 217 next(); 218 } 219 return result; 220 } 221 222 int readSurrogatePairChecked(unsigned negativePositionOffest) 223 { 224 RELEASE_ASSERT(pos >= negativePositionOffest); 225 unsigned p = pos - negativePositionOffest; 226 ASSERT(p < length); 227 if (p + 1 >= length) 228 return -1; 229 230 int first = input[p]; 231 if (U16_IS_LEAD(first) && U16_IS_TRAIL(input[p + 1])) 232 return U16_GET_SUPPLEMENTARY(first, input[p + 1]); 233 234 return -1; 208 235 } 209 236 … … 211 238 { 212 239 ASSERT(from < length); 213 return input[from]; 240 int result = input[from]; 241 if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length 242 && U16_IS_TRAIL(input[from + 1])) { 243 244 result = U16_GET_SUPPLEMENTARY(result, input[from + 1]); 245 } 246 return result; 214 247 } 215 248 … … 282 315 unsigned pos; 283 316 unsigned length; 317 bool decodeSurrogatePairs; 284 318 }; 285 319 286 320 bool testCharacterClass(CharacterClass* characterClass, int ch) 287 321 { 288 if (ch & 0x FF80) {322 if (ch & 0x1FFF80) { 289 323 for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) 290 324 if (ch == characterClass->m_matchesUnicode[i]) … … 310 344 } 311 345 346 bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset) 347 { 348 return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset); 349 } 350 312 351 bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset) 313 352 { … … 329 368 return false; 330 369 331 if (pattern->m_ignoreCase) { 332 for (unsigned i = 0; i < matchSize; ++i) { 333 int oldCh = input.reread(matchBegin + i); 334 int ch = input.readChecked(negativeInputOffset + matchSize - i); 335 336 if (oldCh == ch) 337 continue; 338 339 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that 370 for (unsigned i = 0; i < matchSize; ++i) { 371 int oldCh = input.reread(matchBegin + i); 372 int ch; 373 if (!U_IS_BMP(oldCh)) { 374 ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i); 375 ++i; 376 } else 377 ch = input.readChecked(negativeInputOffset + matchSize - i); 378 379 if (oldCh == ch) 380 continue; 381 382 if (pattern->m_ignoreCase) { 383 // The definition for canonicalize (see ES 6.0, 15.10.2.8) means that 340 384 // unicode values are never allowed to match against ascii ones. 341 385 if (isASCII(oldCh) || isASCII(ch)) { 342 386 if (toASCIIUpper(oldCh) == toASCIIUpper(ch)) 343 387 continue; 344 } else if (areCanonicallyEquivalent(oldCh, ch ))388 } else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2)) 345 389 continue; 346 347 input.uncheckInput(matchSize); 348 return false; 349 } 350 } else { 351 for (unsigned i = 0; i < matchSize; ++i) { 352 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) { 353 input.uncheckInput(matchSize); 354 return false; 355 } 356 } 390 } 391 392 input.uncheckInput(matchSize); 393 return false; 357 394 } 358 395 … … 397 434 if (backTrack->matchAmount) { 398 435 --backTrack->matchAmount; 399 input.uncheckInput(1); 436 if (unicode && !U_IS_BMP(term.atom.patternCharacter)) 437 input.uncheckInput(2); 438 else 439 input.uncheckInput(1); 400 440 return true; 401 441 } … … 408 448 return true; 409 449 } 410 input. uncheckInput(backTrack->matchAmount);450 input.setPos(backTrack->begin); 411 451 break; 412 452 } … … 447 487 { 448 488 ASSERT(term.type == ByteTerm::TypeCharacterClass); 449 BackTrackInfo PatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);489 BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation); 450 490 451 491 switch (term.atom.quantityType) { 452 492 case QuantifierFixedCount: { 493 if (unicode) { 494 backTrack->begin = input.getPos(); 495 unsigned matchAmount = 0; 496 for (matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { 497 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) { 498 input.setPos(backTrack->begin); 499 return false; 500 } 501 } 502 503 return true; 504 } 505 453 506 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { 454 507 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) … … 459 512 460 513 case QuantifierGreedy: { 514 backTrack->begin = input.getPos(); 461 515 unsigned matchAmount = 0; 462 516 while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { … … 473 527 474 528 case QuantifierNonGreedy: 529 backTrack->begin = input.getPos(); 475 530 backTrack->matchAmount = 0; 476 531 return true; … … 484 539 { 485 540 ASSERT(term.type == ByteTerm::TypeCharacterClass); 486 BackTrackInfo PatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);541 BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation); 487 542 488 543 switch (term.atom.quantityType) { 489 544 case QuantifierFixedCount: 545 if (unicode) 546 input.setPos(backTrack->begin); 490 547 break; 491 548 492 549 case QuantifierGreedy: 493 550 if (backTrack->matchAmount) { 551 if (unicode) { 552 // Rematch one less match 553 input.setPos(backTrack->begin); 554 --backTrack->matchAmount; 555 for (unsigned matchAmount = 0; (matchAmount < backTrack->matchAmount) && input.checkInput(1); ++matchAmount) { 556 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { 557 input.uncheckInput(1); 558 break; 559 } 560 } 561 return true; 562 } 494 563 --backTrack->matchAmount; 495 564 input.uncheckInput(1); … … 504 573 return true; 505 574 } 506 input. uncheckInput(backTrack->matchAmount);575 input.setPos(backTrack->begin); 507 576 break; 508 577 } … … 774 843 return false; 775 844 776 // Successful match! Okay, what's next? - loop around and try to match mo ar!845 // Successful match! Okay, what's next? - loop around and try to match more! 777 846 context->term -= (term.atom.parenthesesWidth + 1); 778 847 return true; … … 1155 1224 case ByteTerm::TypePatternCharacterOnce: 1156 1225 case ByteTerm::TypePatternCharacterFixed: { 1226 if (unicode) { 1227 if (!U_IS_BMP(currentTerm().atom.patternCharacter)) { 1228 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { 1229 if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) { 1230 BACKTRACK(); 1231 } 1232 } 1233 MATCH_NEXT(); 1234 } 1235 } 1236 unsigned position = input.getPos(); // May need to back out reading a surrogate pair. 1237 1157 1238 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { 1158 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) 1239 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) { 1240 input.setPos(position); 1159 1241 BACKTRACK(); 1242 } 1160 1243 } 1161 1244 MATCH_NEXT(); … … 1177 1260 case ByteTerm::TypePatternCharacterNonGreedy: { 1178 1261 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); 1262 backTrack->begin = input.getPos(); 1179 1263 backTrack->matchAmount = 0; 1180 1264 MATCH_NEXT(); … … 1183 1267 case ByteTerm::TypePatternCasedCharacterOnce: 1184 1268 case ByteTerm::TypePatternCasedCharacterFixed: { 1269 if (unicode) { 1270 // Case insensitive matching of unicode charaters are handled as TypeCharacterClass 1271 ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter)); 1272 1273 unsigned position = input.getPos(); // May need to back out reading a surrogate pair. 1274 1275 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { 1276 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) { 1277 input.setPos(position); 1278 BACKTRACK(); 1279 } 1280 } 1281 MATCH_NEXT(); 1282 } 1283 1185 1284 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { 1186 1285 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) … … 1191 1290 case ByteTerm::TypePatternCasedCharacterGreedy: { 1192 1291 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); 1292 1293 // Case insensitive matching of unicode charaters are handled as TypeCharacterClass 1294 ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter)); 1295 1193 1296 unsigned matchAmount = 0; 1194 1297 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { … … 1205 1308 case ByteTerm::TypePatternCasedCharacterNonGreedy: { 1206 1309 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); 1310 1311 // Case insensitive matching of unicode charaters are handled as TypeCharacterClass 1312 ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter)); 1313 1207 1314 backTrack->matchAmount = 0; 1208 1315 MATCH_NEXT(); … … 1440 1547 Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start) 1441 1548 : pattern(pattern) 1549 , unicode(pattern->m_unicode) 1442 1550 , output(output) 1443 , input(input, start, length )1551 , input(input, start, length, pattern->m_unicode) 1444 1552 , allocatorPool(0) 1445 1553 , remainingMatchCount(matchLimit) … … 1449 1557 private: 1450 1558 BytecodePattern* pattern; 1559 bool unicode; 1451 1560 unsigned* output; 1452 1561 InputStream input; … … 1507 1616 } 1508 1617 1509 void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)1618 void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 1510 1619 { 1511 1620 if (m_pattern.m_ignoreCase) { 1512 ASSERT(u_tolower(ch) <= 0xFFFF);1513 ASSERT(u_toupper(ch) <= 0xFFFF);1514 1515 UChar lo = u_tolower(ch);1516 UChar hi = u_toupper(ch);1621 ASSERT(u_tolower(ch) <= UCHAR_MAX_VALUE); 1622 ASSERT(u_toupper(ch) <= UCHAR_MAX_VALUE); 1623 1624 UChar32 lo = u_tolower(ch); 1625 UChar32 hi = u_toupper(ch); 1517 1626 1518 1627 if (lo != hi) { -
trunk/Source/JavaScriptCore/yarr/YarrInterpreter.h
r197379 r197426 1 1 /* 2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2010-2012, 2014, 2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 75 75 struct { 76 76 union { 77 UChar patternCharacter;77 UChar32 patternCharacter; 78 78 struct { 79 UChar lo;80 UChar hi;79 UChar32 lo; 80 UChar32 hi; 81 81 } casedCharacter; 82 82 CharacterClass* characterClass; … … 106 106 unsigned inputPosition; 107 107 108 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)108 ByteTerm(UChar32 ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 109 109 : frameLocation(frameLocation) 110 110 , m_capture(false) … … 129 129 } 130 130 131 ByteTerm(UChar lo, UCharhi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)131 ByteTerm(UChar32 lo, UChar32 hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 132 132 : frameLocation(frameLocation) 133 133 , m_capture(false) … … 342 342 , m_ignoreCase(pattern.m_ignoreCase) 343 343 , m_multiline(pattern.m_multiline) 344 , m_unicode(pattern.m_unicode) 344 345 , m_allocator(allocator) 345 346 { … … 361 362 bool m_ignoreCase; 362 363 bool m_multiline; 364 bool m_unicode; 363 365 // Each BytecodePattern is associated with a RegExp, each RegExp is associated 364 366 // with a VM. Cache a pointer to out VM's m_regExpAllocator. -
trunk/Source/JavaScriptCore/yarr/YarrJIT.cpp
r194840 r197426 1 1 /* 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2013, 2015-2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 31 31 #include "Options.h" 32 32 #include "Yarr.h" 33 #include "YarrCanonicalizeU CS2.h"33 #include "YarrCanonicalizeUnicode.h" 34 34 35 35 #if ENABLE(YARR_JIT) … … 141 141 } 142 142 143 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar * matches, unsigned matchCount)143 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount) 144 144 { 145 145 do { … … 201 201 if (charClass->m_matchesUnicode.size()) { 202 202 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { 203 UChar ch = charClass->m_matchesUnicode[i];203 UChar32 ch = charClass->m_matchesUnicode[i]; 204 204 matchDest.append(branch32(Equal, character, Imm32(ch))); 205 205 } … … 208 208 if (charClass->m_rangesUnicode.size()) { 209 209 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { 210 UChar lo = charClass->m_rangesUnicode[i].begin;211 UChar hi = charClass->m_rangesUnicode[i].end;210 UChar32 lo = charClass->m_rangesUnicode[i].begin; 211 UChar32 hi = charClass->m_rangesUnicode[i].end; 212 212 213 213 Jump below = branch32(LessThan, character, Imm32(lo)); … … 286 286 } 287 287 288 Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)288 Jump jumpIfCharNotEquals(UChar32 ch, int inputPosition, RegisterID character) 289 289 { 290 290 readCharacter(inputPosition, character); … … 767 767 768 768 PatternTerm* term = op.m_term; 769 UChar ch = term->patternCharacter;769 UChar32 ch = term->patternCharacter; 770 770 771 771 if ((ch > 0xff) && (m_charSize == Char8)) { … … 814 814 #endif 815 815 816 UChar currentCharacter = nextTerm->patternCharacter;816 UChar32 currentCharacter = nextTerm->patternCharacter; 817 817 818 818 if ((currentCharacter > 0xff) && (m_charSize == Char8)) { … … 883 883 YarrOp& op = m_ops[opIndex]; 884 884 PatternTerm* term = op.m_term; 885 UChar ch = term->patternCharacter;885 UChar32 ch = term->patternCharacter; 886 886 887 887 const RegisterID character = regT0; … … 920 920 YarrOp& op = m_ops[opIndex]; 921 921 PatternTerm* term = op.m_term; 922 UChar ch = term->patternCharacter;922 UChar32 ch = term->patternCharacter; 923 923 924 924 const RegisterID character = regT0; … … 978 978 YarrOp& op = m_ops[opIndex]; 979 979 PatternTerm* term = op.m_term; 980 UChar ch = term->patternCharacter;980 UChar32 ch = term->patternCharacter; 981 981 982 982 const RegisterID character = regT0; -
trunk/Source/JavaScriptCore/yarr/YarrParser.h
r163394 r197426 1 1 /* 2 * Copyright (C) 2009 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2014-2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 47 47 private: 48 48 template<class FriendDelegate> 49 friend const char* parse(FriendDelegate&, const String& pattern, unsigned backReferenceLimit);49 friend const char* parse(FriendDelegate&, const String& pattern, bool isUnicode, unsigned backReferenceLimit); 50 50 51 51 enum ErrorCode { … … 61 61 CharacterClassOutOfOrder, 62 62 EscapeUnterminated, 63 InvalidUnicodeEscape, 63 64 NumberOfErrorCodes 64 65 }; … … 102 103 * is different to /[a\-z]/). 103 104 */ 104 void atomPatternCharacter(UChar ch, bool hyphenIsRange = false)105 void atomPatternCharacter(UChar32 ch, bool hyphenIsRange = false) 105 106 { 106 107 switch (m_state) { … … 226 227 AfterCharacterClassHyphen, 227 228 } m_state; 228 UChar m_character;229 UChar32 m_character; 229 230 }; 230 231 231 Parser(Delegate& delegate, const String& pattern, unsigned backReferenceLimit)232 Parser(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit) 232 233 : m_delegate(delegate) 233 234 , m_backReferenceLimit(backReferenceLimit) … … 236 237 , m_size(pattern.length()) 237 238 , m_index(0) 239 , m_isUnicode(isUnicode) 238 240 , m_parenthesesNestingDepth(0) 239 241 { … … 412 414 case 'u': { 413 415 consume(); 416 if (atEndOfPattern()) { 417 delegate.atomPatternCharacter('u'); 418 break; 419 } 420 421 if (peek() == '{') { 422 consume(); 423 UChar32 codePoint = 0; 424 do { 425 if (atEndOfPattern()) 426 m_err = InvalidUnicodeEscape; 427 if (!WTF::isASCIIHexDigit(peek())) 428 m_err = InvalidUnicodeEscape; 429 430 codePoint = (codePoint << 4) | WTF::toASCIIHexValue(consume()); 431 432 if (codePoint > UCHAR_MAX_VALUE) 433 m_err = InvalidUnicodeEscape; 434 } while (!atEndOfPattern() && peek() != '}'); 435 if (!atEndOfPattern()) 436 consume(); 437 if (m_err) 438 return false; 439 440 delegate.atomPatternCharacter(codePoint); 441 break; 442 } 414 443 int u = tryConsumeHex(4); 415 444 if (u == -1) 416 445 delegate.atomPatternCharacter('u'); 417 else 446 else { 447 // If we have the first of a surrogate pair, look for the second. 448 if (U16_IS_LEAD(u) && m_isUnicode && (patternRemaining() >= 6) && peek() == '\\') { 449 ParseState state = saveState(); 450 consume(); 451 452 if (tryConsume('u')) { 453 int surrogate2 = tryConsumeHex(4); 454 if (U16_IS_TRAIL(surrogate2)) { 455 u = U16_GET_SUPPLEMENTARY(u, surrogate2); 456 delegate.atomPatternCharacter(u); 457 break; 458 } 459 } 460 461 restoreState(state); 462 } 418 463 delegate.atomPatternCharacter(u); 464 } 419 465 break; 420 466 } … … 426 472 427 473 return true; 474 } 475 476 UChar32 consumePossibleSurrogatePair() 477 { 478 UChar32 ch = consume(); 479 if (U16_IS_LEAD(ch) && m_isUnicode && (patternRemaining() > 0)) { 480 ParseState state = saveState(); 481 482 UChar32 surrogate2 = consume(); 483 if (U16_IS_TRAIL(surrogate2)) 484 ch = U16_GET_SUPPLEMENTARY(ch, surrogate2); 485 else 486 restoreState(state); 487 } 488 489 return ch; 428 490 } 429 491 … … 471 533 472 534 default: 473 characterClassConstructor.atomPatternCharacter(consume (), true);535 characterClassConstructor.atomPatternCharacter(consumePossibleSurrogatePair(), true); 474 536 } 475 537 … … 663 725 664 726 default: 665 m_delegate.atomPatternCharacter(consume ());727 m_delegate.atomPatternCharacter(consumePossibleSurrogatePair()); 666 728 lastTokenWasAnAtom = true; 667 729 } … … 702 764 REGEXP_ERROR_PREFIX "range out of order in character class", 703 765 REGEXP_ERROR_PREFIX "\\ at end of pattern" 766 REGEXP_ERROR_PREFIX "invalid unicode {} escape" 704 767 }; 705 768 … … 725 788 ASSERT(m_index <= m_size); 726 789 return m_index == m_size; 790 } 791 792 unsigned patternRemaining() 793 { 794 ASSERT(m_index <= m_size); 795 return m_size - m_index; 727 796 } 728 797 … … 806 875 unsigned m_size; 807 876 unsigned m_index; 877 bool m_isUnicode; 808 878 unsigned m_parenthesesNestingDepth; 809 879 … … 826 896 * void assertionWordBoundary(bool invert); 827 897 * 828 * void atomPatternCharacter(UChar ch);898 * void atomPatternCharacter(UChar32 ch); 829 899 * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert); 830 900 * void atomCharacterClassBegin(bool invert) 831 * void atomCharacterClassAtom(UChar ch)832 * void atomCharacterClassRange(UChar begin, UCharend)901 * void atomCharacterClassAtom(UChar32 ch) 902 * void atomCharacterClassRange(UChar32 begin, UChar32 end) 833 903 * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) 834 904 * void atomCharacterClassEnd() … … 872 942 873 943 template<class Delegate> 874 const char* parse(Delegate& delegate, const String& pattern, unsigned backReferenceLimit = quantifyInfinite)944 const char* parse(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit = quantifyInfinite) 875 945 { 876 946 if (pattern.is8Bit()) 877 return Parser<Delegate, LChar>(delegate, pattern, backReferenceLimit).parse();878 return Parser<Delegate, UChar>(delegate, pattern, backReferenceLimit).parse();947 return Parser<Delegate, LChar>(delegate, pattern, isUnicode, backReferenceLimit).parse(); 948 return Parser<Delegate, UChar>(delegate, pattern, isUnicode, backReferenceLimit).parse(); 879 949 } 880 950 -
trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp
r194496 r197426 1 1 /* 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2013-2016 Apple Inc. All rights reserved. 3 3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged 4 4 * … … 29 29 30 30 #include "Yarr.h" 31 #include "YarrCanonicalizeU CS2.h"31 #include "YarrCanonicalizeUnicode.h" 32 32 #include "YarrParser.h" 33 33 #include <wtf/Vector.h> … … 41 41 class CharacterClassConstructor { 42 42 public: 43 CharacterClassConstructor(bool isCaseInsensitive = false)43 CharacterClassConstructor(bool isCaseInsensitive, CanonicalMode canonicalMode) 44 44 : m_isCaseInsensitive(isCaseInsensitive) 45 , m_canonicalMode(canonicalMode) 45 46 { 46 47 } … … 66 67 } 67 68 68 void putChar(UChar ch)69 void putChar(UChar32 ch) 69 70 { 70 71 // Handle ascii cases. … … 85 86 86 87 // Add multiple matches, if necessary. 87 const UCS2CanonicalizationRange* info = rangeInfoFor(ch);88 const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_canonicalMode); 88 89 if (info->type == CanonicalizeUnique) 89 90 addSorted(m_matchesUnicode, ch); … … 92 93 } 93 94 94 void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info)95 void putUnicodeIgnoreCase(UChar32 ch, const CanonicalizationRange* info) 95 96 { 96 97 ASSERT(m_isCaseInsensitive); 97 ASSERT(ch > 0x7f);98 98 ASSERT(ch >= info->begin && ch <= info->end); 99 99 ASSERT(info->type != CanonicalizeUnique); 100 100 if (info->type == CanonicalizeSet) { 101 for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)102 addSorted( m_matchesUnicode,ch);101 for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set) 102 addSorted(ch); 103 103 } else { 104 addSorted( m_matchesUnicode,ch);105 addSorted( m_matchesUnicode,getCanonicalPair(info, ch));106 } 107 } 108 109 void putRange(UChar lo, UCharhi)104 addSorted(ch); 105 addSorted(getCanonicalPair(info, ch)); 106 } 107 } 108 109 void putRange(UChar32 lo, UChar32 hi) 110 110 { 111 111 if (lo <= 0x7f) { 112 112 char asciiLo = lo; 113 char asciiHi = std::min(hi, (UChar )0x7f);113 char asciiHi = std::min(hi, (UChar32)0x7f); 114 114 addSortedRange(m_ranges, lo, asciiHi); 115 115 … … 124 124 return; 125 125 126 lo = std::max(lo, (UChar )0x80);126 lo = std::max(lo, (UChar32)0x80); 127 127 addSortedRange(m_rangesUnicode, lo, hi); 128 128 … … 130 130 return; 131 131 132 const UCS2CanonicalizationRange* info = rangeInfoFor(lo);132 const CanonicalizationRange* info = canonicalRangeInfoFor(lo, m_canonicalMode); 133 133 while (true) { 134 134 // Handle the range [lo .. end] 135 UChar end = std::min<UChar>(info->end, hi);135 UChar32 end = std::min<UChar32>(info->end, hi); 136 136 137 137 switch (info->type) { … … 141 141 case CanonicalizeSet: { 142 142 UChar ch; 143 for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)143 for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set) 144 144 addSorted(m_matchesUnicode, ch); 145 145 break; … … 189 189 190 190 private: 191 void addSorted(Vector<UChar>& matches, UChar ch) 191 void addSorted(UChar32 ch) 192 { 193 addSorted(ch <= 0x7f ? m_matches : m_matchesUnicode, ch); 194 } 195 196 void addSorted(Vector<UChar32>& matches, UChar32 ch) 192 197 { 193 198 unsigned pos = 0; … … 215 220 } 216 221 217 void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UCharhi)222 void addSortedRange(Vector<CharacterRange>& ranges, UChar32 lo, UChar32 hi) 218 223 { 219 224 unsigned end = ranges.size(); … … 261 266 262 267 bool m_isCaseInsensitive; 263 264 Vector<UChar> m_matches; 268 CanonicalMode m_canonicalMode; 269 270 Vector<UChar32> m_matches; 265 271 Vector<CharacterRange> m_ranges; 266 Vector<UChar > m_matchesUnicode;272 Vector<UChar32> m_matchesUnicode; 267 273 Vector<CharacterRange> m_rangesUnicode; 268 274 }; … … 272 278 YarrPatternConstructor(YarrPattern& pattern) 273 279 : m_pattern(pattern) 274 , m_characterClassConstructor(pattern.m_ignoreCase )280 , m_characterClassConstructor(pattern.m_ignoreCase, pattern.m_unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2) 275 281 , m_invertParentheticalAssertion(false) 276 282 { … … 314 320 } 315 321 316 void atomPatternCharacter(UChar ch)322 void atomPatternCharacter(UChar32 ch) 317 323 { 318 324 // We handle case-insensitive checking of unicode characters which do have both 319 325 // cases by handling them as if they were defined using a CharacterClass. 320 if (!m_pattern.m_ignoreCase || isASCII(ch)) {326 if (!m_pattern.m_ignoreCase || (isASCII(ch) && !m_pattern.m_unicode)) { 321 327 m_alternative->m_terms.append(PatternTerm(ch)); 322 328 return; 323 329 } 324 330 325 const UCS2CanonicalizationRange* info = rangeInfoFor(ch);331 const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_pattern.m_unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2); 326 332 if (info->type == CanonicalizeUnique) { 327 333 m_alternative->m_terms.append(PatternTerm(ch)); … … 358 364 } 359 365 360 void atomCharacterClassAtom(UChar ch)366 void atomCharacterClassAtom(UChar32 ch) 361 367 { 362 368 m_characterClassConstructor.putChar(ch); 363 369 } 364 370 365 void atomCharacterClassRange(UChar begin, UCharend)371 void atomCharacterClassRange(UChar32 begin, UChar32 end) 366 372 { 367 373 m_characterClassConstructor.putRange(begin, end); … … 597 603 currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter; 598 604 alternative->m_hasFixedSize = false; 605 } else if (m_pattern.m_unicode) { 606 currentInputPosition += (!U_IS_BMP(term.patternCharacter) ? 2 : 1) * term.quantityCount; 599 607 } else 600 608 currentInputPosition += term.quantityCount; … … 606 614 term.frameLocation = currentCallFrameSize; 607 615 currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; 616 alternative->m_hasFixedSize = false; 617 } else if (m_pattern.m_unicode) { 618 term.frameLocation = currentCallFrameSize; 619 currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; 620 currentInputPosition += term.quantityCount; 608 621 alternative->m_hasFixedSize = false; 609 622 } else … … 833 846 YarrPatternConstructor constructor(*this); 834 847 835 if (const char* error = parse(constructor, patternString ))848 if (const char* error = parse(constructor, patternString, m_unicode)) 836 849 return error; 837 850 … … 847 860 const char* error = 848 861 #endif 849 parse(constructor, patternString, numSubpatterns);862 parse(constructor, patternString, m_unicode, numSubpatterns); 850 863 851 864 ASSERT(!error); … … 862 875 } 863 876 864 YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error)877 YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, bool unicode, const char** error) 865 878 : m_ignoreCase(ignoreCase) 866 879 , m_multiline(multiline) 880 , m_unicode(unicode) 867 881 , m_containsBackreferences(false) 868 882 , m_containsBOL(false) -
trunk/Source/JavaScriptCore/yarr/YarrPattern.h
r177854 r197426 1 1 /* 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2013-2014, 2016 Apple Inc. All rights reserved. 3 3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged 4 4 * … … 38 38 39 39 struct CharacterRange { 40 UChar begin;41 UChar end;42 43 CharacterRange(UChar begin, UCharend)40 UChar32 begin; 41 UChar32 end; 42 43 CharacterRange(UChar32 begin, UChar32 end) 44 44 : begin(begin) 45 45 , end(end) … … 63 63 { 64 64 } 65 Vector<UChar > m_matches;65 Vector<UChar32> m_matches; 66 66 Vector<CharacterRange> m_ranges; 67 Vector<UChar > m_matchesUnicode;67 Vector<UChar32> m_matchesUnicode; 68 68 Vector<CharacterRange> m_rangesUnicode; 69 69 … … 94 94 bool m_invert :1; 95 95 union { 96 UChar patternCharacter;96 UChar32 patternCharacter; 97 97 CharacterClass* characterClass; 98 98 unsigned backReferenceSubpatternId; … … 114 114 unsigned frameLocation; 115 115 116 PatternTerm(UChar ch)116 PatternTerm(UChar32 ch) 117 117 : type(PatternTerm::TypePatternCharacter) 118 118 , m_capture(false) … … 301 301 302 302 struct YarrPattern { 303 JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error);303 JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, bool unicode, const char** error); 304 304 305 305 void reset() … … 393 393 bool m_ignoreCase : 1; 394 394 bool m_multiline : 1; 395 bool m_unicode : 1; 395 396 bool m_containsBackreferences : 1; 396 397 bool m_containsBOL : 1; -
trunk/Source/JavaScriptCore/yarr/YarrSyntaxChecker.cpp
r127191 r197426 1 1 /* 2 * Copyright (C) 2011 Apple Inc. All rights reserved.2 * Copyright (C) 2011, 2016 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 36 36 void assertionEOL() {} 37 37 void assertionWordBoundary(bool) {} 38 void atomPatternCharacter(UChar ) {}38 void atomPatternCharacter(UChar32) {} 39 39 void atomBuiltInCharacterClass(BuiltInCharacterClassID, bool) {} 40 40 void atomCharacterClassBegin(bool = false) {} … … 54 54 { 55 55 SyntaxChecker syntaxChecker; 56 return parse(syntaxChecker, pattern );56 return parse(syntaxChecker, pattern, false); 57 57 } 58 58
Note: See TracChangeset
for help on using the changeset viewer.