Changeset 275653 in webkit
- Timestamp:
- Apr 7, 2021 10:48:03 PM (3 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JSTests/ChangeLog
r275598 r275653 1 2021-04-07 Yusuke Suzuki <ysuzuki@apple.com> 2 3 [JSC] DUCET level-1 weighs are equal if characters are alphabets 4 https://bugs.webkit.org/show_bug.cgi?id=224047 5 6 Reviewed by Saam Barati and Mark Lam. 7 8 * stress/ducet-level-3-or-4-comparison.js: Added. 9 (shouldBe): 10 1 11 2021-04-07 Yusuke Suzuki <ysuzuki@apple.com> 2 12 -
trunk/Source/JavaScriptCore/ChangeLog
r275650 r275653 1 2021-04-07 Yusuke Suzuki <ysuzuki@apple.com> 2 3 [JSC] DUCET level-1 weighs are equal if characters are alphabets 4 https://bugs.webkit.org/show_bug.cgi?id=224047 5 6 Reviewed by Saam Barati and Mark Lam. 7 8 ASCII comparison optimization was based on that DUCET level-1 weights are all different (except for 0000 case), but this was wrong. 9 If we have the same latin letters with different capitalization, then they have the same level-1 weight ('A' v.s. 'a'). 10 In this patch, 11 12 1. If we found that the result of level-1 weight comparison is equal, and characters are not equal, then we do level-3 weight comparison. 13 We do not perform level-2 since they are all the same weight in ASCII (excluding control characters) region. 14 2. We do not perform level-4 weight comparison since level-1 and level-3 comparison must distinguish the strings. Level-1 weights are equal 15 only when characters are the same latin letters. And level-3 weight puts different weights for capital latin letters. Since we already know 16 that these strings are different while they are equal in level-1 weight comparison, the only case is that they have same latin letters in 17 the same position. In that case, level-3 weight must say different results for these characters so that we never meet "equal" status in 18 level-3 weight comparison if characters are different. 19 20 * runtime/IntlObject.cpp: 21 * runtime/IntlObject.h: 22 * runtime/IntlObjectInlines.h: 23 (JSC::canUseASCIIUCADUCETComparison): 24 (JSC::compareASCIIWithUCADUCETLevel3): 25 (JSC::compareASCIIWithUCADUCET): 26 1 27 2021-04-02 Darin Adler <darin@apple.com> 2 28 -
trunk/Source/JavaScriptCore/runtime/IntlObject.cpp
r275410 r275653 433 433 // 4. UCA step-3 handles 0000 weighted characters specially. And ASCII contains these characters. But 0000 elements are used only for rare control characters. 434 434 // We can ignore this special handling if ASCII strings do not include control characters. 435 // 5. Except 0000 cases, all characters' level-1 weights are different. And level-2 weights are always 0020, which is lower than any level-1 weights. 436 // This means that binary comparison in UCA step-4 do not need to check level 2~ weights. 435 // 5. Level-1 weights are different except for 0000 cases and capital / lower ASCII characters. All non-0000 elements are larger than 0000. 436 // 6. Level-2 weights are always 0020 except for 0000 cases. So if we include 0000 characters, we do not need to perform level-2 weight comparison. 437 // 7. In all levels, characters have non-0000 weights if it does not have 0000 weight in level-1. 438 // 8. In level-1, weights are the same only when characters are the same latin letters ('A' v.s. 'a'). If level-1 weight comparison says EQUAL, and if characters are not binary-equal, 439 // then, the only case is they are including the same latin letters with different capitalization at the same position. Level-3 weight comparison must distinguish them since level-3 440 // weight is set only for latin capital letters. Thus, we do not need to perform level-4 weight comparison. 437 441 // 438 // Based on the above observation, our fast path handles ASCII strings excluding control characters. The following weight is recomputed weights from level-1 weights. 439 const uint8_t ducetWeights[128] = { 442 // Based on the above observation, our fast path handles ASCII strings excluding control characters. We first compare strings with level-1 weights. And then, 443 // if we found they are the same and if we found they are not binary-equal strings, then we perform comparison with level-3 and level-4 weights. 444 const uint8_t ducetLevel1Weights[128] = { 440 445 0, 0, 0, 0, 0, 0, 0, 0, 441 446 0, 1, 2, 3, 4, 5, 0, 0, … … 446 451 39, 40, 41, 42, 43, 44, 45, 46, 447 452 47, 48, 11, 10, 33, 34, 35, 13, 448 23, 50, 52, 54, 56, 58, 60, 62, 449 64, 66, 68, 70, 72, 74, 76, 78, 450 80, 82, 84, 86, 88, 90, 92, 94, 451 96, 98, 100, 19, 26, 20, 31, 7, 452 30, 49, 51, 53, 55, 57, 59, 61, 453 63, 65, 67, 69, 71, 73, 75, 77, 454 79, 81, 83, 85, 87, 89, 91, 93, 455 95, 97, 99, 21, 36, 22, 37, 0, 453 23, 49, 50, 51, 52, 53, 54, 55, 454 56, 57, 58, 59, 60, 61, 62, 63, 455 64, 65, 66, 67, 68, 69, 70, 71, 456 72, 73, 74, 19, 26, 20, 31, 7, 457 30, 49, 50, 51, 52, 53, 54, 55, 458 56, 57, 58, 59, 60, 61, 62, 63, 459 64, 65, 66, 67, 68, 69, 70, 71, 460 72, 73, 74, 21, 36, 22, 37, 0, 461 }; 462 463 // Level 2 are all zeros. 464 465 const uint8_t ducetLevel3Weights[128] = { 466 0, 0, 0, 0, 0, 0, 0, 0, 467 0, 0, 0, 0, 0, 0, 0, 0, 468 0, 0, 0, 0, 0, 0, 0, 0, 469 0, 0, 0, 0, 0, 0, 0, 0, 470 0, 0, 0, 0, 0, 0, 0, 0, 471 0, 0, 0, 0, 0, 0, 0, 0, 472 0, 0, 0, 0, 0, 0, 0, 0, 473 0, 0, 0, 0, 0, 0, 0, 0, 474 0, 1, 1, 1, 1, 1, 1, 1, 475 1, 1, 1, 1, 1, 1, 1, 1, 476 1, 1, 1, 1, 1, 1, 1, 1, 477 1, 1, 1, 0, 0, 0, 0, 0, 478 0, 0, 0, 0, 0, 0, 0, 0, 479 0, 0, 0, 0, 0, 0, 0, 0, 480 0, 0, 0, 0, 0, 0, 0, 0, 481 0, 0, 0, 0, 0, 0, 0, 0, 456 482 }; 457 483 -
trunk/Source/JavaScriptCore/runtime/IntlObject.h
r275410 r275653 36 36 namespace JSC { 37 37 38 extern const uint8_t ducetWeights[128]; 38 extern const uint8_t ducetLevel1Weights[128]; 39 extern const uint8_t ducetLevel3Weights[128]; 39 40 40 41 enum class LocaleMatcher : uint8_t { -
trunk/Source/JavaScriptCore/runtime/IntlObjectInlines.h
r273187 r275653 155 155 ALWAYS_INLINE bool canUseASCIIUCADUCETComparison(UChar character) 156 156 { 157 return isASCII(character) && ducetWeights[character]; 157 return isASCII(character) && ducetLevel1Weights[character]; 158 } 159 160 template<typename CharacterType1, typename CharacterType2> 161 UCollationResult compareASCIIWithUCADUCETLevel3(const CharacterType1* characters1, const CharacterType2* characters2, unsigned length) 162 { 163 for (unsigned position = 0; position < length; ++position) { 164 auto lhs = characters1[position]; 165 auto rhs = characters2[position]; 166 uint8_t leftWeight = ducetLevel3Weights[lhs]; 167 uint8_t rightWeight = ducetLevel3Weights[rhs]; 168 if (leftWeight == rightWeight) 169 continue; 170 return leftWeight > rightWeight ? UCOL_GREATER : UCOL_LESS; 171 } 172 return UCOL_EQUAL; 158 173 } 159 174 … … 161 176 inline UCollationResult compareASCIIWithUCADUCET(const CharacterType1* characters1, unsigned length1, const CharacterType2* characters2, unsigned length2) 162 177 { 178 bool notSameCharacters = false; 163 179 unsigned commonLength = std::min(length1, length2); 164 180 for (unsigned position = 0; position < commonLength; ++position) { … … 167 183 ASSERT(canUseASCIIUCADUCETComparison(lhs)); 168 184 ASSERT(canUseASCIIUCADUCETComparison(rhs)); 169 uint8_t leftWeight = ducetWeights[lhs]; 170 uint8_t rightWeight = ducetWeights[rhs]; 185 if (lhs == rhs) 186 continue; 187 notSameCharacters = true; 188 uint8_t leftWeight = ducetLevel1Weights[lhs]; 189 uint8_t rightWeight = ducetLevel1Weights[rhs]; 171 190 if (leftWeight == rightWeight) 172 191 continue; … … 174 193 } 175 194 176 if (length1 == length2) 195 if (length1 == length2) { 196 if (notSameCharacters) 197 return compareASCIIWithUCADUCETLevel3(characters1, characters2, length1); 177 198 return UCOL_EQUAL; 199 } 178 200 return length1 > length2 ? UCOL_GREATER : UCOL_LESS; 179 201 }
Note: See TracChangeset
for help on using the changeset viewer.