Changeset 275554 in webkit
- Timestamp:
- Apr 6, 2021, 2:28:04 PM (4 years ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WTF/ChangeLog
r275542 r275554 1 2021-04-06 Alex Christensen <achristensen@webkit.org> 2 3 Add U+0581 and U+0585 to list of Armenian characters that look like Latin characters 4 https://bugs.webkit.org/show_bug.cgi?id=224219 5 <rdar://75896365> 6 7 Reviewed by Brent Fulgham. 8 9 These code points are allowed in the context of other Armenian code points and punctuation, but not other script code points. 10 This was already implemented for the others, but I consolidated the list to one location to avoid having two locations for the list. 11 12 * wtf/URLHelpers.cpp: 13 (WTF::URLHelpers::isArmenianLookalikeCharacter): 14 (WTF::URLHelpers::isArmenianLookalikeSequence): 15 (WTF::URLHelpers::isLookalikeCharacter): 16 1 17 2021-04-06 Yusuke Suzuki <ysuzuki@apple.com> 2 18 -
trunk/Source/WTF/wtf/URLHelpers.cpp
r271499 r275554 70 70 static bool isArmenianLookalikeCharacter(UChar32 codePoint) 71 71 { 72 return codePoint == 0x0548 || codePoint == 0x054D || codePoint == 0x0578 || codePoint == 0x057D; 72 switch (codePoint) { 73 case 0x0548: /* ARMENIAN CAPITAL LETTER VO */ 74 case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */ 75 case 0x0551: /* ARMENIAN CAPITAL LETTER CO */ 76 case 0x0555: /* ARMENIAN CAPITAL LETTER OH */ 77 case 0x0578: /* ARMENIAN SMALL LETTER VO */ 78 case 0x057D: /* ARMENIAN SMALL LETTER SEH */ 79 case 0x0581: /* ARMENIAN SMALL LETTER CO */ 80 case 0x0585: /* ARMENIAN SMALL LETTER OH */ 81 return true; 82 default: 83 return false; 84 } 73 85 } 74 86 … … 107 119 } 108 120 109 static bool isLookalikeCharacter(const Optional<UChar32>& previousCodePoint, UChar32 charCode) 121 static bool isArmenianLookalikeSequence(const Optional<UChar32>& previousCodePoint, UChar32 codePoint) 122 { 123 if (!previousCodePoint || *previousCodePoint == '/') 124 return false; 125 126 auto isArmenianLookalikePair = [] (UChar first, UChar second) { 127 return isArmenianLookalikeCharacter(first) && !(isArmenianScriptCharacter(second) || isASCIIDigitOrValidHostCharacter(second)); 128 }; 129 return isArmenianLookalikePair(codePoint, *previousCodePoint) 130 || isArmenianLookalikePair(*previousCodePoint, codePoint); 131 } 132 133 static bool isLookalikeCharacter(const Optional<UChar32>& previousCodePoint, UChar32 codePoint) 110 134 { 111 135 // This function treats the following as unsafe, lookalike characters: … … 120 144 // on characters that have not been processed by ICU, so they are needed here. 121 145 122 if (!u_isprint(c harCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))146 if (!u_isprint(codePoint) || u_isUWhiteSpace(codePoint) || u_hasBinaryProperty(codePoint, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) 123 147 return true; 124 148 125 switch (c harCode) {149 switch (codePoint) { 126 150 case 0x00BC: /* VULGAR FRACTION ONE QUARTER */ 127 151 case 0x00BD: /* VULGAR FRACTION ONE HALF */ … … 251 275 || previousCodePoint == 0x0131 /* LATIN SMALL LETTER DOTLESS I */ 252 276 || previousCodePoint == 0x05D5; /* HEBREW LETTER VAV */ 253 case 0x0548: /* ARMENIAN CAPITAL LETTER VO */254 case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */255 case 0x0578: /* ARMENIAN SMALL LETTER VO */256 case 0x057D: /* ARMENIAN SMALL LETTER SEH */257 return previousCodePoint258 && !isASCIIDigitOrValidHostCharacter(previousCodePoint.value())259 && !isArmenianScriptCharacter(previousCodePoint.value());260 277 case '.': 261 278 return false; 262 279 default: 263 return previousCodePoint 264 && isArmenianLookalikeCharacter(previousCodePoint.value()) 265 && !(isArmenianScriptCharacter(charCode) || isASCIIDigitOrValidHostCharacter(charCode)); 280 return isArmenianLookalikeSequence(previousCodePoint, codePoint); 266 281 } 267 282 } -
trunk/Tools/ChangeLog
r275546 r275554 1 2021-04-06 Alex Christensen <achristensen@webkit.org> 2 3 Add U+0581 and U+0585 to list of Armenian characters that look like Latin characters 4 https://bugs.webkit.org/show_bug.cgi?id=224219 5 6 Reviewed by Brent Fulgham. 7 8 * TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm: 9 (TestWebKitAPI::TEST): 10 1 11 2021-04-06 Wenson Hsieh <wenson_hsieh@apple.com> 2 12 -
trunk/Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm
r271499 r275554 116 116 "xn--koa", // U+0274 117 117 "xn--tma", // U+0237 118 "xn--o-pdc", // U+0585 'o' 119 "xn--o-qdc", // 'o' U+0585 120 "xn--g-hdc", // U+0581 'g' 121 "xn--g-idc", // 'g' U+0581 118 122 }; 119 123 for (const String& host : punycodedSpoofHosts) { … … 131 135 EXPECT_STREQ("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575"))); 132 136 EXPECT_STREQ("https://\u0573_\u0574\u05783.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573_\u0574\u05783.\u0570\u0561\u0575"))); 133 EXPECT_STREQ("https://got \u0551\u0535\u0543.com", userVisibleString(literalURL("https://got\u0551\u0535\u0543.com")));137 EXPECT_STREQ("https://got%D5%91\u0535\u0543.com", userVisibleString(literalURL("https://got\u0551\u0535\u0543.com"))); 134 138 EXPECT_STREQ("https://\u0551\u0535\u0543fans.net", userVisibleString(literalURL("https://\u0551\u0535\u0543fans.net"))); 135 139 EXPECT_STREQ("https://\u0551\u0535or\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535or\u0575\u0543.biz"))); 136 140 EXPECT_STREQ("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz"))); 141 EXPECT_STREQ("https://\u0551%67/", userVisibleString(literalURL("https://\u0551g/"))); 142 EXPECT_STREQ("https://\u0581%67/", userVisibleString(literalURL("https://\u0581g/"))); 143 EXPECT_STREQ("https://o%D5%95%2F", userVisibleString(literalURL("https://o\u0555/"))); 144 EXPECT_STREQ("https://o%D6%85%2F", userVisibleString(literalURL("https://o\u0585/"))); 137 145 } 138 146
Note:
See TracChangeset
for help on using the changeset viewer.