Changeset 240962 in webkit
- Timestamp:
- Feb 4, 2019 8:04:30 PM (5 years ago)
- Location:
- trunk
- Files:
-
- 7 added
- 21 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r240957 r240962 1 2019-02-04 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 * TestExpectations: Enable fast/url/user-visible/. 9 1 10 2019-02-04 Shawn Roberts <sroberts@apple.com> 2 11 -
trunk/LayoutTests/TestExpectations
r240829 r240962 70 70 71 71 # These tests don't have to be platform-specific, but they are only implemented on Mac now. 72 fast/url/user-visible [ Skip ]73 72 fast/images/eps-as-image.html [ Skip ] 74 73 -
trunk/Source/WTF/ChangeLog
r240903 r240962 1 2019-02-04 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Translate userVisibleString and dependent code into platform-neutral C++ 9 in wtf/URLHelpers.{h,cpp}. 10 11 * WTF.xcodeproj/project.pbxproj: 12 * wtf/CMakeLists.txt: 13 * wtf/URLHelpers.cpp: Added. 14 (WTF::URLHelpers::loadIDNScriptWhiteList): 15 (WTF::URLHelpers::isArmenianLookalikeCharacter): 16 (WTF::URLHelpers::isArmenianScriptCharacter): 17 (WTF::URLHelpers::isASCIIDigitOrValidHostCharacter): 18 (WTF::URLHelpers::isLookalikeCharacter): 19 (WTF::URLHelpers::whiteListIDNScript): 20 (WTF::URLHelpers::initializeDefaultIDNScriptWhiteList): 21 (WTF::URLHelpers::allCharactersInIDNScriptWhiteList): 22 (WTF::URLHelpers::isSecondLevelDomainNameAllowedByTLDRules): 23 (WTF::URLHelpers::isRussianDomainNameCharacter): 24 (WTF::URLHelpers::allCharactersAllowedByTLDRules): 25 (WTF::URLHelpers::mapHostName): 26 (WTF::URLHelpers::collectRangesThatNeedMapping): 27 (WTF::URLHelpers::applyHostNameFunctionToMailToURLString): 28 (WTF::URLHelpers::applyHostNameFunctionToURLString): 29 (WTF::URLHelpers::mapHostNames): 30 (WTF::URLHelpers::createStringWithEscapedUnsafeCharacters): 31 (WTF::URLHelpers::toNormalizationFormC): 32 (WTF::URLHelpers::userVisibleURL): 33 * wtf/URLHelpers.h: Added. 34 * wtf/cocoa/NSURLExtras.mm: 35 (WTF::URLHelpers::loadIDNScriptWhiteList): 36 (WTF::decodePercentEscapes): 37 (WTF::decodeHostName): 38 (WTF::encodeHostName): 39 (WTF::URLWithUserTypedString): 40 (WTF::userVisibleString): 41 1 42 2019-02-03 Commit Queue <commit-queue@webkit.org> 2 43 -
trunk/Source/WTF/WTF.xcodeproj/project.pbxproj
r240661 r240962 77 77 5CC0EE892162BC2200A1A842 /* URLCocoa.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5CC0EE862162BC2200A1A842 /* URLCocoa.mm */; }; 78 78 5CC0EE8A2162BC2200A1A842 /* NSURLExtras.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5CC0EE882162BC2200A1A842 /* NSURLExtras.mm */; }; 79 5FAD3AE221B9636600BEE178 /* URLHelpers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5FAD3AE121B9636600BEE178 /* URLHelpers.cpp */; }; 79 80 70A993FE1AD7151300FA615B /* SymbolRegistry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A993FC1AD7151300FA615B /* SymbolRegistry.cpp */; }; 80 81 70ECA60D1B02426800449739 /* AtomicStringImpl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70ECA60A1B02426800449739 /* AtomicStringImpl.cpp */; }; … … 388 389 5D247B7014689C4700E78B76 /* DebugRelease.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = DebugRelease.xcconfig; sourceTree = "<group>"; }; 389 390 5D247B7314689C4700E78B76 /* WTF.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = WTF.xcconfig; sourceTree = "<group>"; }; 391 5FAD3AE021B9636600BEE178 /* URLHelpers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = URLHelpers.h; sourceTree = "<group>"; }; 392 5FAD3AE121B9636600BEE178 /* URLHelpers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = URLHelpers.cpp; sourceTree = "<group>"; }; 390 393 6541CAF41630DB26006D0DEC /* CopyWTFHeaders.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = CopyWTFHeaders.xcconfig; sourceTree = "<group>"; }; 391 394 70A993FC1AD7151300FA615B /* SymbolRegistry.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SymbolRegistry.cpp; sourceTree = "<group>"; }; … … 1024 1027 A8A472CF151A825B004123FF /* MetaAllocatorHandle.h */, 1025 1028 FE7497ED209163060003565B /* MetaAllocatorPtr.h */, 1029 5FAD3AE121B9636600BEE178 /* URLHelpers.cpp */, 1030 5FAD3AE021B9636600BEE178 /* URLHelpers.h */, 1026 1031 0F66B2821DC97BAB004A1D3F /* MonotonicTime.cpp */, 1027 1032 0F66B2831DC97BAB004A1D3F /* MonotonicTime.h */, … … 1553 1558 A8A473F4151A825B004123FF /* NumberOfCores.cpp in Sources */, 1554 1559 8348BA0E21FBC0D500FD3054 /* ObjectIdentifier.cpp in Sources */, 1560 5FAD3AE221B9636600BEE178 /* URLHelpers.cpp in Sources */, 1555 1561 A3EE5C3A21FFAC5F00FABD61 /* OSAllocatorPOSIX.cpp in Sources */, 1556 1562 A8A473F9151A825B004123FF /* OSRandomSource.cpp in Sources */, -
trunk/Source/WTF/wtf/CMakeLists.txt
r240661 r240962 244 244 TypeCasts.h 245 245 URL.h 246 URLHelpers.h 246 247 URLHash.h 247 248 URLParser.h … … 404 405 TimingScope.cpp 405 406 URL.cpp 407 URLHelpers.cpp 406 408 URLParser.cpp 407 409 UUID.cpp -
trunk/Source/WTF/wtf/cocoa/NSURLExtras.mm
r239970 r240962 31 31 #import "NSURLExtras.h" 32 32 33 #import <unicode/uchar.h>34 #import <unicode/uidna.h>35 #import <unicode/unorm.h>36 #import <unicode/uscript.h>37 33 #import <wtf/Function.h> 38 34 #import <wtf/HexNumber.h> 39 35 #import <wtf/ObjCRuntimeExtras.h> 40 36 #import <wtf/RetainPtr.h> 37 #import <wtf/URLHelpers.h> 41 38 #import <wtf/URLParser.h> 42 39 #import <wtf/Vector.h> 43 40 #import <wtf/cf/CFURLExtras.h> 44 41 45 // Needs to be big enough to hold an IDN-encoded name.46 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.47 #define HOST_NAME_BUFFER_LENGTH 204848 42 #define URL_BYTES_BUFFER_LENGTH 2048 49 43 50 typedef void (* StringRangeApplierFunction)(NSString *, NSRange, RetainPtr<NSMutableArray>&);51 52 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];53 54 44 namespace WTF { 55 45 56 static bool isArmenianLookalikeCharacter(UChar32 codePoint) 57 { 58 return codePoint == 0x0548 || codePoint == 0x054D || codePoint == 0x0578 || codePoint == 0x057D; 59 } 60 61 static bool isArmenianScriptCharacter(UChar32 codePoint) 62 { 63 UErrorCode error = U_ZERO_ERROR; 64 UScriptCode script = uscript_getScript(codePoint, &error); 65 if (error != U_ZERO_ERROR) { 66 LOG_ERROR("got ICU error while trying to look at scripts: %d", error); 67 return false; 68 } 69 70 return script == USCRIPT_ARMENIAN; 71 } 72 73 74 template<typename CharacterType> inline bool isASCIIDigitOrValidHostCharacter(CharacterType charCode) 75 { 76 if (!isASCIIDigitOrPunctuation(charCode)) 77 return false; 78 79 // Things the URL Parser rejects: 80 switch (charCode) { 81 case '#': 82 case '%': 83 case '/': 84 case ':': 85 case '?': 86 case '@': 87 case '[': 88 case '\\': 89 case ']': 90 return false; 91 default: 92 return true; 93 } 94 } 95 96 static BOOL isLookalikeCharacter(Optional<UChar32> previousCodePoint, UChar32 charCode) 97 { 98 // This function treats the following as unsafe, lookalike characters: 99 // any non-printable character, any character considered as whitespace, 100 // any ignorable character, and emoji characters related to locks. 101 102 // We also considered the characters in Mozilla's blacklist <http://kb.mozillazine.org/Network.IDN.blacklist_chars>. 103 104 // Some of the characters here will never appear once ICU has encoded. 105 // For example, ICU transforms most spaces into an ASCII space and most 106 // slashes into an ASCII solidus. But one of the two callers uses this 107 // on characters that have not been processed by ICU, so they are needed here. 108 109 if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) 110 return YES; 111 112 switch (charCode) { 113 case 0x00BC: /* VULGAR FRACTION ONE QUARTER */ 114 case 0x00BD: /* VULGAR FRACTION ONE HALF */ 115 case 0x00BE: /* VULGAR FRACTION THREE QUARTERS */ 116 case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */ 117 /* 0x0131 LATIN SMALL LETTER DOTLESS I is intentionally not considered a lookalike character because it is visually distinguishable from i and it has legitimate use in the Turkish language. */ 118 case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */ 119 case 0x0251: /* LATIN SMALL LETTER ALPHA */ 120 case 0x0261: /* LATIN SMALL LETTER SCRIPT G */ 121 case 0x027E: /* LATIN SMALL LETTER R WITH FISHHOOK */ 122 case 0x02D0: /* MODIFIER LETTER TRIANGULAR COLON */ 123 case 0x0335: /* COMBINING SHORT STROKE OVERLAY */ 124 case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */ 125 case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */ 126 case 0x0589: /* ARMENIAN FULL STOP */ 127 case 0x05B4: /* HEBREW POINT HIRIQ */ 128 case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */ 129 case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */ 130 case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */ 131 case 0x0609: /* ARABIC-INDIC PER MILLE SIGN */ 132 case 0x060A: /* ARABIC-INDIC PER TEN THOUSAND SIGN */ 133 case 0x0650: /* ARABIC KASRA */ 134 case 0x0660: /* ARABIC INDIC DIGIT ZERO */ 135 case 0x066A: /* ARABIC PERCENT SIGN */ 136 case 0x06D4: /* ARABIC FULL STOP */ 137 case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */ 138 case 0x0701: /* SYRIAC SUPRALINEAR FULL STOP */ 139 case 0x0702: /* SYRIAC SUBLINEAR FULL STOP */ 140 case 0x0703: /* SYRIAC SUPRALINEAR COLON */ 141 case 0x0704: /* SYRIAC SUBLINEAR COLON */ 142 case 0x1735: /* PHILIPPINE SINGLE PUNCTUATION */ 143 case 0x1D04: /* LATIN LETTER SMALL CAPITAL C */ 144 case 0x1D0F: /* LATIN LETTER SMALL CAPITAL O */ 145 case 0x1D1C: /* LATIN LETTER SMALL CAPITAL U */ 146 case 0x1D20: /* LATIN LETTER SMALL CAPITAL V */ 147 case 0x1D21: /* LATIN LETTER SMALL CAPITAL W */ 148 case 0x1D22: /* LATIN LETTER SMALL CAPITAL Z */ 149 case 0x1ECD: /* LATIN SMALL LETTER O WITH DOT BELOW */ 150 case 0x2010: /* HYPHEN */ 151 case 0x2011: /* NON-BREAKING HYPHEN */ 152 case 0x2024: /* ONE DOT LEADER */ 153 case 0x2027: /* HYPHENATION POINT */ 154 case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ 155 case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ 156 case 0x2041: /* CARET INSERTION POINT */ 157 case 0x2044: /* FRACTION SLASH */ 158 case 0x2052: /* COMMERCIAL MINUS SIGN */ 159 case 0x2153: /* VULGAR FRACTION ONE THIRD */ 160 case 0x2154: /* VULGAR FRACTION TWO THIRDS */ 161 case 0x2155: /* VULGAR FRACTION ONE FIFTH */ 162 case 0x2156: /* VULGAR FRACTION TWO FIFTHS */ 163 case 0x2157: /* VULGAR FRACTION THREE FIFTHS */ 164 case 0x2158: /* VULGAR FRACTION FOUR FIFTHS */ 165 case 0x2159: /* VULGAR FRACTION ONE SIXTH */ 166 case 0x215A: /* VULGAR FRACTION FIVE SIXTHS */ 167 case 0x215B: /* VULGAR FRACTION ONE EIGHT */ 168 case 0x215C: /* VULGAR FRACTION THREE EIGHTHS */ 169 case 0x215D: /* VULGAR FRACTION FIVE EIGHTHS */ 170 case 0x215E: /* VULGAR FRACTION SEVEN EIGHTHS */ 171 case 0x215F: /* FRACTION NUMERATOR ONE */ 172 case 0x2212: /* MINUS SIGN */ 173 case 0x2215: /* DIVISION SLASH */ 174 case 0x2216: /* SET MINUS */ 175 case 0x2236: /* RATIO */ 176 case 0x233F: /* APL FUNCTIONAL SYMBOL SLASH BAR */ 177 case 0x23AE: /* INTEGRAL EXTENSION */ 178 case 0x244A: /* OCR DOUBLE BACKSLASH */ 179 case 0x2571: /* DisplayType::Box DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */ 180 case 0x2572: /* DisplayType::Box DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT */ 181 case 0x29F6: /* SOLIDUS WITH OVERBAR */ 182 case 0x29F8: /* BIG SOLIDUS */ 183 case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */ 184 case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */ 185 case 0x2FF0: /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT */ 186 case 0x2FF1: /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW */ 187 case 0x2FF2: /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT */ 188 case 0x2FF3: /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW */ 189 case 0x2FF4: /* IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND */ 190 case 0x2FF5: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE */ 191 case 0x2FF6: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM BELOW */ 192 case 0x2FF7: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LEFT */ 193 case 0x2FF8: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER LEFT */ 194 case 0x2FF9: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT */ 195 case 0x2FFA: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT */ 196 case 0x2FFB: /* IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID */ 197 case 0x3002: /* IDEOGRAPHIC FULL STOP */ 198 case 0x3008: /* LEFT ANGLE BRACKET */ 199 case 0x3014: /* LEFT TORTOISE SHELL BRACKET */ 200 case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */ 201 case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */ 202 case 0x3035: /* VERTICAL KANA REPEAT MARK LOWER HALF */ 203 case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */ 204 case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */ 205 case 0x33AE: /* SQUARE RAD OVER S */ 206 case 0x33AF: /* SQUARE RAD OVER S SQUARED */ 207 case 0x33C6: /* SQUARE C OVER KG */ 208 case 0x33DF: /* SQUARE A OVER M */ 209 case 0x05B9: /* HEBREW POINT HOLAM */ 210 case 0x05BA: /* HEBREW POINT HOLAM HASER FOR VAV */ 211 case 0x05C1: /* HEBREW POINT SHIN DOT */ 212 case 0x05C2: /* HEBREW POINT SIN DOT */ 213 case 0x05C4: /* HEBREW MARK UPPER DOT */ 214 case 0xA731: /* LATIN LETTER SMALL CAPITAL S */ 215 case 0xA771: /* LATIN SMALL LETTER DUM */ 216 case 0xA789: /* MODIFIER LETTER COLON */ 217 case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */ 218 case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */ 219 case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */ 220 case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */ 221 case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */ 222 case 0xFF0E: /* FULLWIDTH FULL STOP */ 223 case 0xFF0F: /* FULL WIDTH SOLIDUS */ 224 case 0xFF61: /* HALFWIDTH IDEOGRAPHIC FULL STOP */ 225 case 0xFFFC: /* OBJECT REPLACEMENT CHARACTER */ 226 case 0xFFFD: /* REPLACEMENT CHARACTER */ 227 case 0x1F50F: /* LOCK WITH INK PEN */ 228 case 0x1F510: /* CLOSED LOCK WITH KEY */ 229 case 0x1F511: /* KEY */ 230 case 0x1F512: /* LOCK */ 231 case 0x1F513: /* OPEN LOCK */ 232 return YES; 233 case 0x0307: /* COMBINING DOT ABOVE */ 234 return previousCodePoint == 0x0237 /* LATIN SMALL LETTER DOTLESS J */ 235 || previousCodePoint == 0x0131 /* LATIN SMALL LETTER DOTLESS I */ 236 || previousCodePoint == 0x05D5; /* HEBREW LETTER VAV */ 237 case 0x0548: /* ARMENIAN CAPITAL LETTER VO */ 238 case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */ 239 case 0x0578: /* ARMENIAN SMALL LETTER VO */ 240 case 0x057D: /* ARMENIAN SMALL LETTER SEH */ 241 return previousCodePoint 242 && !isASCIIDigitOrValidHostCharacter(previousCodePoint.value()) 243 && !isArmenianScriptCharacter(previousCodePoint.value()); 244 case '.': 245 return NO; 246 default: 247 return previousCodePoint 248 && isArmenianLookalikeCharacter(previousCodePoint.value()) 249 && !(isArmenianScriptCharacter(charCode) || isASCIIDigitOrValidHostCharacter(charCode)); 250 } 251 } 252 253 static void whiteListIDNScript(const char* scriptName) 254 { 255 int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName); 256 if (script >= 0 && script < USCRIPT_CODE_LIMIT) { 257 size_t index = script / 32; 258 uint32_t mask = 1 << (script % 32); 259 IDNScriptWhiteList[index] |= mask; 260 } 261 } 46 using namespace URLHelpers; 262 47 263 48 static BOOL readIDNScriptWhiteListFile(NSString *filename) … … 292 77 } 293 78 294 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length) 79 namespace URLHelpers { 80 81 void loadIDNScriptWhiteList() 295 82 { 296 83 static dispatch_once_t flag; … … 303 90 return; 304 91 } 305 const char* defaultIDNScriptWhiteList[20] = { 306 "Common", 307 "Inherited", 308 "Arabic", 309 "Armenian", 310 "Bopomofo", 311 "Canadian_Aboriginal", 312 "Devanagari", 313 "Deseret", 314 "Gujarati", 315 "Gurmukhi", 316 "Hangul", 317 "Han", 318 "Hebrew", 319 "Hiragana", 320 "Katakana_Or_Hiragana", 321 "Katakana", 322 "Latin", 323 "Tamil", 324 "Thai", 325 "Yi", 326 }; 327 for (const char* scriptName : defaultIDNScriptWhiteList) 328 whiteListIDNScript(scriptName); 92 initializeDefaultIDNScriptWhiteList(); 329 93 }); 330 331 int32_t i = 0; 332 Optional<UChar32> previousCodePoint; 333 while (i < length) { 334 UChar32 c; 335 U16_NEXT(buffer, i, length, c) 336 UErrorCode error = U_ZERO_ERROR; 337 UScriptCode script = uscript_getScript(c, &error); 338 if (error != U_ZERO_ERROR) { 339 LOG_ERROR("got ICU error while trying to look at scripts: %d", error); 340 return NO; 341 } 342 if (script < 0) { 343 LOG_ERROR("got negative number for script code from ICU: %d", script); 344 return NO; 345 } 346 if (script >= USCRIPT_CODE_LIMIT) 347 return NO; 348 349 size_t index = script / 32; 350 uint32_t mask = 1 << (script % 32); 351 if (!(IDNScriptWhiteList[index] & mask)) 352 return NO; 353 354 if (isLookalikeCharacter(previousCodePoint, c)) 355 return NO; 356 previousCodePoint = c; 357 } 358 return YES; 359 } 360 361 static bool isSecondLevelDomainNameAllowedByTLDRules(const UChar* buffer, int32_t length, const WTF::Function<bool(UChar)>& characterIsAllowed) 362 { 363 ASSERT(length > 0); 364 365 for (int32_t i = length - 1; i >= 0; --i) { 366 UChar ch = buffer[i]; 367 368 if (characterIsAllowed(ch)) 369 continue; 370 371 // Only check the second level domain. Lower level registrars may have different rules. 372 if (ch == '.') 373 break; 374 375 return false; 376 } 377 return true; 378 } 379 380 #define CHECK_RULES_IF_SUFFIX_MATCHES(suffix, function) \ 381 { \ 382 static const int32_t suffixLength = sizeof(suffix) / sizeof(suffix[0]); \ 383 if (length > suffixLength && 0 == memcmp(buffer + length - suffixLength, suffix, sizeof(suffix))) \ 384 return isSecondLevelDomainNameAllowedByTLDRules(buffer, length - suffixLength, function); \ 385 } 386 387 static bool isRussianDomainNameCharacter(UChar ch) 388 { 389 // Only modern Russian letters, digits and dashes are allowed. 390 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || isASCIIDigit(ch) || ch == '-'; 391 } 392 393 static BOOL allCharactersAllowedByTLDRules(const UChar* buffer, int32_t length) 394 { 395 // Skip trailing dot for root domain. 396 if (buffer[length - 1] == '.') 397 length--; 398 399 // http://cctld.ru/files/pdf/docs/rules_ru-rf.pdf 400 static const UChar cyrillicRF[] = { 401 '.', 402 0x0440, // CYRILLIC SMALL LETTER ER 403 0x0444 // CYRILLIC SMALL LETTER EF 404 }; 405 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicRF, isRussianDomainNameCharacter); 406 407 // http://rusnames.ru/rules.pl 408 static const UChar cyrillicRUS[] = { 409 '.', 410 0x0440, // CYRILLIC SMALL LETTER ER 411 0x0443, // CYRILLIC SMALL LETTER U 412 0x0441 // CYRILLIC SMALL LETTER ES 413 }; 414 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicRUS, isRussianDomainNameCharacter); 415 416 // http://ru.faitid.org/projects/moscow/documents/moskva/idn 417 static const UChar cyrillicMOSKVA[] = { 418 '.', 419 0x043C, // CYRILLIC SMALL LETTER EM 420 0x043E, // CYRILLIC SMALL LETTER O 421 0x0441, // CYRILLIC SMALL LETTER ES 422 0x043A, // CYRILLIC SMALL LETTER KA 423 0x0432, // CYRILLIC SMALL LETTER VE 424 0x0430 // CYRILLIC SMALL LETTER A 425 }; 426 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicMOSKVA, isRussianDomainNameCharacter); 427 428 // http://www.dotdeti.ru/foruser/docs/regrules.php 429 static const UChar cyrillicDETI[] = { 430 '.', 431 0x0434, // CYRILLIC SMALL LETTER DE 432 0x0435, // CYRILLIC SMALL LETTER IE 433 0x0442, // CYRILLIC SMALL LETTER TE 434 0x0438 // CYRILLIC SMALL LETTER I 435 }; 436 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicDETI, isRussianDomainNameCharacter); 437 438 // http://corenic.org - rules not published. The word is Russian, so only allowing Russian at this time, 439 // although we may need to revise the checks if this ends up being used with other languages spoken in Russia. 440 static const UChar cyrillicONLAYN[] = { 441 '.', 442 0x043E, // CYRILLIC SMALL LETTER O 443 0x043D, // CYRILLIC SMALL LETTER EN 444 0x043B, // CYRILLIC SMALL LETTER EL 445 0x0430, // CYRILLIC SMALL LETTER A 446 0x0439, // CYRILLIC SMALL LETTER SHORT I 447 0x043D // CYRILLIC SMALL LETTER EN 448 }; 449 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicONLAYN, isRussianDomainNameCharacter); 450 451 // http://corenic.org - same as above. 452 static const UChar cyrillicSAYT[] = { 453 '.', 454 0x0441, // CYRILLIC SMALL LETTER ES 455 0x0430, // CYRILLIC SMALL LETTER A 456 0x0439, // CYRILLIC SMALL LETTER SHORT I 457 0x0442 // CYRILLIC SMALL LETTER TE 458 }; 459 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicSAYT, isRussianDomainNameCharacter); 460 461 // http://pir.org/products/opr-domain/ - rules not published. According to the registry site, 462 // the intended audience is "Russian and other Slavic-speaking markets". 463 // Chrome appears to only allow Russian, so sticking with that for now. 464 static const UChar cyrillicORG[] = { 465 '.', 466 0x043E, // CYRILLIC SMALL LETTER O 467 0x0440, // CYRILLIC SMALL LETTER ER 468 0x0433 // CYRILLIC SMALL LETTER GHE 469 }; 470 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicORG, isRussianDomainNameCharacter); 471 472 // http://cctld.by/rules.html 473 static const UChar cyrillicBEL[] = { 474 '.', 475 0x0431, // CYRILLIC SMALL LETTER BE 476 0x0435, // CYRILLIC SMALL LETTER IE 477 0x043B // CYRILLIC SMALL LETTER EL 478 }; 479 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicBEL, [](UChar ch) { 480 // Russian and Byelorussian letters, digits and dashes are allowed. 481 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x0456 || ch == 0x045E || ch == 0x2019 || isASCIIDigit(ch) || ch == '-'; 482 }); 483 484 // http://www.nic.kz/docs/poryadok_vnedreniya_kaz_ru.pdf 485 static const UChar cyrillicKAZ[] = { 486 '.', 487 0x049B, // CYRILLIC SMALL LETTER KA WITH DESCENDER 488 0x0430, // CYRILLIC SMALL LETTER A 489 0x0437 // CYRILLIC SMALL LETTER ZE 490 }; 491 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicKAZ, [](UChar ch) { 492 // Kazakh letters, digits and dashes are allowed. 493 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x04D9 || ch == 0x0493 || ch == 0x049B || ch == 0x04A3 || ch == 0x04E9 || ch == 0x04B1 || ch == 0x04AF || ch == 0x04BB || ch == 0x0456 || isASCIIDigit(ch) || ch == '-'; 494 }); 495 496 // http://uanic.net/docs/documents-ukr/Rules%20of%20UKR_v4.0.pdf 497 static const UChar cyrillicUKR[] = { 498 '.', 499 0x0443, // CYRILLIC SMALL LETTER U 500 0x043A, // CYRILLIC SMALL LETTER KA 501 0x0440 // CYRILLIC SMALL LETTER ER 502 }; 503 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicUKR, [](UChar ch) { 504 // Russian and Ukrainian letters, digits and dashes are allowed. 505 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x0491 || ch == 0x0404 || ch == 0x0456 || ch == 0x0457 || isASCIIDigit(ch) || ch == '-'; 506 }); 507 508 // http://www.rnids.rs/data/DOKUMENTI/idn-srb-policy-termsofuse-v1.4-eng.pdf 509 static const UChar cyrillicSRB[] = { 510 '.', 511 0x0441, // CYRILLIC SMALL LETTER ES 512 0x0440, // CYRILLIC SMALL LETTER ER 513 0x0431 // CYRILLIC SMALL LETTER BE 514 }; 515 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicSRB, [](UChar ch) { 516 // Serbian letters, digits and dashes are allowed. 517 return (ch >= 0x0430 && ch <= 0x0438) || (ch >= 0x043A && ch <= 0x0448) || ch == 0x0452 || ch == 0x0458 || ch == 0x0459 || ch == 0x045A || ch == 0x045B || ch == 0x045F || isASCIIDigit(ch) || ch == '-'; 518 }); 519 520 // http://marnet.mk/doc/pravilnik-mk-mkd.pdf 521 static const UChar cyrillicMKD[] = { 522 '.', 523 0x043C, // CYRILLIC SMALL LETTER EM 524 0x043A, // CYRILLIC SMALL LETTER KA 525 0x0434 // CYRILLIC SMALL LETTER DE 526 }; 527 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicMKD, [](UChar ch) { 528 // Macedonian letters, digits and dashes are allowed. 529 return (ch >= 0x0430 && ch <= 0x0438) || (ch >= 0x043A && ch <= 0x0448) || ch == 0x0453 || ch == 0x0455 || ch == 0x0458 || ch == 0x0459 || ch == 0x045A || ch == 0x045C || ch == 0x045F || isASCIIDigit(ch) || ch == '-'; 530 }); 531 532 // https://www.mon.mn/cs/ 533 static const UChar cyrillicMON[] = { 534 '.', 535 0x043C, // CYRILLIC SMALL LETTER EM 536 0x043E, // CYRILLIC SMALL LETTER O 537 0x043D // CYRILLIC SMALL LETTER EN 538 }; 539 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicMON, [](UChar ch) { 540 // Mongolian letters, digits and dashes are allowed. 541 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x04E9 || ch == 0x04AF || isASCIIDigit(ch) || ch == '-'; 542 }); 543 544 // https://www.icann.org/sites/default/files/packages/lgr/lgr-second-level-bulgarian-30aug16-en.html 545 static const UChar cyrillicBG[] = { 546 '.', 547 0x0431, // CYRILLIC SMALL LETTER BE 548 0x0433 // CYRILLIC SMALL LETTER GHE 549 }; 550 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicBG, [](UChar ch) { 551 return (ch >= 0x0430 && ch <= 0x044A) || ch == 0x044C || (ch >= 0x044E && ch <= 0x0450) || ch == 0x045D || isASCIIDigit(ch) || ch == '-'; 552 }); 553 554 // Not a known top level domain with special rules. 555 return NO; 556 } 557 558 // Return value of nil means no mapping is necessary. 559 // If makeString is NO, then return value is either nil or self to indicate mapping is necessary. 560 // If makeString is YES, then return value is either nil or the mapped string. 561 static NSString *mapHostNameWithRange(NSString *string, NSRange range, BOOL encode, BOOL makeString, BOOL *error) 562 { 563 if (range.length > HOST_NAME_BUFFER_LENGTH) 564 return nil; 565 566 if (![string length]) 567 return nil; 568 569 UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH]; 570 UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH]; 571 572 if (encode && [string rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) { 573 NSString *substring = [string substringWithRange:range]; 574 substring = CFBridgingRelease(CFURLCreateStringByReplacingPercentEscapes(nullptr, (CFStringRef)substring, CFSTR(""))); 575 if (substring) { 576 string = substring; 577 range = NSMakeRange(0, [string length]); 578 } 579 } 580 581 int length = range.length; 582 [string getCharacters:sourceBuffer range:range]; 583 584 UErrorCode uerror = U_ZERO_ERROR; 585 UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER; 586 int32_t numCharactersConverted = (encode ? uidna_nameToASCII : uidna_nameToUnicode)(&URLParser::internationalDomainNameTranscoder(), sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, &processingDetails, &uerror); 587 if (length && (U_FAILURE(uerror) || processingDetails.errors)) { 588 *error = YES; 589 return nil; 590 } 591 592 if (numCharactersConverted == length && !memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar))) 593 return nil; 594 595 if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted) && !allCharactersAllowedByTLDRules(destinationBuffer, numCharactersConverted)) 596 return nil; 597 598 return makeString ? [NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : string; 599 } 600 601 static BOOL hostNameNeedsDecodingWithRange(NSString *string, NSRange range, BOOL *error) 602 { 603 return mapHostNameWithRange(string, range, NO, NO, error) != nil; 604 } 605 606 static BOOL hostNameNeedsEncodingWithRange(NSString *string, NSRange range, BOOL *error) 607 { 608 return mapHostNameWithRange(string, range, YES, NO, error) != nil; 609 } 610 611 static NSString *decodeHostNameWithRange(NSString *string, NSRange range) 612 { 613 BOOL error = NO; 614 NSString *host = mapHostNameWithRange(string, range, NO, YES, &error); 615 if (error) 616 return nil; 617 return !host ? string : host; 618 } 619 620 static NSString *encodeHostNameWithRange(NSString *string, NSRange range) 621 { 622 BOOL error = NO; 623 NSString *host = mapHostNameWithRange(string, range, YES, YES, &error); 624 if (error) 625 return nil; 626 return !host ? string : host; 94 } 95 96 } // namespace URLHelpers 97 98 static String decodePercentEscapes(const String& string) 99 { 100 NSString *substring = (NSString *)string; 101 substring = CFBridgingRelease(CFURLCreateStringByReplacingPercentEscapes(nullptr, (CFStringRef)substring, CFSTR(""))); 102 103 if (!substring) 104 return string; 105 106 return (String)substring; 627 107 } 628 108 629 109 NSString *decodeHostName(NSString *string) 630 110 { 631 BOOL error = NO; 632 NSString *host = mapHostNameWithRange(string, NSMakeRange(0, [string length]), NO, YES, &error); 633 if (error) 634 return nil; 635 return !host ? string : host; 111 Optional<String> host = mapHostName(string, nullopt); 112 if (!host) 113 return nil; 114 return !*host ? string : (NSString *)*host; 636 115 } 637 116 638 117 NSString *encodeHostName(NSString *string) 639 118 { 640 BOOL error = NO; 641 NSString *host = mapHostNameWithRange(string, NSMakeRange(0, [string length]), YES, YES, &error); 642 if (error) 643 return nil; 644 return !host ? string : host; 645 } 646 647 static void collectRangesThatNeedMapping(NSString *string, NSRange range, RetainPtr<NSMutableArray>& array, BOOL encode) 648 { 649 // Generally, we want to optimize for the case where there is one host name that does not need mapping. 650 // Therefore, we use nil to indicate no mapping here and an empty array to indicate error. 651 652 BOOL error = NO; 653 BOOL needsMapping = encode ? hostNameNeedsEncodingWithRange(string, range, &error) : hostNameNeedsDecodingWithRange(string, range, &error); 654 if (!error && !needsMapping) 655 return; 656 657 if (!array) 658 array = adoptNS([NSMutableArray new]); 659 660 if (!error) 661 [array addObject:[NSValue valueWithRange:range]]; 662 } 663 664 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, RetainPtr<NSMutableArray>& array) 665 { 666 return collectRangesThatNeedMapping(string, range, array, YES); 667 } 668 669 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, RetainPtr<NSMutableArray>& array) 670 { 671 return collectRangesThatNeedMapping(string, range, array, NO); 672 } 673 674 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, RetainPtr<NSMutableArray>& array) 675 { 676 // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character. 677 // Skip quoted strings so that characters in them don't confuse us. 678 // When we find a '?' character, we are past the part of the URL that contains host names. 679 680 static NeverDestroyed<RetainPtr<NSCharacterSet>> hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"]; 681 static NeverDestroyed<RetainPtr<NSCharacterSet>> hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"]; 682 static NeverDestroyed<RetainPtr<NSCharacterSet>> quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"]; 683 684 unsigned stringLength = [string length]; 685 NSRange remaining = NSMakeRange(0, stringLength); 686 687 while (1) { 688 // Find start of host name or of quoted string. 689 NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters.get().get() options:0 range:remaining]; 690 if (hostNameOrStringStart.location == NSNotFound) 691 return; 692 693 unichar c = [string characterAtIndex:hostNameOrStringStart.location]; 694 remaining.location = NSMaxRange(hostNameOrStringStart); 695 remaining.length = stringLength - remaining.location; 696 697 if (c == '?') 698 return; 699 700 if (c == '@') { 701 // Find end of host name. 702 unsigned hostNameStart = remaining.location; 703 NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters.get().get() options:0 range:remaining]; 704 BOOL done; 705 if (hostNameEnd.location == NSNotFound) { 706 hostNameEnd.location = stringLength; 707 done = YES; 708 } else { 709 remaining.location = hostNameEnd.location; 710 remaining.length = stringLength - remaining.location; 711 done = NO; 712 } 713 714 // Process host name range. 715 f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), array); 716 717 if (done) 718 return; 719 } else { 720 // Skip quoted string. 721 ASSERT(c == '"'); 722 while (1) { 723 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters.get().get() options:0 range:remaining]; 724 if (escapedCharacterOrStringEnd.location == NSNotFound) 725 return; 726 727 c = [string characterAtIndex:escapedCharacterOrStringEnd.location]; 728 remaining.location = NSMaxRange(escapedCharacterOrStringEnd); 729 remaining.length = stringLength - remaining.location; 730 731 // If we are the end of the string, then break from the string loop back to the host name loop. 732 if (c == '"') 733 break; 734 735 // Skip escaped character. 736 ASSERT(c == '\\'); 737 if (!remaining.length) 738 return; 739 740 remaining.location += 1; 741 remaining.length -= 1; 742 } 743 } 744 } 745 } 746 747 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, RetainPtr<NSMutableArray>& array) 748 { 749 // Find hostnames. Too bad we can't use any real URL-parsing code to do this, 750 // but we have to do it before doing all the %-escaping, and this is the only 751 // code we have that parses mailto URLs anyway. 752 753 // Maybe we should implement this using a character buffer instead? 754 755 if (protocolIs(string, "mailto")) { 756 applyHostNameFunctionToMailToURLString(string, f, array); 757 return; 758 } 759 760 // Find the host name in a hierarchical URL. 761 // It comes after a "://" sequence, with scheme characters preceding. 762 // If ends with the end of the string or a ":", "/", or a "?". 763 // If there is a "@" character, the host part is just the part after the "@". 764 NSRange separatorRange = [string rangeOfString:@"://"]; 765 if (separatorRange.location == NSNotFound) 766 return; 767 768 // Check that all characters before the :// are valid scheme characters. 769 static NeverDestroyed<RetainPtr<NSCharacterSet>> nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet]; 770 if ([string rangeOfCharacterFromSet:nonSchemeCharacters.get().get() options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) 771 return; 772 773 unsigned stringLength = [string length]; 774 775 static NeverDestroyed<RetainPtr<NSCharacterSet>> hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"]; 776 777 // Start after the separator. 778 unsigned authorityStart = NSMaxRange(separatorRange); 779 780 // Find terminating character. 781 NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators.get().get() options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)]; 782 unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location; 783 784 // Find "@" for the start of the host name. 785 NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)]; 786 unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator); 787 788 return f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), array); 789 } 790 791 static RetainPtr<NSString> mapHostNames(NSString *string, BOOL encode) 792 { 793 // Generally, we want to optimize for the case where there is one host name that does not need mapping. 794 795 if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding]) 796 return string; 797 798 // Make a list of ranges that actually need mapping. 799 RetainPtr<NSMutableArray> hostNameRanges; 800 StringRangeApplierFunction f = encode ? collectRangesThatNeedEncoding : collectRangesThatNeedDecoding; 801 applyHostNameFunctionToURLString(string, f, hostNameRanges); 802 if (!hostNameRanges) 803 return string; 804 805 if (![hostNameRanges count]) 806 return nil; 807 808 // Do the mapping. 809 auto mutableCopy = adoptNS([string mutableCopy]); 810 unsigned i = [hostNameRanges count]; 811 while (i--) { 812 NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue]; 813 NSString *mappedHostName = encode ? encodeHostNameWithRange(string, hostNameRange) : decodeHostNameWithRange(string, hostNameRange); 814 [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName]; 815 } 816 return mutableCopy; 119 Optional<String> host = mapHostName(string, decodePercentEscapes); 120 if (!host) 121 return nil; 122 return !*host ? string : (NSString *)*host; 817 123 } 818 124 … … 916 222 return nil; 917 223 918 auto mappedString = mapHostNames(stringByTrimmingWhitespace(string).get(), YES);224 auto mappedString = mapHostNames(stringByTrimmingWhitespace(string).get(), decodePercentEscapes); 919 225 if (!mappedString) 920 226 return nil; 921 227 922 228 // Let's check whether the URL is bogus. 923 URL url { URL { nsURL }, mappedString .get()};229 URL url { URL { nsURL }, mappedString }; 924 230 if (!url.createCFURL()) 925 231 return nil; … … 927 233 // FIXME: https://bugs.webkit.org/show_bug.cgi?id=186057 928 234 // We should be able to use url.createCFURL instead of using directly CFURL parsing routines. 929 NSData *data = dataWithUserTypedString(mappedString .get());235 NSData *data = dataWithUserTypedString(mappedString); 930 236 if (!data) 931 237 return [NSURL URLWithString:@""]; … … 1066 372 } 1067 373 1068 static CFStringRef createStringWithEscapedUnsafeCharacters(CFStringRef string)1069 {1070 CFIndex length = CFStringGetLength(string);1071 Vector<UChar, URL_BYTES_BUFFER_LENGTH> sourceBuffer(length);1072 CFStringGetCharacters(string, CFRangeMake(0, length), sourceBuffer.data());1073 1074 Vector<UChar, URL_BYTES_BUFFER_LENGTH> outBuffer;1075 1076 Optional<UChar32> previousCodePoint;1077 CFIndex i = 0;1078 while (i < length) {1079 UChar32 c;1080 U16_NEXT(sourceBuffer, i, length, c)1081 1082 if (isLookalikeCharacter(previousCodePoint, c)) {1083 uint8_t utf8Buffer[4];1084 CFIndex offset = 0;1085 UBool failure = false;1086 U8_APPEND(utf8Buffer, offset, 4, c, failure)1087 ASSERT(!failure);1088 1089 for (CFIndex j = 0; j < offset; ++j) {1090 outBuffer.append('%');1091 outBuffer.append(upperNibbleToASCIIHexDigit(utf8Buffer[j]));1092 outBuffer.append(lowerNibbleToASCIIHexDigit(utf8Buffer[j]));1093 }1094 } else {1095 UChar utf16Buffer[2];1096 CFIndex offset = 0;1097 UBool failure = false;1098 U16_APPEND(utf16Buffer, offset, 2, c, failure)1099 ASSERT(!failure);1100 for (CFIndex j = 0; j < offset; ++j)1101 outBuffer.append(utf16Buffer[j]);1102 }1103 previousCodePoint = c;1104 }1105 1106 return CFStringCreateWithCharacters(nullptr, outBuffer.data(), outBuffer.size());1107 }1108 1109 static String toNormalizationFormC(const String& string)1110 {1111 auto sourceBuffer = string.charactersWithNullTermination();1112 ASSERT(sourceBuffer.last() == '\0');1113 sourceBuffer.removeLast();1114 1115 String result;1116 Vector<UChar, URL_BYTES_BUFFER_LENGTH> normalizedCharacters(sourceBuffer.size());1117 UErrorCode uerror = U_ZERO_ERROR;1118 int32_t normalizedLength = 0;1119 const UNormalizer2 *normalizer = unorm2_getNFCInstance(&uerror);1120 if (!U_FAILURE(uerror)) {1121 normalizedLength = unorm2_normalize(normalizer, sourceBuffer.data(), sourceBuffer.size(), normalizedCharacters.data(), normalizedCharacters.size(), &uerror);1122 if (uerror == U_BUFFER_OVERFLOW_ERROR) {1123 uerror = U_ZERO_ERROR;1124 normalizedCharacters.resize(normalizedLength);1125 normalizedLength = unorm2_normalize(normalizer, sourceBuffer.data(), sourceBuffer.size(), normalizedCharacters.data(), normalizedLength, &uerror);1126 }1127 if (!U_FAILURE(uerror))1128 result = String(normalizedCharacters.data(), normalizedLength);1129 }1130 1131 return result;1132 }1133 1134 374 NSString *userVisibleString(NSURL *URL) 1135 375 { 1136 376 NSData *data = originalURLData(URL); 1137 const unsigned char *before = static_cast<const unsigned char*>([data bytes]); 1138 int length = [data length]; 1139 1140 bool mayNeedHostNameDecoding = false; 1141 1142 const unsigned char *p = before; 1143 int bufferLength = (length * 3) + 1; 1144 Vector<char, URL_BYTES_BUFFER_LENGTH> after(bufferLength); // large enough to %-escape every character 1145 char *q = after.data(); 1146 for (int i = 0; i < length; i++) { 1147 unsigned char c = p[i]; 1148 // unescape escape sequences that indicate bytes greater than 0x7f 1149 if (c == '%' && (i + 1 < length && isASCIIHexDigit(p[i + 1])) && i + 2 < length && isASCIIHexDigit(p[i + 2])) { 1150 auto u = toASCIIHexValue(p[i + 1], p[i + 2]); 1151 if (u > 0x7f) { 1152 // unescape 1153 *q++ = u; 1154 } else { 1155 // do not unescape 1156 *q++ = p[i]; 1157 *q++ = p[i + 1]; 1158 *q++ = p[i + 2]; 1159 } 1160 i += 2; 1161 } else { 1162 *q++ = c; 1163 1164 // Check for "xn--" in an efficient, non-case-sensitive, way. 1165 if (c == '-' && i >= 3 && !mayNeedHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-') 1166 mayNeedHostNameDecoding = true; 1167 } 1168 } 1169 *q = '\0'; 1170 1171 // Check string to see if it can be converted to display using UTF-8 1172 RetainPtr<NSString> result = [NSString stringWithUTF8String:after.data()]; 1173 if (!result) { 1174 // Could not convert to UTF-8. 1175 // Convert characters greater than 0x7f to escape sequences. 1176 // Shift current string to the end of the buffer 1177 // then we will copy back bytes to the start of the buffer 1178 // as we convert. 1179 int afterlength = q - after.data(); 1180 char *p = after.data() + bufferLength - afterlength - 1; 1181 memmove(p, after.data(), afterlength + 1); // copies trailing '\0' 1182 char *q = after.data(); 1183 while (*p) { 1184 unsigned char c = *p; 1185 if (c > 0x7f) { 1186 *q++ = '%'; 1187 *q++ = upperNibbleToASCIIHexDigit(c); 1188 *q++ = lowerNibbleToASCIIHexDigit(c); 1189 } else 1190 *q++ = *p; 1191 p++; 1192 } 1193 *q = '\0'; 1194 result = [NSString stringWithUTF8String:after.data()]; 1195 } 1196 1197 if (mayNeedHostNameDecoding) { 1198 // FIXME: Is it good to ignore the failure of mapHostNames and keep result intact? 1199 auto mappedResult = mapHostNames(result.get(), NO); 1200 if (mappedResult) 1201 result = mappedResult; 1202 } 1203 1204 auto wtfString = String(result.get()); 1205 auto normalized = toNormalizationFormC(wtfString); 1206 result = static_cast<NSString *>(normalized); 1207 return CFBridgingRelease(createStringWithEscapedUnsafeCharacters((__bridge CFStringRef)result.get())); 377 CString string(static_cast<const char*>([data bytes]), [data length]); 378 return userVisibleURL(string); 1208 379 } 1209 380 -
trunk/Source/WebCore/ChangeLog
r240958 r240962 1 2019-02-04 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Tests: enabled fast/url/user-visible/. 9 10 * testing/Internals.cpp: 11 (WebCore::Internals::userVisibleString): Enable method on all platforms. 12 1 13 2019-02-04 Fujii Hironori <Hironori.Fujii@sony.com> 2 14 -
trunk/Source/WebCore/testing/Internals.cpp
r240912 r240962 186 186 #include <wtf/MemoryPressureHandler.h> 187 187 #include <wtf/MonotonicTime.h> 188 #include <wtf/URLHelpers.h> 188 189 #include <wtf/text/StringBuffer.h> 189 190 #include <wtf/text/StringBuilder.h> … … 4225 4226 #if !PLATFORM(COCOA) 4226 4227 4227 String Internals::userVisibleString(const DOMURL&) 4228 { 4229 // Cocoa-specific function. Could ASSERT_NOT_REACHED, but that's probably overkill. 4230 return String(); 4228 String Internals::userVisibleString(const DOMURL& url) 4229 { 4230 return WTF::URLHelpers::userVisibleURL(url.href().string().utf8()); 4231 4231 } 4232 4232 -
trunk/Source/WebKit/ChangeLog
r240956 r240962 1 2019-02-04 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Add webkit_uri_for_display for GTK and WPE. 9 10 * PlatformGTK.cmake: 11 * PlatformWPE.cmake: 12 * SourcesGTK.txt: 13 * SourcesWPE.txt: 14 * UIProcess/API/glib/WebKitURIUtilities.cpp: Added. 15 (webkit_uri_for_display): 16 * UIProcess/API/gtk/WebKitURIUtilities.h: Added. 17 * UIProcess/API/gtk/docs/webkit2gtk-4.0-sections.txt: 18 * UIProcess/API/gtk/docs/webkit2gtk-docs.sgml: 19 * UIProcess/API/gtk/webkit2.h: 20 * UIProcess/API/wpe/WebKitURIUtilities.h: Added. 21 * UIProcess/API/wpe/docs/wpe-0.1-sections.txt: 22 * UIProcess/API/wpe/docs/wpe-docs.sgml: 23 * UIProcess/API/wpe/webkit.h: 24 1 25 2019-02-04 Alex Christensen <achristensen@webkit.org> 2 26 -
trunk/Source/WebKit/PlatformGTK.cmake
r240683 r240962 104 104 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitURIResponse.h 105 105 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitURISchemeRequest.h 106 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitURIUtilities.h 106 107 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitUserContent.h 107 108 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitUserContentManager.h -
trunk/Source/WebKit/PlatformWPE.cmake
r240141 r240962 134 134 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitURIResponse.h 135 135 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitURISchemeRequest.h 136 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitURIUtilities.h 136 137 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitUserContent.h 137 138 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitUserContentManager.h -
trunk/Source/WebKit/SourcesGTK.txt
r240785 r240962 170 170 UIProcess/API/glib/WebKitUIClient.cpp @no-unify 171 171 UIProcess/API/glib/WebKitURISchemeRequest.cpp @no-unify 172 UIProcess/API/glib/WebKitURIUtilities.cpp @no-unify 172 173 UIProcess/API/glib/WebKitUserContent.cpp @no-unify 173 174 UIProcess/API/glib/WebKitUserContentManager.cpp @no-unify -
trunk/Source/WebKit/SourcesWPE.txt
r240785 r240962 155 155 UIProcess/API/glib/WebKitUIClient.cpp @no-unify 156 156 UIProcess/API/glib/WebKitURISchemeRequest.cpp @no-unify 157 UIProcess/API/glib/WebKitURIUtilities.cpp @no-unify 157 158 UIProcess/API/glib/WebKitUserContent.cpp @no-unify 158 159 UIProcess/API/glib/WebKitUserContentManager.cpp @no-unify -
trunk/Source/WebKit/UIProcess/API/gtk/docs/webkit2gtk-4.0-sections.txt
r240473 r240962 1647 1647 webkit_print_custom_widget_get_type 1648 1648 </SECTION> 1649 1650 <SECTION> 1651 <FILE>WebKitURIUtilities</FILE> 1652 webkit_uri_for_display 1653 </SECTION> -
trunk/Source/WebKit/UIProcess/API/gtk/docs/webkit2gtk-docs.sgml
r239278 r240962 75 75 </chapter> 76 76 77 <chapter> 78 <title>Utilities</title> 79 <xi:include href="xml/WebKitURIUtilities.xml"/> 80 </chapter> 81 77 82 <index id="index-all"> 78 83 <title>Index</title> -
trunk/Source/WebKit/UIProcess/API/gtk/webkit2.h
r239278 r240962 74 74 #include <webkit2/WebKitURIResponse.h> 75 75 #include <webkit2/WebKitURISchemeRequest.h> 76 #include <webkit2/WebKitURIUtilities.h> 76 77 #include <webkit2/WebKitUserContent.h> 77 78 #include <webkit2/WebKitUserContentManager.h> -
trunk/Source/WebKit/UIProcess/API/wpe/docs/wpe-0.1-sections.txt
r240473 r240962 1310 1310 WEBKIT_TYPE_APPLICATION_INFO 1311 1311 </SECTION> 1312 1313 <SECTION> 1314 <FILE>WebKitURIUtilities</FILE> 1315 webkit_uri_for_display 1316 </SECTION> -
trunk/Source/WebKit/UIProcess/API/wpe/docs/wpe-docs.sgml
r239278 r240962 57 57 </chapter> 58 58 59 <chapter> 60 <title>Utilities</title> 61 <xi:include href="xml/WebKitURIUtilities.xml"/> 62 </chapter> 63 59 64 <index id="index-all"> 60 65 <title>Index</title> -
trunk/Source/WebKit/UIProcess/API/wpe/webkit.h
r239278 r240962 68 68 #include <wpe/WebKitURIResponse.h> 69 69 #include <wpe/WebKitURISchemeRequest.h> 70 #include <wpe/WebKitURIUtilities.h> 70 71 #include <wpe/WebKitUserContent.h> 71 72 #include <wpe/WebKitUserContentManager.h> -
trunk/Tools/ChangeLog
r240953 r240962 1 2019-02-04 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Add tests for webkit_uri_for_display(). 9 10 * TestWebKitAPI/Tests/WebKitGLib/TestWebKitURIUtilities.cpp: Added. 11 (testURIForDisplayUnaffected): 12 (testURIForDisplayAffected): 13 (beforeAll): 14 (afterAll): 15 * TestWebKitAPI/glib/CMakeLists.txt: 16 1 17 2019-02-04 Jonathan Bedard <jbedard@apple.com> 2 18 -
trunk/Tools/TestWebKitAPI/glib/CMakeLists.txt
r240141 r240962 139 139 ADD_WK2_TEST(TestWebKitSecurityOrigin ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitSecurityOrigin.cpp) 140 140 ADD_WK2_TEST(TestWebKitSettings ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitSettings.cpp) 141 ADD_WK2_TEST(TestWebKitURIUtilities ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitURIUtilities.cpp) 141 142 ADD_WK2_TEST(TestWebKitWebContext ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitWebContext.cpp) 142 143 ADD_WK2_TEST(TestWebKitWebView ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitWebView.cpp)
Note: See TracChangeset
for help on using the changeset viewer.