Changeset 239265 in webkit
- Timestamp:
- Dec 17, 2018 6:08:46 AM (5 years ago)
- Location:
- trunk
- Files:
-
- 7 added
- 23 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/LayoutTests/ChangeLog
r239252 r239265 1 2018-12-17 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 * TestExpectations: Enable fast/url/user-visible/. 9 1 10 2018-12-15 Youenn Fablet <youenn@apple.com> 2 11 -
trunk/LayoutTests/TestExpectations
r239138 r239265 69 69 70 70 # These tests don't have to be platform-specific, but they are only implemented on Mac now. 71 fast/url/user-visible [ Skip ]72 71 fast/images/eps-as-image.html [ Skip ] 73 72 -
trunk/Source/WTF/ChangeLog
r239255 r239265 1 2018-12-17 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Translate userVisibleString and dependent code into platform-neutral C++ 9 in wtf/URLHelpers.{h,cpp}. 10 11 * WTF.xcodeproj/project.pbxproj: 12 * wtf/CMakeLists.txt: 13 * wtf/URLHelpers.cpp: Added. 14 (WTF::URLHelpers::loadIDNScriptWhiteList): 15 (WTF::URLHelpers::isArmenianLookalikeCharacter): 16 (WTF::URLHelpers::isArmenianScriptCharacter): 17 (WTF::URLHelpers::isASCIIDigitOrValidHostCharacter): 18 (WTF::URLHelpers::isLookalikeCharacter): 19 (WTF::URLHelpers::whiteListIDNScript): 20 (WTF::URLHelpers::initializeDefaultIDNScriptWhiteList): 21 (WTF::URLHelpers::allCharactersInIDNScriptWhiteList): 22 (WTF::URLHelpers::isSecondLevelDomainNameAllowedByTLDRules): 23 (WTF::URLHelpers::isRussianDomainNameCharacter): 24 (WTF::URLHelpers::allCharactersAllowedByTLDRules): 25 (WTF::URLHelpers::mapHostName): 26 (WTF::URLHelpers::collectRangesThatNeedMapping): 27 (WTF::URLHelpers::applyHostNameFunctionToMailToURLString): 28 (WTF::URLHelpers::applyHostNameFunctionToURLString): 29 (WTF::URLHelpers::mapHostNames): 30 (WTF::URLHelpers::createStringWithEscapedUnsafeCharacters): 31 (WTF::URLHelpers::userVisibleURL): 32 * wtf/URLHelpers.h: Added. 33 * wtf/cocoa/NSURLExtras.mm: 34 (WTF::URLHelpers::loadIDNScriptWhiteList): 35 (WTF::decodePercentEscapes): 36 (WTF::decodeHostName): 37 (WTF::encodeHostName): 38 (WTF::URLWithUserTypedString): 39 (WTF::userVisibleString): 40 1 41 2018-12-15 Darin Adler <darin@apple.com> 2 42 -
trunk/Source/WTF/WTF.xcodeproj/project.pbxproj
r238771 r239265 79 79 5CC0EE892162BC2200A1A842 /* URLCocoa.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5CC0EE862162BC2200A1A842 /* URLCocoa.mm */; }; 80 80 5CC0EE8A2162BC2200A1A842 /* NSURLExtras.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5CC0EE882162BC2200A1A842 /* NSURLExtras.mm */; }; 81 5FAD3AE221B9636600BEE178 /* URLHelpers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5FAD3AE121B9636600BEE178 /* URLHelpers.cpp */; }; 81 82 70A993FE1AD7151300FA615B /* SymbolRegistry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A993FC1AD7151300FA615B /* SymbolRegistry.cpp */; }; 82 83 70ECA60D1B02426800449739 /* AtomicStringImpl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70ECA60A1B02426800449739 /* AtomicStringImpl.cpp */; }; … … 383 384 5D247B7014689C4700E78B76 /* DebugRelease.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = DebugRelease.xcconfig; sourceTree = "<group>"; }; 384 385 5D247B7314689C4700E78B76 /* WTF.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = WTF.xcconfig; sourceTree = "<group>"; }; 386 5FAD3AE021B9636600BEE178 /* URLHelpers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = URLHelpers.h; sourceTree = "<group>"; }; 387 5FAD3AE121B9636600BEE178 /* URLHelpers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = URLHelpers.cpp; sourceTree = "<group>"; }; 385 388 6541CAF41630DB26006D0DEC /* CopyWTFHeaders.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = CopyWTFHeaders.xcconfig; sourceTree = "<group>"; }; 386 389 70A993FC1AD7151300FA615B /* SymbolRegistry.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SymbolRegistry.cpp; sourceTree = "<group>"; }; … … 981 984 A8A472CF151A825B004123FF /* MetaAllocatorHandle.h */, 982 985 FE7497ED209163060003565B /* MetaAllocatorPtr.h */, 986 5FAD3AE121B9636600BEE178 /* URLHelpers.cpp */, 987 5FAD3AE021B9636600BEE178 /* URLHelpers.h */, 983 988 0F66B2821DC97BAB004A1D3F /* MonotonicTime.cpp */, 984 989 0F66B2831DC97BAB004A1D3F /* MonotonicTime.h */, … … 1503 1508 5CC0EE8A2162BC2200A1A842 /* NSURLExtras.mm in Sources */, 1504 1509 A8A473F4151A825B004123FF /* NumberOfCores.cpp in Sources */, 1510 5FAD3AE221B9636600BEE178 /* URLHelpers.cpp in Sources */, 1505 1511 A8A473F7151A825B004123FF /* OSAllocatorPosix.cpp in Sources */, 1506 1512 A8A473F9151A825B004123FF /* OSRandomSource.cpp in Sources */, -
trunk/Source/WTF/wtf/CMakeLists.txt
r238771 r239265 242 242 TypeCasts.h 243 243 URL.h 244 URLHelpers.h 244 245 URLHash.h 245 246 URLParser.h … … 399 400 TimingScope.cpp 400 401 URL.cpp 402 URLHelpers.cpp 401 403 URLParser.cpp 402 404 UUID.cpp -
trunk/Source/WTF/wtf/cocoa/NSURLExtras.mm
r238771 r239265 37 37 #import <wtf/ObjCRuntimeExtras.h> 38 38 #import <wtf/RetainPtr.h> 39 #import <wtf/URLHelpers.h> 39 40 #import <wtf/Vector.h> 40 #import <unicode/uchar.h> 41 #import <unicode/uidna.h> 42 #import <unicode/uscript.h> 43 44 // Needs to be big enough to hold an IDN-encoded name. 45 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. 46 #define HOST_NAME_BUFFER_LENGTH 2048 41 47 42 #define URL_BYTES_BUFFER_LENGTH 2048 48 43 49 typedef void (* StringRangeApplierFunction)(NSString *, NSRange, RetainPtr<NSMutableArray>&);50 51 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];52 53 44 namespace WTF { 54 45 55 static bool isArmenianLookalikeCharacter(UChar32 codePoint) 56 { 57 return codePoint == 0x0548 || codePoint == 0x054D || codePoint == 0x0578 || codePoint == 0x057D; 58 } 59 60 static bool isArmenianScriptCharacter(UChar32 codePoint) 61 { 62 UErrorCode error = U_ZERO_ERROR; 63 UScriptCode script = uscript_getScript(codePoint, &error); 64 if (error != U_ZERO_ERROR) { 65 LOG_ERROR("got ICU error while trying to look at scripts: %d", error); 66 return false; 67 } 68 69 return script == USCRIPT_ARMENIAN; 70 } 71 72 73 template<typename CharacterType> inline bool isASCIIDigitOrValidHostCharacter(CharacterType charCode) 74 { 75 if (!isASCIIDigitOrPunctuation(charCode)) 76 return false; 77 78 // Things the URL Parser rejects: 79 switch (charCode) { 80 case '#': 81 case '%': 82 case '/': 83 case ':': 84 case '?': 85 case '@': 86 case '[': 87 case '\\': 88 case ']': 89 return false; 90 default: 91 return true; 92 } 93 } 94 95 96 97 static BOOL isLookalikeCharacter(std::optional<UChar32> previousCodePoint, UChar32 charCode) 98 { 99 // This function treats the following as unsafe, lookalike characters: 100 // any non-printable character, any character considered as whitespace, 101 // any ignorable character, and emoji characters related to locks. 102 103 // We also considered the characters in Mozilla's blacklist <http://kb.mozillazine.org/Network.IDN.blacklist_chars>. 104 105 // Some of the characters here will never appear once ICU has encoded. 106 // For example, ICU transforms most spaces into an ASCII space and most 107 // slashes into an ASCII solidus. But one of the two callers uses this 108 // on characters that have not been processed by ICU, so they are needed here. 109 110 if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) 111 return YES; 112 113 switch (charCode) { 114 case 0x00BC: /* VULGAR FRACTION ONE QUARTER */ 115 case 0x00BD: /* VULGAR FRACTION ONE HALF */ 116 case 0x00BE: /* VULGAR FRACTION THREE QUARTERS */ 117 case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */ 118 case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */ 119 case 0x0251: /* LATIN SMALL LETTER ALPHA */ 120 case 0x0261: /* LATIN SMALL LETTER SCRIPT G */ 121 case 0x02D0: /* MODIFIER LETTER TRIANGULAR COLON */ 122 case 0x0335: /* COMBINING SHORT STROKE OVERLAY */ 123 case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */ 124 case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */ 125 case 0x0589: /* ARMENIAN FULL STOP */ 126 case 0x05B4: /* HEBREW POINT HIRIQ */ 127 case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */ 128 case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */ 129 case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */ 130 case 0x0609: /* ARABIC-INDIC PER MILLE SIGN */ 131 case 0x060A: /* ARABIC-INDIC PER TEN THOUSAND SIGN */ 132 case 0x0650: /* ARABIC KASRA */ 133 case 0x0660: /* ARABIC INDIC DIGIT ZERO */ 134 case 0x066A: /* ARABIC PERCENT SIGN */ 135 case 0x06D4: /* ARABIC FULL STOP */ 136 case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */ 137 case 0x0701: /* SYRIAC SUPRALINEAR FULL STOP */ 138 case 0x0702: /* SYRIAC SUBLINEAR FULL STOP */ 139 case 0x0703: /* SYRIAC SUPRALINEAR COLON */ 140 case 0x0704: /* SYRIAC SUBLINEAR COLON */ 141 case 0x1735: /* PHILIPPINE SINGLE PUNCTUATION */ 142 case 0x1D04: /* LATIN LETTER SMALL CAPITAL C */ 143 case 0x1D0F: /* LATIN LETTER SMALL CAPITAL O */ 144 case 0x1D1C: /* LATIN LETTER SMALL CAPITAL U */ 145 case 0x1D20: /* LATIN LETTER SMALL CAPITAL V */ 146 case 0x1D21: /* LATIN LETTER SMALL CAPITAL W */ 147 case 0x1D22: /* LATIN LETTER SMALL CAPITAL Z */ 148 case 0x1ECD: /* LATIN SMALL LETTER O WITH DOT BELOW */ 149 case 0x2010: /* HYPHEN */ 150 case 0x2011: /* NON-BREAKING HYPHEN */ 151 case 0x2024: /* ONE DOT LEADER */ 152 case 0x2027: /* HYPHENATION POINT */ 153 case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ 154 case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ 155 case 0x2041: /* CARET INSERTION POINT */ 156 case 0x2044: /* FRACTION SLASH */ 157 case 0x2052: /* COMMERCIAL MINUS SIGN */ 158 case 0x2153: /* VULGAR FRACTION ONE THIRD */ 159 case 0x2154: /* VULGAR FRACTION TWO THIRDS */ 160 case 0x2155: /* VULGAR FRACTION ONE FIFTH */ 161 case 0x2156: /* VULGAR FRACTION TWO FIFTHS */ 162 case 0x2157: /* VULGAR FRACTION THREE FIFTHS */ 163 case 0x2158: /* VULGAR FRACTION FOUR FIFTHS */ 164 case 0x2159: /* VULGAR FRACTION ONE SIXTH */ 165 case 0x215A: /* VULGAR FRACTION FIVE SIXTHS */ 166 case 0x215B: /* VULGAR FRACTION ONE EIGHT */ 167 case 0x215C: /* VULGAR FRACTION THREE EIGHTHS */ 168 case 0x215D: /* VULGAR FRACTION FIVE EIGHTHS */ 169 case 0x215E: /* VULGAR FRACTION SEVEN EIGHTHS */ 170 case 0x215F: /* FRACTION NUMERATOR ONE */ 171 case 0x2212: /* MINUS SIGN */ 172 case 0x2215: /* DIVISION SLASH */ 173 case 0x2216: /* SET MINUS */ 174 case 0x2236: /* RATIO */ 175 case 0x233F: /* APL FUNCTIONAL SYMBOL SLASH BAR */ 176 case 0x23AE: /* INTEGRAL EXTENSION */ 177 case 0x244A: /* OCR DOUBLE BACKSLASH */ 178 case 0x2571: /* DisplayType::Box DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */ 179 case 0x2572: /* DisplayType::Box DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT */ 180 case 0x29F6: /* SOLIDUS WITH OVERBAR */ 181 case 0x29F8: /* BIG SOLIDUS */ 182 case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */ 183 case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */ 184 case 0x2FF0: /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT */ 185 case 0x2FF1: /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW */ 186 case 0x2FF2: /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT */ 187 case 0x2FF3: /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW */ 188 case 0x2FF4: /* IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND */ 189 case 0x2FF5: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE */ 190 case 0x2FF6: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM BELOW */ 191 case 0x2FF7: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LEFT */ 192 case 0x2FF8: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER LEFT */ 193 case 0x2FF9: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT */ 194 case 0x2FFA: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT */ 195 case 0x2FFB: /* IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID */ 196 case 0x3002: /* IDEOGRAPHIC FULL STOP */ 197 case 0x3008: /* LEFT ANGLE BRACKET */ 198 case 0x3014: /* LEFT TORTOISE SHELL BRACKET */ 199 case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */ 200 case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */ 201 case 0x3035: /* VERTICAL KANA REPEAT MARK LOWER HALF */ 202 case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */ 203 case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */ 204 case 0x33AE: /* SQUARE RAD OVER S */ 205 case 0x33AF: /* SQUARE RAD OVER S SQUARED */ 206 case 0x33C6: /* SQUARE C OVER KG */ 207 case 0x33DF: /* SQUARE A OVER M */ 208 case 0x05B9: /* HEBREW POINT HOLAM */ 209 case 0x05BA: /* HEBREW POINT HOLAM HASER FOR VAV */ 210 case 0x05C1: /* HEBREW POINT SHIN DOT */ 211 case 0x05C2: /* HEBREW POINT SIN DOT */ 212 case 0x05C4: /* HEBREW MARK UPPER DOT */ 213 case 0xA731: /* LATIN LETTER SMALL CAPITAL S */ 214 case 0xA771: /* LATIN SMALL LETTER DUM */ 215 case 0xA789: /* MODIFIER LETTER COLON */ 216 case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */ 217 case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */ 218 case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */ 219 case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */ 220 case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */ 221 case 0xFF0E: /* FULLWIDTH FULL STOP */ 222 case 0xFF0F: /* FULL WIDTH SOLIDUS */ 223 case 0xFF61: /* HALFWIDTH IDEOGRAPHIC FULL STOP */ 224 case 0xFFFC: /* OBJECT REPLACEMENT CHARACTER */ 225 case 0xFFFD: /* REPLACEMENT CHARACTER */ 226 case 0x1F50F: /* LOCK WITH INK PEN */ 227 case 0x1F510: /* CLOSED LOCK WITH KEY */ 228 case 0x1F511: /* KEY */ 229 case 0x1F512: /* LOCK */ 230 case 0x1F513: /* OPEN LOCK */ 231 return YES; 232 case 0x0307: /* COMBINING DOT ABOVE */ 233 return previousCodePoint == 0x0237 /* LATIN SMALL LETTER DOTLESS J */ 234 || previousCodePoint == 0x0131 /* LATIN SMALL LETTER DOTLESS I */ 235 || previousCodePoint == 0x05D5; /* HEBREW LETTER VAV */ 236 case 0x0548: /* ARMENIAN CAPITAL LETTER VO */ 237 case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */ 238 case 0x0578: /* ARMENIAN SMALL LETTER VO */ 239 case 0x057D: /* ARMENIAN SMALL LETTER SEH */ 240 return previousCodePoint 241 && !isASCIIDigitOrValidHostCharacter(previousCodePoint.value()) 242 && !isArmenianScriptCharacter(previousCodePoint.value()); 243 case '.': 244 return NO; 245 default: 246 return previousCodePoint 247 && isArmenianLookalikeCharacter(previousCodePoint.value()) 248 && !(isArmenianScriptCharacter(charCode) || isASCIIDigitOrValidHostCharacter(charCode)); 249 } 250 } 251 252 static void whiteListIDNScript(const char* scriptName) 253 { 254 int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName); 255 if (script >= 0 && script < USCRIPT_CODE_LIMIT) { 256 size_t index = script / 32; 257 uint32_t mask = 1 << (script % 32); 258 IDNScriptWhiteList[index] |= mask; 259 } 260 } 46 using namespace URLHelpers; 261 47 262 48 static BOOL readIDNScriptWhiteListFile(NSString *filename) … … 291 77 } 292 78 293 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length) 79 namespace URLHelpers { 80 81 void loadIDNScriptWhiteList() 294 82 { 295 83 static dispatch_once_t flag; … … 302 90 return; 303 91 } 304 const char* defaultIDNScriptWhiteList[20] = { 305 "Common", 306 "Inherited", 307 "Arabic", 308 "Armenian", 309 "Bopomofo", 310 "Canadian_Aboriginal", 311 "Devanagari", 312 "Deseret", 313 "Gujarati", 314 "Gurmukhi", 315 "Hangul", 316 "Han", 317 "Hebrew", 318 "Hiragana", 319 "Katakana_Or_Hiragana", 320 "Katakana", 321 "Latin", 322 "Tamil", 323 "Thai", 324 "Yi", 325 }; 326 for (const char* scriptName : defaultIDNScriptWhiteList) 327 whiteListIDNScript(scriptName); 92 initializeDefaultIDNScriptWhiteList(); 328 93 }); 329 330 int32_t i = 0; 331 std::optional<UChar32> previousCodePoint; 332 while (i < length) { 333 UChar32 c; 334 U16_NEXT(buffer, i, length, c) 335 UErrorCode error = U_ZERO_ERROR; 336 UScriptCode script = uscript_getScript(c, &error); 337 if (error != U_ZERO_ERROR) { 338 LOG_ERROR("got ICU error while trying to look at scripts: %d", error); 339 return NO; 340 } 341 if (script < 0) { 342 LOG_ERROR("got negative number for script code from ICU: %d", script); 343 return NO; 344 } 345 if (script >= USCRIPT_CODE_LIMIT) 346 return NO; 347 348 size_t index = script / 32; 349 uint32_t mask = 1 << (script % 32); 350 if (!(IDNScriptWhiteList[index] & mask)) 351 return NO; 352 353 if (isLookalikeCharacter(previousCodePoint, c)) 354 return NO; 355 previousCodePoint = c; 356 } 357 return YES; 358 } 359 360 static bool isSecondLevelDomainNameAllowedByTLDRules(const UChar* buffer, int32_t length, const WTF::Function<bool(UChar)>& characterIsAllowed) 361 { 362 ASSERT(length > 0); 363 364 for (int32_t i = length - 1; i >= 0; --i) { 365 UChar ch = buffer[i]; 366 367 if (characterIsAllowed(ch)) 368 continue; 369 370 // Only check the second level domain. Lower level registrars may have different rules. 371 if (ch == '.') 372 break; 373 374 return false; 375 } 376 return true; 377 } 378 379 #define CHECK_RULES_IF_SUFFIX_MATCHES(suffix, function) \ 380 { \ 381 static const int32_t suffixLength = sizeof(suffix) / sizeof(suffix[0]); \ 382 if (length > suffixLength && 0 == memcmp(buffer + length - suffixLength, suffix, sizeof(suffix))) \ 383 return isSecondLevelDomainNameAllowedByTLDRules(buffer, length - suffixLength, function); \ 384 } 385 386 static bool isRussianDomainNameCharacter(UChar ch) 387 { 388 // Only modern Russian letters, digits and dashes are allowed. 389 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || isASCIIDigit(ch) || ch == '-'; 390 } 391 392 static BOOL allCharactersAllowedByTLDRules(const UChar* buffer, int32_t length) 393 { 394 // Skip trailing dot for root domain. 395 if (buffer[length - 1] == '.') 396 length--; 397 398 // http://cctld.ru/files/pdf/docs/rules_ru-rf.pdf 399 static const UChar cyrillicRF[] = { 400 '.', 401 0x0440, // CYRILLIC SMALL LETTER ER 402 0x0444 // CYRILLIC SMALL LETTER EF 403 }; 404 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicRF, isRussianDomainNameCharacter); 405 406 // http://rusnames.ru/rules.pl 407 static const UChar cyrillicRUS[] = { 408 '.', 409 0x0440, // CYRILLIC SMALL LETTER ER 410 0x0443, // CYRILLIC SMALL LETTER U 411 0x0441 // CYRILLIC SMALL LETTER ES 412 }; 413 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicRUS, isRussianDomainNameCharacter); 414 415 // http://ru.faitid.org/projects/moscow/documents/moskva/idn 416 static const UChar cyrillicMOSKVA[] = { 417 '.', 418 0x043C, // CYRILLIC SMALL LETTER EM 419 0x043E, // CYRILLIC SMALL LETTER O 420 0x0441, // CYRILLIC SMALL LETTER ES 421 0x043A, // CYRILLIC SMALL LETTER KA 422 0x0432, // CYRILLIC SMALL LETTER VE 423 0x0430 // CYRILLIC SMALL LETTER A 424 }; 425 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicMOSKVA, isRussianDomainNameCharacter); 426 427 // http://www.dotdeti.ru/foruser/docs/regrules.php 428 static const UChar cyrillicDETI[] = { 429 '.', 430 0x0434, // CYRILLIC SMALL LETTER DE 431 0x0435, // CYRILLIC SMALL LETTER IE 432 0x0442, // CYRILLIC SMALL LETTER TE 433 0x0438 // CYRILLIC SMALL LETTER I 434 }; 435 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicDETI, isRussianDomainNameCharacter); 436 437 // http://corenic.org - rules not published. The word is Russian, so only allowing Russian at this time, 438 // although we may need to revise the checks if this ends up being used with other languages spoken in Russia. 439 static const UChar cyrillicONLAYN[] = { 440 '.', 441 0x043E, // CYRILLIC SMALL LETTER O 442 0x043D, // CYRILLIC SMALL LETTER EN 443 0x043B, // CYRILLIC SMALL LETTER EL 444 0x0430, // CYRILLIC SMALL LETTER A 445 0x0439, // CYRILLIC SMALL LETTER SHORT I 446 0x043D // CYRILLIC SMALL LETTER EN 447 }; 448 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicONLAYN, isRussianDomainNameCharacter); 449 450 // http://corenic.org - same as above. 451 static const UChar cyrillicSAYT[] = { 452 '.', 453 0x0441, // CYRILLIC SMALL LETTER ES 454 0x0430, // CYRILLIC SMALL LETTER A 455 0x0439, // CYRILLIC SMALL LETTER SHORT I 456 0x0442 // CYRILLIC SMALL LETTER TE 457 }; 458 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicSAYT, isRussianDomainNameCharacter); 459 460 // http://pir.org/products/opr-domain/ - rules not published. According to the registry site, 461 // the intended audience is "Russian and other Slavic-speaking markets". 462 // Chrome appears to only allow Russian, so sticking with that for now. 463 static const UChar cyrillicORG[] = { 464 '.', 465 0x043E, // CYRILLIC SMALL LETTER O 466 0x0440, // CYRILLIC SMALL LETTER ER 467 0x0433 // CYRILLIC SMALL LETTER GHE 468 }; 469 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicORG, isRussianDomainNameCharacter); 470 471 // http://cctld.by/rules.html 472 static const UChar cyrillicBEL[] = { 473 '.', 474 0x0431, // CYRILLIC SMALL LETTER BE 475 0x0435, // CYRILLIC SMALL LETTER IE 476 0x043B // CYRILLIC SMALL LETTER EL 477 }; 478 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicBEL, [](UChar ch) { 479 // Russian and Byelorussian letters, digits and dashes are allowed. 480 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x0456 || ch == 0x045E || ch == 0x2019 || isASCIIDigit(ch) || ch == '-'; 481 }); 482 483 // http://www.nic.kz/docs/poryadok_vnedreniya_kaz_ru.pdf 484 static const UChar cyrillicKAZ[] = { 485 '.', 486 0x049B, // CYRILLIC SMALL LETTER KA WITH DESCENDER 487 0x0430, // CYRILLIC SMALL LETTER A 488 0x0437 // CYRILLIC SMALL LETTER ZE 489 }; 490 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicKAZ, [](UChar ch) { 491 // Kazakh letters, digits and dashes are allowed. 492 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x04D9 || ch == 0x0493 || ch == 0x049B || ch == 0x04A3 || ch == 0x04E9 || ch == 0x04B1 || ch == 0x04AF || ch == 0x04BB || ch == 0x0456 || isASCIIDigit(ch) || ch == '-'; 493 }); 494 495 // http://uanic.net/docs/documents-ukr/Rules%20of%20UKR_v4.0.pdf 496 static const UChar cyrillicUKR[] = { 497 '.', 498 0x0443, // CYRILLIC SMALL LETTER U 499 0x043A, // CYRILLIC SMALL LETTER KA 500 0x0440 // CYRILLIC SMALL LETTER ER 501 }; 502 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicUKR, [](UChar ch) { 503 // Russian and Ukrainian letters, digits and dashes are allowed. 504 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x0491 || ch == 0x0404 || ch == 0x0456 || ch == 0x0457 || isASCIIDigit(ch) || ch == '-'; 505 }); 506 507 // http://www.rnids.rs/data/DOKUMENTI/idn-srb-policy-termsofuse-v1.4-eng.pdf 508 static const UChar cyrillicSRB[] = { 509 '.', 510 0x0441, // CYRILLIC SMALL LETTER ES 511 0x0440, // CYRILLIC SMALL LETTER ER 512 0x0431 // CYRILLIC SMALL LETTER BE 513 }; 514 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicSRB, [](UChar ch) { 515 // Serbian letters, digits and dashes are allowed. 516 return (ch >= 0x0430 && ch <= 0x0438) || (ch >= 0x043A && ch <= 0x0448) || ch == 0x0452 || ch == 0x0458 || ch == 0x0459 || ch == 0x045A || ch == 0x045B || ch == 0x045F || isASCIIDigit(ch) || ch == '-'; 517 }); 518 519 // http://marnet.mk/doc/pravilnik-mk-mkd.pdf 520 static const UChar cyrillicMKD[] = { 521 '.', 522 0x043C, // CYRILLIC SMALL LETTER EM 523 0x043A, // CYRILLIC SMALL LETTER KA 524 0x0434 // CYRILLIC SMALL LETTER DE 525 }; 526 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicMKD, [](UChar ch) { 527 // Macedonian letters, digits and dashes are allowed. 528 return (ch >= 0x0430 && ch <= 0x0438) || (ch >= 0x043A && ch <= 0x0448) || ch == 0x0453 || ch == 0x0455 || ch == 0x0458 || ch == 0x0459 || ch == 0x045A || ch == 0x045C || ch == 0x045F || isASCIIDigit(ch) || ch == '-'; 529 }); 530 531 // https://www.mon.mn/cs/ 532 static const UChar cyrillicMON[] = { 533 '.', 534 0x043C, // CYRILLIC SMALL LETTER EM 535 0x043E, // CYRILLIC SMALL LETTER O 536 0x043D // CYRILLIC SMALL LETTER EN 537 }; 538 CHECK_RULES_IF_SUFFIX_MATCHES(cyrillicMON, [](UChar ch) { 539 // Mongolian letters, digits and dashes are allowed. 540 return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x04E9 || ch == 0x04AF || isASCIIDigit(ch) || ch == '-'; 541 }); 542 543 // Not a known top level domain with special rules. 544 return NO; 545 } 546 547 // Return value of nil means no mapping is necessary. 548 // If makeString is NO, then return value is either nil or self to indicate mapping is necessary. 549 // If makeString is YES, then return value is either nil or the mapped string. 550 static NSString *mapHostNameWithRange(NSString *string, NSRange range, BOOL encode, BOOL makeString, BOOL *error) 551 { 552 if (range.length > HOST_NAME_BUFFER_LENGTH) 553 return nil; 554 555 if (![string length]) 556 return nil; 557 558 UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH]; 559 UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH]; 560 561 if (encode && [string rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) { 562 NSString *substring = [string substringWithRange:range]; 563 substring = CFBridgingRelease(CFURLCreateStringByReplacingPercentEscapes(nullptr, (CFStringRef)substring, CFSTR(""))); 564 if (substring) { 565 string = substring; 566 range = NSMakeRange(0, [string length]); 567 } 568 } 569 570 int length = range.length; 571 [string getCharacters:sourceBuffer range:range]; 572 573 UErrorCode uerror = U_ZERO_ERROR; 574 UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER; 575 int32_t numCharactersConverted = (encode ? uidna_nameToASCII : uidna_nameToUnicode)(&URLParser::internationalDomainNameTranscoder(), sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, &processingDetails, &uerror); 576 if (length && (U_FAILURE(uerror) || processingDetails.errors)) { 577 *error = YES; 578 return nil; 579 } 580 581 if (numCharactersConverted == length && !memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar))) 582 return nil; 583 584 if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted) && !allCharactersAllowedByTLDRules(destinationBuffer, numCharactersConverted)) 585 return nil; 586 587 return makeString ? [NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : string; 588 } 589 590 static BOOL hostNameNeedsDecodingWithRange(NSString *string, NSRange range, BOOL *error) 591 { 592 return mapHostNameWithRange(string, range, NO, NO, error) != nil; 593 } 594 595 static BOOL hostNameNeedsEncodingWithRange(NSString *string, NSRange range, BOOL *error) 596 { 597 return mapHostNameWithRange(string, range, YES, NO, error) != nil; 598 } 599 600 static NSString *decodeHostNameWithRange(NSString *string, NSRange range) 601 { 602 BOOL error = NO; 603 NSString *host = mapHostNameWithRange(string, range, NO, YES, &error); 604 if (error) 605 return nil; 606 return !host ? string : host; 607 } 608 609 static NSString *encodeHostNameWithRange(NSString *string, NSRange range) 610 { 611 BOOL error = NO; 612 NSString *host = mapHostNameWithRange(string, range, YES, YES, &error); 613 if (error) 614 return nil; 615 return !host ? string : host; 94 } 95 96 } // namespace URLHelpers 97 98 static String decodePercentEscapes(const String& string) 99 { 100 NSString *substring = (NSString *)string; 101 substring = CFBridgingRelease(CFURLCreateStringByReplacingPercentEscapes(nullptr, (CFStringRef)substring, CFSTR(""))); 102 103 if (!substring) 104 return string; 105 106 return (String)substring; 616 107 } 617 108 618 109 NSString *decodeHostName(NSString *string) 619 110 { 620 BOOL error = NO; 621 NSString *host = mapHostNameWithRange(string, NSMakeRange(0, [string length]), NO, YES, &error); 622 if (error) 623 return nil; 624 return !host ? string : host; 111 std::optional<String> host = mapHostName(string, std::nullopt); 112 if (!host) 113 return nil; 114 return !*host ? string : (NSString *)*host; 625 115 } 626 116 627 117 NSString *encodeHostName(NSString *string) 628 118 { 629 BOOL error = NO; 630 NSString *host = mapHostNameWithRange(string, NSMakeRange(0, [string length]), YES, YES, &error); 631 if (error) 632 return nil; 633 return !host ? string : host; 634 } 635 636 static void collectRangesThatNeedMapping(NSString *string, NSRange range, RetainPtr<NSMutableArray>& array, BOOL encode) 637 { 638 // Generally, we want to optimize for the case where there is one host name that does not need mapping. 639 // Therefore, we use nil to indicate no mapping here and an empty array to indicate error. 640 641 BOOL error = NO; 642 BOOL needsMapping = encode ? hostNameNeedsEncodingWithRange(string, range, &error) : hostNameNeedsDecodingWithRange(string, range, &error); 643 if (!error && !needsMapping) 644 return; 645 646 if (!array) 647 array = adoptNS([NSMutableArray new]); 648 649 if (!error) 650 [array addObject:[NSValue valueWithRange:range]]; 651 } 652 653 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, RetainPtr<NSMutableArray>& array) 654 { 655 return collectRangesThatNeedMapping(string, range, array, YES); 656 } 657 658 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, RetainPtr<NSMutableArray>& array) 659 { 660 return collectRangesThatNeedMapping(string, range, array, NO); 661 } 662 663 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, RetainPtr<NSMutableArray>& array) 664 { 665 // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character. 666 // Skip quoted strings so that characters in them don't confuse us. 667 // When we find a '?' character, we are past the part of the URL that contains host names. 668 669 static NeverDestroyed<RetainPtr<NSCharacterSet>> hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"]; 670 static NeverDestroyed<RetainPtr<NSCharacterSet>> hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"]; 671 static NeverDestroyed<RetainPtr<NSCharacterSet>> quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"]; 672 673 unsigned stringLength = [string length]; 674 NSRange remaining = NSMakeRange(0, stringLength); 675 676 while (1) { 677 // Find start of host name or of quoted string. 678 NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters.get().get() options:0 range:remaining]; 679 if (hostNameOrStringStart.location == NSNotFound) 680 return; 681 682 unichar c = [string characterAtIndex:hostNameOrStringStart.location]; 683 remaining.location = NSMaxRange(hostNameOrStringStart); 684 remaining.length = stringLength - remaining.location; 685 686 if (c == '?') 687 return; 688 689 if (c == '@') { 690 // Find end of host name. 691 unsigned hostNameStart = remaining.location; 692 NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters.get().get() options:0 range:remaining]; 693 BOOL done; 694 if (hostNameEnd.location == NSNotFound) { 695 hostNameEnd.location = stringLength; 696 done = YES; 697 } else { 698 remaining.location = hostNameEnd.location; 699 remaining.length = stringLength - remaining.location; 700 done = NO; 701 } 702 703 // Process host name range. 704 f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), array); 705 706 if (done) 707 return; 708 } else { 709 // Skip quoted string. 710 ASSERT(c == '"'); 711 while (1) { 712 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters.get().get() options:0 range:remaining]; 713 if (escapedCharacterOrStringEnd.location == NSNotFound) 714 return; 715 716 c = [string characterAtIndex:escapedCharacterOrStringEnd.location]; 717 remaining.location = NSMaxRange(escapedCharacterOrStringEnd); 718 remaining.length = stringLength - remaining.location; 719 720 // If we are the end of the string, then break from the string loop back to the host name loop. 721 if (c == '"') 722 break; 723 724 // Skip escaped character. 725 ASSERT(c == '\\'); 726 if (!remaining.length) 727 return; 728 729 remaining.location += 1; 730 remaining.length -= 1; 731 } 732 } 733 } 734 } 735 736 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, RetainPtr<NSMutableArray>& array) 737 { 738 // Find hostnames. Too bad we can't use any real URL-parsing code to do this, 739 // but we have to do it before doing all the %-escaping, and this is the only 740 // code we have that parses mailto URLs anyway. 741 742 // Maybe we should implement this using a character buffer instead? 743 744 if (protocolIs(string, "mailto")) { 745 applyHostNameFunctionToMailToURLString(string, f, array); 746 return; 747 } 748 749 // Find the host name in a hierarchical URL. 750 // It comes after a "://" sequence, with scheme characters preceding. 751 // If ends with the end of the string or a ":", "/", or a "?". 752 // If there is a "@" character, the host part is just the part after the "@". 753 NSRange separatorRange = [string rangeOfString:@"://"]; 754 if (separatorRange.location == NSNotFound) 755 return; 756 757 // Check that all characters before the :// are valid scheme characters. 758 static NeverDestroyed<RetainPtr<NSCharacterSet>> nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet]; 759 if ([string rangeOfCharacterFromSet:nonSchemeCharacters.get().get() options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) 760 return; 761 762 unsigned stringLength = [string length]; 763 764 static NeverDestroyed<RetainPtr<NSCharacterSet>> hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"]; 765 766 // Start after the separator. 767 unsigned authorityStart = NSMaxRange(separatorRange); 768 769 // Find terminating character. 770 NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators.get().get() options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)]; 771 unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location; 772 773 // Find "@" for the start of the host name. 774 NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)]; 775 unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator); 776 777 return f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), array); 778 } 779 780 static RetainPtr<NSString> mapHostNames(NSString *string, BOOL encode) 781 { 782 // Generally, we want to optimize for the case where there is one host name that does not need mapping. 783 784 if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding]) 785 return string; 786 787 // Make a list of ranges that actually need mapping. 788 RetainPtr<NSMutableArray> hostNameRanges; 789 StringRangeApplierFunction f = encode ? collectRangesThatNeedEncoding : collectRangesThatNeedDecoding; 790 applyHostNameFunctionToURLString(string, f, hostNameRanges); 791 if (!hostNameRanges) 792 return string; 793 794 if (![hostNameRanges count]) 795 return nil; 796 797 // Do the mapping. 798 auto mutableCopy = adoptNS([string mutableCopy]); 799 unsigned i = [hostNameRanges count]; 800 while (i--) { 801 NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue]; 802 NSString *mappedHostName = encode ? encodeHostNameWithRange(string, hostNameRange) : decodeHostNameWithRange(string, hostNameRange); 803 [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName]; 804 } 805 return mutableCopy; 119 std::optional<String> host = mapHostName(string, decodePercentEscapes); 120 if (!host) 121 return nil; 122 return !*host ? string : (NSString *)*host; 806 123 } 807 124 … … 905 222 return nil; 906 223 907 auto mappedString = mapHostNames(stringByTrimmingWhitespace(string).get(), YES);224 auto mappedString = mapHostNames(stringByTrimmingWhitespace(string).get(), decodePercentEscapes); 908 225 if (!mappedString) 909 226 return nil; 910 227 911 228 // Let's check whether the URL is bogus. 912 URL url { URL { nsURL }, mappedString .get()};229 URL url { URL { nsURL }, mappedString }; 913 230 if (!url.createCFURL()) 914 231 return nil; … … 916 233 // FIXME: https://bugs.webkit.org/show_bug.cgi?id=186057 917 234 // We should be able to use url.createCFURL instead of using directly CFURL parsing routines. 918 NSData *data = dataWithUserTypedString(mappedString .get());235 NSData *data = dataWithUserTypedString(mappedString); 919 236 if (!data) 920 237 return [NSURL URLWithString:@""]; … … 1055 372 } 1056 373 1057 static CFStringRef createStringWithEscapedUnsafeCharacters(CFStringRef string)1058 {1059 CFIndex length = CFStringGetLength(string);1060 Vector<UChar, URL_BYTES_BUFFER_LENGTH> sourceBuffer(length);1061 CFStringGetCharacters(string, CFRangeMake(0, length), sourceBuffer.data());1062 1063 Vector<UChar, URL_BYTES_BUFFER_LENGTH> outBuffer;1064 1065 std::optional<UChar32> previousCodePoint;1066 CFIndex i = 0;1067 while (i < length) {1068 UChar32 c;1069 U16_NEXT(sourceBuffer, i, length, c)1070 1071 if (isLookalikeCharacter(previousCodePoint, c)) {1072 uint8_t utf8Buffer[4];1073 CFIndex offset = 0;1074 UBool failure = false;1075 U8_APPEND(utf8Buffer, offset, 4, c, failure)1076 ASSERT(!failure);1077 1078 for (CFIndex j = 0; j < offset; ++j) {1079 outBuffer.append('%');1080 outBuffer.append(upperNibbleToASCIIHexDigit(utf8Buffer[j]));1081 outBuffer.append(lowerNibbleToASCIIHexDigit(utf8Buffer[j]));1082 }1083 } else {1084 UChar utf16Buffer[2];1085 CFIndex offset = 0;1086 UBool failure = false;1087 U16_APPEND(utf16Buffer, offset, 2, c, failure)1088 ASSERT(!failure);1089 for (CFIndex j = 0; j < offset; ++j)1090 outBuffer.append(utf16Buffer[j]);1091 }1092 previousCodePoint = c;1093 }1094 1095 return CFStringCreateWithCharacters(nullptr, outBuffer.data(), outBuffer.size());1096 }1097 1098 374 NSString *userVisibleString(NSURL *URL) 1099 375 { 1100 376 NSData *data = originalURLData(URL); 1101 const unsigned char *before = static_cast<const unsigned char*>([data bytes]); 1102 int length = [data length]; 1103 1104 bool mayNeedHostNameDecoding = false; 1105 1106 const unsigned char *p = before; 1107 int bufferLength = (length * 3) + 1; 1108 Vector<char, URL_BYTES_BUFFER_LENGTH> after(bufferLength); // large enough to %-escape every character 1109 char *q = after.data(); 1110 for (int i = 0; i < length; i++) { 1111 unsigned char c = p[i]; 1112 // unescape escape sequences that indicate bytes greater than 0x7f 1113 if (c == '%' && (i + 1 < length && isASCIIHexDigit(p[i + 1])) && i + 2 < length && isASCIIHexDigit(p[i + 2])) { 1114 auto u = toASCIIHexValue(p[i + 1], p[i + 2]); 1115 if (u > 0x7f) { 1116 // unescape 1117 *q++ = u; 1118 } else { 1119 // do not unescape 1120 *q++ = p[i]; 1121 *q++ = p[i + 1]; 1122 *q++ = p[i + 2]; 1123 } 1124 i += 2; 1125 } else { 1126 *q++ = c; 1127 1128 // Check for "xn--" in an efficient, non-case-sensitive, way. 1129 if (c == '-' && i >= 3 && !mayNeedHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-') 1130 mayNeedHostNameDecoding = true; 1131 } 1132 } 1133 *q = '\0'; 1134 1135 // Check string to see if it can be converted to display using UTF-8 1136 RetainPtr<NSString> result = [NSString stringWithUTF8String:after.data()]; 1137 if (!result) { 1138 // Could not convert to UTF-8. 1139 // Convert characters greater than 0x7f to escape sequences. 1140 // Shift current string to the end of the buffer 1141 // then we will copy back bytes to the start of the buffer 1142 // as we convert. 1143 int afterlength = q - after.data(); 1144 char *p = after.data() + bufferLength - afterlength - 1; 1145 memmove(p, after.data(), afterlength + 1); // copies trailing '\0' 1146 char *q = after.data(); 1147 while (*p) { 1148 unsigned char c = *p; 1149 if (c > 0x7f) { 1150 *q++ = '%'; 1151 *q++ = upperNibbleToASCIIHexDigit(c); 1152 *q++ = lowerNibbleToASCIIHexDigit(c); 1153 } else 1154 *q++ = *p; 1155 p++; 1156 } 1157 *q = '\0'; 1158 result = [NSString stringWithUTF8String:after.data()]; 1159 } 1160 1161 if (mayNeedHostNameDecoding) { 1162 // FIXME: Is it good to ignore the failure of mapHostNames and keep result intact? 1163 auto mappedResult = mapHostNames(result.get(), NO); 1164 if (mappedResult) 1165 result = mappedResult; 1166 } 1167 1168 result = [result precomposedStringWithCanonicalMapping]; 1169 return CFBridgingRelease(createStringWithEscapedUnsafeCharacters((__bridge CFStringRef)result.get())); 377 CString string(static_cast<const char*>([data bytes]), [data length]); 378 return userVisibleURL(string); 1170 379 } 1171 380 -
trunk/Source/WebCore/ChangeLog
r239256 r239265 1 2018-12-17 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Tests: enabled fast/url/user-visible/. 9 10 * testing/Internals.cpp: 11 (WebCore::Internals::userVisibleString): Enable method on all platforms. 12 1 13 2018-12-15 Yusuke Suzuki <yusukesuzuki@slowstart.org> 2 14 -
trunk/Source/WebCore/testing/Internals.cpp
r239145 r239265 186 186 #include <wtf/MemoryPressureHandler.h> 187 187 #include <wtf/MonotonicTime.h> 188 #include <wtf/URLHelpers.h> 188 189 #include <wtf/text/StringBuffer.h> 189 190 #include <wtf/text/StringBuilder.h> … … 4210 4211 #if !PLATFORM(COCOA) 4211 4212 4212 String Internals::userVisibleString(const DOMURL&) 4213 { 4214 // Cocoa-specific function. Could ASSERT_NOT_REACHED, but that's probably overkill. 4215 return String(); 4213 String Internals::userVisibleString(const DOMURL& url) 4214 { 4215 return WTF::URLHelpers::userVisibleURL(url.href().string().utf8()); 4216 4216 } 4217 4217 -
trunk/Source/WebKit/ChangeLog
r239264 r239265 1 2018-12-17 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Add webkit_uri_for_display for GTK and WPE. 9 10 * PlatformGTK.cmake: 11 * PlatformWPE.cmake: 12 * SourcesGTK.txt: 13 * SourcesWPE.txt: 14 * UIProcess/API/glib/WebKitURIUtilities.cpp: Added. 15 (webkit_uri_for_display): 16 * UIProcess/API/gtk/WebKitURIUtilities.h: Added. 17 * UIProcess/API/gtk/docs/webkit2gtk-4.0-sections.txt: 18 * UIProcess/API/gtk/docs/webkit2gtk-docs.sgml: 19 * UIProcess/API/gtk/webkit2.h: 20 * UIProcess/API/wpe/WebKitURIUtilities.h: Added. 21 * UIProcess/API/wpe/docs/wpe-0.1-sections.txt: 22 * UIProcess/API/wpe/docs/wpe-docs.sgml: 23 * UIProcess/API/wpe/webkit.h: 24 1 25 2018-12-17 Carlos Garcia Campos <cgarcia@igalia.com> 2 26 -
trunk/Source/WebKit/PlatformGTK.cmake
r238552 r239265 104 104 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitURIResponse.h 105 105 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitURISchemeRequest.h 106 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitURIUtilities.h 106 107 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitUserContent.h 107 108 ${WEBKIT_DIR}/UIProcess/API/gtk/WebKitUserContentManager.h -
trunk/Source/WebKit/PlatformWPE.cmake
r238853 r239265 133 133 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitURIResponse.h 134 134 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitURISchemeRequest.h 135 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitURIUtilities.h 135 136 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitUserContent.h 136 137 ${WEBKIT_DIR}/UIProcess/API/wpe/WebKitUserContentManager.h -
trunk/Source/WebKit/SourcesGTK.txt
r238350 r239265 168 168 UIProcess/API/glib/WebKitUIClient.cpp @no-unify 169 169 UIProcess/API/glib/WebKitURISchemeRequest.cpp @no-unify 170 UIProcess/API/glib/WebKitURIUtilities.cpp @no-unify 170 171 UIProcess/API/glib/WebKitUserContent.cpp @no-unify 171 172 UIProcess/API/glib/WebKitUserContentManager.cpp @no-unify -
trunk/Source/WebKit/SourcesWPE.txt
r238384 r239265 153 153 UIProcess/API/glib/WebKitUIClient.cpp @no-unify 154 154 UIProcess/API/glib/WebKitURISchemeRequest.cpp @no-unify 155 UIProcess/API/glib/WebKitURIUtilities.cpp @no-unify 155 156 UIProcess/API/glib/WebKitUserContent.cpp @no-unify 156 157 UIProcess/API/glib/WebKitUserContentManager.cpp @no-unify -
trunk/Source/WebKit/UIProcess/API/gtk/docs/webkit2gtk-4.0-sections.txt
r238371 r239265 1646 1646 webkit_print_custom_widget_get_type 1647 1647 </SECTION> 1648 1649 <SECTION> 1650 <FILE>WebKitURIUtilities</FILE> 1651 webkit_uri_for_display 1652 </SECTION> -
trunk/Source/WebKit/UIProcess/API/gtk/docs/webkit2gtk-docs.sgml
r238371 r239265 75 75 </chapter> 76 76 77 <chapter> 78 <title>Utilities</title> 79 <xi:include href="xml/WebKitURIUtilities.xml"/> 80 </chapter> 81 77 82 <index id="index-all"> 78 83 <title>Index</title> -
trunk/Source/WebKit/UIProcess/API/gtk/webkit2.h
r238277 r239265 74 74 #include <webkit2/WebKitURIResponse.h> 75 75 #include <webkit2/WebKitURISchemeRequest.h> 76 #include <webkit2/WebKitURIUtilities.h> 76 77 #include <webkit2/WebKitUserContent.h> 77 78 #include <webkit2/WebKitUserContentManager.h> -
trunk/Source/WebKit/UIProcess/API/wpe/docs/wpe-0.1-sections.txt
r239264 r239265 1309 1309 WEBKIT_TYPE_APPLICATION_INFO 1310 1310 </SECTION> 1311 1312 <SECTION> 1313 <FILE>WebKitURIUtilities</FILE> 1314 webkit_uri_for_display 1315 </SECTION> -
trunk/Source/WebKit/UIProcess/API/wpe/docs/wpe-docs.sgml
r238853 r239265 57 57 </chapter> 58 58 59 <chapter> 60 <title>Utilities</title> 61 <xi:include href="xml/WebKitURIUtilities.xml"/> 62 </chapter> 63 59 64 <index id="index-all"> 60 65 <title>Index</title> -
trunk/Source/WebKit/UIProcess/API/wpe/webkit.h
r238277 r239265 68 68 #include <wpe/WebKitURIResponse.h> 69 69 #include <wpe/WebKitURISchemeRequest.h> 70 #include <wpe/WebKitURIUtilities.h> 70 71 #include <wpe/WebKitUserContent.h> 71 72 #include <wpe/WebKitUserContentManager.h> -
trunk/Tools/ChangeLog
r239264 r239265 1 2018-12-17 Ms2ger <Ms2ger@igalia.com> 2 3 [GTK][WPE] Need a function to convert internal URI to display ("pretty") URI 4 https://bugs.webkit.org/show_bug.cgi?id=174816 5 6 Reviewed by Michael Catanzaro. 7 8 Add tests for userVisibleString() and (for GTK and WPE) webkit_uri_for_display(). 9 10 * TestWebKitAPI/CMakeLists.txt: 11 * TestWebKitAPI/TestWebKitAPI.xcodeproj/project.pbxproj: 12 * TestWebKitAPI/Tests/WTF/URLHelpers.cpp: Added. 13 (TestWebKitAPI::TEST): 14 * TestWebKitAPI/Tests/WebKitGLib/TestWebKitURIUtilities.cpp: Added. 15 (testURIForDisplayUnaffected): 16 (testURIForDisplayAffected): 17 (beforeAll): 18 (afterAll): 19 * TestWebKitAPI/glib/CMakeLists.txt: 20 1 21 2018-12-17 Carlos Garcia Campos <cgarcia@igalia.com> 2 22 -
trunk/Tools/TestWebKitAPI/CMakeLists.txt
r238779 r239265 178 178 ${TESTWEBKITAPI_DIR}/Tests/WTF/Time.cpp 179 179 ${TESTWEBKITAPI_DIR}/Tests/WTF/URL.cpp 180 ${TESTWEBKITAPI_DIR}/Tests/WTF/URLHelpers.cpp 180 181 ${TESTWEBKITAPI_DIR}/Tests/WTF/URLParser.cpp 181 182 ${TESTWEBKITAPI_DIR}/Tests/WTF/UniqueArray.cpp -
trunk/Tools/TestWebKitAPI/TestWebKitAPI.xcodeproj/project.pbxproj
r239167 r239265 316 316 5CEAB5E11FA939F400A77FAA /* _WKInputDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5CEAB5DF1FA937CB00A77FAA /* _WKInputDelegate.mm */; }; 317 317 5E4B1D2E1D404C6100053621 /* WKScrollViewDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5E4B1D2C1D404C6100053621 /* WKScrollViewDelegate.mm */; }; 318 5FAD3AE421B97EEE00BEE178 /* URLHelpers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5FAD3AE321B97C5000BEE178 /* URLHelpers.cpp */; }; 318 319 631EFFF61E7B5E8D00D2EBB8 /* Geolocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = 631EFFF51E7B5E8D00D2EBB8 /* Geolocation.mm */; }; 319 320 634910E01E9D3FF300880309 /* CoreLocation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 634910DF1E9D3FF300880309 /* CoreLocation.framework */; }; … … 1663 1664 5CEAB5DF1FA937CB00A77FAA /* _WKInputDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = _WKInputDelegate.mm; sourceTree = "<group>"; }; 1664 1665 5E4B1D2C1D404C6100053621 /* WKScrollViewDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = WKScrollViewDelegate.mm; path = ../ios/WKScrollViewDelegate.mm; sourceTree = "<group>"; }; 1666 5FAD3AE321B97C5000BEE178 /* URLHelpers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = URLHelpers.cpp; path = Tests/WTF/URLHelpers.cpp; sourceTree = "<group>"; }; 1665 1667 631EFFF51E7B5E8D00D2EBB8 /* Geolocation.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = Geolocation.mm; sourceTree = "<group>"; }; 1666 1668 634910DF1E9D3FF300880309 /* CoreLocation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreLocation.framework; path = System/Library/Frameworks/CoreLocation.framework; sourceTree = SDKROOT; }; … … 2287 2289 isa = PBXGroup; 2288 2290 children = ( 2291 5FAD3AE321B97C5000BEE178 /* URLHelpers.cpp */, 2289 2292 08FB7795FE84155DC02AAC07 /* Source */, 2290 2293 BCB9EB66112366D800A137E0 /* Tests */, … … 3815 3818 files = ( 3816 3819 7C83DFA21D0A5AE400FEBCF3 /* mainIOS.mm in Sources */, 3820 5FAD3AE421B97EEE00BEE178 /* URLHelpers.cpp in Sources */, 3817 3821 7C83DFAD1D0A5AE400FEBCF3 /* mainMac.mm in Sources */, 3818 3822 ); -
trunk/Tools/TestWebKitAPI/glib/CMakeLists.txt
r231004 r239265 135 135 ADD_WK2_TEST(TestWebKitSecurityOrigin ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitSecurityOrigin.cpp) 136 136 ADD_WK2_TEST(TestWebKitSettings ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitSettings.cpp) 137 ADD_WK2_TEST(TestWebKitURIUtilities ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitURIUtilities.cpp) 137 138 ADD_WK2_TEST(TestWebKitWebContext ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitWebContext.cpp) 138 139 ADD_WK2_TEST(TestWebKitWebView ${TOOLS_DIR}/TestWebKitAPI/Tests/WebKitGLib/TestWebKitWebView.cpp)
Note: See TracChangeset
for help on using the changeset viewer.