Changeset 261247 in webkit
- Timestamp:
- May 6, 2020 1:02:40 PM (4 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r261245 r261247 1 2020-05-06 Wenson Hsieh <wenson_hsieh@apple.com> 2 3 Cut and paste from Google Doc to Notes in several (non-Latin) languages doesn't work 4 https://bugs.webkit.org/show_bug.cgi?id=211498 5 <rdar://problem/56675345> 6 7 Reviewed by Darin Adler. 8 9 When copying text in Google Docs, the page uses `DataTransfer.setData` to write text/html data to the system 10 pasteboard. This markup string includes a meta tag with `charset="utf-8"`, indicating that the HTML string that 11 was copied should be interpreted as UTF-8 data. 12 13 However, before we write this data to the system pasteboard, we first sanitize it by loading it in a separate 14 page, and then build the final sanitized markup string to write by iterating over only visible content in the 15 main document of this page. Importantly, this last step skips over the meta element containing the charset. 16 17 Later, when pasting in Notes or TextEdit, both apps use `-[NSAttributedString initWithData:...:]` to convert the 18 HTML data on the pasteboard into an NSAttributedString. This takes the NSPasteboard's HTML data (a blob of 19 `NSData`) and synchronously loads it in a new legacy WebKit view by calling `-[WebFrame 20 loadData:MIMEType:textEncodingName:baseURL:]`, passing in `nil` as the text encoding name. Since WebKit is only 21 given a blob of data and no particular encoding, we fall back to default Latin-1 encoding, which produces 22 gibberish for CJK text. 23 24 To fix this, we automatically insert a `<meta charset="utf-8">` tag when writing HTML to the pasteboard, if the 25 sanitized markup contains non-ASCII characters. 26 27 Test: CopyHTML.SanitizationPreservesCharacterSet 28 29 * Modules/async-clipboard/ClipboardItemBindingsDataSource.cpp: 30 (WebCore::ClipboardItemBindingsDataSource::ClipboardItemTypeLoader::sanitizeDataIfNeeded): 31 32 Pass in AddMetaCharsetIfNeeded::Yes. 33 34 * dom/DataTransfer.cpp: 35 (WebCore::DataTransfer::setDataFromItemList): 36 37 Pass in AddMetaCharsetIfNeeded::Yes here too. 38 39 * editing/cocoa/WebContentReaderCocoa.mm: 40 (WebCore::sanitizeMarkupWithArchive): 41 (WebCore::WebContentReader::readHTML): 42 (WebCore::WebContentMarkupReader::readHTML): 43 * editing/markup.cpp: 44 (WebCore::sanitizeMarkup): 45 46 Add a new enum so that we only add the extra meta tag when sanitizing content that is being written to the 47 system pasteboard through one of the clipboard DOM APIs. 48 49 (WebCore::sanitizedMarkupForFragmentInDocument): 50 * editing/markup.h: 51 1 52 2020-05-06 Tim Horton <timothy_horton@apple.com> 2 53 -
trunk/Source/WebCore/Modules/async-clipboard/ClipboardItemBindingsDataSource.cpp
r254893 r261247 270 270 return; 271 271 272 m_data = { sanitizeMarkup(markupToSanitize ) };272 m_data = { sanitizeMarkup(markupToSanitize, AddMetaCharsetIfNeeded::Yes) }; 273 273 } 274 274 -
trunk/Source/WebCore/dom/DataTransfer.cpp
r259613 r261247 254 254 String sanitizedData; 255 255 if (type == "text/html") 256 sanitizedData = sanitizeMarkup(data );256 sanitizedData = sanitizeMarkup(data, AddMetaCharsetIfNeeded::Yes); 257 257 else if (type == "text/uri-list") { 258 258 auto url = URL({ }, data); -
trunk/Source/WebCore/editing/cocoa/WebContentReaderCocoa.mm
r261138 r261247 450 450 if (shouldReplaceRichContentWithAttachments()) { 451 451 replaceRichContentWithAttachments(frame, fragment, markupAndArchive.archive->subresources()); 452 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);452 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, AddMetaCharsetIfNeeded::No, msoListQuirks, markupAndArchive.markup); 453 453 } 454 454 … … 493 493 replaceSubresourceURLs(fragment.get(), WTFMove(blobURLMap)); 494 494 495 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, markupAndArchive.markup);495 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, AddMetaCharsetIfNeeded::No, msoListQuirks, markupAndArchive.markup); 496 496 } 497 497 … … 581 581 String markup; 582 582 if (RuntimeEnabledFeatures::sharedFeatures().customPasteboardDataEnabled() && shouldSanitize()) { 583 markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {583 markup = sanitizeMarkup(stringOmittingMicrosoftPrefix, AddMetaCharsetIfNeeded::No, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) { 584 584 removeSubresourceURLAttributes(fragment, [] (const URL& url) { 585 585 return shouldReplaceSubresourceURL(url); … … 600 600 String rawHTML = stripMicrosoftPrefix(string); 601 601 if (shouldSanitize()) { 602 markup = sanitizeMarkup(rawHTML, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) {602 markup = sanitizeMarkup(rawHTML, AddMetaCharsetIfNeeded::No, msoListQuirksForMarkup(), WTF::Function<void (DocumentFragment&)> { [] (DocumentFragment& fragment) { 603 603 removeSubresourceURLAttributes(fragment, [] (const URL& url) { 604 604 return shouldReplaceSubresourceURL(url); -
trunk/Source/WebCore/editing/markup.cpp
r259401 r261247 202 202 } 203 203 204 String sanitizeMarkup(const String& rawHTML, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer)204 String sanitizeMarkup(const String& rawHTML, AddMetaCharsetIfNeeded addMetaCharsetIfNeeded, MSOListQuirks msoListQuirks, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer) 205 205 { 206 206 auto page = createPageForSanitizingWebContent(); … … 213 213 (*fragmentSanitizer)(fragment); 214 214 215 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, msoListQuirks, rawHTML);215 return sanitizedMarkupForFragmentInDocument(WTFMove(fragment), *stagingDocument, addMetaCharsetIfNeeded, msoListQuirks, rawHTML); 216 216 } 217 217 … … 946 946 } 947 947 948 String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, MSOListQuirks msoListQuirks, const String& originalMarkup)948 String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&& fragment, Document& document, AddMetaCharsetIfNeeded addMetaCharsetIfNeeded, MSOListQuirks msoListQuirks, const String& originalMarkup) 949 949 { 950 950 MSOListMode msoListMode = msoListQuirks == MSOListQuirks::CheckIfNeeded && shouldPreserveMSOLists(originalMarkup) … … 959 959 ResolveURLs::YesExcludingLocalFileURLsForPrivacy, SerializeComposedTree::No, AnnotateForInterchange::Yes, ConvertBlocksToInlines::No, StandardFontFamilySerializationMode::Strip, msoListMode); 960 960 961 StringBuilder builder; 961 962 if (msoListMode == MSOListMode::Preserve) { 962 StringBuilder builder;963 963 builder.appendLiteral("<html xmlns:o=\"urn:schemas-microsoft-com:office:office\"\n" 964 964 "xmlns:w=\"urn:schemas-microsoft-com:office:word\"\n" 965 965 "xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\"\n" 966 966 "xmlns=\"http://www.w3.org/TR/REC-html40\">"); 967 builder.append(result); 967 } 968 969 #if PLATFORM(COCOA) 970 if (addMetaCharsetIfNeeded == AddMetaCharsetIfNeeded::Yes && !result.isAllASCII()) { 971 // On Cocoa platforms, this markup is eventually persisted to the pasteboard and read back as UTF-8 data, 972 // so this meta tag is needed for clients that read this data in the future from the pasteboard and load it. 973 // This logic is used by both DataTransfer and Clipboard APIs to sanitize "text/html" from the page. 974 builder.appendLiteral("<meta charset=\"UTF-8\">"); 975 } 976 #else 977 UNUSED_PARAM(addMetaCharsetIfNeeded); 978 #endif 979 980 builder.append(result); 981 982 if (msoListMode == MSOListMode::Preserve) 968 983 builder.appendLiteral("</html>"); 969 return builder.toString(); 970 } 971 972 return result; 984 985 return builder.toString(); 973 986 } 974 987 -
trunk/Source/WebCore/editing/markup.h
r246490 r261247 53 53 void removeSubresourceURLAttributes(Ref<DocumentFragment>&&, WTF::Function<bool(const URL&)> shouldRemoveURL); 54 54 55 enum class MSOListQuirks { CheckIfNeeded, Disabled };56 55 std::unique_ptr<Page> createPageForSanitizingWebContent(); 57 String sanitizeMarkup(const String&, MSOListQuirks = MSOListQuirks::Disabled, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer = WTF::nullopt); 58 String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&&, Document&, MSOListQuirks, const String& originalMarkup); 56 enum class MSOListQuirks : bool { CheckIfNeeded, Disabled }; 57 enum class AddMetaCharsetIfNeeded : bool { No, Yes }; 58 String sanitizeMarkup(const String&, AddMetaCharsetIfNeeded = AddMetaCharsetIfNeeded::No, MSOListQuirks = MSOListQuirks::Disabled, Optional<WTF::Function<void(DocumentFragment&)>> fragmentSanitizer = WTF::nullopt); 59 String sanitizedMarkupForFragmentInDocument(Ref<DocumentFragment>&&, Document&, AddMetaCharsetIfNeeded, MSOListQuirks, const String& originalMarkup); 59 60 60 61 WEBCORE_EXPORT Ref<DocumentFragment> createFragmentFromText(Range& context, const String& text); -
trunk/Source/WebKit/ChangeLog
r261246 r261247 1 2020-05-06 Wenson Hsieh <wenson_hsieh@apple.com> 2 3 Cut and paste from Google Doc to Notes in several (non-Latin) languages doesn't work 4 https://bugs.webkit.org/show_bug.cgi?id=211498 5 <rdar://problem/56675345> 6 7 Reviewed by Darin Adler. 8 9 Add a new header to allow Cocoa code to reason about UIColors and NSColors on iOS and macOS (respectively) 10 without requiring platform ifdefs. A followup patch will adopt this in several places in WebKit, where we 11 currently need ifdefs for iOS and macOS. 12 13 * Platform/cocoa/CocoaColor.h: Added. 14 * WebKit.xcodeproj/project.pbxproj: 15 1 16 2020-05-06 Antoine Quint <graouts@apple.com> 2 17 -
trunk/Source/WebKit/WebKit.xcodeproj/project.pbxproj
r261163 r261247 1848 1848 F4DB54E62319E733009E3155 /* WKHighlightLongPressGestureRecognizer.h in Headers */ = {isa = PBXBuildFile; fileRef = F4DB54E42319E733009E3155 /* WKHighlightLongPressGestureRecognizer.h */; }; 1849 1849 F4EC94E32356CC57000BB614 /* ApplicationServicesSPI.h in Headers */ = {isa = PBXBuildFile; fileRef = 29D04E2821F7C73D0076741D /* ApplicationServicesSPI.h */; }; 1850 F4FE0A3B24632B60002631E1 /* CocoaColor.h in Headers */ = {isa = PBXBuildFile; fileRef = F4FE0A3A24632B10002631E1 /* CocoaColor.h */; }; 1850 1851 F6113E25126CE1820057D0A7 /* APIUserContentURLPattern.h in Headers */ = {isa = PBXBuildFile; fileRef = F6113E24126CE1820057D0A7 /* APIUserContentURLPattern.h */; }; 1851 1852 F6113E29126CE19B0057D0A7 /* WKUserContentURLPattern.h in Headers */ = {isa = PBXBuildFile; fileRef = F6113E27126CE19B0057D0A7 /* WKUserContentURLPattern.h */; settings = {ATTRIBUTES = (Private, ); }; }; … … 5373 5374 F4F59AD32065A5C9006CAA46 /* WKSelectMenuListViewController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = WKSelectMenuListViewController.mm; path = ios/forms/WKSelectMenuListViewController.mm; sourceTree = "<group>"; }; 5374 5375 F4F59AD42065A5CA006CAA46 /* WKSelectMenuListViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = WKSelectMenuListViewController.h; path = ios/forms/WKSelectMenuListViewController.h; sourceTree = "<group>"; }; 5376 F4FE0A3A24632B10002631E1 /* CocoaColor.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CocoaColor.h; sourceTree = "<group>"; }; 5375 5377 F6113E24126CE1820057D0A7 /* APIUserContentURLPattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = APIUserContentURLPattern.h; sourceTree = "<group>"; }; 5376 5378 F6113E26126CE19B0057D0A7 /* WKUserContentURLPattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WKUserContentURLPattern.cpp; sourceTree = "<group>"; }; … … 7685 7687 isa = PBXGroup; 7686 7688 children = ( 7689 F4FE0A3A24632B10002631E1 /* CocoaColor.h */, 7687 7690 4482734624528F6000A95493 /* CocoaImage.h */, 7688 7691 BCE0937614FB128B001138D9 /* LayerHostingContext.h */, … … 10729 10732 1AA2E51D12E4C05E00BC4966 /* CGUtilities.h in Headers */, 10730 10733 57B4B46020B504AC00D4AD79 /* ClientCertificateAuthenticationXPCConstants.h in Headers */, 10734 F4FE0A3B24632B60002631E1 /* CocoaColor.h in Headers */, 10731 10735 4482734724528F6000A95493 /* CocoaImage.h in Headers */, 10732 10736 CE11AD521CBC482F00681EE5 /* CodeSigning.h in Headers */, -
trunk/Tools/ChangeLog
r261243 r261247 1 2020-05-06 Wenson Hsieh <wenson_hsieh@apple.com> 2 3 Cut and paste from Google Doc to Notes in several (non-Latin) languages doesn't work 4 https://bugs.webkit.org/show_bug.cgi?id=211498 5 <rdar://problem/56675345> 6 7 Reviewed by Darin Adler. 8 9 Add a test to verify that when writing markup to the clipboard via DOM API, if non-ASCII characters appear in 10 the written markup, they can still be converted to `NSAttributedString`s containing the expected non-Latin text. 11 12 * TestWebKitAPI/Configurations/Base.xcconfig: 13 14 Adjust header search paths so that we can import CocoaColor.h in WebKit. 15 16 * TestWebKitAPI/Tests/WebKitCocoa/CopyHTML.mm: 17 (readHTMLDataFromPasteboard): 18 (readHTMLStringFromPasteboard): 19 (readHTMLFromPasteboard): Deleted. 20 1 21 2020-05-06 Ryan Haddad <ryanhaddad@apple.com> 2 22 -
trunk/Tools/TestWebKitAPI/Configurations/Base.xcconfig
r259466 r261247 34 34 CLANG_ENABLE_OBJC_WEAK = YES; 35 35 CLANG_WARN_CXX0X_EXTENSIONS = NO; 36 HEADER_SEARCH_PATHS = ${BUILT_PRODUCTS_DIR}/usr/local/include $(WEBCORE_PRIVATE_HEADERS_DIR)/ForwardingHeaders $(BUILT_PRODUCTS_DIR)/WebCoreTestSupport ${SRCROOT} ;36 HEADER_SEARCH_PATHS = ${BUILT_PRODUCTS_DIR}/usr/local/include $(WEBCORE_PRIVATE_HEADERS_DIR)/ForwardingHeaders $(BUILT_PRODUCTS_DIR)/WebCoreTestSupport ${SRCROOT} $(SRCROOT)/../../Source/WebKit/Platform/cocoa; 37 37 38 38 GCC_NO_COMMON_BLOCKS = YES; -
trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/CopyHTML.mm
r260366 r261247 29 29 #if PLATFORM(COCOA) 30 30 31 #import "CocoaColor.h" 31 32 #import "PlatformUtilities.h" 32 33 #import "TestWKWebView.h" … … 45 46 46 47 #if PLATFORM(MAC) 47 NSString *readHTMLFromPasteboard() 48 49 NSData *readHTMLDataFromPasteboard() 50 { 51 return [[NSPasteboard generalPasteboard] dataForType:NSHTMLPboardType]; 52 } 53 54 NSString *readHTMLStringFromPasteboard() 48 55 { 49 56 return [[NSPasteboard generalPasteboard] stringForType:NSHTMLPboardType]; 50 57 } 58 51 59 #else 52 NSString *readHTMLFromPasteboard() 60 61 NSData *readHTMLDataFromPasteboard() 62 { 63 return [[UIPasteboard generalPasteboard] dataForPasteboardType:(__bridge NSString *)kUTTypeHTML]; 64 } 65 66 NSString *readHTMLStringFromPasteboard() 53 67 { 54 68 id value = [[UIPasteboard generalPasteboard] valueForPasteboardType:(__bridge NSString *)kUTTypeHTML]; … … 58 72 return (NSString *)value; 59 73 } 74 60 75 #endif 61 76 … … 81 96 EXPECT_WK_STREQ("<meta content=\"secret\"><b onmouseover=\"dangerousCode()\">hello</b><!-- secret-->, world<script>dangerousCode()</script>", 82 97 [webView stringByEvaluatingJavaScript:@"pastedHTML"]); 83 String htmlInNativePasteboard = readHTML FromPasteboard();98 String htmlInNativePasteboard = readHTMLStringFromPasteboard(); 84 99 EXPECT_TRUE(htmlInNativePasteboard.contains("hello")); 85 100 EXPECT_TRUE(htmlInNativePasteboard.contains(", world")); 86 101 EXPECT_FALSE(htmlInNativePasteboard.contains("secret")); 87 102 EXPECT_FALSE(htmlInNativePasteboard.contains("dangerousCode")); 103 } 104 105 TEST(CopyHTML, SanitizationPreservesCharacterSet) 106 { 107 Vector<std::pair<RetainPtr<NSString>, RetainPtr<NSData>>, 3> markupStringsAndData; 108 auto webView = createWebViewWithCustomPasteboardDataEnabled(); 109 for (NSString *encodingName in @[ @"utf-8", @"windows-1252", @"bogus-encoding" ]) { 110 [webView synchronouslyLoadHTMLString:[NSString stringWithFormat:@"<!DOCTYPE html>" 111 "<body>" 112 "<meta charset='%@'>" 113 "<p id='copy'>Copy me</p>" 114 "<script>" 115 "copy.addEventListener('copy', e => {" 116 " e.clipboardData.setData('text/html', `<span style='color: red;'>我叫謝文昇</span>`);" 117 " e.preventDefault();" 118 "});" 119 "getSelection().selectAllChildren(copy);" 120 "</script>" 121 "</body>", encodingName]]; 122 [webView copy:nil]; 123 [webView waitForNextPresentationUpdate]; 124 125 markupStringsAndData.append({ readHTMLStringFromPasteboard(), readHTMLDataFromPasteboard() }); 126 } 127 128 for (auto& [copiedMarkup, copiedData] : markupStringsAndData) { 129 EXPECT_TRUE([copiedMarkup containsString:@"<span "]); 130 EXPECT_TRUE([copiedMarkup containsString:@"color: red;"]); 131 EXPECT_TRUE([copiedMarkup containsString:@"我叫謝文昇"]); 132 EXPECT_TRUE([copiedMarkup containsString:@"</span>"]); 133 134 NSError *attributedStringConversionError = nil; 135 136 auto attributedString = adoptNS([[NSAttributedString alloc] initWithData:copiedData.get() options:@{ NSDocumentTypeDocumentOption: NSHTMLTextDocumentType } documentAttributes:nil error:&attributedStringConversionError]); 137 EXPECT_WK_STREQ("我叫謝文昇", [attributedString string]); 138 139 __block BOOL foundColorAttribute = NO; 140 [attributedString enumerateAttribute:NSForegroundColorAttributeName inRange:NSMakeRange(0, 5) options:0 usingBlock:^(CocoaColor *color, NSRange range, BOOL *) { 141 CGFloat redComponent = 0; 142 CGFloat greenComponent = 0; 143 CGFloat blueComponent = 0; 144 [color getRed:&redComponent green:&greenComponent blue:&blueComponent alpha:nil]; 145 146 EXPECT_EQ(1., redComponent); 147 EXPECT_EQ(0., greenComponent); 148 EXPECT_EQ(0., blueComponent); 149 foundColorAttribute = YES; 150 }]; 151 EXPECT_TRUE(foundColorAttribute); 152 } 88 153 } 89 154
Note: See TracChangeset
for help on using the changeset viewer.