Changeset 85244 in webkit


Ignore:
Timestamp:
Apr 28, 2011 3:03:09 PM (13 years ago)
Author:
abarth@webkit.org
Message:

2011-04-28 Jay Civelli <jcivelli@chromium.org>

Reviewed by Adam Barth.

Introducing a new class that allows serialization of a page back
to HTML/XML with all its sub-frames and also retrieves the page's
resources.
Tests: unit-tests in chromium/tests/WebPageNewSerializerTest.cpp
https://bugs.webkit.org/show_bug.cgi?id=58947

  • Android.mk:
  • CMakeLists.txt:
  • GNUmakefile.list.am:
  • WebCore.gypi:
  • WebCore.pro:
  • WebCore.vcproj/WebCore.vcproj:
  • dom/Document.cpp: (WebCore::Document::suggestedMIMEType):
  • dom/Document.h:
  • editing/MarkupAccumulator.cpp: (WebCore::MarkupAccumulator::appendCustomAttributes): (WebCore::MarkupAccumulator::appendElement):
  • editing/MarkupAccumulator.h:
  • html/parser/HTMLMetaCharsetParser.cpp: (WebCore::HTMLMetaCharsetParser::processMeta): (WebCore::HTMLMetaCharsetParser::encodingFromMetaAttributes):
  • html/parser/HTMLMetaCharsetParser.h:
  • page/PageSerializer.cpp: Added.
  • page/PageSerializer.h: Added.

2011-04-28 Jay Civelli <jcivelli@chromium.org>

Reviewed by Adam Barth.

Introducing a new API to serialize a WebView back to HTML.
https://bugs.webkit.org/show_bug.cgi?id=58947

  • WebKit.gyp:
  • public/WebPageSerializer.h:
  • src/WebPageSerializer.cpp: (WebKit::WebPageSerializer::serialize):
  • tests/WebPageNewSerializerTest.cpp: Added.
  • tests/data/pageserializer: Added.
  • tests/data/pageserializer/blank_frames.html: Added.
  • tests/data/pageserializer/blue_background.png: Added.
  • tests/data/pageserializer/css_test_page.html: Added.
  • tests/data/pageserializer/green_background.png: Added.
  • tests/data/pageserializer/iframe.html: Added.
  • tests/data/pageserializer/iframe2.html: Added.
  • tests/data/pageserializer/import_style_from_link.css: Added.
  • tests/data/pageserializer/import_styles.css: Added.
  • tests/data/pageserializer/link_styles.css: Added.
  • tests/data/pageserializer/ol-dot.png: Added.
  • tests/data/pageserializer/orange_background.png: Added.
  • tests/data/pageserializer/purple_background.png: Added.
  • tests/data/pageserializer/red_background.png: Added.
  • tests/data/pageserializer/simple.xhtml: Added.
  • tests/data/pageserializer/top_frame.html: Added.
  • tests/data/pageserializer/ul-dot.png: Added.
  • tests/data/pageserializer/yellow_background.png: Added.
Location:
trunk/Source
Files:
13 added
17 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/CMakeLists.txt

    r85180 r85244  
    10131013    page/PageGroup.cpp
    10141014    page/PageGroupLoadDeferrer.cpp
     1015    page/PageSerializer.cpp
    10151016    page/Performance.cpp
    10161017    page/PerformanceNavigation.cpp
  • trunk/Source/WebCore/ChangeLog

    r85238 r85244  
     12011-04-28  Jay Civelli  <jcivelli@chromium.org>
     2
     3        Reviewed by Adam Barth.
     4
     5        Introducing a new class that allows serialization of a page back
     6        to HTML/XML with all its sub-frames and also retrieves the page's
     7        resources.
     8        Tests: unit-tests in chromium/tests/WebPageNewSerializerTest.cpp
     9        https://bugs.webkit.org/show_bug.cgi?id=58947
     10
     11        * Android.mk:
     12        * CMakeLists.txt:
     13        * GNUmakefile.list.am:
     14        * WebCore.gypi:
     15        * WebCore.pro:
     16        * WebCore.vcproj/WebCore.vcproj:
     17        * dom/Document.cpp:
     18        (WebCore::Document::suggestedMIMEType):
     19        * dom/Document.h:
     20        * editing/MarkupAccumulator.cpp:
     21        (WebCore::MarkupAccumulator::appendCustomAttributes):
     22        (WebCore::MarkupAccumulator::appendElement):
     23        * editing/MarkupAccumulator.h:
     24        * html/parser/HTMLMetaCharsetParser.cpp:
     25        (WebCore::HTMLMetaCharsetParser::processMeta):
     26        (WebCore::HTMLMetaCharsetParser::encodingFromMetaAttributes):
     27        * html/parser/HTMLMetaCharsetParser.h:
     28        * page/PageSerializer.cpp: Added.
     29        * page/PageSerializer.h: Added.
     30
    1312011-04-28  Mark Pilgrim  <pilgrim@chromium.org>
    232
  • trunk/Source/WebCore/GNUmakefile.list.am

    r85214 r85244  
    22182218        Source/WebCore/page/PageGroupLoadDeferrer.cpp \
    22192219        Source/WebCore/page/PageGroupLoadDeferrer.h \
     2220        Source/WebCore/page/PageSerializer.cpp \
     2221        Source/WebCore/page/PageSerializer.h \
    22202222        Source/WebCore/page/Performance.cpp \
    22212223        Source/WebCore/page/Performance.h \
  • trunk/Source/WebCore/WebCore.gypi

    r85191 r85244  
    712712            'page/Page.h',
    713713            'page/PageGroup.h',
     714            'page/PageSerializer.h',
    714715            'page/PluginHalterClient.h',
    715716            'page/PositionCallback.h',
     
    34363437            'page/PageGroupLoadDeferrer.cpp',
    34373438            'page/PageGroupLoadDeferrer.h',
     3439            'page/PageSerializer.cpp',
    34383440            'page/Performance.cpp',
    34393441            'page/Performance.h',
  • trunk/Source/WebCore/WebCore.pro

    r85180 r85244  
    923923    page/PageGroup.cpp \
    924924    page/PageGroupLoadDeferrer.cpp \
     925    page/PageSerializer.cpp \
    925926    page/Performance.cpp \
    926927    page/PerformanceNavigation.cpp \
     
    18631864    page/PageGroupLoadDeferrer.h \
    18641865    page/Page.h \
     1866    page/PageSerializer.h \
    18651867    page/PluginHalter.h \
    18661868    page/PluginHalterClient.h \
  • trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj

    r85180 r85244  
    2483424834                                >
    2483524835                        </File>
     24836                        <File
     24837                                RelativePath="..\page\PageSerializer.cpp"
     24838                                >
     24839                        </File>
     24840                        <File
     24841                                RelativePath="..\page\PageSerializer.h"
     24842                                >
     24843                        </File>
    2483624844                        <File
    2483724845                                RelativePath="..\page\Performance.cpp"
  • trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj

    r85180 r85244  
    852852                33D0212D131DB37B004091A8 /* CookieStorage.h in Headers */ = {isa = PBXBuildFile; fileRef = E13F01EA1270E10D00DFBA71 /* CookieStorage.h */; settings = {ATTRIBUTES = (Private, ); }; };
    853853                371A67CB11C6C7DB00047B8B /* HyphenationCF.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 371A67CA11C6C7DB00047B8B /* HyphenationCF.cpp */; };
     854                371E65CC13661EDC00BEEDB0 /* PageSerializer.h in Headers */ = {isa = PBXBuildFile; fileRef = 371E65CB13661EDC00BEEDB0 /* PageSerializer.h */; };
     855                371E65CE13661EED00BEEDB0 /* PageSerializer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 371E65CD13661EED00BEEDB0 /* PageSerializer.cpp */; };
    854856                371F4F400D25B9AF00ECE0D5 /* FontData.h in Headers */ = {isa = PBXBuildFile; fileRef = 371F4F3E0D25B9AF00ECE0D5 /* FontData.h */; settings = {ATTRIBUTES = (Private, ); }; };
    855857                371F4F410D25B9AF00ECE0D5 /* FontData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 371F4F3F0D25B9AF00ECE0D5 /* FontData.cpp */; };
     
    73487350                33C0CCD3112C5E6200CE057D /* SecureTextInput.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SecureTextInput.h; sourceTree = "<group>"; };
    73497351                371A67CA11C6C7DB00047B8B /* HyphenationCF.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HyphenationCF.cpp; sourceTree = "<group>"; };
     7352                371E65CB13661EDC00BEEDB0 /* PageSerializer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PageSerializer.h; sourceTree = "<group>"; };
     7353                371E65CD13661EED00BEEDB0 /* PageSerializer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PageSerializer.cpp; sourceTree = "<group>"; };
    73507354                371F4F3E0D25B9AF00ECE0D5 /* FontData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FontData.h; sourceTree = "<group>"; };
    73517355                371F4F3F0D25B9AF00ECE0D5 /* FontData.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FontData.cpp; sourceTree = "<group>"; };
     
    1438514389                                7A674BD90F9EBF4E006CF099 /* PageGroupLoadDeferrer.cpp */,
    1438614390                                7A674BDA0F9EBF4E006CF099 /* PageGroupLoadDeferrer.h */,
     14391                                371E65CD13661EED00BEEDB0 /* PageSerializer.cpp */,
     14392                                371E65CB13661EDC00BEEDB0 /* PageSerializer.h */,
    1438714393                                8A844D0111D3C18E0014065C /* Performance.cpp */,
    1438814394                                8A844D0211D3C18E0014065C /* Performance.h */,
     
    2298122987                                F3810C1C1365A49600ED6E33 /* InspectorWorkerAgent.h in Headers */,
    2298222988                                F3810C1E1365A4D400ED6E33 /* WorkerContextInspectorProxy.h in Headers */,
     22989                                371E65CC13661EDC00BEEDB0 /* PageSerializer.h in Headers */,
    2298322990                        );
    2298422991                        runOnlyForDeploymentPostprocessing = 0;
     
    2567925686                                F3810C1B1365A49600ED6E33 /* InspectorWorkerAgent.cpp in Sources */,
    2568025687                                781755861365A1B00093BE2E /* DataTransferItems.cpp in Sources */,
     25688                                371E65CE13661EED00BEEDB0 /* PageSerializer.cpp in Sources */,
    2568125689                        );
    2568225690                        runOnlyForDeploymentPostprocessing = 0;
  • trunk/Source/WebCore/dom/Document.cpp

    r85036 r85244  
    10811081}
    10821082
     1083String Document::suggestedMIMEType() const
     1084{
     1085    if (m_document->isXHTMLDocument())
     1086        return "application/xhtml+xml";
     1087    if (m_document->isSVGDocument())
     1088        return "image/svg+xml";
     1089    if (m_document->xmlStandalone())
     1090        return "text/xml";
     1091    if (m_document->isHTMLDocument())
     1092        return "text/html";
     1093
     1094    return m_documentLoader->responseMIMEType();
     1095}
     1096
    10831097// FIXME: We need to discuss the DOM API here at some point. Ideas:
    10841098// * making it receive a rect as parameter, i.e. nodesFromRect(x, y, w, h);
  • trunk/Source/WebCore/dom/Document.h

    r84665 r85244  
    358358    void setContent(const String&);
    359359
     360    String suggestedMIMEType() const;
     361
    360362    String contentLanguage() const { return m_contentLanguage; }
    361363    void setContentLanguage(const String& lang) { m_contentLanguage = lang; }
  • trunk/Source/WebCore/editing/MarkupAccumulator.cpp

    r84520 r85244  
    159159}
    160160
     161void MarkupAccumulator::appendCustomAttributes(Vector<UChar>&, Element*, Namespaces*)
     162{
     163}
     164
    161165void MarkupAccumulator::appendQuotedURLAttributeValue(Vector<UChar>& result, const String& urlString)
    162166{
     
    328332        appendAttribute(out, element, *attributes->attributeItem(i), namespaces);
    329333
     334    // Give an opportunity to subclasses to add their own attributes.
     335    appendCustomAttributes(out, element, namespaces);
     336
    330337    appendCloseTag(out, element);
    331338}
  • trunk/Source/WebCore/editing/MarkupAccumulator.h

    r76723 r85244  
    7575    virtual void appendString(const String&);
    7676    void appendStartTag(Node*, Namespaces* = 0);
    77     void appendEndTag(Node*);
     77    virtual void appendEndTag(Node*);
    7878    static size_t totalLength(const Vector<String>&);
    7979    size_t length() const { return totalLength(m_succeedingMarkup); }
    8080    void concatenateMarkup(Vector<UChar>& out);
    8181    void appendAttributeValue(Vector<UChar>& result, const String& attribute, bool documentIsHTML);
     82    virtual void appendCustomAttributes(Vector<UChar>&, Element*, Namespaces*);
    8283    void appendQuotedURLAttributeValue(Vector<UChar>& result, const String& urlString);
    8384    void appendNodeValue(Vector<UChar>& out, const Node*, const Range*, EntityMask);
     
    8586    bool shouldAddNamespaceAttribute(const Attribute&, Namespaces&);
    8687    void appendNamespace(Vector<UChar>& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces&);
    87     EntityMask entityMaskForText(Text* text) const;
     88    EntityMask entityMaskForText(Text*) const;
    8889    virtual void appendText(Vector<UChar>& out, Text*);
    8990    void appendComment(Vector<UChar>& out, const String& comment);
     
    9192    void appendProcessingInstruction(Vector<UChar>& out, const String& target, const String& data);
    9293    virtual void appendElement(Vector<UChar>& out, Element*, Namespaces*);
    93     void appendOpenTag(Vector<UChar>& out, Element* element, Namespaces*);
    94     void appendCloseTag(Vector<UChar>& out, Element* element);
    95     void appendAttribute(Vector<UChar>& out, Element* element, const Attribute&, Namespaces*);
     94    void appendOpenTag(Vector<UChar>& out, Element*, Namespaces*);
     95    void appendCloseTag(Vector<UChar>& out, Element*);
     96    void appendAttribute(Vector<UChar>& out, Element*, const Attribute&, Namespaces*);
    9697    void appendCDATASection(Vector<UChar>& out, const String& section);
    9798    void appendStartMarkup(Vector<UChar>& result, const Node*, Namespaces*);
  • trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.cpp

    r73756 r85244  
    103103bool HTMLMetaCharsetParser::processMeta()
    104104{
     105    const HTMLToken::AttributeList& tokenAttributes = m_token.attributes();
     106    AttributeList attributes;
     107    for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin(); iter != tokenAttributes.end(); ++iter) {
     108        String attributeName(iter->m_name.data(), iter->m_name.size());
     109        String attributeValue(iter->m_value.data(), iter->m_value.size());
     110        attributes.append(make_pair(attributeName, attributeValue));
     111    }
     112
     113    m_encoding = encodingFromMetaAttributes(attributes);
     114    return m_encoding.isValid();
     115}
     116
     117TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const AttributeList& attributes)
     118{
    105119    bool gotPragma = false;
    106120    Mode mode = None;
    107121    String charset;
    108122
    109     const HTMLToken::AttributeList& attributes = m_token.attributes();
    110     for (HTMLToken::AttributeList::const_iterator iter = attributes.begin();
    111          iter != attributes.end(); ++iter) {
    112         AtomicString attributeName(iter->m_name.data(), iter->m_name.size());
    113         String attributeValue(iter->m_value.data(), iter->m_value.size());
     123    for (AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
     124        const AtomicString& attributeName = iter->first;
     125        const String& attributeValue = iter->second;
    114126
    115127        if (attributeName == http_equivAttr) {
     
    128140    }
    129141
    130     if (mode == Charset || (mode == Pragma && gotPragma)) {
    131         m_encoding = TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
    132         if (m_encoding.isValid())
    133             return true;
    134     }
    135 
    136     return false;
     142    if (mode == Charset || (mode == Pragma && gotPragma))
     143        return TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
     144
     145    return TextEncoding();
    137146}
    138147
  • trunk/Source/WebCore/html/parser/HTMLMetaCharsetParser.h

    r76248 r85244  
    4949    const TextEncoding& encoding() { return m_encoding; }
    5050
     51    typedef Vector<pair<String, String> > AttributeList;
     52    // The returned encoding might not be valid.
     53    static TextEncoding encodingFromMetaAttributes(const AttributeList&
     54);
     55
    5156private:
    5257    HTMLMetaCharsetParser();
    5358
    5459    bool processMeta();
    55     String extractCharset(const String&);
     60    static String extractCharset(const String&);
    5661
    5762    enum Mode {
  • trunk/Source/WebKit/chromium/ChangeLog

    r85241 r85244  
     12011-04-28  Jay Civelli  <jcivelli@chromium.org>
     2
     3        Reviewed by Adam Barth.
     4
     5        Introducing a new API to serialize a WebView back to HTML.
     6        https://bugs.webkit.org/show_bug.cgi?id=58947
     7
     8        * WebKit.gyp:
     9        * public/WebPageSerializer.h:
     10        * src/WebPageSerializer.cpp:
     11        (WebKit::WebPageSerializer::serialize):
     12        * tests/WebPageNewSerializerTest.cpp: Added.
     13        * tests/data/pageserializer: Added.
     14        * tests/data/pageserializer/blank_frames.html: Added.
     15        * tests/data/pageserializer/blue_background.png: Added.
     16        * tests/data/pageserializer/css_test_page.html: Added.
     17        * tests/data/pageserializer/green_background.png: Added.
     18        * tests/data/pageserializer/iframe.html: Added.
     19        * tests/data/pageserializer/iframe2.html: Added.
     20        * tests/data/pageserializer/import_style_from_link.css: Added.
     21        * tests/data/pageserializer/import_styles.css: Added.
     22        * tests/data/pageserializer/link_styles.css: Added.
     23        * tests/data/pageserializer/ol-dot.png: Added.
     24        * tests/data/pageserializer/orange_background.png: Added.
     25        * tests/data/pageserializer/purple_background.png: Added.
     26        * tests/data/pageserializer/red_background.png: Added.
     27        * tests/data/pageserializer/simple.xhtml: Added.
     28        * tests/data/pageserializer/top_frame.html: Added.
     29        * tests/data/pageserializer/ul-dot.png: Added.
     30        * tests/data/pageserializer/yellow_background.png: Added.
     31
    1322011-04-28  John Abd-El-Malek  <jam@chromium.org>
    233
  • trunk/Source/WebKit/chromium/WebKit.gypi

    r84960 r85244  
    7878                    'tests/TransparencyWinTest.cpp',
    7979                    'tests/UniscribeHelperTest.cpp',
     80                    'tests/WebPageNewSerializerTest.cpp',
    8081                    'tests/WebPageSerializerTest.cpp',
    8182                ],
  • trunk/Source/WebKit/chromium/public/WebPageSerializer.h

    r81846 r85244  
    3232#define WebPageSerializer_h
    3333
     34#include "WebCString.h"
    3435#include "WebCommon.h"
     36#include "WebURL.h"
    3537
    3638namespace WebKit {
    37 class WebCString;
    3839class WebFrame;
    3940class WebPageSerializerClient;
    4041class WebString;
    41 class WebURL;
    4242class WebView;
    4343template <typename T> class WebVector;
     
    4747class WebPageSerializer {
    4848public:
     49    struct Resource {
     50        WebURL url;
     51        WebCString mimeType;
     52        WebCString data;
     53    };
     54
     55    // Serializes all the frames from the WebView, retrieves the page's
     56    // resources (such as images and CSS) and adds them to the passed vector.
     57    // The first resource in that vector is the top frame contents.
     58    // Note that this also strips-out any script tag or link to JavaScript.
     59    WEBKIT_API static void serialize(WebView*, WebVector<Resource>*);
     60
     61    // IMPORTANT:
     62    // The API below is an older implementation of a pageserialization that
     63    // will be removed soon.
     64
     65
    4966    // This function will find out all frames and serialize them to HTML data.
    5067    // We have a data buffer to temporary saving generated html data. We will
     
    6582    // The parameter localDirectoryName is relative path of directory which
    6683    // contain all saved auxiliary files included all sub frames and resources.
    67     WEBKIT_API static bool serialize(WebFrame* frame,
     84    WEBKIT_API static bool serialize(WebFrame*,
    6885                                     bool recursive,
    69                                      WebPageSerializerClient* client,
     86                                     WebPageSerializerClient*,
    7087                                     const WebVector<WebURL>& links,
    7188                                     const WebVector<WebString>& localPaths,
     
    86103    WEBKIT_API static WebString generateMetaCharsetDeclaration(const WebString& charset);
    87104    // Generate the MOTW declaration.
    88     WEBKIT_API static WebString generateMarkOfTheWebDeclaration(const WebURL& url);
     105    WEBKIT_API static WebString generateMarkOfTheWebDeclaration(const WebURL&);
    89106    // Generate the default base tag declaration.
    90107    WEBKIT_API static WebString generateBaseTagDeclaration(const WebString& baseTarget);
  • trunk/Source/WebKit/chromium/src/WebPageSerializer.cpp

    r82325 r85244  
    4040#include "HTMLNames.h"
    4141#include "KURL.h"
     42#include "PageSerializer.h"
    4243#include "Vector.h"
    4344
     
    5152#include "WebVector.h"
    5253#include "WebView.h"
     54#include "WebViewImpl.h"
    5355
    5456#include <wtf/text/StringConcatenate.h>
     
    180182namespace WebKit {
    181183
     184void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam)
     185{
     186    Vector<PageSerializer::Resource> resources;
     187    PageSerializer serializer(&resources);
     188    serializer.serialize(static_cast<WebViewImpl*>(view)->page());
     189
     190    Vector<Resource> result;
     191    for (Vector<PageSerializer::Resource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) {
     192        Resource resource;
     193        resource.url = iter->url;
     194        resource.mimeType = iter->mimeType.ascii();
     195        // FIXME: we are copying all the resource data here. Idealy we would have a WebSharedData().
     196        resource.data = WebCString(iter->data->data(), iter->data->size());
     197        result.append(resource);
     198    }
     199
     200    *resourcesParam = result;         
     201}
     202
    182203bool WebPageSerializer::serialize(WebFrame* frame,
    183204                                  bool recursive,
Note: See TracChangeset for help on using the changeset viewer.