Changeset 81686 in webkit


Ignore:
Timestamp:
Mar 22, 2011 12:17:58 PM (13 years ago)
Author:
jcivelli@chromium.org
Message:

2011-03-22 Jay Civelli <jcivelli@chromium.org>

Reviewed by David Levin.

Adding a contains method to Vector.
https://bugs.webkit.org/show_bug.cgi?id=55859

  • wtf/Vector.h: (WTF::Vector::contains):

2011-03-22 Jay Civelli <jcivelli@chromium.org>

Reviewed by David Levin.

Relanding:
Moving the method that is used to retrieve all the resources in
a page from the Chromium code (dom_operations.cc) to WebKit.
https://bugs.webkit.org/show_bug.cgi?id=55859

  • WebKit.gyp:
  • public/WebPageSerializer.h:
  • public/WebURL.h: (WebKit::operator==): (WebKit::operator!=):
  • public/WebVector.h: (WebKit::WebVector::contains):
  • src/WebDataSourceImpl.cpp: (WebKit::WebDataSourceImpl::WebDataSourceImpl):
  • src/WebPageSerializer.cpp: (WebCore::getSubResourceURLFromElement): (WebCore::retrieveResourcesForElement): (WebCore::retrieveResourcesForFrame): (WebKit::WebPageSerializer::retrieveAllResources):
  • tests/WebFrameTest.cpp:
  • tests/WebPageSerializerTest.cpp: Added.
  • tests/data/pageserialization/awesome.png: Added.
  • tests/data/pageserialization/embed_iframe.html: Added.
  • tests/data/pageserialization/object_iframe.html: Added.
  • tests/data/pageserialization/simple_iframe.html: Added.
  • tests/data/pageserialization/simple_page.html: Added.
  • tests/data/pageserialization/top_frame.html: Added.
Location:
trunk/Source
Files:
7 added
10 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/ChangeLog

    r81682 r81686  
     12011-03-22  Jay Civelli  <jcivelli@chromium.org>
     2
     3        Reviewed by David Levin.
     4
     5        Adding a contains method to Vector.
     6        https://bugs.webkit.org/show_bug.cgi?id=55859
     7
     8        * wtf/Vector.h:
     9        (WTF::Vector::contains):
     10
    1112011-03-22  Gabor Loki  <loki@webkit.org>
    212
  • trunk/Source/JavaScriptCore/wtf/Vector.h

    r81609 r81686  
    567567        const T& last() const { return at(size() - 1); }
    568568
     569        template<typename U> bool contains(const U&) const;
    569570        template<typename U> size_t find(const U&) const;
    570571        template<typename U> size_t reverseFind(const U&) const;
     
    742743
    743744        return *this;
     745    }
     746
     747    template<typename T, size_t inlineCapacity>
     748    template<typename U>
     749    bool Vector<T, inlineCapacity>::contains(const U& value) const
     750    {
     751        return find(value) != notFound;
    744752    }
    745753
  • trunk/Source/WebKit/chromium/ChangeLog

    r81676 r81686  
     12011-03-22  Jay Civelli  <jcivelli@chromium.org>
     2
     3        Reviewed by David Levin.
     4
     5        Relanding:
     6        Moving the method that is used to retrieve all the resources in
     7        a page from the Chromium code (dom_operations.cc) to WebKit.
     8        https://bugs.webkit.org/show_bug.cgi?id=55859
     9
     10
     11        * WebKit.gyp:
     12        * public/WebPageSerializer.h:
     13        * public/WebURL.h:
     14        (WebKit::operator==):
     15        (WebKit::operator!=):
     16        * public/WebVector.h:
     17        (WebKit::WebVector::contains):
     18        * src/WebDataSourceImpl.cpp:
     19        (WebKit::WebDataSourceImpl::WebDataSourceImpl):
     20        * src/WebPageSerializer.cpp:
     21        (WebCore::getSubResourceURLFromElement):
     22        (WebCore::retrieveResourcesForElement):
     23        (WebCore::retrieveResourcesForFrame):
     24        (WebKit::WebPageSerializer::retrieveAllResources):
     25        * tests/WebFrameTest.cpp:
     26        * tests/WebPageSerializerTest.cpp: Added.
     27        * tests/data/pageserialization/awesome.png: Added.
     28        * tests/data/pageserialization/embed_iframe.html: Added.
     29        * tests/data/pageserialization/object_iframe.html: Added.
     30        * tests/data/pageserialization/simple_iframe.html: Added.
     31        * tests/data/pageserialization/simple_page.html: Added.
     32        * tests/data/pageserialization/top_frame.html: Added.
     33
    1342011-03-22  Noel Gordon  <noel.gordon@gmail.com>
    235
  • trunk/Source/WebKit/chromium/WebKit.gyp

    r81614 r81686  
    818818                                'tests/UniscribeHelperTest.cpp',
    819819                                'tests/WebFrameTest.cpp',
     820                                'tests/WebPageSerializerTest.cpp',
    820821                            ],
    821822                        }],
  • trunk/Source/WebKit/chromium/public/WebPageSerializer.h

    r81609 r81686  
    3535
    3636namespace WebKit {
     37class WebCString;
    3738class WebFrame;
    3839class WebPageSerializerClient;
    3940class WebString;
    4041class WebURL;
     42class WebView;
    4143template <typename T> class WebVector;
    4244
     
    7072                                     const WebString& localDirectoryName);
    7173
     74    // Retrieve all the resource for the passed view, including the main frame
     75    // and sub-frames. Returns true if all resources were retrieved
     76    // successfully.
     77    WEBKIT_API static bool retrieveAllResources(WebView*,
     78                                                const WebVector<WebCString>& supportedSchemes,
     79                                                WebVector<WebURL>* resources,
     80                                                WebVector<WebURL>* frames);
     81
    7282    // FIXME: The following are here for unit testing purposes. Consider
    7383    // changing the unit tests instead.
     
    8191};
    8292
    83 }  // namespace WebKit
     93} // namespace WebKit
    8494
    8595#endif
  • trunk/Source/WebKit/chromium/public/WebURL.h

    r81609 r81686  
    144144}
    145145
     146inline bool operator==(const WebURL& a, const WebURL& b)
     147{
     148    return !a.spec().compare(b.spec());
     149}
     150
     151inline bool operator!=(const WebURL& a, const WebURL& b)
     152{
     153    return !(a == b);
     154}
     155
    146156} // namespace WebKit
    147157
  • trunk/Source/WebKit/chromium/public/WebVector.h

    r81609 r81686  
    126126        return m_ptr[i];
    127127    }
     128   
     129    bool contains(const T& value) const
     130    {
     131        for (size_t i = 0; i < m_size; i++) {
     132            if (m_ptr[i] == value)
     133                return true;
     134        }
     135        return false;
     136    }
    128137
    129138    T* data() { return m_ptr; }
     
    173182};
    174183
    175 }  // namespace WebKit
     184} // namespace WebKit
    176185
    177186#endif
  • trunk/Source/WebKit/chromium/src/WebDataSourceImpl.cpp

    r81609 r81686  
    175175        // to wait to attach the WebPluginLoadObserver to that data source.
    176176        if (!request.url().isEmpty()) {
    177             ASSERT(m_nextPluginLoadObserver->url() == request.url());
     177            ASSERT(m_nextPluginLoadObserver->url() == WebURL(request.url()));
    178178            m_pluginLoadObserver.set(m_nextPluginLoadObserver);
    179179            m_nextPluginLoadObserver = 0;
  • trunk/Source/WebKit/chromium/src/WebPageSerializer.cpp

    r81609 r81686  
    3232#include "WebPageSerializer.h"
    3333
     34#include "DocumentLoader.h"
     35#include "Element.h"
     36#include "Frame.h"
     37#include "HTMLAllCollection.h"
     38#include "HTMLFrameOwnerElement.h"
     39#include "HTMLInputElement.h"
     40#include "HTMLNames.h"
    3441#include "KURL.h"
    35 
     42#include "Vector.h"
     43
     44#include "WebCString.h"
    3645#include "WebFrame.h"
     46#include "WebFrameImpl.h"
    3747#include "WebPageSerializerClient.h"
    3848#include "WebPageSerializerImpl.h"
     
    4050#include "WebURL.h"
    4151#include "WebVector.h"
     52#include "WebView.h"
    4253
    4354#include <wtf/text/StringConcatenate.h>
    4455
    4556using namespace WebCore;
     57
     58namespace {
     59
     60KURL getSubResourceURLFromElement(Element* element)
     61{
     62    ASSERT(element);
     63    const QualifiedName* attributeName = 0;
     64    if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag))
     65        attributeName = &HTMLNames::srcAttr;
     66    else if (element->hasTagName(HTMLNames::inputTag)) {
     67        HTMLInputElement* input = static_cast<HTMLInputElement*>(element);
     68        if (input->isImageButton())
     69            attributeName = &HTMLNames::srcAttr;
     70    } else if (element->hasTagName(HTMLNames::bodyTag)
     71               || element->hasTagName(HTMLNames::tableTag)
     72               || element->hasTagName(HTMLNames::trTag)
     73               || element->hasTagName(HTMLNames::tdTag))
     74        attributeName = &HTMLNames::backgroundAttr;
     75    else if (element->hasTagName(HTMLNames::blockquoteTag)
     76             || element->hasTagName(HTMLNames::qTag)
     77             || element->hasTagName(HTMLNames::delTag)
     78             || element->hasTagName(HTMLNames::insTag))
     79        attributeName = &HTMLNames::citeAttr;
     80    else if (element->hasTagName(HTMLNames::linkTag)) {
     81        // If the link element is not css, ignore it.
     82        if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) {
     83            // FIXME: Add support for extracting links of sub-resources which
     84            // are inside style-sheet such as @import, @font-face, url(), etc.
     85            attributeName = &HTMLNames::hrefAttr;
     86        }
     87    } else if (element->hasTagName(HTMLNames::objectTag))
     88        attributeName = &HTMLNames::dataAttr;
     89    else if (element->hasTagName(HTMLNames::embedTag))
     90        attributeName = &HTMLNames::srcAttr;
     91
     92    if (!attributeName)
     93        return KURL();
     94
     95    String value = element->getAttribute(*attributeName);
     96    // Ignore javascript content.
     97    if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false))
     98        return KURL();
     99 
     100    return element->document()->completeURL(value);
     101}
     102
     103void retrieveResourcesForElement(Element* element,
     104                                 Vector<Frame*>* visitedFrames,
     105                                 Vector<Frame*>* framesToVisit,
     106                                 Vector<KURL>* frameURLs,
     107                                 Vector<KURL>* resourceURLs)
     108{
     109    // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
     110    if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag)
     111        || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag))
     112            && element->isFrameOwnerElement()) {
     113        Frame* frame = static_cast<HTMLFrameOwnerElement*>(element)->contentFrame();
     114        if (frame) {
     115            if (!visitedFrames->contains(frame))
     116                framesToVisit->append(frame);
     117            return;
     118        }
     119    }
     120
     121    KURL url = getSubResourceURLFromElement(element);
     122    if (url.isEmpty() || !url.isValid())
     123        return; // No subresource for this node.
     124
     125    // Ignore URLs that have a non-standard protocols. Since the FTP protocol
     126    // does no have a cache mechanism, we skip it as well.
     127    if (!url.protocolInHTTPFamily() && !url.isLocalFile())
     128        return;
     129
     130    if (!resourceURLs->contains(url))
     131        resourceURLs->append(url);
     132}
     133
     134void retrieveResourcesForFrame(Frame* frame,
     135                               const WebKit::WebVector<WebKit::WebCString>& supportedSchemes,
     136                               Vector<Frame*>* visitedFrames,
     137                               Vector<Frame*>* framesToVisit,
     138                               Vector<KURL>* frameURLs,
     139                               Vector<KURL>* resourceURLs)
     140{
     141    KURL frameURL = frame->loader()->documentLoader()->request().url();
     142
     143    // If the frame's URL is invalid, ignore it, it is not retrievable.
     144    if (!frameURL.isValid())
     145        return;
     146
     147    // Ignore frames from unsupported schemes.
     148    bool isValidScheme = false;
     149    for (size_t i = 0; i < supportedSchemes.size(); ++i) {
     150        if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
     151            isValidScheme = true;
     152            break;
     153        }
     154    }
     155    if (!isValidScheme)
     156        return;
     157
     158    // If we have already seen that frame, ignore it.
     159    if (visitedFrames->contains(frame))
     160        return;
     161    visitedFrames->append(frame);
     162    if (!frameURLs->contains(frameURL))
     163        frameURLs->append(frameURL);
     164 
     165    // Now get the resources associated with each node of the document.
     166    RefPtr<HTMLAllCollection> allNodes = frame->document()->all();
     167    for (unsigned i = 0; i < allNodes->length(); ++i) {
     168        Node* node = allNodes->item(i);
     169        // We are only interested in HTML resources.
     170        if (!node->isElementNode())
     171            continue;
     172        retrieveResourcesForElement(static_cast<Element*>(node),
     173                                    visitedFrames, framesToVisit,
     174                                    frameURLs, resourceURLs);
     175    }
     176}
     177
     178} // namespace
    46179
    47180namespace WebKit {
     
    59192}
    60193
     194bool WebPageSerializer::retrieveAllResources(WebView* view,
     195                                             const WebVector<WebCString>& supportedSchemes,
     196                                             WebVector<WebURL>* resourceURLs,
     197                                             WebVector<WebURL>* frameURLs) {
     198    WebFrameImpl* mainFrame = static_cast<WebFrameImpl*>(view->mainFrame());
     199    if (!mainFrame)
     200        return false;
     201
     202    Vector<Frame*> framesToVisit;
     203    Vector<Frame*> visitedFrames;
     204    Vector<KURL> frameKURLs;
     205    Vector<KURL> resourceKURLs;
     206   
     207    // Let's retrieve the resources from every frame in this page.
     208    framesToVisit.append(mainFrame->frame());
     209    while (!framesToVisit.isEmpty()) {
     210        Frame* frame = framesToVisit[0];
     211        framesToVisit.remove(0);
     212        retrieveResourcesForFrame(frame, supportedSchemes,
     213                                  &visitedFrames, &framesToVisit,
     214                                  &frameKURLs, &resourceKURLs);
     215    }
     216
     217    // Converts the results to WebURLs.
     218    WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
     219    for (size_t i = 0; i < resourceKURLs.size(); ++i) {
     220        resultResourceURLs[i] = resourceKURLs[i];
     221        // A frame's src can point to the same URL as another resource, keep the
     222        // resource URL only in such cases.
     223        size_t index = frameKURLs.find(resourceKURLs[i]);
     224        if (index != notFound)
     225            frameKURLs.remove(index);
     226    }
     227    *resourceURLs = resultResourceURLs;
     228    WebVector<WebURL> resultFrameURLs(frameKURLs.size());
     229    for (size_t i = 0; i < frameKURLs.size(); ++i)
     230        resultFrameURLs[i] = frameKURLs[i];
     231    *frameURLs = resultFrameURLs;
     232   
     233    return true;
     234}
     235
    61236WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& charset)
    62237{
     
    78253}
    79254
    80 }  // namespace WebKit
     255} // namespace WebKit
  • trunk/Source/WebKit/chromium/tests/WebFrameTest.cpp

    r81609 r81686  
    2828 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    2929 */
     30
     31#include "config.h"
    3032
    3133#include <googleurl/src/gurl.h>
Note: See TracChangeset for help on using the changeset viewer.