Changeset 56991 in webkit


Ignore:
Timestamp:
Apr 2, 2010 2:00:52 AM (14 years ago)
Author:
eric@webkit.org
Message:

2010-04-02 Adam Barth <abarth@webkit.org>

Reviewed by Daniel Bates.

Make XSSAuditor go fast with large POST data
https://bugs.webkit.org/show_bug.cgi?id=36694

There were two things slowing down this bechmark:

1) Searching the large POST data for each inline event handler. To

make this faster, we now use a suffix tree to fast-reject strings
that don't appear as substrings of the POST data.

2) The next largest cost was flattening the form data into a string.

To make this fater, we now use the form data object itself as a key
and only flatten once.

  • GNUmakefile.am:
  • WebCore.gypi:
  • WebCore.vcproj/WebCore.vcproj:
  • WebCore.xcodeproj/project.pbxproj:
  • page/XSSAuditor.cpp: (WebCore::XSSAuditor::CachingURLCanonicalizer::canonicalizeURL): (WebCore::XSSAuditor::CachingURLCanonicalizer::clear): (WebCore::XSSAuditor::XSSAuditor): (WebCore::XSSAuditor::findInRequest):
  • page/XSSAuditor.h: (WebCore::XSSAuditor::CachingURLCanonicalizer::CachingURLCanonicalizer): (WebCore::XSSAuditor::CachingURLCanonicalizer::generation):
  • platform/text/SuffixTree.h: Added. (WebCore::UnicodeCodebook::codeWord): (WebCore::UnicodeCodebook::): (WebCore::ASCIICodebook::codeWord): (WebCore::ASCIICodebook::): (WebCore::SuffixTree::SuffixTree): (WebCore::SuffixTree::mightContain): (WebCore::SuffixTree::Node::Node): (WebCore::SuffixTree::Node::~Node): (WebCore::SuffixTree::Node::at): (WebCore::SuffixTree::build):
Location:
trunk/WebCore
Files:
1 added
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/WebCore/ChangeLog

    r56989 r56991  
     12010-04-02  Adam Barth  <abarth@webkit.org>
     2
     3        Reviewed by Daniel Bates.
     4
     5        Make XSSAuditor go fast with large POST data
     6        https://bugs.webkit.org/show_bug.cgi?id=36694
     7
     8        There were two things slowing down this bechmark:
     9
     10        1) Searching the large POST data for each inline event handler.  To
     11           make this faster, we now use a suffix tree to fast-reject strings
     12           that don't appear as substrings of the POST data.
     13
     14        2) The next largest cost was flattening the form data into a string.
     15           To make this fater, we now use the form data object itself as a key
     16           and only flatten once.
     17
     18        * GNUmakefile.am:
     19        * WebCore.gypi:
     20        * WebCore.vcproj/WebCore.vcproj:
     21        * WebCore.xcodeproj/project.pbxproj:
     22        * page/XSSAuditor.cpp:
     23        (WebCore::XSSAuditor::CachingURLCanonicalizer::canonicalizeURL):
     24        (WebCore::XSSAuditor::CachingURLCanonicalizer::clear):
     25        (WebCore::XSSAuditor::XSSAuditor):
     26        (WebCore::XSSAuditor::findInRequest):
     27        * page/XSSAuditor.h:
     28        (WebCore::XSSAuditor::CachingURLCanonicalizer::CachingURLCanonicalizer):
     29        (WebCore::XSSAuditor::CachingURLCanonicalizer::generation):
     30        * platform/text/SuffixTree.h: Added.
     31        (WebCore::UnicodeCodebook::codeWord):
     32        (WebCore::UnicodeCodebook::):
     33        (WebCore::ASCIICodebook::codeWord):
     34        (WebCore::ASCIICodebook::):
     35        (WebCore::SuffixTree::SuffixTree):
     36        (WebCore::SuffixTree::mightContain):
     37        (WebCore::SuffixTree::Node::Node):
     38        (WebCore::SuffixTree::Node::~Node):
     39        (WebCore::SuffixTree::Node::at):
     40        (WebCore::SuffixTree::build):
     41
    1422010-04-02  Roland Steiner  <rolandsteiner@chromium.org>
    243
  • trunk/WebCore/GNUmakefile.am

    r56968 r56991  
    17241724        WebCore/platform/text/StringImpl.cpp \
    17251725        WebCore/platform/text/StringImpl.h \
     1726        WebCore/platform/text/SuffixTree.h \
    17261727        WebCore/platform/text/TextBoundaries.h \
    17271728        WebCore/platform/text/TextBoundaries.cpp \
  • trunk/WebCore/WebCore.gypi

    r56968 r56991  
    27022702            'platform/text/StringImpl.cpp',
    27032703            'platform/text/StringImpl.h',
     2704            'platform/text/SuffixTree.h',
    27042705            'platform/text/TextBoundaries.h',
    27052706            'platform/text/TextBoundaries.cpp',
  • trunk/WebCore/WebCore.vcproj/WebCore.vcproj

    r56968 r56991  
    2568125681                                </File>
    2568225682                                <File
     25683                                        RelativePath="..\platform\text\SuffixTree.h"
     25684                                        >
     25685                                </File>
     25686                                <File
    2568325687                                        RelativePath="..\platform\graphics\StringTruncator.cpp"
    2568425688                                        >
  • trunk/WebCore/WebCore.xcodeproj/project.pbxproj

    r56968 r56991  
    24822482                979F43D31075E44A0000F83B /* RedirectScheduler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 979F43D11075E44A0000F83B /* RedirectScheduler.cpp */; };
    24832483                979F43D41075E44A0000F83B /* RedirectScheduler.h in Headers */ = {isa = PBXBuildFile; fileRef = 979F43D21075E44A0000F83B /* RedirectScheduler.h */; settings = {ATTRIBUTES = (Private, ); }; };
     2484                97C078501165D5BE003A32EF /* SuffixTree.h in Headers */ = {isa = PBXBuildFile; fileRef = 97C0784F1165D5BE003A32EF /* SuffixTree.h */; };
    24842485                97DCE20110807C750057D394 /* HistoryController.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 97DCE1FF10807C750057D394 /* HistoryController.cpp */; };
    24852486                97DCE20210807C750057D394 /* HistoryController.h in Headers */ = {isa = PBXBuildFile; fileRef = 97DCE20010807C750057D394 /* HistoryController.h */; settings = {ATTRIBUTES = (Private, ); }; };
     
    79027903                979F43D11075E44A0000F83B /* RedirectScheduler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = RedirectScheduler.cpp; sourceTree = "<group>"; };
    79037904                979F43D21075E44A0000F83B /* RedirectScheduler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RedirectScheduler.h; sourceTree = "<group>"; };
     7905                97C0784F1165D5BE003A32EF /* SuffixTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SuffixTree.h; sourceTree = "<group>"; };
    79047906                97DCE1FF10807C750057D394 /* HistoryController.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HistoryController.cpp; sourceTree = "<group>"; };
    79057907                97DCE20010807C750057D394 /* HistoryController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HistoryController.h; sourceTree = "<group>"; };
     
    1470914711                                B2C3DA020D006C1D00EF6F26 /* StringImpl.cpp */,
    1471014712                                B2C3DA030D006C1D00EF6F26 /* StringImpl.h */,
     14713                                97C0784F1165D5BE003A32EF /* SuffixTree.h */,
    1471114714                                B2C3DA040D006C1D00EF6F26 /* TextBoundaries.h */,
    1471214715                                B2C3DA060D006C1D00EF6F26 /* TextBreakIterator.h */,
     
    1877618779                                2E3BBF081162DA1100B9409A /* UUID.h in Headers */,
    1877718780                                8952535311641B3400CABF00 /* FileThread.h in Headers */,
     18781                                97C078501165D5BE003A32EF /* SuffixTree.h in Headers */,
    1877818782                        );
    1877918783                        runOnlyForDeploymentPostprocessing = 0;
  • trunk/WebCore/page/XSSAuditor.cpp

    r56825 r56991  
    7171}
    7272
     73String XSSAuditor::CachingURLCanonicalizer::canonicalizeURL(FormData* formData, const TextEncoding& encoding, bool decodeEntities,
     74                                                            bool decodeURLEscapeSequencesTwice)
     75{
     76    if (decodeEntities == m_decodeEntities && decodeURLEscapeSequencesTwice == m_decodeURLEscapeSequencesTwice
     77        && encoding == m_encoding && formData == m_formData)
     78        return m_cachedCanonicalizedURL;
     79    m_formData = formData;
     80    return canonicalizeURL(formData->flattenToString(), encoding, decodeEntities, decodeURLEscapeSequencesTwice);
     81}
     82
    7383String XSSAuditor::CachingURLCanonicalizer::canonicalizeURL(const String& url, const TextEncoding& encoding, bool decodeEntities,
    7484                                                            bool decodeURLEscapeSequencesTwice)
     
    8393    m_decodeEntities = decodeEntities;
    8494    m_decodeURLEscapeSequencesTwice = decodeURLEscapeSequencesTwice;
     95    ++m_generation;
    8596    return m_cachedCanonicalizedURL;
     97}
     98
     99void XSSAuditor::CachingURLCanonicalizer::clear()
     100{
     101    m_formData.clear();
     102    m_inputURL = String();
    86103}
    87104
    88105XSSAuditor::XSSAuditor(Frame* frame)
    89106    : m_frame(frame)
     107    , m_generationOfSuffixTree(-1)
    90108{
    91109}
     
    349367    String pageURL = frame->document()->url().string();
    350368
     369    if (!hasFormData) {
     370        // We clear out our form data caches, in case we're holding onto a bunch of memory.
     371        m_formDataCache.clear();
     372        m_formDataSuffixTree.clear();
     373    }
     374
    351375    String canonicalizedString;
    352376    if (!hasFormData && task.string.length() > 2 * pageURL.length()) {
     
    385409
    386410    if (hasFormData) {
    387         String decodedFormData = m_formDataCache.canonicalizeURL(formDataObj->flattenToString(), frame->document()->decoder()->encoding(), task.decodeEntities, task.decodeURLEscapeSequencesTwice);
     411        String decodedFormData = m_formDataCache.canonicalizeURL(formDataObj, frame->document()->decoder()->encoding(), task.decodeEntities, task.decodeURLEscapeSequencesTwice);
     412
     413        if (m_generationOfSuffixTree != m_formDataCache.generation()) {
     414            m_formDataSuffixTree = new SuffixTree<ASCIICodebook>(decodedFormData, 5);
     415            m_generationOfSuffixTree = m_formDataCache.generation();
     416        }
     417
     418        // Try a fast-reject via the suffixTree.
     419        if (m_formDataSuffixTree && !m_formDataSuffixTree->mightContain(canonicalizedString))
     420            return false;
     421
    388422        if (decodedFormData.find(canonicalizedString, 0, false) != -1)
    389423            return true; // We found the string in the POST data.
  • trunk/WebCore/page/XSSAuditor.h

    r56295 r56991  
    3030#include "HTTPParsers.h"
    3131#include "PlatformString.h"
     32#include "SuffixTree.h"
    3233#include "TextEncoding.h"
    3334
    3435namespace WebCore {
    3536
     37    class FormData;
    3638    class Frame;
    3739    class ScriptSourceCode;
     
    107109        {
    108110        public:
    109             CachingURLCanonicalizer() : m_decodeEntities(false), m_decodeURLEscapeSequencesTwice(false) { }
     111            CachingURLCanonicalizer() : m_decodeEntities(false), m_decodeURLEscapeSequencesTwice(false), m_generation(0) { }
     112            String canonicalizeURL(FormData*, const TextEncoding& encoding, bool decodeEntities,
     113                                   bool decodeURLEscapeSequencesTwice);
    110114            String canonicalizeURL(const String& url, const TextEncoding& encoding, bool decodeEntities,
    111115                                   bool decodeURLEscapeSequencesTwice);
     116
     117            void clear();
     118
     119            int generation() const { return m_generation; }
    112120
    113121        private:
     
    117125            bool m_decodeEntities;
    118126            bool m_decodeURLEscapeSequencesTwice;
     127            RefPtr<FormData> m_formData;
     128
     129            // Incremented every time we see a new URL.
     130            int m_generation;
    119131
    120132            // The cached result.
     
    159171        mutable CachingURLCanonicalizer m_pageURLCache;
    160172        mutable CachingURLCanonicalizer m_formDataCache;
     173
     174        mutable OwnPtr<SuffixTree<ASCIICodebook> > m_formDataSuffixTree;
     175        mutable int m_generationOfSuffixTree;
    161176    };
    162177
Note: See TracChangeset for help on using the changeset viewer.