Changeset 89869 in webkit


Ignore:
Timestamp:
Jun 27, 2011 3:18:15 PM (13 years ago)
Author:
jcivelli@chromium.org
Message:

2011-06-27 Jay Civelli <jcivelli@chromium.org>

Reviewed by Darin Fisher.

Adding binary part support to MHTML.
https://bugs.webkit.org/show_bug.cgi?id=63310

  • mhtml/multi_frames_binary.mht: Added.
  • platform/chromium/mhtml/multi_frames_binary-expected.txt: Added.

2011-06-27 Jay Civelli <jcivelli@chromium.org>

Reviewed by Darin Fisher.

Adding binary part support to MHTML.
https://bugs.webkit.org/show_bug.cgi?id=63310

  • loader/archive/mhtml/MHTMLArchive.cpp: (WebCore::MHTMLArchive::generateMHTMLData): (WebCore::MHTMLArchive::generateMHTMLDataUsingBinaryEncoding):
  • loader/archive/mhtml/MHTMLArchive.h:
  • loader/archive/mhtml/MHTMLParser.cpp: (WebCore::MHTMLParser::parseNextPart):
  • platform/SharedBuffer.cpp: (WebCore::SharedBuffer::append):
  • platform/SharedBuffer.h:
  • platform/network/MIMEHeader.cpp: (WebCore::MIMEHeader::parseContentTransferEncoding):
  • platform/network/MIMEHeader.h:

2011-06-27 Jay Civelli <jcivelli@chromium.org>

Reviewed by Darin Fisher.

Adding binary part support to MHTML.
https://bugs.webkit.org/show_bug.cgi?id=63310

  • public/WebPageSerializer.h:
  • src/WebPageSerializer.cpp: (WebKit::WebPageSerializer::serializeToMHTML): (WebKit::WebPageSerializer::serializeToMHTMLUsingBinaryEncoding):
Location:
trunk
Files:
2 added
12 edited

Legend:

Unmodified
Added
Removed
  • trunk/LayoutTests/ChangeLog

    r89864 r89869  
     12011-06-27  Jay Civelli  <jcivelli@chromium.org>
     2
     3        Reviewed by Darin Fisher.
     4
     5        Adding binary part support to MHTML.
     6        https://bugs.webkit.org/show_bug.cgi?id=63310
     7
     8        * mhtml/multi_frames_binary.mht: Added.
     9        * platform/chromium/mhtml/multi_frames_binary-expected.txt: Added.
     10
    1112011-06-27  Levi Weintraub  <leviw@chromium.org>
    212
  • trunk/Source/WebCore/ChangeLog

    r89864 r89869  
     12011-06-27  Jay Civelli  <jcivelli@chromium.org>
     2
     3        Reviewed by Darin Fisher.
     4
     5        Adding binary part support to MHTML.
     6        https://bugs.webkit.org/show_bug.cgi?id=63310
     7
     8        * loader/archive/mhtml/MHTMLArchive.cpp:
     9        (WebCore::MHTMLArchive::generateMHTMLData):
     10        (WebCore::MHTMLArchive::generateMHTMLDataUsingBinaryEncoding):
     11        * loader/archive/mhtml/MHTMLArchive.h:
     12        * loader/archive/mhtml/MHTMLParser.cpp:
     13        (WebCore::MHTMLParser::parseNextPart):
     14        * platform/SharedBuffer.cpp:
     15        (WebCore::SharedBuffer::append):
     16        * platform/SharedBuffer.h:
     17        * platform/network/MIMEHeader.cpp:
     18        (WebCore::MIMEHeader::parseContentTransferEncoding):
     19        * platform/network/MIMEHeader.h:
     20
    1212011-06-27  Levi Weintraub  <leviw@chromium.org>
    222
  • trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp

    r87958 r89869  
    5858const char* const quotedPrintable = "quoted-printable";
    5959const char* const base64 = "base64";
     60const char* const binary = "binary";
    6061
    6162static String generateRandomBoundary()
     
    124125
    125126PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
     127{
     128    return generateMHTMLData(page, false);
     129}
     130
     131PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(Page* page)
     132{
     133    return generateMHTMLData(page, true);
     134}
     135
     136PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBinaryEncoding)
    126137{
    127138    Vector<PageSerializer::Resource> resources;
     
    168179        stringBuilder.append(resource.mimeType);
    169180
    170         const char* contentEncoding = MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType) ? quotedPrintable : base64;
     181        const char* contentEncoding = useBinaryEncoding ? binary : base64;
    171182        stringBuilder.append("\r\nContent-Transfer-Encoding: ");
    172183        stringBuilder.append(contentEncoding);
     
    178189        mhtmlData->append(asciiString.data(), asciiString.length());
    179190
    180         // FIXME: ideally we would encode the content as a stream without having to fetch it all.
    181         const char* data = resource.data->data();
    182         size_t dataLength = resource.data->size();
    183         Vector<char> encodedData;
    184         if (!strcmp(contentEncoding, quotedPrintable)) {
    185             quotedPrintableEncode(data, dataLength, encodedData);
    186             mhtmlData->append(encodedData.data(), encodedData.size());
    187             mhtmlData->append("\r\n", 2);
     191        if (!strcmp(contentEncoding, binary)) {
     192            const char* data;
     193            size_t position = 0;
     194            while (size_t length = resource.data->getSomeData(data, position)) {
     195                mhtmlData->append(data, length);
     196                position += length;
     197            }
    188198        } else {
    189             ASSERT(!strcmp(contentEncoding, base64));
    190             // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
    191             base64Encode(data, dataLength, encodedData);
    192             const size_t maximumLineLength = 76;
    193             size_t index = 0;
    194             size_t encodedDataLength = encodedData.size();
    195             do {
    196                 size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
    197                 mhtmlData->append(encodedData.data() + index, lineLength);
     199            // FIXME: ideally we would encode the content as a stream without having to fetch it all.
     200            const char* data = resource.data->data();
     201            size_t dataLength = resource.data->size();
     202            Vector<char> encodedData;
     203            if (!strcmp(contentEncoding, quotedPrintable)) {
     204                quotedPrintableEncode(data, dataLength, encodedData);
     205                mhtmlData->append(encodedData.data(), encodedData.size());
    198206                mhtmlData->append("\r\n", 2);
    199                 index += maximumLineLength;
    200             } while (index < encodedDataLength);
     207            } else {
     208                ASSERT(!strcmp(contentEncoding, base64));
     209                // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
     210                base64Encode(data, dataLength, encodedData);
     211                const size_t maximumLineLength = 76;
     212                size_t index = 0;
     213                size_t encodedDataLength = encodedData.size();
     214                do {
     215                    size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
     216                    mhtmlData->append(encodedData.data() + index, lineLength);
     217                    mhtmlData->append("\r\n", 2);
     218                    index += maximumLineLength;
     219                } while (index < encodedDataLength);
     220            }
    201221        }
    202222    }
  • trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h

    r87958 r89869  
    5050
    5151    static PassRefPtr<SharedBuffer> generateMHTMLData(Page*);
     52    // Binary encoding results in smaller MHTML files but they might not work in other browsers.
     53    static PassRefPtr<SharedBuffer> generateMHTMLDataUsingBinaryEncoding(Page*);
    5254
    5355private:
     56    static PassRefPtr<SharedBuffer> generateMHTMLData(Page*, bool useBinaryEncoding);
     57
    5458    friend class MHTMLParser;
    5559    MHTMLArchive();
  • trunk/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp

    r89599 r89869  
    147147    const bool checkBoundary = !endOfPartBoundary.isEmpty();
    148148    bool endOfPartReached = false;
    149     String line;
    150     while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
    151         if (checkBoundary && (line == endOfPartBoundary || line == endOfDocumentBoundary)) {
     149    if (mimeHeader.contentTransferEncoding() == MIMEHeader::Binary) {
     150        if (!checkBoundary) {
     151            LOG_ERROR("Binary contents requires end of part");
     152            return 0;
     153        }
     154        m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
     155        Vector<char> part;
     156        if (!m_lineReader.nextChunk(part)) {
     157            LOG_ERROR("Binary contents requires end of part");
     158            return 0;
     159         }
     160         content->append(part);
     161         m_lineReader.setSeparator("\r\n");
     162         Vector<char> nextChars;
     163         if (m_lineReader.peek(nextChars, 2) != 2) {
     164             LOG_ERROR("Invalid seperator.");
     165             return 0;
     166         }
     167         endOfPartReached = true;
     168         ASSERT(nextChars.size() == 2);
     169         endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
     170         if (!endOfArchiveReached) {
     171             String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
     172             if (!line.isEmpty()) {
     173                 LOG_ERROR("No CRLF at end of binary section.");
     174                 return 0;
     175             }
     176         }
     177    } else {
     178        String line;
     179        while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
    152180            endOfArchiveReached = (line == endOfDocumentBoundary);
    153             endOfPartReached = true;
    154             break;
    155         }
    156         // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
    157         content->append(line.utf8().data(), line.length());
    158         if (mimeHeader.contentTransferEncoding() == MIMEHeader::QuotedPrintable) {
    159             // The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
    160             content->append("\r\n", 2);
     181            if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) {
     182                endOfPartReached = true;
     183                break;
     184            }
     185            // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
     186            content->append(line.utf8().data(), line.length());
     187            if (mimeHeader.contentTransferEncoding() == MIMEHeader::QuotedPrintable) {
     188                // The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
     189                content->append("\r\n", 2);
     190            }
    161191        }
    162192    }
     
    178208        break;
    179209    case MIMEHeader::SevenBit:
     210    case MIMEHeader::Binary:
    180211        data.append(content->data(), content->size());
    181212        break;
  • trunk/Source/WebCore/platform/SharedBuffer.cpp

    r87958 r89869  
    166166}
    167167
     168void SharedBuffer::append(const Vector<char>& data)
     169{
     170    append(data.data(), data.size());
     171}
     172
    168173void SharedBuffer::clear()
    169174{
  • trunk/Source/WebCore/platform/SharedBuffer.h

    r87958 r89869  
    8787    void append(SharedBuffer*);
    8888    void append(const char*, unsigned);
     89    void append(const Vector<char>&);
     90
    8991    void clear();
    9092    const char* platformData() const;
  • trunk/Source/WebCore/platform/network/MIMEHeader.cpp

    r89599 r89869  
    124124    if (encoding == "7bit")
    125125        return SevenBit;
     126    if (encoding == "binary")
     127        return Binary;
    126128    LOG_ERROR("Unknown encoding '%s' found in MIME header.", text.ascii().data());
    127129    return Unknown;
  • trunk/Source/WebCore/platform/network/MIMEHeader.h

    r89321 r89869  
    4747        Base64,
    4848        SevenBit,
     49        Binary,
    4950        Unknown
    5051    };
  • trunk/Source/WebKit/chromium/ChangeLog

    r89837 r89869  
     12011-06-27  Jay Civelli  <jcivelli@chromium.org>
     2
     3        Reviewed by Darin Fisher.
     4
     5        Adding binary part support to MHTML.
     6        https://bugs.webkit.org/show_bug.cgi?id=63310
     7
     8        * public/WebPageSerializer.h:
     9        * src/WebPageSerializer.cpp:
     10        (WebKit::WebPageSerializer::serializeToMHTML):
     11        (WebKit::WebPageSerializer::serializeToMHTMLUsingBinaryEncoding):
     12
    1132011-05-17  Nat Duca  <nduca@chromium.org>
    214
  • trunk/Source/WebKit/chromium/public/WebPageSerializer.h

    r88495 r89869  
    6262    WEBKIT_API static WebCString serializeToMHTML(WebView*);
    6363
     64    // Similar to serializeToMHTML but uses binary encoding for the MHTML parts.
     65    // This results in a smaller MHTML file but it might not be supported by other browsers.
     66    WEBKIT_API static WebCString serializeToMHTMLUsingBinaryEncoding(WebView*);
     67
    6468    // IMPORTANT:
    6569    // The API below is an older implementation of a pageserialization that
  • trunk/Source/WebKit/chromium/src/WebPageSerializer.cpp

    r88495 r89869  
    209209}
    210210
     211WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
     212{
     213    RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(static_cast<WebViewImpl*>(view)->page());
     214    // FIXME: we are copying all the data here. Idealy we would have a WebSharedData().
     215    return WebCString(mhtml->data(), mhtml->size());
     216}
     217
    211218bool WebPageSerializer::serialize(WebFrame* frame,
    212219                                  bool recursive,
Note: See TracChangeset for help on using the changeset viewer.