Changeset 55242 in webkit


Ignore:
Timestamp:
Feb 25, 2010 10:13:46 AM (14 years ago)
Author:
eric@webkit.org
Message:

2010-02-25 Andreas Kling <andreas.kling@nokia.com>

Reviewed by Darin Adler.

Optimize decoding of Latin-1 text by exploiting the fact that most of it will
be ASCII-only data.

https://bugs.webkit.org/show_bug.cgi?id=35233

  • platform/text/TextCodecLatin1.cpp: (WebCore::TextCodecLatin1::decode):
Location:
trunk/WebCore
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/WebCore/ChangeLog

    r55241 r55242  
     12010-02-25  Andreas Kling  <andreas.kling@nokia.com>
     2
     3        Reviewed by Darin Adler.
     4
     5        Optimize decoding of Latin-1 text by exploiting the fact that most of it will
     6        be ASCII-only data.
     7
     8        https://bugs.webkit.org/show_bug.cgi?id=35233
     9
     10        * platform/text/TextCodecLatin1.cpp:
     11        (WebCore::TextCodecLatin1::decode):
     12
    1132010-02-25  Pavel Feldman  <pfeldman@chromium.org>
    214
  • trunk/WebCore/platform/text/TextCodecLatin1.cpp

    r44279 r55242  
    118118}
    119119
     120template<size_t size> struct NonASCIIMask;
     121template<> struct NonASCIIMask<4> {
     122    static unsigned value() { return 0x80808080U; }
     123};
     124template<> struct NonASCIIMask<8> {
     125    static unsigned long long value() { return 0x8080808080808080ULL; }
     126};
     127
     128template<size_t size> struct UCharByteFiller;
     129template<> struct UCharByteFiller<4> {
     130    static void copy(UChar* dest, const unsigned char* src)
     131    {
     132        dest[0] = src[0];
     133        dest[1] = src[1];
     134        dest[2] = src[2];
     135        dest[3] = src[3];
     136    }
     137};
     138template<> struct UCharByteFiller<8> {
     139    static void copy(UChar* dest, const unsigned char* src)
     140    {
     141        dest[0] = src[0];
     142        dest[1] = src[1];
     143        dest[2] = src[2];
     144        dest[3] = src[3];
     145        dest[4] = src[4];
     146        dest[5] = src[5];
     147        dest[6] = src[6];
     148        dest[7] = src[7];
     149    }
     150};
     151
    120152String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
    121153{
     
    123155    String result = String::createUninitialized(length, characters);
    124156
    125     // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
    126     unsigned char ored = 0;
    127     for (size_t i = 0; i < length; ++i) {
    128         unsigned char c = bytes[i];
    129         characters[i] = c;
    130         ored |= c;
    131     }
    132 
    133     if (!(ored & 0x80))
    134         return result;
    135 
    136     // Convert the slightly slower way when there are non-ASCII characters.
    137     for (size_t i = 0; i < length; ++i) {
    138         unsigned char c = bytes[i];
    139         characters[i] = table[c];
     157    const unsigned char* src = reinterpret_cast<const unsigned char*>(bytes);
     158    const unsigned char* end = reinterpret_cast<const unsigned char*>(bytes + length);
     159    const unsigned char* alignedEnd = reinterpret_cast<const unsigned char*>(reinterpret_cast<ptrdiff_t>(end) & ~(sizeof(uintptr_t) - 1));
     160    UChar* dest = characters;
     161
     162    while (src < end) {
     163        if (*src < 0x80) {
     164            // Fast path for values < 0x80 (most Latin-1 text will be ASCII)
     165            // Wait until we're at a properly aligned address, then read full CPU words.
     166            if (!(reinterpret_cast<ptrdiff_t>(src) & (sizeof(uintptr_t) - 1))) {
     167                while (src < alignedEnd) {
     168                    uintptr_t chunk = *reinterpret_cast<const uintptr_t*>(src);
     169
     170                    if (chunk & NonASCIIMask<sizeof(uintptr_t)>::value())
     171                        goto useLookupTable;
     172
     173                    UCharByteFiller<sizeof(uintptr_t)>::copy(dest, src);
     174
     175                    src += sizeof(uintptr_t);
     176                    dest += sizeof(uintptr_t);
     177                }
     178
     179                if (src == end)
     180                    break;
     181            }
     182            *dest = *src;
     183        } else {
     184useLookupTable:
     185            *dest = table[*src];
     186        }
     187
     188        ++src;
     189        ++dest;
    140190    }
    141191
Note: See TracChangeset for help on using the changeset viewer.