Changeset 106217 in webkit


Ignore:
Timestamp:
Jan 29, 2012 11:30:14 PM (12 years ago)
Author:
zherczeg@webkit.org
Message:

Custom written CSS lexer
https://bugs.webkit.org/show_bug.cgi?id=70107

Reviewed by Antti Koivisto and Oliver Hunt.

.:

Remove tokenizer.cpp from intermediate sources.

  • wscript:

Source/JavaScriptCore:

Add new helper functions for the custom written CSS lexer.

  • wtf/ASCIICType.h:

(WTF::toASCIILowerUnchecked):
(WTF):
(WTF::isASCIIAlphaCaselessEqual):

Source/WebCore:

This patch replaces the flex based CSS lexer to a
new, custom written one. The new code is more
than 2 times faster according to oprofile and CPU
cycle counters.

The code structure is quite straightforward: it choose
the possible token group based on the first character
and employ utility functions to parse the longer than
one character long ones. Most of the utilities are inline
to make the lexer fast.

All build systems updated. Including removing the flex support.

Existing tests cover this feature.

  • CMakeLists.txt:
  • DerivedSources.make:
  • DerivedSources.pri:
  • GNUmakefile.am:
  • GNUmakefile.list.am:
  • WebCore.gyp/WebCore.gyp:
  • WebCore.gyp/scripts/action_maketokenizer.py: Removed.
  • WebCore.gypi:
  • WebCore.vcproj/WebCore.vcproj:
  • WebCore.xcodeproj/project.pbxproj:
  • css/CSSParser.cpp:

(WebCore::CSSParser::CSSParser):
(WebCore::CSSParser::setupParser):
(WebCore::parseSimpleLengthValue):
(WebCore::mightBeRGBA):
(WebCore::mightBeRGB):
():
(WebCore::isCSSLetter):
(WebCore):
(WebCore::isCSSEscape):
(WebCore::isURILetter):
(WebCore::isIdentifierStartAfterDash):
(WebCore::isEqualToCSSIdentifier):
(WebCore::checkAndSkipEscape):
(WebCore::skipWhiteSpace):
(WebCore::CSSParser::isIdentifierStart):
(WebCore::CSSParser::checkAndSkipString):
(WebCore::CSSParser::parseEscape):
(WebCore::CSSParser::parseIdentifier):
(WebCore::CSSParser::parseString):
(WebCore::CSSParser::parseURI):
(WebCore::CSSParser::parseUnicodeRange):
(WebCore::CSSParser::parseNthChild):
(WebCore::CSSParser::parseNthChildExtra):
(WebCore::CSSParser::detectFunctionTypeToken):
(WebCore::CSSParser::detectMediaQueryToken):
(WebCore::CSSParser::detectNumberToken):
(WebCore::CSSParser::detectDashToken):
(WebCore::CSSParser::detectAtToken):
(WebCore::CSSParser::lex):
(WebCore::CSSParser::markSelectorListStart):
(WebCore::CSSParser::markSelectorListEnd):
(WebCore::CSSParser::markRuleBodyStart):
(WebCore::CSSParser::markRuleBodyEnd):
(WebCore::CSSParser::markPropertyStart):
(WebCore::CSSParser::markPropertyEnd):

  • css/CSSParser.h:

(WebCore::CSSParser::token):
(CSSParser):
():

  • css/tokenizer.flex: Removed.
Location:
trunk
Files:
2 deleted
16 edited

Legend:

Unmodified
Added
Removed
  • trunk/ChangeLog

    r106146 r106217  
     12012-01-29  Zoltan Herczeg  <zherczeg@webkit.org>
     2
     3        Custom written CSS lexer
     4        https://bugs.webkit.org/show_bug.cgi?id=70107
     5
     6        Reviewed by Antti Koivisto and Oliver Hunt.
     7
     8        Remove tokenizer.cpp from intermediate sources.
     9
     10        * wscript:
     11
    1122012-01-27  Fady Samuel  <fsamuel@chromium.org>
    213
  • trunk/Source/JavaScriptCore/ChangeLog

    r106207 r106217  
     12012-01-29  Zoltan Herczeg  <zherczeg@webkit.org>
     2
     3        Custom written CSS lexer
     4        https://bugs.webkit.org/show_bug.cgi?id=70107
     5
     6        Reviewed by Antti Koivisto and Oliver Hunt.
     7
     8        Add new helper functions for the custom written CSS lexer.
     9
     10        * wtf/ASCIICType.h:
     11        (WTF::toASCIILowerUnchecked):
     12        (WTF):
     13        (WTF::isASCIIAlphaCaselessEqual):
     14
    1152012-01-29  Filip Pizlo  <fpizlo@apple.com>
    216
  • trunk/Source/JavaScriptCore/wtf/ASCIICType.h

    r103202 r106217  
    112112}
    113113
     114template<typename CharType> inline CharType toASCIILowerUnchecked(CharType character)
     115{
     116    // This function can be used for comparing any input character
     117    // to a lowercase English character. The isASCIIAlphaCaselessEqual
     118    // below should be used for regular comparison of ASCII alpha
     119    // characters, but switch statements in CSS tokenizer require
     120    // direct use of this function.
     121    return character | 0x20;
     122}
     123
    114124template<typename CharType> inline CharType toASCIIUpper(CharType c)
    115125{
     
    141151}
    142152
     153template<typename CharType> inline bool isASCIIAlphaCaselessEqual(CharType cssCharacter, char character)
     154{
     155    // This function compares a (preferrably) constant ASCII
     156    // lowercase letter to any input character.
     157    ASSERT(character >= 'a' && character <= 'z');
     158    return LIKELY(toASCIILowerUnchecked(cssCharacter) == character);
     159}
     160
    143161}
    144162
     
    155173using WTF::toASCIIHexValue;
    156174using WTF::toASCIILower;
     175using WTF::toASCIILowerUnchecked;
    157176using WTF::toASCIIUpper;
    158177using WTF::lowerNibbleToASCIIHexDigit;
    159178using WTF::upperNibbleToASCIIHexDigit;
     179using WTF::isASCIIAlphaCaselessEqual;
    160180
    161181#endif
  • trunk/Source/WebCore/CMakeLists.txt

    r106166 r106217  
    23292329
    23302330
    2331 # Generate tokenizer
    2332 FILE(TO_NATIVE_PATH ${PERL_EXECUTABLE} PERL_EXECUTABLE_NATIVE_PATH)
    2333 ADD_CUSTOM_COMMAND(
    2334     OUTPUT ${DERIVED_SOURCES_WEBCORE_DIR}/tokenizer.cpp
    2335     MAIN_DEPENDENCY ${WEBCORE_DIR}/css/maketokenizer
    2336     DEPENDS ${WEBCORE_DIR}/css/tokenizer.flex
    2337     COMMAND ${FLEX_EXECUTABLE} -t ${WEBCORE_DIR}/css/tokenizer.flex | ${PERL_EXECUTABLE_NATIVE_PATH} ${WEBCORE_DIR}/css/maketokenizer > ${DERIVED_SOURCES_WEBCORE_DIR}/tokenizer.cpp
    2338     VERBATIM)
    2339 ADD_SOURCE_WEBCORE_DERIVED_DEPENDENCIES(${WEBCORE_DIR}/css/CSSParser.cpp tokenizer.cpp)
    2340 
    2341 
    23422331# Replace ";" with "space" in order to recognize feature definition in css files.
    23432332SET(FEATURE_DEFINES_WITH_SPACE_SEPARATOR "")
  • trunk/Source/WebCore/ChangeLog

    r106209 r106217  
     12012-01-29  Zoltan Herczeg  <zherczeg@webkit.org>
     2
     3        Custom written CSS lexer
     4        https://bugs.webkit.org/show_bug.cgi?id=70107
     5
     6        Reviewed by Antti Koivisto and Oliver Hunt.
     7
     8        This patch replaces the flex based CSS lexer to a
     9        new, custom written one. The new code is more
     10        than 2 times faster according to oprofile and CPU
     11        cycle counters.
     12
     13        The code structure is quite straightforward: it choose
     14        the possible token group based on the first character
     15        and employ utility functions to parse the longer than
     16        one character long ones. Most of the utilities are inline
     17        to make the lexer fast.
     18
     19        All build systems updated. Including removing the flex support.
     20
     21        Existing tests cover this feature.
     22
     23        * CMakeLists.txt:
     24        * DerivedSources.make:
     25        * DerivedSources.pri:
     26        * GNUmakefile.am:
     27        * GNUmakefile.list.am:
     28        * WebCore.gyp/WebCore.gyp:
     29        * WebCore.gyp/scripts/action_maketokenizer.py: Removed.
     30        * WebCore.gypi:
     31        * WebCore.vcproj/WebCore.vcproj:
     32        * WebCore.xcodeproj/project.pbxproj:
     33        * css/CSSParser.cpp:
     34        (WebCore::CSSParser::CSSParser):
     35        (WebCore::CSSParser::setupParser):
     36        (WebCore::parseSimpleLengthValue):
     37        (WebCore::mightBeRGBA):
     38        (WebCore::mightBeRGB):
     39        ():
     40        (WebCore::isCSSLetter):
     41        (WebCore):
     42        (WebCore::isCSSEscape):
     43        (WebCore::isURILetter):
     44        (WebCore::isIdentifierStartAfterDash):
     45        (WebCore::isEqualToCSSIdentifier):
     46        (WebCore::checkAndSkipEscape):
     47        (WebCore::skipWhiteSpace):
     48        (WebCore::CSSParser::isIdentifierStart):
     49        (WebCore::CSSParser::checkAndSkipString):
     50        (WebCore::CSSParser::parseEscape):
     51        (WebCore::CSSParser::parseIdentifier):
     52        (WebCore::CSSParser::parseString):
     53        (WebCore::CSSParser::parseURI):
     54        (WebCore::CSSParser::parseUnicodeRange):
     55        (WebCore::CSSParser::parseNthChild):
     56        (WebCore::CSSParser::parseNthChildExtra):
     57        (WebCore::CSSParser::detectFunctionTypeToken):
     58        (WebCore::CSSParser::detectMediaQueryToken):
     59        (WebCore::CSSParser::detectNumberToken):
     60        (WebCore::CSSParser::detectDashToken):
     61        (WebCore::CSSParser::detectAtToken):
     62        (WebCore::CSSParser::lex):
     63        (WebCore::CSSParser::markSelectorListStart):
     64        (WebCore::CSSParser::markSelectorListEnd):
     65        (WebCore::CSSParser::markRuleBodyStart):
     66        (WebCore::CSSParser::markRuleBodyEnd):
     67        (WebCore::CSSParser::markPropertyStart):
     68        (WebCore::CSSParser::markPropertyEnd):
     69        * css/CSSParser.h:
     70        (WebCore::CSSParser::token):
     71        (CSSParser):
     72        ():
     73        * css/tokenizer.flex: Removed.
     74
    1752012-01-29  Dale Curtis  <dalecurtis@chromium.org>
    276
  • trunk/Source/WebCore/DerivedSources.make

    r105947 r106217  
    619619    MathMLNames.cpp \
    620620    XPathGrammar.cpp \
    621     tokenizer.cpp \
    622621#
    623622
     
    713712ColorData.cpp : platform/ColorData.gperf $(WebCore)/make-hash-tools.pl
    714713        perl $(WebCore)/make-hash-tools.pl . $(WebCore)/platform/ColorData.gperf
    715 
    716 # --------
    717 
    718 # CSS tokenizer
    719 
    720 tokenizer.cpp : css/tokenizer.flex css/maketokenizer
    721         flex -t $< | perl $(WebCore)/css/maketokenizer > $@
    722714
    723715# --------
  • trunk/Source/WebCore/DerivedSources.pri

    r105947 r106217  
    2626
    2727XLINK_NAMES = $$PWD/svg/xlinkattrs.in
    28 
    29 TOKENIZER = $$PWD/css/tokenizer.flex
    3028
    3129CSSBISON = $$PWD/css/CSSGrammar.y
     
    762760GENERATORS += arrayBufferViewCustomScript
    763761
    764 # GENERATOR 3: tokenizer (flex)
    765 tokenizer.output = ${QMAKE_FILE_BASE}.cpp
    766 tokenizer.input = TOKENIZER
    767 tokenizer.script = $$PWD/css/maketokenizer
    768 tokenizer.commands = flex -t < ${QMAKE_FILE_NAME} | perl $$tokenizer.script > ${QMAKE_FILE_OUT}
    769 # tokenizer.cpp is included into CSSParser.cpp
    770 tokenizer.add_output_to_sources = false
    771 GENERATORS += tokenizer
    772 
    773762# GENERATOR 4: CSS grammar
    774763cssbison.output = ${QMAKE_FILE_BASE}.cpp
  • trunk/Source/WebCore/GNUmakefile.am

    r105922 r106217  
    642642        $(AM_V_GEN)$(PERL) $(WebCore)/make-hash-tools.pl $(GENSOURCES_WEBCORE) $(WebCore)/platform/ColorData.gperf
    643643
    644 # CSS tokenizer
    645 DerivedSources/WebCore/tokenizer.cpp : $(WebCore)/css/tokenizer.flex $(WebCore)/css/maketokenizer
    646         $(AM_V_GEN)$(FLEX) -t $< | $(PERL) $(WebCore)/css/maketokenizer > $@
    647 
    648644# CSS grammar
    649645
     
    937933        Source/WebCore/css/SVGCSSPropertyNames.in \
    938934        Source/WebCore/css/SVGCSSValueKeywords.in \
    939         Source/WebCore/css/tokenizer.flex \
    940935        Source/WebCore/css/view-source.css \
    941936        Source/WebCore/css/WebKitFontFamilyNames.in \
  • trunk/Source/WebCore/GNUmakefile.list.am

    r106166 r106217  
    1 webcore_built_nosources += \
    2         DerivedSources/WebCore/tokenizer.cpp
    3 
    41webcore_built_sources += \
    52        DerivedSources/WebCore/CSSGrammar.cpp \
  • trunk/Source/WebCore/WebCore.gyp/WebCore.gyp

    r106209 r106217  
    913913        },
    914914        {
    915           'action_name': 'tokenizer',
    916           'inputs': [
    917             '../css/maketokenizer',
    918             '../css/tokenizer.flex',
    919           ],
    920           'outputs': [
    921             '<(SHARED_INTERMEDIATE_DIR)/webkit/tokenizer.cpp',
    922           ],
    923           'action': [
    924             'python',
    925             'scripts/action_maketokenizer.py',
    926             '<@(_outputs)',
    927             '--',
    928             '<@(_inputs)'
    929           ],
    930         },
    931         {
    932915          'action_name': 'derived_sources_all_in_one',
    933916          'inputs': [
  • trunk/Source/WebCore/WebCore.gypi

    r106166 r106217  
    79647964            '<(PRODUCT_DIR)/DerivedSources/WebCore/XPathGrammar.cpp',
    79657965            '<(PRODUCT_DIR)/DerivedSources/WebCore/XPathGrammar.h',
    7966             '<(PRODUCT_DIR)/DerivedSources/WebCore/tokenizer.cpp',
    79677966        ],
    79687967        'export_file_generator_files': [
  • trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj

    r106166 r106217  
    3551135511                        </File>
    3551235512                        <File
    35513                                 RelativePath="..\css\tokenizer.flex"
    35514                                 >
    35515                         </File>
    35516                         <File
    3551735513                                RelativePath="..\css\WebKitCSSFilterValue.cpp"
    3551835514                                >
  • trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj

    r106166 r106217  
    85418541                6565814709D13043000E61D7 /* CSSValueKeywords.gperf */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = CSSValueKeywords.gperf; sourceTree = "<group>"; };
    85428542                6565814809D13043000E61D7 /* CSSValueKeywords.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = CSSValueKeywords.h; sourceTree = "<group>"; };
    8543                 6565814C09D13043000E61D7 /* tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = tokenizer.cpp; sourceTree = "<group>"; };
    85448543                656581AC09D14EE6000E61D7 /* CharsetData.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = CharsetData.cpp; sourceTree = "<group>"; };
    85458544                656581AE09D14EE6000E61D7 /* UserAgentStyleSheets.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = UserAgentStyleSheets.h; sourceTree = "<group>"; };
     
    1005910058                93CA4C9F09DF93FA00DF8677 /* quirks.css */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = quirks.css; sourceTree = "<group>"; };
    1006010059                93CA4CA209DF93FA00DF8677 /* svg.css */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = svg.css; sourceTree = "<group>"; };
    10061                 93CA4CA309DF93FA00DF8677 /* tokenizer.flex */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = tokenizer.flex; sourceTree = "<group>"; };
    1006210060                93CCF0260AF6C52900018E89 /* NavigationAction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NavigationAction.h; sourceTree = "<group>"; };
    1006310061                93CCF05F0AF6CA7600018E89 /* NavigationAction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = NavigationAction.cpp; sourceTree = "<group>"; };
     
    1483914837                                656581E809D1508D000E61D7 /* SVGNames.cpp */,
    1484014838                                656581E909D1508D000E61D7 /* SVGNames.h */,
    14841                                 6565814C09D13043000E61D7 /* tokenizer.cpp */,
    1484214839                                656581AE09D14EE6000E61D7 /* UserAgentStyleSheets.h */,
    1484314840                                656581AF09D14EE6000E61D7 /* UserAgentStyleSheetsData.cpp */,
     
    2009420091                                B2227B030D00BFF10071B782 /* SVGCSSStyleSelector.cpp */,
    2009520092                                B2227B040D00BFF10071B782 /* SVGCSSValueKeywords.in */,
    20096                                 93CA4CA309DF93FA00DF8677 /* tokenizer.flex */,
    2009720093                                BC5EC1760A507E3E006007F5 /* view-source.css */,
    2009820094                                3106036C14327D2E00ABF4BA /* WebKitCSSFilterValue.cpp */,
  • trunk/Source/WebCore/css/CSSParser.cpp

    r106166 r106217  
    202202    , m_ruleRangeMap(0)
    203203    , m_currentRuleData(0)
    204     , yy_start(1)
     204    , m_parsingMode(NormalMode)
     205    , m_currentCharacter(0)
     206    , m_token(0)
    205207    , m_lineNumber(0)
    206208    , m_lastSelectorLineNumber(0)
     
    244246void CSSParser::setupParser(const char* prefix, const String& string, const char* suffix)
    245247{
    246     int length = string.length() + strlen(prefix) + strlen(suffix) + 2;
    247 
    248     m_data = adoptArrayPtr(new UChar[length]);
     248    int length = string.length() + strlen(prefix) + strlen(suffix) + 1;
     249
     250    m_dataStart = adoptArrayPtr(new UChar[length]);
    249251    for (unsigned i = 0; i < strlen(prefix); i++)
    250         m_data[i] = prefix[i];
    251 
    252     memcpy(m_data.get() + strlen(prefix), string.characters(), string.length() * sizeof(UChar));
     252        m_dataStart[i] = prefix[i];
     253
     254    memcpy(m_dataStart.get() + strlen(prefix), string.characters(), string.length() * sizeof(UChar));
    253255
    254256    unsigned start = strlen(prefix) + string.length();
    255257    unsigned end = start + strlen(suffix);
    256258    for (unsigned i = start; i < end; i++)
    257         m_data[i] = suffix[i - start];
    258 
    259     m_data[length - 1] = 0;
    260     m_data[length - 2] = 0;
    261 
    262     yy_hold_char = 0;
    263     yyleng = 0;
    264     yytext = m_data.get();
    265     yy_c_buf_p = yytext;
    266     yy_hold_char = *yy_c_buf_p;
     259        m_dataStart[i] = suffix[i - start];
     260
     261    m_dataStart[length - 1] = 0;
     262
     263    m_currentCharacter = m_tokenStart = m_dataStart.get();
    267264    resetRuleBodyMarks();
    268265}
     
    424421
    425422    CSSPrimitiveValue::UnitTypes unit = CSSPrimitiveValue::CSS_NUMBER;
    426     if (length > 2 && (characters[length - 2] | 0x20) == 'p' && (characters[length - 1] | 0x20) == 'x') {
     423    if (length > 2 && isASCIIAlphaCaselessEqual(characters[length - 2], 'p') && isASCIIAlphaCaselessEqual(characters[length - 1], 'x')) {
    427424        length -= 2;
    428425        unit = CSSPrimitiveValue::CSS_PX;
     
    48514848        return false;
    48524849    return characters[4] == '('
    4853         && (characters[0] | 0x20) == 'r'
    4854         && (characters[1] | 0x20) == 'g'
    4855         && (characters[2] | 0x20) == 'b'
    4856         && (characters[3] | 0x20) == 'a';
     4850        && isASCIIAlphaCaselessEqual(characters[0], 'r')
     4851        && isASCIIAlphaCaselessEqual(characters[1], 'g')
     4852        && isASCIIAlphaCaselessEqual(characters[2], 'b')
     4853        && isASCIIAlphaCaselessEqual(characters[3], 'a');
    48574854}
    48584855
     
    48624859        return false;
    48634860    return characters[3] == '('
    4864         && (characters[0] | 0x20) == 'r'
    4865         && (characters[1] | 0x20) == 'g'
    4866         && (characters[2] | 0x20) == 'b';
     4861        && isASCIIAlphaCaselessEqual(characters[0], 'r')
     4862        && isASCIIAlphaCaselessEqual(characters[1], 'g')
     4863        && isASCIIAlphaCaselessEqual(characters[2], 'b');
    48674864}
    48684865
     
    74347431    return true;
    74357432}
    7436    
    7437 static inline int yyerror(const char*) { return 1; }
    74387433
    74397434#define END_TOKEN 0
     
    74417436#include "CSSGrammar.h"
    74427437
     7438enum CharacterType {
     7439    // Types for the main switch.
     7440
     7441    // The first 4 types must be grouped together, as they
     7442    // represent the allowed chars in an identifier.
     7443    CharacterCaselessU,
     7444    CharacterIdentifierStart,
     7445    CharacterNumber,
     7446    CharacterDash,
     7447
     7448    CharacterOther,
     7449    CharacterWhiteSpace,
     7450    CharacterEndMediaQuery,
     7451    CharacterEndNthChild,
     7452    CharacterQuote,
     7453    CharacterExclamationMark,
     7454    CharacterHashmark,
     7455    CharacterDollar,
     7456    CharacterAsterisk,
     7457    CharacterPlus,
     7458    CharacterDot,
     7459    CharacterSlash,
     7460    CharacterLess,
     7461    CharacterAt,
     7462    CharacterBackSlash,
     7463    CharacterXor,
     7464    CharacterVerticalBar,
     7465    CharacterTilde,
     7466};
     7467
     7468// 128 ASCII codes
     7469static const CharacterType typesOfASCIICharacters[128] = {
     7470/*   0 - Null               */ CharacterOther,
     7471/*   1 - Start of Heading   */ CharacterOther,
     7472/*   2 - Start of Text      */ CharacterOther,
     7473/*   3 - End of Text        */ CharacterOther,
     7474/*   4 - End of Transm.     */ CharacterOther,
     7475/*   5 - Enquiry            */ CharacterOther,
     7476/*   6 - Acknowledgment     */ CharacterOther,
     7477/*   7 - Bell               */ CharacterOther,
     7478/*   8 - Back Space         */ CharacterOther,
     7479/*   9 - Horizontal Tab     */ CharacterWhiteSpace,
     7480/*  10 - Line Feed          */ CharacterWhiteSpace,
     7481/*  11 - Vertical Tab       */ CharacterOther,
     7482/*  12 - Form Feed          */ CharacterWhiteSpace,
     7483/*  13 - Carriage Return    */ CharacterWhiteSpace,
     7484/*  14 - Shift Out          */ CharacterOther,
     7485/*  15 - Shift In           */ CharacterOther,
     7486/*  16 - Data Line Escape   */ CharacterOther,
     7487/*  17 - Device Control 1   */ CharacterOther,
     7488/*  18 - Device Control 2   */ CharacterOther,
     7489/*  19 - Device Control 3   */ CharacterOther,
     7490/*  20 - Device Control 4   */ CharacterOther,
     7491/*  21 - Negative Ack.      */ CharacterOther,
     7492/*  22 - Synchronous Idle   */ CharacterOther,
     7493/*  23 - End of Transmit    */ CharacterOther,
     7494/*  24 - Cancel             */ CharacterOther,
     7495/*  25 - End of Medium      */ CharacterOther,
     7496/*  26 - Substitute         */ CharacterOther,
     7497/*  27 - Escape             */ CharacterOther,
     7498/*  28 - File Separator     */ CharacterOther,
     7499/*  29 - Group Separator    */ CharacterOther,
     7500/*  30 - Record Separator   */ CharacterOther,
     7501/*  31 - Unit Separator     */ CharacterOther,
     7502/*  32 - Space              */ CharacterWhiteSpace,
     7503/*  33 - !                  */ CharacterExclamationMark,
     7504/*  34 - "                  */ CharacterQuote,
     7505/*  35 - #                  */ CharacterHashmark,
     7506/*  36 - $                  */ CharacterDollar,
     7507/*  37 - %                  */ CharacterOther,
     7508/*  38 - &                  */ CharacterOther,
     7509/*  39 - '                  */ CharacterQuote,
     7510/*  40 - (                  */ CharacterOther,
     7511/*  41 - )                  */ CharacterEndNthChild,
     7512/*  42 - *                  */ CharacterAsterisk,
     7513/*  43 - +                  */ CharacterPlus,
     7514/*  44 - ,                  */ CharacterOther,
     7515/*  45 - -                  */ CharacterDash,
     7516/*  46 - .                  */ CharacterDot,
     7517/*  47 - /                  */ CharacterSlash,
     7518/*  48 - 0                  */ CharacterNumber,
     7519/*  49 - 1                  */ CharacterNumber,
     7520/*  50 - 2                  */ CharacterNumber,
     7521/*  51 - 3                  */ CharacterNumber,
     7522/*  52 - 4                  */ CharacterNumber,
     7523/*  53 - 5                  */ CharacterNumber,
     7524/*  54 - 6                  */ CharacterNumber,
     7525/*  55 - 7                  */ CharacterNumber,
     7526/*  56 - 8                  */ CharacterNumber,
     7527/*  57 - 9                  */ CharacterNumber,
     7528/*  58 - :                  */ CharacterOther,
     7529/*  59 - ;                  */ CharacterEndMediaQuery,
     7530/*  60 - <                  */ CharacterLess,
     7531/*  61 - =                  */ CharacterOther,
     7532/*  62 - >                  */ CharacterOther,
     7533/*  63 - ?                  */ CharacterOther,
     7534/*  64 - @                  */ CharacterAt,
     7535/*  65 - A                  */ CharacterIdentifierStart,
     7536/*  66 - B                  */ CharacterIdentifierStart,
     7537/*  67 - C                  */ CharacterIdentifierStart,
     7538/*  68 - D                  */ CharacterIdentifierStart,
     7539/*  69 - E                  */ CharacterIdentifierStart,
     7540/*  70 - F                  */ CharacterIdentifierStart,
     7541/*  71 - G                  */ CharacterIdentifierStart,
     7542/*  72 - H                  */ CharacterIdentifierStart,
     7543/*  73 - I                  */ CharacterIdentifierStart,
     7544/*  74 - J                  */ CharacterIdentifierStart,
     7545/*  75 - K                  */ CharacterIdentifierStart,
     7546/*  76 - L                  */ CharacterIdentifierStart,
     7547/*  77 - M                  */ CharacterIdentifierStart,
     7548/*  78 - N                  */ CharacterIdentifierStart,
     7549/*  79 - O                  */ CharacterIdentifierStart,
     7550/*  80 - P                  */ CharacterIdentifierStart,
     7551/*  81 - Q                  */ CharacterIdentifierStart,
     7552/*  82 - R                  */ CharacterIdentifierStart,
     7553/*  83 - S                  */ CharacterIdentifierStart,
     7554/*  84 - T                  */ CharacterIdentifierStart,
     7555/*  85 - U                  */ CharacterCaselessU,
     7556/*  86 - V                  */ CharacterIdentifierStart,
     7557/*  87 - W                  */ CharacterIdentifierStart,
     7558/*  88 - X                  */ CharacterIdentifierStart,
     7559/*  89 - Y                  */ CharacterIdentifierStart,
     7560/*  90 - Z                  */ CharacterIdentifierStart,
     7561/*  91 - [                  */ CharacterOther,
     7562/*  92 - \                  */ CharacterBackSlash,
     7563/*  93 - ]                  */ CharacterOther,
     7564/*  94 - ^                  */ CharacterXor,
     7565/*  95 - _                  */ CharacterIdentifierStart,
     7566/*  96 - `                  */ CharacterOther,
     7567/*  97 - a                  */ CharacterIdentifierStart,
     7568/*  98 - b                  */ CharacterIdentifierStart,
     7569/*  99 - c                  */ CharacterIdentifierStart,
     7570/* 100 - d                  */ CharacterIdentifierStart,
     7571/* 101 - e                  */ CharacterIdentifierStart,
     7572/* 102 - f                  */ CharacterIdentifierStart,
     7573/* 103 - g                  */ CharacterIdentifierStart,
     7574/* 104 - h                  */ CharacterIdentifierStart,
     7575/* 105 - i                  */ CharacterIdentifierStart,
     7576/* 106 - j                  */ CharacterIdentifierStart,
     7577/* 107 - k                  */ CharacterIdentifierStart,
     7578/* 108 - l                  */ CharacterIdentifierStart,
     7579/* 109 - m                  */ CharacterIdentifierStart,
     7580/* 110 - n                  */ CharacterIdentifierStart,
     7581/* 111 - o                  */ CharacterIdentifierStart,
     7582/* 112 - p                  */ CharacterIdentifierStart,
     7583/* 113 - q                  */ CharacterIdentifierStart,
     7584/* 114 - r                  */ CharacterIdentifierStart,
     7585/* 115 - s                  */ CharacterIdentifierStart,
     7586/* 116 - t                  */ CharacterIdentifierStart,
     7587/* 117 - u                  */ CharacterCaselessU,
     7588/* 118 - v                  */ CharacterIdentifierStart,
     7589/* 119 - w                  */ CharacterIdentifierStart,
     7590/* 120 - x                  */ CharacterIdentifierStart,
     7591/* 121 - y                  */ CharacterIdentifierStart,
     7592/* 122 - z                  */ CharacterIdentifierStart,
     7593/* 123 - {                  */ CharacterEndMediaQuery,
     7594/* 124 - |                  */ CharacterVerticalBar,
     7595/* 125 - }                  */ CharacterOther,
     7596/* 126 - ~                  */ CharacterTilde,
     7597/* 127 - Delete             */ CharacterOther,
     7598};
     7599
     7600// Utility functions for the CSS tokenizer.
     7601
     7602static inline bool isCSSLetter(UChar character)
     7603{
     7604    return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash;
     7605}
     7606
     7607static inline bool isCSSEscape(UChar character)
     7608{
     7609    return character >= ' ' && character != 127;
     7610}
     7611
     7612static inline bool isURILetter(UChar character)
     7613{
     7614    return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!';
     7615}
     7616
     7617static inline bool isIdentifierStartAfterDash(UChar* currentCharacter)
     7618{
     7619    return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128
     7620        || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));
     7621}
     7622
     7623static inline bool isEqualToCSSIdentifier(UChar* cssString, const char* constantString)
     7624{
     7625    // Compare an UChar memory data with a zero terminated string.
     7626    do {
     7627        // The input must be part of an identifier if constantChar or constString
     7628        // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'.
     7629        ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-');
     7630        ASSERT(*constantString != '-' || isCSSLetter(*cssString));
     7631        if (toASCIILowerUnchecked(*cssString++) != (*constantString++))
     7632            return false;
     7633    } while (*constantString);
     7634    return true;
     7635}
     7636
     7637static UChar* checkAndSkipEscape(UChar* currentCharacter)
     7638{
     7639    // Returns with 0, if escape check is failed. Otherwise
     7640    // it returns with the following character.
     7641    ASSERT(*currentCharacter == '\\');
     7642
     7643    ++currentCharacter;
     7644    if (!isCSSEscape(*currentCharacter))
     7645        return 0;
     7646
     7647    if (isASCIIHexDigit(*currentCharacter)) {
     7648        int length = 6;
     7649
     7650        do {
     7651            ++currentCharacter;
     7652        } while (isASCIIHexDigit(*currentCharacter) && --length);
     7653
     7654        // Optional space after the escape sequence.
     7655        if (isHTMLSpace(*currentCharacter))
     7656            ++currentCharacter;
     7657        return currentCharacter;
     7658    }
     7659    return currentCharacter + 1;
     7660}
     7661
     7662static inline UChar* skipWhiteSpace(UChar* currentCharacter)
     7663{
     7664    while (isHTMLSpace(*currentCharacter))
     7665        ++currentCharacter;
     7666    return currentCharacter;
     7667}
     7668
     7669// Main CSS tokenizer functions.
     7670
     7671inline bool CSSParser::isIdentifierStart()
     7672{
     7673    // Check whether an identifier is started.
     7674    return isIdentifierStartAfterDash((*m_currentCharacter != '-') ? m_currentCharacter : m_currentCharacter + 1);
     7675}
     7676
     7677inline UChar* CSSParser::checkAndSkipString(UChar* currentCharacter, UChar quote)
     7678{
     7679    // Returns with 0, if string check is failed. Otherwise
     7680    // it returns with the following character. This is necessary
     7681    // since we cannot revert escape sequences, thus strings
     7682    // must be validated before parsing.
     7683    while (true) {
     7684        if (UNLIKELY(*currentCharacter == quote)) {
     7685            // String parsing is successful.
     7686            return currentCharacter + 1;
     7687        }
     7688        if (UNLIKELY(*currentCharacter <= '\r' && (!*currentCharacter || *currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) {
     7689            // String parsing is failed for character '\0', '\n', '\f' or '\r'.
     7690            return 0;
     7691        }
     7692
     7693        if (LIKELY(currentCharacter[0] != '\\'))
     7694            ++currentCharacter;
     7695        else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f')
     7696            currentCharacter += 2;
     7697        else if (currentCharacter[1] == '\r')
     7698            currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
     7699        else {
     7700            currentCharacter = checkAndSkipEscape(currentCharacter);
     7701            if (!currentCharacter)
     7702                return 0;
     7703        }
     7704    }
     7705}
     7706
     7707void CSSParser::parseEscape(UChar*& result)
     7708{
     7709    ASSERT(*m_currentCharacter == '\\' && isCSSEscape(m_currentCharacter[1]));
     7710
     7711    ++m_currentCharacter;
     7712    if (isASCIIHexDigit(*m_currentCharacter)) {
     7713        unsigned unicode = 0;
     7714        int length = 6;
     7715
     7716        do {
     7717            unicode = (unicode << 4) + toASCIIHexValue(*m_currentCharacter++);
     7718        } while (--length && isASCIIHexDigit(*m_currentCharacter));
     7719
     7720        // Characters above 0xffff are not handled.
     7721        if (unicode > 0xffff)
     7722            unicode = 0xfffd;
     7723
     7724        // Optional space after the escape sequence.
     7725        if (isHTMLSpace(*m_currentCharacter))
     7726            ++m_currentCharacter;
     7727        *result = unicode;
     7728    } else
     7729        *result = *m_currentCharacter++;
     7730    ++result;
     7731}
     7732
     7733inline void CSSParser::parseIdentifier(UChar*& result, bool& hasEscape)
     7734{
     7735    // If a valid identifier start is found, we can safely
     7736    // parse the identifier until the next invalid character.
     7737    ASSERT(isIdentifierStart());
     7738    hasEscape = false;
     7739    do {
     7740        if (LIKELY(*m_currentCharacter != '\\'))
     7741            *result++ = *m_currentCharacter++;
     7742        else {
     7743            hasEscape = true;
     7744            parseEscape(result);
     7745        }
     7746    } while (isCSSLetter(m_currentCharacter[0]) || (m_currentCharacter[0] == '\\' && isCSSEscape(m_currentCharacter[1])));
     7747}
     7748
     7749inline void CSSParser::parseString(UChar*& result, UChar quote)
     7750{
     7751    while (true) {
     7752        if (UNLIKELY(*m_currentCharacter == quote)) {
     7753            // String parsing is done.
     7754            ++m_currentCharacter;
     7755            return;
     7756        }
     7757        ASSERT(*m_currentCharacter > '\r' || (*m_currentCharacter < '\n' && *m_currentCharacter) || *m_currentCharacter == '\v');
     7758
     7759        if (LIKELY(m_currentCharacter[0] != '\\'))
     7760            *result++ = *m_currentCharacter++;
     7761        else if (m_currentCharacter[1] == '\n' || m_currentCharacter[1] == '\f')
     7762            m_currentCharacter += 2;
     7763        else if (m_currentCharacter[1] == '\r')
     7764            m_currentCharacter += m_currentCharacter[2] == '\n' ? 3 : 2;
     7765        else
     7766            parseEscape(result);
     7767    }
     7768}
     7769
     7770inline void CSSParser::parseURI(UChar*& start, UChar*& result)
     7771{
     7772    UChar* uriStart = skipWhiteSpace(m_currentCharacter);
     7773
     7774    if (*uriStart == '"' || *uriStart == '\'') {
     7775        UChar quote = *uriStart;
     7776        ++uriStart;
     7777
     7778        UChar* stringEnd = checkAndSkipString(uriStart, quote);
     7779        if (!stringEnd)
     7780            return;
     7781        stringEnd = skipWhiteSpace(stringEnd);
     7782        if (*stringEnd != ')')
     7783            return;
     7784
     7785        start = result = m_currentCharacter = uriStart;
     7786        parseString(result, quote);
     7787
     7788        m_currentCharacter = stringEnd + 1;
     7789        m_token = URI;
     7790    } else {
     7791        UChar* stringEnd = uriStart;
     7792
     7793        while (isURILetter(*stringEnd)) {
     7794            if (*stringEnd != '\\')
     7795                ++stringEnd;
     7796            else {
     7797                stringEnd = checkAndSkipEscape(stringEnd);
     7798                if (!stringEnd)
     7799                    return;
     7800            }
     7801        }
     7802
     7803        stringEnd = skipWhiteSpace(stringEnd);
     7804        if (*stringEnd != ')')
     7805            return;
     7806
     7807        start = result = m_currentCharacter = uriStart;
     7808        while (isURILetter(*m_currentCharacter)) {
     7809            if (LIKELY(*m_currentCharacter != '\\'))
     7810                *result++ = *m_currentCharacter++;
     7811            else
     7812                parseEscape(result);
     7813        }
     7814
     7815        m_currentCharacter = stringEnd + 1;
     7816        m_token = URI;
     7817    }
     7818}
     7819
     7820inline bool CSSParser::parseUnicodeRange()
     7821{
     7822    UChar* currentCharacter = m_currentCharacter + 1;
     7823    int length = 6;
     7824    ASSERT(*m_currentCharacter == '+');
     7825
     7826    while (isASCIIHexDigit(*currentCharacter) && length) {
     7827        ++currentCharacter;
     7828        --length;
     7829    }
     7830
     7831    if (length && *currentCharacter == '?') {
     7832        // At most 5 hex digit followed by a question mark.
     7833        do {
     7834            ++currentCharacter;
     7835            --length;
     7836        } while (*currentCharacter == '?' && length);
     7837        m_currentCharacter = currentCharacter;
     7838        return true;
     7839    }
     7840
     7841    if (length < 6) {
     7842        // At least one hex digit.
     7843        if (currentCharacter[0] == '-' && isASCIIHexDigit(currentCharacter[1])) {
     7844            // Followed by a dash and a hex digit.
     7845            ++currentCharacter;
     7846            length = 6;
     7847            do {
     7848                ++currentCharacter;
     7849            } while (--length && isASCIIHexDigit(*currentCharacter));
     7850        }
     7851        m_currentCharacter = currentCharacter;
     7852        return true;
     7853    }
     7854    return false;
     7855}
     7856
     7857bool CSSParser::parseNthChild()
     7858{
     7859    UChar* currentCharacter = m_currentCharacter;
     7860
     7861    while (isASCIIDigit(*currentCharacter))
     7862        ++currentCharacter;
     7863    if (isASCIIAlphaCaselessEqual(*currentCharacter, 'n')) {
     7864        m_currentCharacter = currentCharacter + 1;
     7865        return true;
     7866    }
     7867    return false;
     7868}
     7869
     7870bool CSSParser::parseNthChildExtra()
     7871{
     7872    UChar* currentCharacter = skipWhiteSpace(m_currentCharacter);
     7873    if (*currentCharacter != '+' && *currentCharacter != '-')
     7874        return false;
     7875
     7876    currentCharacter = skipWhiteSpace(currentCharacter + 1);
     7877    if (!isASCIIDigit(*currentCharacter))
     7878        return false;
     7879
     7880    do {
     7881        ++currentCharacter;
     7882    } while (isASCIIDigit(*currentCharacter));
     7883
     7884    m_currentCharacter = currentCharacter;
     7885    return true;
     7886}
     7887
     7888inline void CSSParser::detectFunctionTypeToken(int length)
     7889{
     7890    ASSERT(length > 0);
     7891    UChar* name = m_tokenStart;
     7892
     7893    switch (length) {
     7894    case 3:
     7895        if (isASCIIAlphaCaselessEqual(name[0], 'n') && isASCIIAlphaCaselessEqual(name[1], 'o') && isASCIIAlphaCaselessEqual(name[2], 't'))
     7896            m_token = NOTFUNCTION;
     7897        else if (isASCIIAlphaCaselessEqual(name[0], 'u') && isASCIIAlphaCaselessEqual(name[1], 'r') && isASCIIAlphaCaselessEqual(name[2], 'l'))
     7898            m_token = URI;
     7899        return;
     7900
     7901    case 9:
     7902        if (isEqualToCSSIdentifier(name, "nth-child"))
     7903            m_parsingMode = NthChildMode;
     7904        return;
     7905
     7906    case 11:
     7907        if (isEqualToCSSIdentifier(name, "nth-of-type"))
     7908            m_parsingMode = NthChildMode;
     7909        return;
     7910
     7911    case 14:
     7912        if (isEqualToCSSIdentifier(name, "nth-last-child"))
     7913            m_parsingMode = NthChildMode;
     7914        return;
     7915
     7916    case 16:
     7917        if (isEqualToCSSIdentifier(name, "nth-last-of-type"))
     7918            m_parsingMode = NthChildMode;
     7919        return;
     7920    }
     7921}
     7922
     7923inline void CSSParser::detectMediaQueryToken(int length)
     7924{
     7925    ASSERT(m_parsingMode == MediaQueryMode);
     7926    UChar* name = m_tokenStart;
     7927
     7928    if (length == 3) {
     7929        if (isASCIIAlphaCaselessEqual(name[0], 'a') && isASCIIAlphaCaselessEqual(name[1], 'n') && isASCIIAlphaCaselessEqual(name[2], 'd'))
     7930            m_token = MEDIA_AND;
     7931        else if (isASCIIAlphaCaselessEqual(name[0], 'n') && isASCIIAlphaCaselessEqual(name[1], 'o') && isASCIIAlphaCaselessEqual(name[2], 't'))
     7932            m_token = MEDIA_NOT;
     7933    } else if (length == 4) {
     7934        if (isASCIIAlphaCaselessEqual(name[0], 'o') && isASCIIAlphaCaselessEqual(name[1], 'n')
     7935                && isASCIIAlphaCaselessEqual(name[2], 'l') && isASCIIAlphaCaselessEqual(name[3], 'y'))
     7936            m_token = MEDIA_ONLY;
     7937    }
     7938}
     7939
     7940inline void CSSParser::detectNumberToken(UChar* type, int length)
     7941{
     7942    ASSERT(length > 0);
     7943
     7944    switch (toASCIILowerUnchecked(type[0])) {
     7945    case 'c':
     7946        if (length == 2 && isASCIIAlphaCaselessEqual(type[1], 'm'))
     7947            m_token = CMS;
     7948        return;
     7949
     7950    case 'd':
     7951        if (length == 3 && isASCIIAlphaCaselessEqual(type[1], 'e') && isASCIIAlphaCaselessEqual(type[2], 'g'))
     7952            m_token = DEGS;
     7953        return;
     7954
     7955    case 'e':
     7956        if (length == 2) {
     7957            if (isASCIIAlphaCaselessEqual(type[1], 'm'))
     7958                m_token = EMS;
     7959            else if (isASCIIAlphaCaselessEqual(type[1], 'x'))
     7960                m_token = EXS;
     7961        }
     7962        return;
     7963
     7964    case 'g':
     7965        if (length == 4 && isASCIIAlphaCaselessEqual(type[1], 'r')
     7966                && isASCIIAlphaCaselessEqual(type[2], 'a') && isASCIIAlphaCaselessEqual(type[3], 'd'))
     7967            m_token = GRADS;
     7968        return;
     7969
     7970    case 'h':
     7971        if (length == 2 && isASCIIAlphaCaselessEqual(type[1], 'z'))
     7972            m_token = HERTZ;
     7973        return;
     7974
     7975    case 'i':
     7976        if (length == 2 && isASCIIAlphaCaselessEqual(type[1], 'n'))
     7977            m_token = INS;
     7978        return;
     7979
     7980    case 'k':
     7981        if (length == 3 && isASCIIAlphaCaselessEqual(type[1], 'h') && isASCIIAlphaCaselessEqual(type[2], 'z'))
     7982            m_token = KHERTZ;
     7983        return;
     7984
     7985    case 'm':
     7986        if (length == 2) {
     7987            if (isASCIIAlphaCaselessEqual(type[1], 'm'))
     7988                m_token = MMS;
     7989            else if (isASCIIAlphaCaselessEqual(type[1], 's'))
     7990                m_token = MSECS;
     7991        }
     7992        return;
     7993
     7994    case 'p':
     7995        if (length == 2) {
     7996            if (isASCIIAlphaCaselessEqual(type[1], 'x'))
     7997                m_token = PXS;
     7998            else if (isASCIIAlphaCaselessEqual(type[1], 't'))
     7999                m_token = PTS;
     8000            else if (isASCIIAlphaCaselessEqual(type[1], 'c'))
     8001                m_token = PCS;
     8002        }
     8003        return;
     8004
     8005    case 'r':
     8006        if (length == 3) {
     8007            if (isASCIIAlphaCaselessEqual(type[1], 'a') && isASCIIAlphaCaselessEqual(type[2], 'd'))
     8008                m_token = RADS;
     8009            else if (isASCIIAlphaCaselessEqual(type[1], 'e') && isASCIIAlphaCaselessEqual(type[2], 'm'))
     8010                m_token = REMS;
     8011        }
     8012        return;
     8013
     8014    case 's':
     8015        if (length == 1)
     8016            m_token = SECS;
     8017        return;
     8018
     8019    case 't':
     8020        if (length == 4 && isASCIIAlphaCaselessEqual(type[1], 'u')
     8021                && isASCIIAlphaCaselessEqual(type[2], 'r') && isASCIIAlphaCaselessEqual(type[3], 'n'))
     8022            m_token = TURNS;
     8023        return;
     8024
     8025    default:
     8026        if (type[0] == '_' && length == 5 && type[1] == '_' && isASCIIAlphaCaselessEqual(type[2], 'q')
     8027                && isASCIIAlphaCaselessEqual(type[3], 'e') && isASCIIAlphaCaselessEqual(type[4], 'm'))
     8028            m_token = QEMS;
     8029        return;
     8030    }
     8031}
     8032
     8033inline void CSSParser::detectDashToken(int length)
     8034{
     8035    UChar* name = m_tokenStart;
     8036
     8037    if (length == 11) {
     8038        if (isASCIIAlphaCaselessEqual(name[10], 'y') && isEqualToCSSIdentifier(name + 1, "webkit-an"))
     8039            m_token = ANYFUNCTION;
     8040        else if (isASCIIAlphaCaselessEqual(name[10], 'n') && isEqualToCSSIdentifier(name + 1, "webkit-mi"))
     8041            m_token = MINFUNCTION;
     8042        else if (isASCIIAlphaCaselessEqual(name[10], 'x') && isEqualToCSSIdentifier(name + 1, "webkit-ma"))
     8043            m_token = MAXFUNCTION;
     8044    } else if (length == 12 && isEqualToCSSIdentifier(name + 1, "webkit-calc"))
     8045        m_token = CALCFUNCTION;
     8046}
     8047
     8048inline void CSSParser::detectAtToken(int length, bool hasEscape)
     8049{
     8050    UChar* name = m_tokenStart;
     8051    ASSERT(name[0] == '@' && length >= 2);
     8052
     8053    // charset, font-face, import, media, namespace, page,
     8054    // -webkit-keyframes, and -webkit-mediaquery are not affected by hasEscape.
     8055    switch (toASCIILowerUnchecked(name[1])) {
     8056    case 'b':
     8057        if (hasEscape)
     8058            return;
     8059
     8060        switch (length) {
     8061        case 12:
     8062            if (isEqualToCSSIdentifier(name + 2, "ottom-left"))
     8063                m_token = BOTTOMLEFT_SYM;
     8064            return;
     8065
     8066        case 13:
     8067            if (isEqualToCSSIdentifier(name + 2, "ottom-right"))
     8068                m_token = BOTTOMRIGHT_SYM;
     8069            return;
     8070
     8071        case 14:
     8072            if (isEqualToCSSIdentifier(name + 2, "ottom-center"))
     8073                m_token = BOTTOMCENTER_SYM;
     8074            return;
     8075
     8076        case 19:
     8077            if (isEqualToCSSIdentifier(name + 2, "ottom-left-corner"))
     8078                m_token = BOTTOMLEFTCORNER_SYM;
     8079            return;
     8080
     8081        case 20:
     8082            if (isEqualToCSSIdentifier(name + 2, "ottom-right-corner"))
     8083                m_token = BOTTOMRIGHTCORNER_SYM;
     8084            return;
     8085        }
     8086        return;
     8087
     8088    case 'c':
     8089        if (length == 8 && isEqualToCSSIdentifier(name + 2, "harset"))
     8090            m_token = CHARSET_SYM;
     8091        return;
     8092
     8093    case 'f':
     8094        if (length == 10 && isEqualToCSSIdentifier(name + 2, "ont-face"))
     8095            m_token = FONT_FACE_SYM;
     8096        return;
     8097
     8098    case 'i':
     8099        if (length == 7 && isEqualToCSSIdentifier(name + 2, "mport")) {
     8100            m_parsingMode = MediaQueryMode;
     8101            m_token = IMPORT_SYM;
     8102        }
     8103        return;
     8104
     8105    case 'l':
     8106        if (hasEscape)
     8107            return;
     8108
     8109        if (length == 9) {
     8110            if (isEqualToCSSIdentifier(name + 2, "eft-top"))
     8111                m_token = LEFTTOP_SYM;
     8112        } else if (length == 12) {
     8113            // Checking the last character first could further reduce the possibile cases.
     8114            if (isASCIIAlphaCaselessEqual(name[11], 'e') && isEqualToCSSIdentifier(name + 2, "eft-middl"))
     8115                m_token = LEFTMIDDLE_SYM;
     8116            else if (isASCIIAlphaCaselessEqual(name[11], 'm') && isEqualToCSSIdentifier(name + 2, "eft-botto"))
     8117                m_token = LEFTBOTTOM_SYM;
     8118        }
     8119        return;
     8120
     8121    case 'm':
     8122        if (length == 6 && isEqualToCSSIdentifier(name + 2, "edia")) {
     8123            m_parsingMode = MediaQueryMode;
     8124            m_token = MEDIA_SYM;
     8125        }
     8126        return;
     8127
     8128    case 'n':
     8129        if (length == 10 && isEqualToCSSIdentifier(name + 2, "amespace"))
     8130            m_token = NAMESPACE_SYM;
     8131        return;
     8132
     8133    case 'p':
     8134        if (length == 5 && isEqualToCSSIdentifier(name + 2, "age"))
     8135            m_token = PAGE_SYM;
     8136        return;
     8137
     8138    case 'r':
     8139        if (hasEscape)
     8140            return;
     8141
     8142        if (length == 10) {
     8143            if (isEqualToCSSIdentifier(name + 2, "ight-top"))
     8144                m_token = RIGHTTOP_SYM;
     8145        } else if (length == 13) {
     8146            // Checking the last character first could further reduce the possibile cases.
     8147            if (isASCIIAlphaCaselessEqual(name[12], 'e') && isEqualToCSSIdentifier(name + 2, "ight-middl"))
     8148                m_token = RIGHTMIDDLE_SYM;
     8149            else if (isASCIIAlphaCaselessEqual(name[12], 'm') && isEqualToCSSIdentifier(name + 2, "ight-botto"))
     8150                m_token = RIGHTBOTTOM_SYM;
     8151        }
     8152        return;
     8153
     8154    case 't':
     8155        if (hasEscape)
     8156            return;
     8157
     8158        switch (length) {
     8159        case 9:
     8160            if (isEqualToCSSIdentifier(name + 2, "op-left"))
     8161                m_token = TOPLEFT_SYM;
     8162            return;
     8163
     8164        case 10:
     8165            if (isEqualToCSSIdentifier(name + 2, "op-right"))
     8166                m_token = TOPRIGHT_SYM;
     8167            return;
     8168
     8169        case 11:
     8170            if (isEqualToCSSIdentifier(name + 2, "op-center"))
     8171                m_token = TOPCENTER_SYM;
     8172            return;
     8173
     8174        case 16:
     8175            if (isEqualToCSSIdentifier(name + 2, "op-left-corner"))
     8176                m_token = TOPLEFTCORNER_SYM;
     8177            return;
     8178
     8179        case 17:
     8180            if (isEqualToCSSIdentifier(name + 2, "op-right-corner"))
     8181                m_token = TOPRIGHTCORNER_SYM;
     8182            return;
     8183        }
     8184        return;
     8185
     8186    case '-':
     8187        switch (length) {
     8188        case 13:
     8189            if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-rule"))
     8190                m_token = WEBKIT_RULE_SYM;
     8191            return;
     8192
     8193        case 14:
     8194            if (hasEscape)
     8195                return;
     8196
     8197            // Checking the last character first could further reduce the possibile cases.
     8198            if (isASCIIAlphaCaselessEqual(name[13], 's') && isEqualToCSSIdentifier(name + 2, "webkit-decl"))
     8199                m_token = WEBKIT_DECLS_SYM;
     8200            else if (isASCIIAlphaCaselessEqual(name[13], 'e') && isEqualToCSSIdentifier(name + 2, "webkit-valu"))
     8201                m_token = WEBKIT_VALUE_SYM;
     8202            return;
     8203
     8204        case 15:
     8205            if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-region"))
     8206                m_token = WEBKIT_REGION_RULE_SYM;
     8207            return;
     8208
     8209        case 17:
     8210            if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-selector"))
     8211                m_token = WEBKIT_SELECTOR_SYM;
     8212            return;
     8213
     8214        case 18:
     8215            if (isEqualToCSSIdentifier(name + 2, "webkit-keyframes"))
     8216                m_token = WEBKIT_KEYFRAMES_SYM;
     8217            return;
     8218
     8219        case 19:
     8220            if (isEqualToCSSIdentifier(name + 2, "webkit-mediaquery")) {
     8221                m_parsingMode = MediaQueryMode;
     8222                m_token = WEBKIT_MEDIAQUERY_SYM;
     8223            }
     8224            return;
     8225
     8226        case 22:
     8227            if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-keyframe-rule"))
     8228                m_token = WEBKIT_KEYFRAME_RULE_SYM;
     8229            return;
     8230        }
     8231    }
     8232}
     8233
    74438234int CSSParser::lex(void* yylvalWithoutType)
    74448235{
    74458236    YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);
    7446     int length;
    7447 
    7448     lex();
    7449 
    7450     UChar* t = text(&length);
    7451 
     8237    // Write pointer for the next character.
     8238    UChar* result;
     8239    bool hasEscape;
     8240
     8241    // The input buffer is terminated by two \0, so
     8242    // it is safe to read two characters ahead anytime.
     8243
     8244#ifndef NDEBUG
     8245    // In debug we check with an ASSERT that the length is > 0 for string types.
     8246    yylval->string.characters = 0;
     8247    yylval->string.length = 0;
     8248#endif
     8249
     8250restartAfterComment:
     8251    m_tokenStart = result = m_currentCharacter;
     8252    m_token = *m_currentCharacter;
     8253    ++m_currentCharacter;
     8254
     8255    switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) {
     8256    case CharacterCaselessU:
     8257        if (UNLIKELY(*m_currentCharacter == '+'))
     8258            if (parseUnicodeRange()) {
     8259                m_token = UNICODERANGE;
     8260                yylval->string.characters = m_tokenStart;
     8261                yylval->string.length = m_currentCharacter - m_tokenStart;
     8262                break;
     8263            }
     8264        // Fall through to CharacterIdentifierStart.
     8265
     8266    case CharacterIdentifierStart:
     8267        --m_currentCharacter;
     8268        parseIdentifier(result, hasEscape);
     8269        m_token = IDENT;
     8270
     8271        yylval->string.characters = m_tokenStart;
     8272        yylval->string.length = result - m_tokenStart;
     8273
     8274        if (UNLIKELY(*m_currentCharacter == '(')) {
     8275            m_token = FUNCTION;
     8276            if (!hasEscape)
     8277                detectFunctionTypeToken(result - m_tokenStart);
     8278            ++m_currentCharacter;
     8279            ++result;
     8280            ++yylval->string.length;
     8281
     8282            if (token() == URI) {
     8283                m_token = FUNCTION;
     8284                // Check whether it is really an URI.
     8285                parseURI(yylval->string.characters, result);
     8286                yylval->string.length = result - yylval->string.characters;
     8287            }
     8288        } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {
     8289            if (m_parsingMode == MediaQueryMode)
     8290                detectMediaQueryToken(result - m_tokenStart);
     8291            else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(m_tokenStart[0], 'n')) {
     8292                if (result - m_tokenStart == 1) {
     8293                    // String "n" is IDENT but "n+1" is NTH.
     8294                    if (parseNthChildExtra()) {
     8295                        m_token = NTH;
     8296                        yylval->string.length = m_currentCharacter - m_tokenStart;
     8297                    }
     8298                } else if (result - m_tokenStart == 2 && m_tokenStart[1] == '-') {
     8299                    // String "n-" is IDENT but "n-1" is NTH.
     8300                    // Speculatively decrease m_currentCharacter to detect an nth-child token.
     8301                    m_currentCharacter--;
     8302                    if (parseNthChildExtra()) {
     8303                        m_token = NTH;
     8304                        yylval->string.length = m_currentCharacter - m_tokenStart;
     8305                    } else {
     8306                        // Revert the change to m_currentCharacter if unsuccessful.
     8307                        m_currentCharacter++;
     8308                    }
     8309                }
     8310            }
     8311        }
     8312        break;
     8313
     8314    case CharacterDot:
     8315        if (!isASCIIDigit(m_currentCharacter[0]))
     8316            break;
     8317        // Fall through to CharacterNumber.
     8318
     8319    case CharacterNumber: {
     8320        bool dotSeen = (m_token == '.');
     8321
     8322        while (true) {
     8323            if (!isASCIIDigit(m_currentCharacter[0])) {
     8324                // Only one dot is allowed for a number,
     8325                // and it must be followed by a digit.
     8326                if (m_currentCharacter[0] != '.' || dotSeen || !isASCIIDigit(m_currentCharacter[1]))
     8327                    break;
     8328                dotSeen = true;
     8329            }
     8330            ++m_currentCharacter;
     8331        }
     8332
     8333        if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*m_currentCharacter, 'n')) {
     8334            // "[0-9]+n" is always an NthChild.
     8335            ++m_currentCharacter;
     8336            parseNthChildExtra();
     8337            m_token = NTH;
     8338            yylval->string.characters = m_tokenStart;
     8339            yylval->string.length = m_currentCharacter - m_tokenStart;
     8340            break;
     8341        }
     8342
     8343        yylval->number = charactersToDouble(m_tokenStart, m_currentCharacter - m_tokenStart);
     8344
     8345        // Type of the function.
     8346        if (isIdentifierStart()) {
     8347            UChar* type = m_currentCharacter;
     8348            result = m_currentCharacter;
     8349
     8350            parseIdentifier(result, hasEscape);
     8351            if (*m_currentCharacter == '+') {
     8352                // Any identifier followed by a '+' sign is an invalid dimension.
     8353                ++m_currentCharacter;
     8354                m_token = INVALIDDIMEN;
     8355            } else {
     8356                m_token = DIMEN;
     8357                if (!hasEscape)
     8358                    detectNumberToken(type, m_currentCharacter - type);
     8359
     8360                if (m_token == DIMEN) {
     8361                    // The decoded number is overwritten, but this is intentional.
     8362                    yylval->string.characters = m_tokenStart;
     8363                    yylval->string.length = m_currentCharacter - m_tokenStart;
     8364                }
     8365            }
     8366        } else if (*m_currentCharacter == '%') {
     8367            // Although the CSS grammar says {num}% we follow
     8368            // webkit at the moment which uses {num}%+.
     8369            do {
     8370                ++m_currentCharacter;
     8371            } while (*m_currentCharacter == '%');
     8372            m_token = PERCENTAGE;
     8373        } else
     8374            m_token = dotSeen ? FLOATTOKEN : INTEGER;
     8375        break;
     8376    }
     8377
     8378    case CharacterDash:
     8379        if (isIdentifierStartAfterDash(m_currentCharacter)) {
     8380            --m_currentCharacter;
     8381            parseIdentifier(result, hasEscape);
     8382            m_token = IDENT;
     8383
     8384            if (*m_currentCharacter == '(') {
     8385                m_token = FUNCTION;
     8386                if (!hasEscape)
     8387                    detectDashToken(result - m_tokenStart);
     8388                ++m_currentCharacter;
     8389                ++result;
     8390            } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(m_tokenStart[1], 'n')) {
     8391                if (result - m_tokenStart == 2) {
     8392                    // String "-n" is IDENT but "-n+1" is NTH.
     8393                    if (parseNthChildExtra()) {
     8394                        m_token = NTH;
     8395                        result = m_currentCharacter;
     8396                    }
     8397                } else if (result - m_tokenStart == 3 && m_tokenStart[2] == '-') {
     8398                    // String "-n-" is IDENT but "-n-1" is NTH.
     8399                    // Speculatively decrease m_currentCharacter to detect an nth-child token.
     8400                    m_currentCharacter--;
     8401                    if (parseNthChildExtra()) {
     8402                        m_token = NTH;
     8403                        yylval->string.length = m_currentCharacter - m_tokenStart;
     8404                    } else {
     8405                        // Revert the change to m_currentCharacter if unsuccessful.
     8406                        m_currentCharacter++;
     8407                    }
     8408                }
     8409            }
     8410            yylval->string.characters = m_tokenStart;
     8411            yylval->string.length = result - m_tokenStart;
     8412        } else if (m_currentCharacter[0] == '-' && m_currentCharacter[1] == '>') {
     8413            m_currentCharacter += 2;
     8414            m_token = SGML_CD;
     8415        } else if (UNLIKELY(m_parsingMode == NthChildMode)) {
     8416            // "-[0-9]+n" is always an NthChild.
     8417            if (parseNthChild()) {
     8418                parseNthChildExtra();
     8419                m_token = NTH;
     8420                yylval->string.characters = m_tokenStart;
     8421                yylval->string.length = m_currentCharacter - m_tokenStart;
     8422            }
     8423        }
     8424        break;
     8425
     8426    case CharacterOther:
     8427        // m_token is simply the current character.
     8428        break;
     8429
     8430    case CharacterWhiteSpace:
     8431        m_token = WHITESPACE;
     8432        // Might start with a '\n'.
     8433        --m_currentCharacter;
     8434        do {
     8435            if (*m_currentCharacter == '\n')
     8436                ++m_lineNumber;
     8437            ++m_currentCharacter;
     8438        } while (*m_currentCharacter <= ' ' && (typesOfASCIICharacters[*m_currentCharacter] == CharacterWhiteSpace));
     8439        break;
     8440
     8441    case CharacterEndMediaQuery:
     8442        if (m_parsingMode == MediaQueryMode)
     8443            m_parsingMode = NormalMode;
     8444        break;
     8445
     8446    case CharacterEndNthChild:
     8447        if (m_parsingMode == NthChildMode)
     8448            m_parsingMode = NormalMode;
     8449        break;
     8450
     8451    case CharacterQuote:
     8452        if (checkAndSkipString(m_currentCharacter, m_token)) {
     8453            ++result;
     8454            parseString(result, m_token);
     8455            m_token = STRING;
     8456            yylval->string.characters = m_tokenStart + 1;
     8457            yylval->string.length = result - (m_tokenStart + 1);
     8458        }
     8459        break;
     8460
     8461    case CharacterExclamationMark: {
     8462        UChar* start = skipWhiteSpace(m_currentCharacter);
     8463        if (isEqualToCSSIdentifier(start, "important")) {
     8464            m_token = IMPORTANT_SYM;
     8465            m_currentCharacter = start + 9;
     8466        }
     8467        break;
     8468    }
     8469
     8470    case CharacterHashmark: {
     8471        UChar* start = m_currentCharacter;
     8472        result = m_currentCharacter;
     8473
     8474        if (isASCIIDigit(*m_currentCharacter)) {
     8475            // This must be a valid hex number token.
     8476            do {
     8477                ++m_currentCharacter;
     8478            } while (isASCIIHexDigit(*m_currentCharacter));
     8479            m_token = HEX;
     8480            yylval->string.characters = start;
     8481            yylval->string.length = m_currentCharacter - start;
     8482        } else if (isIdentifierStart()) {
     8483            m_token = IDSEL;
     8484            parseIdentifier(result, hasEscape);
     8485            if (!hasEscape) {
     8486                // Check whether the identifier is also a valid hex number.
     8487                UChar* current = start;
     8488                m_token = HEX;
     8489                do {
     8490                    if (!isASCIIHexDigit(*current)) {
     8491                        m_token = IDSEL;
     8492                        break;
     8493                    }
     8494                    ++current;
     8495                } while (current < result);
     8496            }
     8497            yylval->string.characters = start;
     8498            yylval->string.length = result - start;
     8499        }
     8500        break;
     8501    }
     8502
     8503    case CharacterSlash:
     8504        // Ignore comments. They are not even considered as white spaces.
     8505        if (*m_currentCharacter == '*') {
     8506            ++m_currentCharacter;
     8507            while (m_currentCharacter[0] != '*' || m_currentCharacter[1] != '/') {
     8508                if (m_currentCharacter[0] == '\n')
     8509                    ++m_lineNumber;
     8510                if (m_currentCharacter[0] == '\0' && m_currentCharacter[1] == '\0') {
     8511                    // Unterminated comments are simply ignored.
     8512                    m_currentCharacter -= 2;
     8513                    break;
     8514                }
     8515                ++m_currentCharacter;
     8516            }
     8517            m_currentCharacter += 2;
     8518            goto restartAfterComment;
     8519        }
     8520        break;
     8521
     8522    case CharacterDollar:
     8523        if (*m_currentCharacter == '=') {
     8524            ++m_currentCharacter;
     8525            m_token = ENDSWITH;
     8526        }
     8527        break;
     8528
     8529    case CharacterAsterisk:
     8530        if (*m_currentCharacter == '=') {
     8531            ++m_currentCharacter;
     8532            m_token = CONTAINS;
     8533        }
     8534        break;
     8535
     8536    case CharacterPlus:
     8537        if (UNLIKELY(m_parsingMode == NthChildMode)) {
     8538            // Simplest case. "+[0-9]*n" is always NthChild.
     8539            if (parseNthChild()) {
     8540                parseNthChildExtra();
     8541                m_token = NTH;
     8542                yylval->string.characters = m_tokenStart;
     8543                yylval->string.length = m_currentCharacter - m_tokenStart;
     8544            }
     8545        }
     8546        break;
     8547
     8548    case CharacterLess:
     8549        if (m_currentCharacter[0] == '!' && m_currentCharacter[1] == '-' && m_currentCharacter[2] == '-') {
     8550            m_currentCharacter += 3;
     8551            m_token = SGML_CD;
     8552        }
     8553        break;
     8554
     8555    case CharacterAt:
     8556        if (isIdentifierStart()) {
     8557            m_token = ATKEYWORD;
     8558            ++result;
     8559            parseIdentifier(result, hasEscape);
     8560            detectAtToken(result - m_tokenStart, hasEscape);
     8561        }
     8562        break;
     8563
     8564    case CharacterBackSlash:
     8565        if (isCSSEscape(*m_currentCharacter)) {
     8566            --m_currentCharacter;
     8567            parseIdentifier(result, hasEscape);
     8568            m_token = IDENT;
     8569            yylval->string.characters = m_tokenStart;
     8570            yylval->string.length = result - m_tokenStart;
     8571        }
     8572        break;
     8573
     8574    case CharacterXor:
     8575        if (*m_currentCharacter == '=') {
     8576            ++m_currentCharacter;
     8577            m_token = BEGINSWITH;
     8578        }
     8579        break;
     8580
     8581    case CharacterVerticalBar:
     8582        if (*m_currentCharacter == '=') {
     8583            ++m_currentCharacter;
     8584            m_token = DASHMATCH;
     8585        }
     8586        break;
     8587
     8588    case CharacterTilde:
     8589        if (*m_currentCharacter == '=') {
     8590            ++m_currentCharacter;
     8591            m_token = INCLUDES;
     8592        }
     8593        break;
     8594
     8595    default:
     8596        ASSERT_NOT_REACHED();
     8597        break;
     8598    }
     8599
     8600#ifndef NDEBUG
    74528601    switch (token()) {
    7453     case WHITESPACE:
    7454     case SGML_CD:
    7455     case INCLUDES:
    7456     case DASHMATCH:
    7457         break;
    7458 
    7459     case URI:
    74608602    case STRING:
     8603        ASSERT(yylval->string.characters == m_tokenStart + 1);
     8604        break;
     8605
    74618606    case IDENT:
    74628607    case NTH:
    7463     case HEX:
    7464     case IDSEL:
    74658608    case DIMEN:
    74668609    case UNICODERANGE:
     
    74718614    case MINFUNCTION:
    74728615    case MAXFUNCTION:
    7473         yylval->string.characters = t;
    7474         yylval->string.length = length;
    7475         break;
    7476 
    7477     case IMPORT_SYM:
    7478     case PAGE_SYM:
    7479     case MEDIA_SYM:
    7480     case FONT_FACE_SYM:
    7481     case CHARSET_SYM:
    7482     case NAMESPACE_SYM:
    7483     case WEBKIT_KEYFRAMES_SYM:
    7484 
    7485     case IMPORTANT_SYM:
    7486         break;
    7487 
    7488     case QEMS:
    7489         length--;
    7490     case GRADS:
    7491     case TURNS:
    7492         length--;
    7493     case DEGS:
    7494     case RADS:
    7495     case KHERTZ:
    7496     case REMS:
    7497         length--;
    7498     case MSECS:
    7499     case HERTZ:
    7500     case EMS:
    7501     case EXS:
    7502     case PXS:
    7503     case CMS:
    7504     case MMS:
    7505     case INS:
    7506     case PTS:
    7507     case PCS:
    7508         length--;
    7509     case SECS:
    7510     case PERCENTAGE:
    7511         length--;
    7512     case FLOATTOKEN:
    7513     case INTEGER:
    7514         yylval->number = charactersToDouble(t, length);
    7515         break;
    7516 
    7517     default:
    7518         break;
    7519     }
    7520 
    7521     return token();
    7522 }
    7523 
    7524 void CSSParser::recheckAtKeyword(const UChar* str, int len)
    7525 {
    7526     String ruleName(str, len);
    7527     if (equalIgnoringCase(ruleName, "@import"))
    7528         yyTok = IMPORT_SYM;
    7529     else if (equalIgnoringCase(ruleName, "@page"))
    7530         yyTok = PAGE_SYM;
    7531     else if (equalIgnoringCase(ruleName, "@media"))
    7532         yyTok = MEDIA_SYM;
    7533     else if (equalIgnoringCase(ruleName, "@font-face"))
    7534         yyTok = FONT_FACE_SYM;
    7535     else if (equalIgnoringCase(ruleName, "@charset"))
    7536         yyTok = CHARSET_SYM;
    7537     else if (equalIgnoringCase(ruleName, "@namespace"))
    7538         yyTok = NAMESPACE_SYM;
    7539     else if (equalIgnoringCase(ruleName, "@-webkit-keyframes"))
    7540         yyTok = WEBKIT_KEYFRAMES_SYM;
    7541     else if (equalIgnoringCase(ruleName, "@-webkit-mediaquery"))
    7542         yyTok = WEBKIT_MEDIAQUERY_SYM;
    7543 }
    7544 
    7545 UChar* CSSParser::text(int *length)
    7546 {
    7547     UChar* start = yytext;
    7548     int l = yyleng;
    7549     switch (yyTok) {
    7550     case STRING:
    7551         l--;
    7552         /* nobreak */
     8616        ASSERT(yylval->string.characters == m_tokenStart && yylval->string.length > 0);
     8617        break;
     8618
     8619    case URI:
     8620        ASSERT(yylval->string.characters && yylval->string.characters != m_tokenStart);
     8621        break;
     8622
    75538623    case HEX:
    75548624    case IDSEL:
    7555         start++;
    7556         l--;
    7557         break;
    7558     case URI:
    7559         // "url("{w}{string}{w}")"
    7560         // "url("{w}{url}{w}")"
    7561         // strip "url(" and ")"
    7562         start += 4;
    7563         l -= 5;
    7564         // strip {w}
    7565         while (l && isHTMLSpace(*start)) {
    7566             ++start;
    7567             --l;
    7568         }
    7569         while (l && isHTMLSpace(start[l - 1]))
    7570             --l;
    7571         if (l && (*start == '"' || *start == '\'')) {
    7572             ASSERT(l >= 2 && start[l - 1] == *start);
    7573             ++start;
    7574             l -= 2;
    7575         }
    7576         break;
    7577     default:
    7578         break;
    7579     }
    7580 
    7581     // process escapes
    7582     UChar* out = start;
    7583     UChar* escape = 0;
    7584 
    7585     bool sawEscape = false;
    7586 
    7587     for (int i = 0; i < l; i++) {
    7588         UChar* current = start + i;
    7589         if (escape == current - 1) {
    7590             if (isASCIIHexDigit(*current))
    7591                 continue;
    7592             if (yyTok == STRING &&
    7593                  (*current == '\n' || *current == '\r' || *current == '\f')) {
    7594                 // ### handle \r\n case
    7595                 if (*current != '\r')
    7596                     escape = 0;
    7597                 continue;
    7598             }
    7599             // in all other cases copy the char to output
    7600             // ###
    7601             *out++ = *current;
    7602             escape = 0;
    7603             continue;
    7604         }
    7605         if (escape == current - 2 && yyTok == STRING &&
    7606              *(current-1) == '\r' && *current == '\n') {
    7607             escape = 0;
    7608             continue;
    7609         }
    7610         if (escape > current - 7 && isASCIIHexDigit(*current))
    7611             continue;
    7612         if (escape) {
    7613             // add escaped char
    7614             unsigned uc = 0;
    7615             escape++;
    7616             while (escape < current) {
    7617                 uc *= 16;
    7618                 uc += toASCIIHexValue(*escape);
    7619                 escape++;
    7620             }
    7621             // can't handle chars outside ucs2
    7622             if (uc > 0xffff)
    7623                 uc = 0xfffd;
    7624             *out++ = uc;
    7625             escape = 0;
    7626             if (isHTMLSpace(*current))
    7627                 continue;
    7628         }
    7629         if (!escape && *current == '\\') {
    7630             escape = current;
    7631             sawEscape = true;
    7632             continue;
    7633         }
    7634         *out++ = *current;
    7635     }
    7636     if (escape) {
    7637         // add escaped char
    7638         unsigned uc = 0;
    7639         escape++;
    7640         while (escape < start+l) {
    7641             uc *= 16;
    7642             uc += toASCIIHexValue(*escape);
    7643             escape++;
    7644         }
    7645         // can't handle chars outside ucs2
    7646         if (uc > 0xffff)
    7647             uc = 0xfffd;
    7648         *out++ = uc;
    7649     }
    7650 
    7651     *length = out - start;
    7652 
    7653     // If we have an unrecognized @-keyword, and if we handled any escapes at all, then
    7654     // we should attempt to adjust yyTok to the correct type.
    7655     if (yyTok == ATKEYWORD && sawEscape)
    7656         recheckAtKeyword(start, *length);
    7657 
    7658     return start;
    7659 }
    7660 
    7661 void CSSParser::countLines()
    7662 {
    7663     for (UChar* current = yytext; current < yytext + yyleng; ++current) {
    7664         if (*current == '\n')
    7665             ++m_lineNumber;
    7666     }
     8625        ASSERT(yylval->string.characters == m_tokenStart + 1 && yylval->string.length > 0);
     8626        break;
     8627    }
     8628#endif
     8629
     8630    return token();
    76678631}
    76688632
     
    80709034void CSSParser::markSelectorListStart()
    80719035{
    8072     m_selectorListRange.start = yytext - m_data.get();
     9036    m_selectorListRange.start = m_tokenStart - m_dataStart.get();
    80739037}
    80749038
     
    80779041    if (!m_currentRuleData)
    80789042        return;
    8079     UChar* listEnd = yytext;
    8080     while (listEnd > m_data.get() + 1) {
     9043    UChar* listEnd = m_tokenStart;
     9044    while (listEnd > m_dataStart.get() + 1) {
    80819045        if (isHTMLSpace(*(listEnd - 1)))
    80829046            --listEnd;
     
    80849048            break;
    80859049    }
    8086     m_selectorListRange.end = listEnd - m_data.get();
     9050    m_selectorListRange.end = listEnd - m_dataStart.get();
    80879051}
    80889052
    80899053void CSSParser::markRuleBodyStart()
    80909054{
    8091     unsigned offset = yytext - m_data.get();
    8092     if (*yytext == '{')
     9055    unsigned offset = m_tokenStart - m_dataStart.get();
     9056    if (*m_tokenStart == '{')
    80939057        ++offset; // Skip the rule body opening brace.
    80949058    if (offset > m_ruleBodyRange.start)
     
    80999063void CSSParser::markRuleBodyEnd()
    81009064{
    8101     unsigned offset = yytext - m_data.get();
     9065    unsigned offset = m_tokenStart - m_dataStart.get();
    81029066    if (offset > m_ruleBodyRange.end)
    81039067        m_ruleBodyRange.end = offset;
     
    81089072    if (!m_inStyleRuleOrDeclaration)
    81099073        return;
    8110     m_propertyRange.start = yytext - m_data.get();
     9074    m_propertyRange.start = m_tokenStart - m_dataStart.get();
    81119075}
    81129076
     
    81159079    if (!m_inStyleRuleOrDeclaration)
    81169080        return;
    8117     unsigned offset = yytext - m_data.get();
    8118     if (*yytext == ';') // Include semicolon into the property text.
     9081    unsigned offset = m_tokenStart - m_dataStart.get();
     9082    if (*m_tokenStart == ';') // Include semicolon into the property text.
    81199083        ++offset;
    81209084    m_propertyRange.end = offset;
     
    81249088        const unsigned end = m_propertyRange.end;
    81259089        ASSERT(start < end);
    8126         String propertyString = String(m_data.get() + start, end - start).stripWhiteSpace();
     9090        String propertyString = String(m_dataStart.get() + start, end - start).stripWhiteSpace();
    81279091        if (propertyString.endsWith(";", true))
    81289092            propertyString = propertyString.left(propertyString.length() - 1);
     
    83439307}
    83449308
    8345 #define YY_DECL int CSSParser::lex()
    8346 #define yyconst const
    8347 typedef int yy_state_type;
    8348 typedef unsigned YY_CHAR;
    8349 // The following line makes sure we treat non-Latin-1 Unicode characters correctly.
    8350 #define YY_SC_TO_UI(c) (c > 0xff ? 0xff : c)
    8351 #define YY_DO_BEFORE_ACTION \
    8352         yytext = yy_bp; \
    8353         yyleng = (int) (yy_cp - yy_bp); \
    8354         yy_hold_char = *yy_cp; \
    8355         *yy_cp = 0; \
    8356         yy_c_buf_p = yy_cp;
    8357 #define YY_BREAK break;
    8358 #define ECHO
    8359 #define YY_RULE_SETUP
    8360 #define INITIAL 0
    8361 #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
    8362 #define yyterminate() yyTok = END_TOKEN; return yyTok
    8363 #define YY_FATAL_ERROR(a)
    8364 // The following line is needed to build the tokenizer with a condition stack.
    8365 // The macro is used in the tokenizer grammar with lines containing
    8366 // BEGIN(mediaqueries) and BEGIN(initial). yy_start acts as index to
    8367 // tokenizer transition table, and 'mediaqueries' and 'initial' are
    8368 // offset multipliers that specify which transitions are active
    8369 // in the tokenizer during in each condition (tokenizer state).
    8370 #define BEGIN yy_start = 1 + 2 *
    8371 
    8372 #include "tokenizer.cpp"
    8373 
    8374 }
     9309}
  • trunk/Source/WebCore/css/CSSParser.h

    r106166 r106217  
    320320    void resetPropertyMarks() { m_propertyRange.start = m_propertyRange.end = UINT_MAX; }
    321321    int lex(void* yylval);
    322     int token() { return yyTok; }
    323     UChar* text(int* length);
    324     void countLines();
    325     int lex();
     322    int token() { return m_token; }
    326323
    327324    PassRefPtr<CSSPrimitiveValue> createPrimitiveNumericValue(CSSParserValue*);
     
    329326       
    330327private:
     328    inline bool isIdentifierStart();
     329
     330    static inline UChar* checkAndSkipString(UChar*, UChar);
     331
     332    void parseEscape(UChar*&);
     333    inline void parseIdentifier(UChar*&, bool&);
     334    inline void parseString(UChar*&, UChar);
     335    inline void parseURI(UChar*&, UChar*&);
     336    inline bool parseUnicodeRange();
     337    bool parseNthChild();
     338    bool parseNthChildExtra();
     339    inline void detectFunctionTypeToken(int);
     340    inline void detectMediaQueryToken(int);
     341    inline void detectNumberToken(UChar*, int);
     342    inline void detectDashToken(int);
     343    inline void detectAtToken(int, bool);
     344
    331345    void setStyleSheet(CSSStyleSheet*);
    332346    void ensureCSSValuePool();
     
    364378    bool parseColor(const String&);
    365379
    366     OwnArrayPtr<UChar> m_data;
    367     UChar* yytext;
    368     UChar* yy_c_buf_p;
    369     UChar yy_hold_char;
    370     int yy_last_accepting_state;
    371     UChar* yy_last_accepting_cpos;
    372     int yyleng;
    373     int yyTok;
    374     int yy_start;
     380    enum ParsingMode {
     381        NormalMode,
     382        MediaQueryMode,
     383        NthChildMode
     384    };
     385
     386    ParsingMode m_parsingMode;
     387    OwnArrayPtr<UChar> m_dataStart;
     388    UChar* m_currentCharacter;
     389    UChar* m_tokenStart;
     390    int m_token;
    375391    int m_lineNumber;
    376392    int m_lastSelectorLineNumber;
  • trunk/wscript

    r106094 r106217  
    279279        excludes.append('DocTypeStrings.cpp')
    280280        excludes.append('HTMLEntityNames.cpp')
    281         excludes.append('tokenizer.cpp')
    282281
    283282        # Qt specific file in common sources
Note: See TracChangeset for help on using the changeset viewer.