Changeset 106217 in webkit
- Timestamp:
- Jan 29, 2012 11:30:14 PM (12 years ago)
- Location:
- trunk
- Files:
-
- 2 deleted
- 16 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/ChangeLog
r106146 r106217 1 2012-01-29 Zoltan Herczeg <zherczeg@webkit.org> 2 3 Custom written CSS lexer 4 https://bugs.webkit.org/show_bug.cgi?id=70107 5 6 Reviewed by Antti Koivisto and Oliver Hunt. 7 8 Remove tokenizer.cpp from intermediate sources. 9 10 * wscript: 11 1 12 2012-01-27 Fady Samuel <fsamuel@chromium.org> 2 13 -
trunk/Source/JavaScriptCore/ChangeLog
r106207 r106217 1 2012-01-29 Zoltan Herczeg <zherczeg@webkit.org> 2 3 Custom written CSS lexer 4 https://bugs.webkit.org/show_bug.cgi?id=70107 5 6 Reviewed by Antti Koivisto and Oliver Hunt. 7 8 Add new helper functions for the custom written CSS lexer. 9 10 * wtf/ASCIICType.h: 11 (WTF::toASCIILowerUnchecked): 12 (WTF): 13 (WTF::isASCIIAlphaCaselessEqual): 14 1 15 2012-01-29 Filip Pizlo <fpizlo@apple.com> 2 16 -
trunk/Source/JavaScriptCore/wtf/ASCIICType.h
r103202 r106217 112 112 } 113 113 114 template<typename CharType> inline CharType toASCIILowerUnchecked(CharType character) 115 { 116 // This function can be used for comparing any input character 117 // to a lowercase English character. The isASCIIAlphaCaselessEqual 118 // below should be used for regular comparison of ASCII alpha 119 // characters, but switch statements in CSS tokenizer require 120 // direct use of this function. 121 return character | 0x20; 122 } 123 114 124 template<typename CharType> inline CharType toASCIIUpper(CharType c) 115 125 { … … 141 151 } 142 152 153 template<typename CharType> inline bool isASCIIAlphaCaselessEqual(CharType cssCharacter, char character) 154 { 155 // This function compares a (preferrably) constant ASCII 156 // lowercase letter to any input character. 157 ASSERT(character >= 'a' && character <= 'z'); 158 return LIKELY(toASCIILowerUnchecked(cssCharacter) == character); 159 } 160 143 161 } 144 162 … … 155 173 using WTF::toASCIIHexValue; 156 174 using WTF::toASCIILower; 175 using WTF::toASCIILowerUnchecked; 157 176 using WTF::toASCIIUpper; 158 177 using WTF::lowerNibbleToASCIIHexDigit; 159 178 using WTF::upperNibbleToASCIIHexDigit; 179 using WTF::isASCIIAlphaCaselessEqual; 160 180 161 181 #endif -
trunk/Source/WebCore/CMakeLists.txt
r106166 r106217 2329 2329 2330 2330 2331 # Generate tokenizer2332 FILE(TO_NATIVE_PATH ${PERL_EXECUTABLE} PERL_EXECUTABLE_NATIVE_PATH)2333 ADD_CUSTOM_COMMAND(2334 OUTPUT ${DERIVED_SOURCES_WEBCORE_DIR}/tokenizer.cpp2335 MAIN_DEPENDENCY ${WEBCORE_DIR}/css/maketokenizer2336 DEPENDS ${WEBCORE_DIR}/css/tokenizer.flex2337 COMMAND ${FLEX_EXECUTABLE} -t ${WEBCORE_DIR}/css/tokenizer.flex | ${PERL_EXECUTABLE_NATIVE_PATH} ${WEBCORE_DIR}/css/maketokenizer > ${DERIVED_SOURCES_WEBCORE_DIR}/tokenizer.cpp2338 VERBATIM)2339 ADD_SOURCE_WEBCORE_DERIVED_DEPENDENCIES(${WEBCORE_DIR}/css/CSSParser.cpp tokenizer.cpp)2340 2341 2342 2331 # Replace ";" with "space" in order to recognize feature definition in css files. 2343 2332 SET(FEATURE_DEFINES_WITH_SPACE_SEPARATOR "") -
trunk/Source/WebCore/ChangeLog
r106209 r106217 1 2012-01-29 Zoltan Herczeg <zherczeg@webkit.org> 2 3 Custom written CSS lexer 4 https://bugs.webkit.org/show_bug.cgi?id=70107 5 6 Reviewed by Antti Koivisto and Oliver Hunt. 7 8 This patch replaces the flex based CSS lexer to a 9 new, custom written one. The new code is more 10 than 2 times faster according to oprofile and CPU 11 cycle counters. 12 13 The code structure is quite straightforward: it choose 14 the possible token group based on the first character 15 and employ utility functions to parse the longer than 16 one character long ones. Most of the utilities are inline 17 to make the lexer fast. 18 19 All build systems updated. Including removing the flex support. 20 21 Existing tests cover this feature. 22 23 * CMakeLists.txt: 24 * DerivedSources.make: 25 * DerivedSources.pri: 26 * GNUmakefile.am: 27 * GNUmakefile.list.am: 28 * WebCore.gyp/WebCore.gyp: 29 * WebCore.gyp/scripts/action_maketokenizer.py: Removed. 30 * WebCore.gypi: 31 * WebCore.vcproj/WebCore.vcproj: 32 * WebCore.xcodeproj/project.pbxproj: 33 * css/CSSParser.cpp: 34 (WebCore::CSSParser::CSSParser): 35 (WebCore::CSSParser::setupParser): 36 (WebCore::parseSimpleLengthValue): 37 (WebCore::mightBeRGBA): 38 (WebCore::mightBeRGB): 39 (): 40 (WebCore::isCSSLetter): 41 (WebCore): 42 (WebCore::isCSSEscape): 43 (WebCore::isURILetter): 44 (WebCore::isIdentifierStartAfterDash): 45 (WebCore::isEqualToCSSIdentifier): 46 (WebCore::checkAndSkipEscape): 47 (WebCore::skipWhiteSpace): 48 (WebCore::CSSParser::isIdentifierStart): 49 (WebCore::CSSParser::checkAndSkipString): 50 (WebCore::CSSParser::parseEscape): 51 (WebCore::CSSParser::parseIdentifier): 52 (WebCore::CSSParser::parseString): 53 (WebCore::CSSParser::parseURI): 54 (WebCore::CSSParser::parseUnicodeRange): 55 (WebCore::CSSParser::parseNthChild): 56 (WebCore::CSSParser::parseNthChildExtra): 57 (WebCore::CSSParser::detectFunctionTypeToken): 58 (WebCore::CSSParser::detectMediaQueryToken): 59 (WebCore::CSSParser::detectNumberToken): 60 (WebCore::CSSParser::detectDashToken): 61 (WebCore::CSSParser::detectAtToken): 62 (WebCore::CSSParser::lex): 63 (WebCore::CSSParser::markSelectorListStart): 64 (WebCore::CSSParser::markSelectorListEnd): 65 (WebCore::CSSParser::markRuleBodyStart): 66 (WebCore::CSSParser::markRuleBodyEnd): 67 (WebCore::CSSParser::markPropertyStart): 68 (WebCore::CSSParser::markPropertyEnd): 69 * css/CSSParser.h: 70 (WebCore::CSSParser::token): 71 (CSSParser): 72 (): 73 * css/tokenizer.flex: Removed. 74 1 75 2012-01-29 Dale Curtis <dalecurtis@chromium.org> 2 76 -
trunk/Source/WebCore/DerivedSources.make
r105947 r106217 619 619 MathMLNames.cpp \ 620 620 XPathGrammar.cpp \ 621 tokenizer.cpp \622 621 # 623 622 … … 713 712 ColorData.cpp : platform/ColorData.gperf $(WebCore)/make-hash-tools.pl 714 713 perl $(WebCore)/make-hash-tools.pl . $(WebCore)/platform/ColorData.gperf 715 716 # --------717 718 # CSS tokenizer719 720 tokenizer.cpp : css/tokenizer.flex css/maketokenizer721 flex -t $< | perl $(WebCore)/css/maketokenizer > $@722 714 723 715 # -------- -
trunk/Source/WebCore/DerivedSources.pri
r105947 r106217 26 26 27 27 XLINK_NAMES = $$PWD/svg/xlinkattrs.in 28 29 TOKENIZER = $$PWD/css/tokenizer.flex30 28 31 29 CSSBISON = $$PWD/css/CSSGrammar.y … … 762 760 GENERATORS += arrayBufferViewCustomScript 763 761 764 # GENERATOR 3: tokenizer (flex)765 tokenizer.output = ${QMAKE_FILE_BASE}.cpp766 tokenizer.input = TOKENIZER767 tokenizer.script = $$PWD/css/maketokenizer768 tokenizer.commands = flex -t < ${QMAKE_FILE_NAME} | perl $$tokenizer.script > ${QMAKE_FILE_OUT}769 # tokenizer.cpp is included into CSSParser.cpp770 tokenizer.add_output_to_sources = false771 GENERATORS += tokenizer772 773 762 # GENERATOR 4: CSS grammar 774 763 cssbison.output = ${QMAKE_FILE_BASE}.cpp -
trunk/Source/WebCore/GNUmakefile.am
r105922 r106217 642 642 $(AM_V_GEN)$(PERL) $(WebCore)/make-hash-tools.pl $(GENSOURCES_WEBCORE) $(WebCore)/platform/ColorData.gperf 643 643 644 # CSS tokenizer645 DerivedSources/WebCore/tokenizer.cpp : $(WebCore)/css/tokenizer.flex $(WebCore)/css/maketokenizer646 $(AM_V_GEN)$(FLEX) -t $< | $(PERL) $(WebCore)/css/maketokenizer > $@647 648 644 # CSS grammar 649 645 … … 937 933 Source/WebCore/css/SVGCSSPropertyNames.in \ 938 934 Source/WebCore/css/SVGCSSValueKeywords.in \ 939 Source/WebCore/css/tokenizer.flex \940 935 Source/WebCore/css/view-source.css \ 941 936 Source/WebCore/css/WebKitFontFamilyNames.in \ -
trunk/Source/WebCore/GNUmakefile.list.am
r106166 r106217 1 webcore_built_nosources += \2 DerivedSources/WebCore/tokenizer.cpp3 4 1 webcore_built_sources += \ 5 2 DerivedSources/WebCore/CSSGrammar.cpp \ -
trunk/Source/WebCore/WebCore.gyp/WebCore.gyp
r106209 r106217 913 913 }, 914 914 { 915 'action_name': 'tokenizer',916 'inputs': [917 '../css/maketokenizer',918 '../css/tokenizer.flex',919 ],920 'outputs': [921 '<(SHARED_INTERMEDIATE_DIR)/webkit/tokenizer.cpp',922 ],923 'action': [924 'python',925 'scripts/action_maketokenizer.py',926 '<@(_outputs)',927 '--',928 '<@(_inputs)'929 ],930 },931 {932 915 'action_name': 'derived_sources_all_in_one', 933 916 'inputs': [ -
trunk/Source/WebCore/WebCore.gypi
r106166 r106217 7964 7964 '<(PRODUCT_DIR)/DerivedSources/WebCore/XPathGrammar.cpp', 7965 7965 '<(PRODUCT_DIR)/DerivedSources/WebCore/XPathGrammar.h', 7966 '<(PRODUCT_DIR)/DerivedSources/WebCore/tokenizer.cpp',7967 7966 ], 7968 7967 'export_file_generator_files': [ -
trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj
r106166 r106217 35511 35511 </File> 35512 35512 <File 35513 RelativePath="..\css\tokenizer.flex"35514 >35515 </File>35516 <File35517 35513 RelativePath="..\css\WebKitCSSFilterValue.cpp" 35518 35514 > -
trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj
r106166 r106217 8541 8541 6565814709D13043000E61D7 /* CSSValueKeywords.gperf */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = CSSValueKeywords.gperf; sourceTree = "<group>"; }; 8542 8542 6565814809D13043000E61D7 /* CSSValueKeywords.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = CSSValueKeywords.h; sourceTree = "<group>"; }; 8543 6565814C09D13043000E61D7 /* tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = tokenizer.cpp; sourceTree = "<group>"; };8544 8543 656581AC09D14EE6000E61D7 /* CharsetData.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = CharsetData.cpp; sourceTree = "<group>"; }; 8545 8544 656581AE09D14EE6000E61D7 /* UserAgentStyleSheets.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = UserAgentStyleSheets.h; sourceTree = "<group>"; }; … … 10059 10058 93CA4C9F09DF93FA00DF8677 /* quirks.css */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = quirks.css; sourceTree = "<group>"; }; 10060 10059 93CA4CA209DF93FA00DF8677 /* svg.css */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = svg.css; sourceTree = "<group>"; }; 10061 93CA4CA309DF93FA00DF8677 /* tokenizer.flex */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = tokenizer.flex; sourceTree = "<group>"; };10062 10060 93CCF0260AF6C52900018E89 /* NavigationAction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NavigationAction.h; sourceTree = "<group>"; }; 10063 10061 93CCF05F0AF6CA7600018E89 /* NavigationAction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = NavigationAction.cpp; sourceTree = "<group>"; }; … … 14839 14837 656581E809D1508D000E61D7 /* SVGNames.cpp */, 14840 14838 656581E909D1508D000E61D7 /* SVGNames.h */, 14841 6565814C09D13043000E61D7 /* tokenizer.cpp */,14842 14839 656581AE09D14EE6000E61D7 /* UserAgentStyleSheets.h */, 14843 14840 656581AF09D14EE6000E61D7 /* UserAgentStyleSheetsData.cpp */, … … 20094 20091 B2227B030D00BFF10071B782 /* SVGCSSStyleSelector.cpp */, 20095 20092 B2227B040D00BFF10071B782 /* SVGCSSValueKeywords.in */, 20096 93CA4CA309DF93FA00DF8677 /* tokenizer.flex */,20097 20093 BC5EC1760A507E3E006007F5 /* view-source.css */, 20098 20094 3106036C14327D2E00ABF4BA /* WebKitCSSFilterValue.cpp */, -
trunk/Source/WebCore/css/CSSParser.cpp
r106166 r106217 202 202 , m_ruleRangeMap(0) 203 203 , m_currentRuleData(0) 204 , yy_start(1) 204 , m_parsingMode(NormalMode) 205 , m_currentCharacter(0) 206 , m_token(0) 205 207 , m_lineNumber(0) 206 208 , m_lastSelectorLineNumber(0) … … 244 246 void CSSParser::setupParser(const char* prefix, const String& string, const char* suffix) 245 247 { 246 int length = string.length() + strlen(prefix) + strlen(suffix) + 2;247 248 m_data = adoptArrayPtr(new UChar[length]);248 int length = string.length() + strlen(prefix) + strlen(suffix) + 1; 249 250 m_dataStart = adoptArrayPtr(new UChar[length]); 249 251 for (unsigned i = 0; i < strlen(prefix); i++) 250 m_data [i] = prefix[i];251 252 memcpy(m_data .get() + strlen(prefix), string.characters(), string.length() * sizeof(UChar));252 m_dataStart[i] = prefix[i]; 253 254 memcpy(m_dataStart.get() + strlen(prefix), string.characters(), string.length() * sizeof(UChar)); 253 255 254 256 unsigned start = strlen(prefix) + string.length(); 255 257 unsigned end = start + strlen(suffix); 256 258 for (unsigned i = start; i < end; i++) 257 m_data[i] = suffix[i - start]; 258 259 m_data[length - 1] = 0; 260 m_data[length - 2] = 0; 261 262 yy_hold_char = 0; 263 yyleng = 0; 264 yytext = m_data.get(); 265 yy_c_buf_p = yytext; 266 yy_hold_char = *yy_c_buf_p; 259 m_dataStart[i] = suffix[i - start]; 260 261 m_dataStart[length - 1] = 0; 262 263 m_currentCharacter = m_tokenStart = m_dataStart.get(); 267 264 resetRuleBodyMarks(); 268 265 } … … 424 421 425 422 CSSPrimitiveValue::UnitTypes unit = CSSPrimitiveValue::CSS_NUMBER; 426 if (length > 2 && (characters[length - 2] | 0x20) == 'p' && (characters[length - 1] | 0x20) == 'x') {423 if (length > 2 && isASCIIAlphaCaselessEqual(characters[length - 2], 'p') && isASCIIAlphaCaselessEqual(characters[length - 1], 'x')) { 427 424 length -= 2; 428 425 unit = CSSPrimitiveValue::CSS_PX; … … 4851 4848 return false; 4852 4849 return characters[4] == '(' 4853 && (characters[0] | 0x20) == 'r'4854 && (characters[1] | 0x20) == 'g'4855 && (characters[2] | 0x20) == 'b'4856 && (characters[3] | 0x20) == 'a';4850 && isASCIIAlphaCaselessEqual(characters[0], 'r') 4851 && isASCIIAlphaCaselessEqual(characters[1], 'g') 4852 && isASCIIAlphaCaselessEqual(characters[2], 'b') 4853 && isASCIIAlphaCaselessEqual(characters[3], 'a'); 4857 4854 } 4858 4855 … … 4862 4859 return false; 4863 4860 return characters[3] == '(' 4864 && (characters[0] | 0x20) == 'r'4865 && (characters[1] | 0x20) == 'g'4866 && (characters[2] | 0x20) == 'b';4861 && isASCIIAlphaCaselessEqual(characters[0], 'r') 4862 && isASCIIAlphaCaselessEqual(characters[1], 'g') 4863 && isASCIIAlphaCaselessEqual(characters[2], 'b'); 4867 4864 } 4868 4865 … … 7434 7431 return true; 7435 7432 } 7436 7437 static inline int yyerror(const char*) { return 1; }7438 7433 7439 7434 #define END_TOKEN 0 … … 7441 7436 #include "CSSGrammar.h" 7442 7437 7438 enum CharacterType { 7439 // Types for the main switch. 7440 7441 // The first 4 types must be grouped together, as they 7442 // represent the allowed chars in an identifier. 7443 CharacterCaselessU, 7444 CharacterIdentifierStart, 7445 CharacterNumber, 7446 CharacterDash, 7447 7448 CharacterOther, 7449 CharacterWhiteSpace, 7450 CharacterEndMediaQuery, 7451 CharacterEndNthChild, 7452 CharacterQuote, 7453 CharacterExclamationMark, 7454 CharacterHashmark, 7455 CharacterDollar, 7456 CharacterAsterisk, 7457 CharacterPlus, 7458 CharacterDot, 7459 CharacterSlash, 7460 CharacterLess, 7461 CharacterAt, 7462 CharacterBackSlash, 7463 CharacterXor, 7464 CharacterVerticalBar, 7465 CharacterTilde, 7466 }; 7467 7468 // 128 ASCII codes 7469 static const CharacterType typesOfASCIICharacters[128] = { 7470 /* 0 - Null */ CharacterOther, 7471 /* 1 - Start of Heading */ CharacterOther, 7472 /* 2 - Start of Text */ CharacterOther, 7473 /* 3 - End of Text */ CharacterOther, 7474 /* 4 - End of Transm. */ CharacterOther, 7475 /* 5 - Enquiry */ CharacterOther, 7476 /* 6 - Acknowledgment */ CharacterOther, 7477 /* 7 - Bell */ CharacterOther, 7478 /* 8 - Back Space */ CharacterOther, 7479 /* 9 - Horizontal Tab */ CharacterWhiteSpace, 7480 /* 10 - Line Feed */ CharacterWhiteSpace, 7481 /* 11 - Vertical Tab */ CharacterOther, 7482 /* 12 - Form Feed */ CharacterWhiteSpace, 7483 /* 13 - Carriage Return */ CharacterWhiteSpace, 7484 /* 14 - Shift Out */ CharacterOther, 7485 /* 15 - Shift In */ CharacterOther, 7486 /* 16 - Data Line Escape */ CharacterOther, 7487 /* 17 - Device Control 1 */ CharacterOther, 7488 /* 18 - Device Control 2 */ CharacterOther, 7489 /* 19 - Device Control 3 */ CharacterOther, 7490 /* 20 - Device Control 4 */ CharacterOther, 7491 /* 21 - Negative Ack. */ CharacterOther, 7492 /* 22 - Synchronous Idle */ CharacterOther, 7493 /* 23 - End of Transmit */ CharacterOther, 7494 /* 24 - Cancel */ CharacterOther, 7495 /* 25 - End of Medium */ CharacterOther, 7496 /* 26 - Substitute */ CharacterOther, 7497 /* 27 - Escape */ CharacterOther, 7498 /* 28 - File Separator */ CharacterOther, 7499 /* 29 - Group Separator */ CharacterOther, 7500 /* 30 - Record Separator */ CharacterOther, 7501 /* 31 - Unit Separator */ CharacterOther, 7502 /* 32 - Space */ CharacterWhiteSpace, 7503 /* 33 - ! */ CharacterExclamationMark, 7504 /* 34 - " */ CharacterQuote, 7505 /* 35 - # */ CharacterHashmark, 7506 /* 36 - $ */ CharacterDollar, 7507 /* 37 - % */ CharacterOther, 7508 /* 38 - & */ CharacterOther, 7509 /* 39 - ' */ CharacterQuote, 7510 /* 40 - ( */ CharacterOther, 7511 /* 41 - ) */ CharacterEndNthChild, 7512 /* 42 - * */ CharacterAsterisk, 7513 /* 43 - + */ CharacterPlus, 7514 /* 44 - , */ CharacterOther, 7515 /* 45 - - */ CharacterDash, 7516 /* 46 - . */ CharacterDot, 7517 /* 47 - / */ CharacterSlash, 7518 /* 48 - 0 */ CharacterNumber, 7519 /* 49 - 1 */ CharacterNumber, 7520 /* 50 - 2 */ CharacterNumber, 7521 /* 51 - 3 */ CharacterNumber, 7522 /* 52 - 4 */ CharacterNumber, 7523 /* 53 - 5 */ CharacterNumber, 7524 /* 54 - 6 */ CharacterNumber, 7525 /* 55 - 7 */ CharacterNumber, 7526 /* 56 - 8 */ CharacterNumber, 7527 /* 57 - 9 */ CharacterNumber, 7528 /* 58 - : */ CharacterOther, 7529 /* 59 - ; */ CharacterEndMediaQuery, 7530 /* 60 - < */ CharacterLess, 7531 /* 61 - = */ CharacterOther, 7532 /* 62 - > */ CharacterOther, 7533 /* 63 - ? */ CharacterOther, 7534 /* 64 - @ */ CharacterAt, 7535 /* 65 - A */ CharacterIdentifierStart, 7536 /* 66 - B */ CharacterIdentifierStart, 7537 /* 67 - C */ CharacterIdentifierStart, 7538 /* 68 - D */ CharacterIdentifierStart, 7539 /* 69 - E */ CharacterIdentifierStart, 7540 /* 70 - F */ CharacterIdentifierStart, 7541 /* 71 - G */ CharacterIdentifierStart, 7542 /* 72 - H */ CharacterIdentifierStart, 7543 /* 73 - I */ CharacterIdentifierStart, 7544 /* 74 - J */ CharacterIdentifierStart, 7545 /* 75 - K */ CharacterIdentifierStart, 7546 /* 76 - L */ CharacterIdentifierStart, 7547 /* 77 - M */ CharacterIdentifierStart, 7548 /* 78 - N */ CharacterIdentifierStart, 7549 /* 79 - O */ CharacterIdentifierStart, 7550 /* 80 - P */ CharacterIdentifierStart, 7551 /* 81 - Q */ CharacterIdentifierStart, 7552 /* 82 - R */ CharacterIdentifierStart, 7553 /* 83 - S */ CharacterIdentifierStart, 7554 /* 84 - T */ CharacterIdentifierStart, 7555 /* 85 - U */ CharacterCaselessU, 7556 /* 86 - V */ CharacterIdentifierStart, 7557 /* 87 - W */ CharacterIdentifierStart, 7558 /* 88 - X */ CharacterIdentifierStart, 7559 /* 89 - Y */ CharacterIdentifierStart, 7560 /* 90 - Z */ CharacterIdentifierStart, 7561 /* 91 - [ */ CharacterOther, 7562 /* 92 - \ */ CharacterBackSlash, 7563 /* 93 - ] */ CharacterOther, 7564 /* 94 - ^ */ CharacterXor, 7565 /* 95 - _ */ CharacterIdentifierStart, 7566 /* 96 - ` */ CharacterOther, 7567 /* 97 - a */ CharacterIdentifierStart, 7568 /* 98 - b */ CharacterIdentifierStart, 7569 /* 99 - c */ CharacterIdentifierStart, 7570 /* 100 - d */ CharacterIdentifierStart, 7571 /* 101 - e */ CharacterIdentifierStart, 7572 /* 102 - f */ CharacterIdentifierStart, 7573 /* 103 - g */ CharacterIdentifierStart, 7574 /* 104 - h */ CharacterIdentifierStart, 7575 /* 105 - i */ CharacterIdentifierStart, 7576 /* 106 - j */ CharacterIdentifierStart, 7577 /* 107 - k */ CharacterIdentifierStart, 7578 /* 108 - l */ CharacterIdentifierStart, 7579 /* 109 - m */ CharacterIdentifierStart, 7580 /* 110 - n */ CharacterIdentifierStart, 7581 /* 111 - o */ CharacterIdentifierStart, 7582 /* 112 - p */ CharacterIdentifierStart, 7583 /* 113 - q */ CharacterIdentifierStart, 7584 /* 114 - r */ CharacterIdentifierStart, 7585 /* 115 - s */ CharacterIdentifierStart, 7586 /* 116 - t */ CharacterIdentifierStart, 7587 /* 117 - u */ CharacterCaselessU, 7588 /* 118 - v */ CharacterIdentifierStart, 7589 /* 119 - w */ CharacterIdentifierStart, 7590 /* 120 - x */ CharacterIdentifierStart, 7591 /* 121 - y */ CharacterIdentifierStart, 7592 /* 122 - z */ CharacterIdentifierStart, 7593 /* 123 - { */ CharacterEndMediaQuery, 7594 /* 124 - | */ CharacterVerticalBar, 7595 /* 125 - } */ CharacterOther, 7596 /* 126 - ~ */ CharacterTilde, 7597 /* 127 - Delete */ CharacterOther, 7598 }; 7599 7600 // Utility functions for the CSS tokenizer. 7601 7602 static inline bool isCSSLetter(UChar character) 7603 { 7604 return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash; 7605 } 7606 7607 static inline bool isCSSEscape(UChar character) 7608 { 7609 return character >= ' ' && character != 127; 7610 } 7611 7612 static inline bool isURILetter(UChar character) 7613 { 7614 return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!'; 7615 } 7616 7617 static inline bool isIdentifierStartAfterDash(UChar* currentCharacter) 7618 { 7619 return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128 7620 || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1])); 7621 } 7622 7623 static inline bool isEqualToCSSIdentifier(UChar* cssString, const char* constantString) 7624 { 7625 // Compare an UChar memory data with a zero terminated string. 7626 do { 7627 // The input must be part of an identifier if constantChar or constString 7628 // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'. 7629 ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-'); 7630 ASSERT(*constantString != '-' || isCSSLetter(*cssString)); 7631 if (toASCIILowerUnchecked(*cssString++) != (*constantString++)) 7632 return false; 7633 } while (*constantString); 7634 return true; 7635 } 7636 7637 static UChar* checkAndSkipEscape(UChar* currentCharacter) 7638 { 7639 // Returns with 0, if escape check is failed. Otherwise 7640 // it returns with the following character. 7641 ASSERT(*currentCharacter == '\\'); 7642 7643 ++currentCharacter; 7644 if (!isCSSEscape(*currentCharacter)) 7645 return 0; 7646 7647 if (isASCIIHexDigit(*currentCharacter)) { 7648 int length = 6; 7649 7650 do { 7651 ++currentCharacter; 7652 } while (isASCIIHexDigit(*currentCharacter) && --length); 7653 7654 // Optional space after the escape sequence. 7655 if (isHTMLSpace(*currentCharacter)) 7656 ++currentCharacter; 7657 return currentCharacter; 7658 } 7659 return currentCharacter + 1; 7660 } 7661 7662 static inline UChar* skipWhiteSpace(UChar* currentCharacter) 7663 { 7664 while (isHTMLSpace(*currentCharacter)) 7665 ++currentCharacter; 7666 return currentCharacter; 7667 } 7668 7669 // Main CSS tokenizer functions. 7670 7671 inline bool CSSParser::isIdentifierStart() 7672 { 7673 // Check whether an identifier is started. 7674 return isIdentifierStartAfterDash((*m_currentCharacter != '-') ? m_currentCharacter : m_currentCharacter + 1); 7675 } 7676 7677 inline UChar* CSSParser::checkAndSkipString(UChar* currentCharacter, UChar quote) 7678 { 7679 // Returns with 0, if string check is failed. Otherwise 7680 // it returns with the following character. This is necessary 7681 // since we cannot revert escape sequences, thus strings 7682 // must be validated before parsing. 7683 while (true) { 7684 if (UNLIKELY(*currentCharacter == quote)) { 7685 // String parsing is successful. 7686 return currentCharacter + 1; 7687 } 7688 if (UNLIKELY(*currentCharacter <= '\r' && (!*currentCharacter || *currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) { 7689 // String parsing is failed for character '\0', '\n', '\f' or '\r'. 7690 return 0; 7691 } 7692 7693 if (LIKELY(currentCharacter[0] != '\\')) 7694 ++currentCharacter; 7695 else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') 7696 currentCharacter += 2; 7697 else if (currentCharacter[1] == '\r') 7698 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; 7699 else { 7700 currentCharacter = checkAndSkipEscape(currentCharacter); 7701 if (!currentCharacter) 7702 return 0; 7703 } 7704 } 7705 } 7706 7707 void CSSParser::parseEscape(UChar*& result) 7708 { 7709 ASSERT(*m_currentCharacter == '\\' && isCSSEscape(m_currentCharacter[1])); 7710 7711 ++m_currentCharacter; 7712 if (isASCIIHexDigit(*m_currentCharacter)) { 7713 unsigned unicode = 0; 7714 int length = 6; 7715 7716 do { 7717 unicode = (unicode << 4) + toASCIIHexValue(*m_currentCharacter++); 7718 } while (--length && isASCIIHexDigit(*m_currentCharacter)); 7719 7720 // Characters above 0xffff are not handled. 7721 if (unicode > 0xffff) 7722 unicode = 0xfffd; 7723 7724 // Optional space after the escape sequence. 7725 if (isHTMLSpace(*m_currentCharacter)) 7726 ++m_currentCharacter; 7727 *result = unicode; 7728 } else 7729 *result = *m_currentCharacter++; 7730 ++result; 7731 } 7732 7733 inline void CSSParser::parseIdentifier(UChar*& result, bool& hasEscape) 7734 { 7735 // If a valid identifier start is found, we can safely 7736 // parse the identifier until the next invalid character. 7737 ASSERT(isIdentifierStart()); 7738 hasEscape = false; 7739 do { 7740 if (LIKELY(*m_currentCharacter != '\\')) 7741 *result++ = *m_currentCharacter++; 7742 else { 7743 hasEscape = true; 7744 parseEscape(result); 7745 } 7746 } while (isCSSLetter(m_currentCharacter[0]) || (m_currentCharacter[0] == '\\' && isCSSEscape(m_currentCharacter[1]))); 7747 } 7748 7749 inline void CSSParser::parseString(UChar*& result, UChar quote) 7750 { 7751 while (true) { 7752 if (UNLIKELY(*m_currentCharacter == quote)) { 7753 // String parsing is done. 7754 ++m_currentCharacter; 7755 return; 7756 } 7757 ASSERT(*m_currentCharacter > '\r' || (*m_currentCharacter < '\n' && *m_currentCharacter) || *m_currentCharacter == '\v'); 7758 7759 if (LIKELY(m_currentCharacter[0] != '\\')) 7760 *result++ = *m_currentCharacter++; 7761 else if (m_currentCharacter[1] == '\n' || m_currentCharacter[1] == '\f') 7762 m_currentCharacter += 2; 7763 else if (m_currentCharacter[1] == '\r') 7764 m_currentCharacter += m_currentCharacter[2] == '\n' ? 3 : 2; 7765 else 7766 parseEscape(result); 7767 } 7768 } 7769 7770 inline void CSSParser::parseURI(UChar*& start, UChar*& result) 7771 { 7772 UChar* uriStart = skipWhiteSpace(m_currentCharacter); 7773 7774 if (*uriStart == '"' || *uriStart == '\'') { 7775 UChar quote = *uriStart; 7776 ++uriStart; 7777 7778 UChar* stringEnd = checkAndSkipString(uriStart, quote); 7779 if (!stringEnd) 7780 return; 7781 stringEnd = skipWhiteSpace(stringEnd); 7782 if (*stringEnd != ')') 7783 return; 7784 7785 start = result = m_currentCharacter = uriStart; 7786 parseString(result, quote); 7787 7788 m_currentCharacter = stringEnd + 1; 7789 m_token = URI; 7790 } else { 7791 UChar* stringEnd = uriStart; 7792 7793 while (isURILetter(*stringEnd)) { 7794 if (*stringEnd != '\\') 7795 ++stringEnd; 7796 else { 7797 stringEnd = checkAndSkipEscape(stringEnd); 7798 if (!stringEnd) 7799 return; 7800 } 7801 } 7802 7803 stringEnd = skipWhiteSpace(stringEnd); 7804 if (*stringEnd != ')') 7805 return; 7806 7807 start = result = m_currentCharacter = uriStart; 7808 while (isURILetter(*m_currentCharacter)) { 7809 if (LIKELY(*m_currentCharacter != '\\')) 7810 *result++ = *m_currentCharacter++; 7811 else 7812 parseEscape(result); 7813 } 7814 7815 m_currentCharacter = stringEnd + 1; 7816 m_token = URI; 7817 } 7818 } 7819 7820 inline bool CSSParser::parseUnicodeRange() 7821 { 7822 UChar* currentCharacter = m_currentCharacter + 1; 7823 int length = 6; 7824 ASSERT(*m_currentCharacter == '+'); 7825 7826 while (isASCIIHexDigit(*currentCharacter) && length) { 7827 ++currentCharacter; 7828 --length; 7829 } 7830 7831 if (length && *currentCharacter == '?') { 7832 // At most 5 hex digit followed by a question mark. 7833 do { 7834 ++currentCharacter; 7835 --length; 7836 } while (*currentCharacter == '?' && length); 7837 m_currentCharacter = currentCharacter; 7838 return true; 7839 } 7840 7841 if (length < 6) { 7842 // At least one hex digit. 7843 if (currentCharacter[0] == '-' && isASCIIHexDigit(currentCharacter[1])) { 7844 // Followed by a dash and a hex digit. 7845 ++currentCharacter; 7846 length = 6; 7847 do { 7848 ++currentCharacter; 7849 } while (--length && isASCIIHexDigit(*currentCharacter)); 7850 } 7851 m_currentCharacter = currentCharacter; 7852 return true; 7853 } 7854 return false; 7855 } 7856 7857 bool CSSParser::parseNthChild() 7858 { 7859 UChar* currentCharacter = m_currentCharacter; 7860 7861 while (isASCIIDigit(*currentCharacter)) 7862 ++currentCharacter; 7863 if (isASCIIAlphaCaselessEqual(*currentCharacter, 'n')) { 7864 m_currentCharacter = currentCharacter + 1; 7865 return true; 7866 } 7867 return false; 7868 } 7869 7870 bool CSSParser::parseNthChildExtra() 7871 { 7872 UChar* currentCharacter = skipWhiteSpace(m_currentCharacter); 7873 if (*currentCharacter != '+' && *currentCharacter != '-') 7874 return false; 7875 7876 currentCharacter = skipWhiteSpace(currentCharacter + 1); 7877 if (!isASCIIDigit(*currentCharacter)) 7878 return false; 7879 7880 do { 7881 ++currentCharacter; 7882 } while (isASCIIDigit(*currentCharacter)); 7883 7884 m_currentCharacter = currentCharacter; 7885 return true; 7886 } 7887 7888 inline void CSSParser::detectFunctionTypeToken(int length) 7889 { 7890 ASSERT(length > 0); 7891 UChar* name = m_tokenStart; 7892 7893 switch (length) { 7894 case 3: 7895 if (isASCIIAlphaCaselessEqual(name[0], 'n') && isASCIIAlphaCaselessEqual(name[1], 'o') && isASCIIAlphaCaselessEqual(name[2], 't')) 7896 m_token = NOTFUNCTION; 7897 else if (isASCIIAlphaCaselessEqual(name[0], 'u') && isASCIIAlphaCaselessEqual(name[1], 'r') && isASCIIAlphaCaselessEqual(name[2], 'l')) 7898 m_token = URI; 7899 return; 7900 7901 case 9: 7902 if (isEqualToCSSIdentifier(name, "nth-child")) 7903 m_parsingMode = NthChildMode; 7904 return; 7905 7906 case 11: 7907 if (isEqualToCSSIdentifier(name, "nth-of-type")) 7908 m_parsingMode = NthChildMode; 7909 return; 7910 7911 case 14: 7912 if (isEqualToCSSIdentifier(name, "nth-last-child")) 7913 m_parsingMode = NthChildMode; 7914 return; 7915 7916 case 16: 7917 if (isEqualToCSSIdentifier(name, "nth-last-of-type")) 7918 m_parsingMode = NthChildMode; 7919 return; 7920 } 7921 } 7922 7923 inline void CSSParser::detectMediaQueryToken(int length) 7924 { 7925 ASSERT(m_parsingMode == MediaQueryMode); 7926 UChar* name = m_tokenStart; 7927 7928 if (length == 3) { 7929 if (isASCIIAlphaCaselessEqual(name[0], 'a') && isASCIIAlphaCaselessEqual(name[1], 'n') && isASCIIAlphaCaselessEqual(name[2], 'd')) 7930 m_token = MEDIA_AND; 7931 else if (isASCIIAlphaCaselessEqual(name[0], 'n') && isASCIIAlphaCaselessEqual(name[1], 'o') && isASCIIAlphaCaselessEqual(name[2], 't')) 7932 m_token = MEDIA_NOT; 7933 } else if (length == 4) { 7934 if (isASCIIAlphaCaselessEqual(name[0], 'o') && isASCIIAlphaCaselessEqual(name[1], 'n') 7935 && isASCIIAlphaCaselessEqual(name[2], 'l') && isASCIIAlphaCaselessEqual(name[3], 'y')) 7936 m_token = MEDIA_ONLY; 7937 } 7938 } 7939 7940 inline void CSSParser::detectNumberToken(UChar* type, int length) 7941 { 7942 ASSERT(length > 0); 7943 7944 switch (toASCIILowerUnchecked(type[0])) { 7945 case 'c': 7946 if (length == 2 && isASCIIAlphaCaselessEqual(type[1], 'm')) 7947 m_token = CMS; 7948 return; 7949 7950 case 'd': 7951 if (length == 3 && isASCIIAlphaCaselessEqual(type[1], 'e') && isASCIIAlphaCaselessEqual(type[2], 'g')) 7952 m_token = DEGS; 7953 return; 7954 7955 case 'e': 7956 if (length == 2) { 7957 if (isASCIIAlphaCaselessEqual(type[1], 'm')) 7958 m_token = EMS; 7959 else if (isASCIIAlphaCaselessEqual(type[1], 'x')) 7960 m_token = EXS; 7961 } 7962 return; 7963 7964 case 'g': 7965 if (length == 4 && isASCIIAlphaCaselessEqual(type[1], 'r') 7966 && isASCIIAlphaCaselessEqual(type[2], 'a') && isASCIIAlphaCaselessEqual(type[3], 'd')) 7967 m_token = GRADS; 7968 return; 7969 7970 case 'h': 7971 if (length == 2 && isASCIIAlphaCaselessEqual(type[1], 'z')) 7972 m_token = HERTZ; 7973 return; 7974 7975 case 'i': 7976 if (length == 2 && isASCIIAlphaCaselessEqual(type[1], 'n')) 7977 m_token = INS; 7978 return; 7979 7980 case 'k': 7981 if (length == 3 && isASCIIAlphaCaselessEqual(type[1], 'h') && isASCIIAlphaCaselessEqual(type[2], 'z')) 7982 m_token = KHERTZ; 7983 return; 7984 7985 case 'm': 7986 if (length == 2) { 7987 if (isASCIIAlphaCaselessEqual(type[1], 'm')) 7988 m_token = MMS; 7989 else if (isASCIIAlphaCaselessEqual(type[1], 's')) 7990 m_token = MSECS; 7991 } 7992 return; 7993 7994 case 'p': 7995 if (length == 2) { 7996 if (isASCIIAlphaCaselessEqual(type[1], 'x')) 7997 m_token = PXS; 7998 else if (isASCIIAlphaCaselessEqual(type[1], 't')) 7999 m_token = PTS; 8000 else if (isASCIIAlphaCaselessEqual(type[1], 'c')) 8001 m_token = PCS; 8002 } 8003 return; 8004 8005 case 'r': 8006 if (length == 3) { 8007 if (isASCIIAlphaCaselessEqual(type[1], 'a') && isASCIIAlphaCaselessEqual(type[2], 'd')) 8008 m_token = RADS; 8009 else if (isASCIIAlphaCaselessEqual(type[1], 'e') && isASCIIAlphaCaselessEqual(type[2], 'm')) 8010 m_token = REMS; 8011 } 8012 return; 8013 8014 case 's': 8015 if (length == 1) 8016 m_token = SECS; 8017 return; 8018 8019 case 't': 8020 if (length == 4 && isASCIIAlphaCaselessEqual(type[1], 'u') 8021 && isASCIIAlphaCaselessEqual(type[2], 'r') && isASCIIAlphaCaselessEqual(type[3], 'n')) 8022 m_token = TURNS; 8023 return; 8024 8025 default: 8026 if (type[0] == '_' && length == 5 && type[1] == '_' && isASCIIAlphaCaselessEqual(type[2], 'q') 8027 && isASCIIAlphaCaselessEqual(type[3], 'e') && isASCIIAlphaCaselessEqual(type[4], 'm')) 8028 m_token = QEMS; 8029 return; 8030 } 8031 } 8032 8033 inline void CSSParser::detectDashToken(int length) 8034 { 8035 UChar* name = m_tokenStart; 8036 8037 if (length == 11) { 8038 if (isASCIIAlphaCaselessEqual(name[10], 'y') && isEqualToCSSIdentifier(name + 1, "webkit-an")) 8039 m_token = ANYFUNCTION; 8040 else if (isASCIIAlphaCaselessEqual(name[10], 'n') && isEqualToCSSIdentifier(name + 1, "webkit-mi")) 8041 m_token = MINFUNCTION; 8042 else if (isASCIIAlphaCaselessEqual(name[10], 'x') && isEqualToCSSIdentifier(name + 1, "webkit-ma")) 8043 m_token = MAXFUNCTION; 8044 } else if (length == 12 && isEqualToCSSIdentifier(name + 1, "webkit-calc")) 8045 m_token = CALCFUNCTION; 8046 } 8047 8048 inline void CSSParser::detectAtToken(int length, bool hasEscape) 8049 { 8050 UChar* name = m_tokenStart; 8051 ASSERT(name[0] == '@' && length >= 2); 8052 8053 // charset, font-face, import, media, namespace, page, 8054 // -webkit-keyframes, and -webkit-mediaquery are not affected by hasEscape. 8055 switch (toASCIILowerUnchecked(name[1])) { 8056 case 'b': 8057 if (hasEscape) 8058 return; 8059 8060 switch (length) { 8061 case 12: 8062 if (isEqualToCSSIdentifier(name + 2, "ottom-left")) 8063 m_token = BOTTOMLEFT_SYM; 8064 return; 8065 8066 case 13: 8067 if (isEqualToCSSIdentifier(name + 2, "ottom-right")) 8068 m_token = BOTTOMRIGHT_SYM; 8069 return; 8070 8071 case 14: 8072 if (isEqualToCSSIdentifier(name + 2, "ottom-center")) 8073 m_token = BOTTOMCENTER_SYM; 8074 return; 8075 8076 case 19: 8077 if (isEqualToCSSIdentifier(name + 2, "ottom-left-corner")) 8078 m_token = BOTTOMLEFTCORNER_SYM; 8079 return; 8080 8081 case 20: 8082 if (isEqualToCSSIdentifier(name + 2, "ottom-right-corner")) 8083 m_token = BOTTOMRIGHTCORNER_SYM; 8084 return; 8085 } 8086 return; 8087 8088 case 'c': 8089 if (length == 8 && isEqualToCSSIdentifier(name + 2, "harset")) 8090 m_token = CHARSET_SYM; 8091 return; 8092 8093 case 'f': 8094 if (length == 10 && isEqualToCSSIdentifier(name + 2, "ont-face")) 8095 m_token = FONT_FACE_SYM; 8096 return; 8097 8098 case 'i': 8099 if (length == 7 && isEqualToCSSIdentifier(name + 2, "mport")) { 8100 m_parsingMode = MediaQueryMode; 8101 m_token = IMPORT_SYM; 8102 } 8103 return; 8104 8105 case 'l': 8106 if (hasEscape) 8107 return; 8108 8109 if (length == 9) { 8110 if (isEqualToCSSIdentifier(name + 2, "eft-top")) 8111 m_token = LEFTTOP_SYM; 8112 } else if (length == 12) { 8113 // Checking the last character first could further reduce the possibile cases. 8114 if (isASCIIAlphaCaselessEqual(name[11], 'e') && isEqualToCSSIdentifier(name + 2, "eft-middl")) 8115 m_token = LEFTMIDDLE_SYM; 8116 else if (isASCIIAlphaCaselessEqual(name[11], 'm') && isEqualToCSSIdentifier(name + 2, "eft-botto")) 8117 m_token = LEFTBOTTOM_SYM; 8118 } 8119 return; 8120 8121 case 'm': 8122 if (length == 6 && isEqualToCSSIdentifier(name + 2, "edia")) { 8123 m_parsingMode = MediaQueryMode; 8124 m_token = MEDIA_SYM; 8125 } 8126 return; 8127 8128 case 'n': 8129 if (length == 10 && isEqualToCSSIdentifier(name + 2, "amespace")) 8130 m_token = NAMESPACE_SYM; 8131 return; 8132 8133 case 'p': 8134 if (length == 5 && isEqualToCSSIdentifier(name + 2, "age")) 8135 m_token = PAGE_SYM; 8136 return; 8137 8138 case 'r': 8139 if (hasEscape) 8140 return; 8141 8142 if (length == 10) { 8143 if (isEqualToCSSIdentifier(name + 2, "ight-top")) 8144 m_token = RIGHTTOP_SYM; 8145 } else if (length == 13) { 8146 // Checking the last character first could further reduce the possibile cases. 8147 if (isASCIIAlphaCaselessEqual(name[12], 'e') && isEqualToCSSIdentifier(name + 2, "ight-middl")) 8148 m_token = RIGHTMIDDLE_SYM; 8149 else if (isASCIIAlphaCaselessEqual(name[12], 'm') && isEqualToCSSIdentifier(name + 2, "ight-botto")) 8150 m_token = RIGHTBOTTOM_SYM; 8151 } 8152 return; 8153 8154 case 't': 8155 if (hasEscape) 8156 return; 8157 8158 switch (length) { 8159 case 9: 8160 if (isEqualToCSSIdentifier(name + 2, "op-left")) 8161 m_token = TOPLEFT_SYM; 8162 return; 8163 8164 case 10: 8165 if (isEqualToCSSIdentifier(name + 2, "op-right")) 8166 m_token = TOPRIGHT_SYM; 8167 return; 8168 8169 case 11: 8170 if (isEqualToCSSIdentifier(name + 2, "op-center")) 8171 m_token = TOPCENTER_SYM; 8172 return; 8173 8174 case 16: 8175 if (isEqualToCSSIdentifier(name + 2, "op-left-corner")) 8176 m_token = TOPLEFTCORNER_SYM; 8177 return; 8178 8179 case 17: 8180 if (isEqualToCSSIdentifier(name + 2, "op-right-corner")) 8181 m_token = TOPRIGHTCORNER_SYM; 8182 return; 8183 } 8184 return; 8185 8186 case '-': 8187 switch (length) { 8188 case 13: 8189 if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-rule")) 8190 m_token = WEBKIT_RULE_SYM; 8191 return; 8192 8193 case 14: 8194 if (hasEscape) 8195 return; 8196 8197 // Checking the last character first could further reduce the possibile cases. 8198 if (isASCIIAlphaCaselessEqual(name[13], 's') && isEqualToCSSIdentifier(name + 2, "webkit-decl")) 8199 m_token = WEBKIT_DECLS_SYM; 8200 else if (isASCIIAlphaCaselessEqual(name[13], 'e') && isEqualToCSSIdentifier(name + 2, "webkit-valu")) 8201 m_token = WEBKIT_VALUE_SYM; 8202 return; 8203 8204 case 15: 8205 if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-region")) 8206 m_token = WEBKIT_REGION_RULE_SYM; 8207 return; 8208 8209 case 17: 8210 if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-selector")) 8211 m_token = WEBKIT_SELECTOR_SYM; 8212 return; 8213 8214 case 18: 8215 if (isEqualToCSSIdentifier(name + 2, "webkit-keyframes")) 8216 m_token = WEBKIT_KEYFRAMES_SYM; 8217 return; 8218 8219 case 19: 8220 if (isEqualToCSSIdentifier(name + 2, "webkit-mediaquery")) { 8221 m_parsingMode = MediaQueryMode; 8222 m_token = WEBKIT_MEDIAQUERY_SYM; 8223 } 8224 return; 8225 8226 case 22: 8227 if (!hasEscape && isEqualToCSSIdentifier(name + 2, "webkit-keyframe-rule")) 8228 m_token = WEBKIT_KEYFRAME_RULE_SYM; 8229 return; 8230 } 8231 } 8232 } 8233 7443 8234 int CSSParser::lex(void* yylvalWithoutType) 7444 8235 { 7445 8236 YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType); 7446 int length; 7447 7448 lex(); 7449 7450 UChar* t = text(&length); 7451 8237 // Write pointer for the next character. 8238 UChar* result; 8239 bool hasEscape; 8240 8241 // The input buffer is terminated by two \0, so 8242 // it is safe to read two characters ahead anytime. 8243 8244 #ifndef NDEBUG 8245 // In debug we check with an ASSERT that the length is > 0 for string types. 8246 yylval->string.characters = 0; 8247 yylval->string.length = 0; 8248 #endif 8249 8250 restartAfterComment: 8251 m_tokenStart = result = m_currentCharacter; 8252 m_token = *m_currentCharacter; 8253 ++m_currentCharacter; 8254 8255 switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) { 8256 case CharacterCaselessU: 8257 if (UNLIKELY(*m_currentCharacter == '+')) 8258 if (parseUnicodeRange()) { 8259 m_token = UNICODERANGE; 8260 yylval->string.characters = m_tokenStart; 8261 yylval->string.length = m_currentCharacter - m_tokenStart; 8262 break; 8263 } 8264 // Fall through to CharacterIdentifierStart. 8265 8266 case CharacterIdentifierStart: 8267 --m_currentCharacter; 8268 parseIdentifier(result, hasEscape); 8269 m_token = IDENT; 8270 8271 yylval->string.characters = m_tokenStart; 8272 yylval->string.length = result - m_tokenStart; 8273 8274 if (UNLIKELY(*m_currentCharacter == '(')) { 8275 m_token = FUNCTION; 8276 if (!hasEscape) 8277 detectFunctionTypeToken(result - m_tokenStart); 8278 ++m_currentCharacter; 8279 ++result; 8280 ++yylval->string.length; 8281 8282 if (token() == URI) { 8283 m_token = FUNCTION; 8284 // Check whether it is really an URI. 8285 parseURI(yylval->string.characters, result); 8286 yylval->string.length = result - yylval->string.characters; 8287 } 8288 } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) { 8289 if (m_parsingMode == MediaQueryMode) 8290 detectMediaQueryToken(result - m_tokenStart); 8291 else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(m_tokenStart[0], 'n')) { 8292 if (result - m_tokenStart == 1) { 8293 // String "n" is IDENT but "n+1" is NTH. 8294 if (parseNthChildExtra()) { 8295 m_token = NTH; 8296 yylval->string.length = m_currentCharacter - m_tokenStart; 8297 } 8298 } else if (result - m_tokenStart == 2 && m_tokenStart[1] == '-') { 8299 // String "n-" is IDENT but "n-1" is NTH. 8300 // Speculatively decrease m_currentCharacter to detect an nth-child token. 8301 m_currentCharacter--; 8302 if (parseNthChildExtra()) { 8303 m_token = NTH; 8304 yylval->string.length = m_currentCharacter - m_tokenStart; 8305 } else { 8306 // Revert the change to m_currentCharacter if unsuccessful. 8307 m_currentCharacter++; 8308 } 8309 } 8310 } 8311 } 8312 break; 8313 8314 case CharacterDot: 8315 if (!isASCIIDigit(m_currentCharacter[0])) 8316 break; 8317 // Fall through to CharacterNumber. 8318 8319 case CharacterNumber: { 8320 bool dotSeen = (m_token == '.'); 8321 8322 while (true) { 8323 if (!isASCIIDigit(m_currentCharacter[0])) { 8324 // Only one dot is allowed for a number, 8325 // and it must be followed by a digit. 8326 if (m_currentCharacter[0] != '.' || dotSeen || !isASCIIDigit(m_currentCharacter[1])) 8327 break; 8328 dotSeen = true; 8329 } 8330 ++m_currentCharacter; 8331 } 8332 8333 if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*m_currentCharacter, 'n')) { 8334 // "[0-9]+n" is always an NthChild. 8335 ++m_currentCharacter; 8336 parseNthChildExtra(); 8337 m_token = NTH; 8338 yylval->string.characters = m_tokenStart; 8339 yylval->string.length = m_currentCharacter - m_tokenStart; 8340 break; 8341 } 8342 8343 yylval->number = charactersToDouble(m_tokenStart, m_currentCharacter - m_tokenStart); 8344 8345 // Type of the function. 8346 if (isIdentifierStart()) { 8347 UChar* type = m_currentCharacter; 8348 result = m_currentCharacter; 8349 8350 parseIdentifier(result, hasEscape); 8351 if (*m_currentCharacter == '+') { 8352 // Any identifier followed by a '+' sign is an invalid dimension. 8353 ++m_currentCharacter; 8354 m_token = INVALIDDIMEN; 8355 } else { 8356 m_token = DIMEN; 8357 if (!hasEscape) 8358 detectNumberToken(type, m_currentCharacter - type); 8359 8360 if (m_token == DIMEN) { 8361 // The decoded number is overwritten, but this is intentional. 8362 yylval->string.characters = m_tokenStart; 8363 yylval->string.length = m_currentCharacter - m_tokenStart; 8364 } 8365 } 8366 } else if (*m_currentCharacter == '%') { 8367 // Although the CSS grammar says {num}% we follow 8368 // webkit at the moment which uses {num}%+. 8369 do { 8370 ++m_currentCharacter; 8371 } while (*m_currentCharacter == '%'); 8372 m_token = PERCENTAGE; 8373 } else 8374 m_token = dotSeen ? FLOATTOKEN : INTEGER; 8375 break; 8376 } 8377 8378 case CharacterDash: 8379 if (isIdentifierStartAfterDash(m_currentCharacter)) { 8380 --m_currentCharacter; 8381 parseIdentifier(result, hasEscape); 8382 m_token = IDENT; 8383 8384 if (*m_currentCharacter == '(') { 8385 m_token = FUNCTION; 8386 if (!hasEscape) 8387 detectDashToken(result - m_tokenStart); 8388 ++m_currentCharacter; 8389 ++result; 8390 } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(m_tokenStart[1], 'n')) { 8391 if (result - m_tokenStart == 2) { 8392 // String "-n" is IDENT but "-n+1" is NTH. 8393 if (parseNthChildExtra()) { 8394 m_token = NTH; 8395 result = m_currentCharacter; 8396 } 8397 } else if (result - m_tokenStart == 3 && m_tokenStart[2] == '-') { 8398 // String "-n-" is IDENT but "-n-1" is NTH. 8399 // Speculatively decrease m_currentCharacter to detect an nth-child token. 8400 m_currentCharacter--; 8401 if (parseNthChildExtra()) { 8402 m_token = NTH; 8403 yylval->string.length = m_currentCharacter - m_tokenStart; 8404 } else { 8405 // Revert the change to m_currentCharacter if unsuccessful. 8406 m_currentCharacter++; 8407 } 8408 } 8409 } 8410 yylval->string.characters = m_tokenStart; 8411 yylval->string.length = result - m_tokenStart; 8412 } else if (m_currentCharacter[0] == '-' && m_currentCharacter[1] == '>') { 8413 m_currentCharacter += 2; 8414 m_token = SGML_CD; 8415 } else if (UNLIKELY(m_parsingMode == NthChildMode)) { 8416 // "-[0-9]+n" is always an NthChild. 8417 if (parseNthChild()) { 8418 parseNthChildExtra(); 8419 m_token = NTH; 8420 yylval->string.characters = m_tokenStart; 8421 yylval->string.length = m_currentCharacter - m_tokenStart; 8422 } 8423 } 8424 break; 8425 8426 case CharacterOther: 8427 // m_token is simply the current character. 8428 break; 8429 8430 case CharacterWhiteSpace: 8431 m_token = WHITESPACE; 8432 // Might start with a '\n'. 8433 --m_currentCharacter; 8434 do { 8435 if (*m_currentCharacter == '\n') 8436 ++m_lineNumber; 8437 ++m_currentCharacter; 8438 } while (*m_currentCharacter <= ' ' && (typesOfASCIICharacters[*m_currentCharacter] == CharacterWhiteSpace)); 8439 break; 8440 8441 case CharacterEndMediaQuery: 8442 if (m_parsingMode == MediaQueryMode) 8443 m_parsingMode = NormalMode; 8444 break; 8445 8446 case CharacterEndNthChild: 8447 if (m_parsingMode == NthChildMode) 8448 m_parsingMode = NormalMode; 8449 break; 8450 8451 case CharacterQuote: 8452 if (checkAndSkipString(m_currentCharacter, m_token)) { 8453 ++result; 8454 parseString(result, m_token); 8455 m_token = STRING; 8456 yylval->string.characters = m_tokenStart + 1; 8457 yylval->string.length = result - (m_tokenStart + 1); 8458 } 8459 break; 8460 8461 case CharacterExclamationMark: { 8462 UChar* start = skipWhiteSpace(m_currentCharacter); 8463 if (isEqualToCSSIdentifier(start, "important")) { 8464 m_token = IMPORTANT_SYM; 8465 m_currentCharacter = start + 9; 8466 } 8467 break; 8468 } 8469 8470 case CharacterHashmark: { 8471 UChar* start = m_currentCharacter; 8472 result = m_currentCharacter; 8473 8474 if (isASCIIDigit(*m_currentCharacter)) { 8475 // This must be a valid hex number token. 8476 do { 8477 ++m_currentCharacter; 8478 } while (isASCIIHexDigit(*m_currentCharacter)); 8479 m_token = HEX; 8480 yylval->string.characters = start; 8481 yylval->string.length = m_currentCharacter - start; 8482 } else if (isIdentifierStart()) { 8483 m_token = IDSEL; 8484 parseIdentifier(result, hasEscape); 8485 if (!hasEscape) { 8486 // Check whether the identifier is also a valid hex number. 8487 UChar* current = start; 8488 m_token = HEX; 8489 do { 8490 if (!isASCIIHexDigit(*current)) { 8491 m_token = IDSEL; 8492 break; 8493 } 8494 ++current; 8495 } while (current < result); 8496 } 8497 yylval->string.characters = start; 8498 yylval->string.length = result - start; 8499 } 8500 break; 8501 } 8502 8503 case CharacterSlash: 8504 // Ignore comments. They are not even considered as white spaces. 8505 if (*m_currentCharacter == '*') { 8506 ++m_currentCharacter; 8507 while (m_currentCharacter[0] != '*' || m_currentCharacter[1] != '/') { 8508 if (m_currentCharacter[0] == '\n') 8509 ++m_lineNumber; 8510 if (m_currentCharacter[0] == '\0' && m_currentCharacter[1] == '\0') { 8511 // Unterminated comments are simply ignored. 8512 m_currentCharacter -= 2; 8513 break; 8514 } 8515 ++m_currentCharacter; 8516 } 8517 m_currentCharacter += 2; 8518 goto restartAfterComment; 8519 } 8520 break; 8521 8522 case CharacterDollar: 8523 if (*m_currentCharacter == '=') { 8524 ++m_currentCharacter; 8525 m_token = ENDSWITH; 8526 } 8527 break; 8528 8529 case CharacterAsterisk: 8530 if (*m_currentCharacter == '=') { 8531 ++m_currentCharacter; 8532 m_token = CONTAINS; 8533 } 8534 break; 8535 8536 case CharacterPlus: 8537 if (UNLIKELY(m_parsingMode == NthChildMode)) { 8538 // Simplest case. "+[0-9]*n" is always NthChild. 8539 if (parseNthChild()) { 8540 parseNthChildExtra(); 8541 m_token = NTH; 8542 yylval->string.characters = m_tokenStart; 8543 yylval->string.length = m_currentCharacter - m_tokenStart; 8544 } 8545 } 8546 break; 8547 8548 case CharacterLess: 8549 if (m_currentCharacter[0] == '!' && m_currentCharacter[1] == '-' && m_currentCharacter[2] == '-') { 8550 m_currentCharacter += 3; 8551 m_token = SGML_CD; 8552 } 8553 break; 8554 8555 case CharacterAt: 8556 if (isIdentifierStart()) { 8557 m_token = ATKEYWORD; 8558 ++result; 8559 parseIdentifier(result, hasEscape); 8560 detectAtToken(result - m_tokenStart, hasEscape); 8561 } 8562 break; 8563 8564 case CharacterBackSlash: 8565 if (isCSSEscape(*m_currentCharacter)) { 8566 --m_currentCharacter; 8567 parseIdentifier(result, hasEscape); 8568 m_token = IDENT; 8569 yylval->string.characters = m_tokenStart; 8570 yylval->string.length = result - m_tokenStart; 8571 } 8572 break; 8573 8574 case CharacterXor: 8575 if (*m_currentCharacter == '=') { 8576 ++m_currentCharacter; 8577 m_token = BEGINSWITH; 8578 } 8579 break; 8580 8581 case CharacterVerticalBar: 8582 if (*m_currentCharacter == '=') { 8583 ++m_currentCharacter; 8584 m_token = DASHMATCH; 8585 } 8586 break; 8587 8588 case CharacterTilde: 8589 if (*m_currentCharacter == '=') { 8590 ++m_currentCharacter; 8591 m_token = INCLUDES; 8592 } 8593 break; 8594 8595 default: 8596 ASSERT_NOT_REACHED(); 8597 break; 8598 } 8599 8600 #ifndef NDEBUG 7452 8601 switch (token()) { 7453 case WHITESPACE:7454 case SGML_CD:7455 case INCLUDES:7456 case DASHMATCH:7457 break;7458 7459 case URI:7460 8602 case STRING: 8603 ASSERT(yylval->string.characters == m_tokenStart + 1); 8604 break; 8605 7461 8606 case IDENT: 7462 8607 case NTH: 7463 case HEX:7464 case IDSEL:7465 8608 case DIMEN: 7466 8609 case UNICODERANGE: … … 7471 8614 case MINFUNCTION: 7472 8615 case MAXFUNCTION: 7473 yylval->string.characters = t; 7474 yylval->string.length = length; 7475 break; 7476 7477 case IMPORT_SYM: 7478 case PAGE_SYM: 7479 case MEDIA_SYM: 7480 case FONT_FACE_SYM: 7481 case CHARSET_SYM: 7482 case NAMESPACE_SYM: 7483 case WEBKIT_KEYFRAMES_SYM: 7484 7485 case IMPORTANT_SYM: 7486 break; 7487 7488 case QEMS: 7489 length--; 7490 case GRADS: 7491 case TURNS: 7492 length--; 7493 case DEGS: 7494 case RADS: 7495 case KHERTZ: 7496 case REMS: 7497 length--; 7498 case MSECS: 7499 case HERTZ: 7500 case EMS: 7501 case EXS: 7502 case PXS: 7503 case CMS: 7504 case MMS: 7505 case INS: 7506 case PTS: 7507 case PCS: 7508 length--; 7509 case SECS: 7510 case PERCENTAGE: 7511 length--; 7512 case FLOATTOKEN: 7513 case INTEGER: 7514 yylval->number = charactersToDouble(t, length); 7515 break; 7516 7517 default: 7518 break; 7519 } 7520 7521 return token(); 7522 } 7523 7524 void CSSParser::recheckAtKeyword(const UChar* str, int len) 7525 { 7526 String ruleName(str, len); 7527 if (equalIgnoringCase(ruleName, "@import")) 7528 yyTok = IMPORT_SYM; 7529 else if (equalIgnoringCase(ruleName, "@page")) 7530 yyTok = PAGE_SYM; 7531 else if (equalIgnoringCase(ruleName, "@media")) 7532 yyTok = MEDIA_SYM; 7533 else if (equalIgnoringCase(ruleName, "@font-face")) 7534 yyTok = FONT_FACE_SYM; 7535 else if (equalIgnoringCase(ruleName, "@charset")) 7536 yyTok = CHARSET_SYM; 7537 else if (equalIgnoringCase(ruleName, "@namespace")) 7538 yyTok = NAMESPACE_SYM; 7539 else if (equalIgnoringCase(ruleName, "@-webkit-keyframes")) 7540 yyTok = WEBKIT_KEYFRAMES_SYM; 7541 else if (equalIgnoringCase(ruleName, "@-webkit-mediaquery")) 7542 yyTok = WEBKIT_MEDIAQUERY_SYM; 7543 } 7544 7545 UChar* CSSParser::text(int *length) 7546 { 7547 UChar* start = yytext; 7548 int l = yyleng; 7549 switch (yyTok) { 7550 case STRING: 7551 l--; 7552 /* nobreak */ 8616 ASSERT(yylval->string.characters == m_tokenStart && yylval->string.length > 0); 8617 break; 8618 8619 case URI: 8620 ASSERT(yylval->string.characters && yylval->string.characters != m_tokenStart); 8621 break; 8622 7553 8623 case HEX: 7554 8624 case IDSEL: 7555 start++; 7556 l--; 7557 break; 7558 case URI: 7559 // "url("{w}{string}{w}")" 7560 // "url("{w}{url}{w}")" 7561 // strip "url(" and ")" 7562 start += 4; 7563 l -= 5; 7564 // strip {w} 7565 while (l && isHTMLSpace(*start)) { 7566 ++start; 7567 --l; 7568 } 7569 while (l && isHTMLSpace(start[l - 1])) 7570 --l; 7571 if (l && (*start == '"' || *start == '\'')) { 7572 ASSERT(l >= 2 && start[l - 1] == *start); 7573 ++start; 7574 l -= 2; 7575 } 7576 break; 7577 default: 7578 break; 7579 } 7580 7581 // process escapes 7582 UChar* out = start; 7583 UChar* escape = 0; 7584 7585 bool sawEscape = false; 7586 7587 for (int i = 0; i < l; i++) { 7588 UChar* current = start + i; 7589 if (escape == current - 1) { 7590 if (isASCIIHexDigit(*current)) 7591 continue; 7592 if (yyTok == STRING && 7593 (*current == '\n' || *current == '\r' || *current == '\f')) { 7594 // ### handle \r\n case 7595 if (*current != '\r') 7596 escape = 0; 7597 continue; 7598 } 7599 // in all other cases copy the char to output 7600 // ### 7601 *out++ = *current; 7602 escape = 0; 7603 continue; 7604 } 7605 if (escape == current - 2 && yyTok == STRING && 7606 *(current-1) == '\r' && *current == '\n') { 7607 escape = 0; 7608 continue; 7609 } 7610 if (escape > current - 7 && isASCIIHexDigit(*current)) 7611 continue; 7612 if (escape) { 7613 // add escaped char 7614 unsigned uc = 0; 7615 escape++; 7616 while (escape < current) { 7617 uc *= 16; 7618 uc += toASCIIHexValue(*escape); 7619 escape++; 7620 } 7621 // can't handle chars outside ucs2 7622 if (uc > 0xffff) 7623 uc = 0xfffd; 7624 *out++ = uc; 7625 escape = 0; 7626 if (isHTMLSpace(*current)) 7627 continue; 7628 } 7629 if (!escape && *current == '\\') { 7630 escape = current; 7631 sawEscape = true; 7632 continue; 7633 } 7634 *out++ = *current; 7635 } 7636 if (escape) { 7637 // add escaped char 7638 unsigned uc = 0; 7639 escape++; 7640 while (escape < start+l) { 7641 uc *= 16; 7642 uc += toASCIIHexValue(*escape); 7643 escape++; 7644 } 7645 // can't handle chars outside ucs2 7646 if (uc > 0xffff) 7647 uc = 0xfffd; 7648 *out++ = uc; 7649 } 7650 7651 *length = out - start; 7652 7653 // If we have an unrecognized @-keyword, and if we handled any escapes at all, then 7654 // we should attempt to adjust yyTok to the correct type. 7655 if (yyTok == ATKEYWORD && sawEscape) 7656 recheckAtKeyword(start, *length); 7657 7658 return start; 7659 } 7660 7661 void CSSParser::countLines() 7662 { 7663 for (UChar* current = yytext; current < yytext + yyleng; ++current) { 7664 if (*current == '\n') 7665 ++m_lineNumber; 7666 } 8625 ASSERT(yylval->string.characters == m_tokenStart + 1 && yylval->string.length > 0); 8626 break; 8627 } 8628 #endif 8629 8630 return token(); 7667 8631 } 7668 8632 … … 8070 9034 void CSSParser::markSelectorListStart() 8071 9035 { 8072 m_selectorListRange.start = yytext - m_data.get();9036 m_selectorListRange.start = m_tokenStart - m_dataStart.get(); 8073 9037 } 8074 9038 … … 8077 9041 if (!m_currentRuleData) 8078 9042 return; 8079 UChar* listEnd = yytext;8080 while (listEnd > m_data .get() + 1) {9043 UChar* listEnd = m_tokenStart; 9044 while (listEnd > m_dataStart.get() + 1) { 8081 9045 if (isHTMLSpace(*(listEnd - 1))) 8082 9046 --listEnd; … … 8084 9048 break; 8085 9049 } 8086 m_selectorListRange.end = listEnd - m_data .get();9050 m_selectorListRange.end = listEnd - m_dataStart.get(); 8087 9051 } 8088 9052 8089 9053 void CSSParser::markRuleBodyStart() 8090 9054 { 8091 unsigned offset = yytext - m_data.get();8092 if (* yytext == '{')9055 unsigned offset = m_tokenStart - m_dataStart.get(); 9056 if (*m_tokenStart == '{') 8093 9057 ++offset; // Skip the rule body opening brace. 8094 9058 if (offset > m_ruleBodyRange.start) … … 8099 9063 void CSSParser::markRuleBodyEnd() 8100 9064 { 8101 unsigned offset = yytext - m_data.get();9065 unsigned offset = m_tokenStart - m_dataStart.get(); 8102 9066 if (offset > m_ruleBodyRange.end) 8103 9067 m_ruleBodyRange.end = offset; … … 8108 9072 if (!m_inStyleRuleOrDeclaration) 8109 9073 return; 8110 m_propertyRange.start = yytext - m_data.get();9074 m_propertyRange.start = m_tokenStart - m_dataStart.get(); 8111 9075 } 8112 9076 … … 8115 9079 if (!m_inStyleRuleOrDeclaration) 8116 9080 return; 8117 unsigned offset = yytext - m_data.get();8118 if (* yytext == ';') // Include semicolon into the property text.9081 unsigned offset = m_tokenStart - m_dataStart.get(); 9082 if (*m_tokenStart == ';') // Include semicolon into the property text. 8119 9083 ++offset; 8120 9084 m_propertyRange.end = offset; … … 8124 9088 const unsigned end = m_propertyRange.end; 8125 9089 ASSERT(start < end); 8126 String propertyString = String(m_data .get() + start, end - start).stripWhiteSpace();9090 String propertyString = String(m_dataStart.get() + start, end - start).stripWhiteSpace(); 8127 9091 if (propertyString.endsWith(";", true)) 8128 9092 propertyString = propertyString.left(propertyString.length() - 1); … … 8343 9307 } 8344 9308 8345 #define YY_DECL int CSSParser::lex() 8346 #define yyconst const 8347 typedef int yy_state_type; 8348 typedef unsigned YY_CHAR; 8349 // The following line makes sure we treat non-Latin-1 Unicode characters correctly. 8350 #define YY_SC_TO_UI(c) (c > 0xff ? 0xff : c) 8351 #define YY_DO_BEFORE_ACTION \ 8352 yytext = yy_bp; \ 8353 yyleng = (int) (yy_cp - yy_bp); \ 8354 yy_hold_char = *yy_cp; \ 8355 *yy_cp = 0; \ 8356 yy_c_buf_p = yy_cp; 8357 #define YY_BREAK break; 8358 #define ECHO 8359 #define YY_RULE_SETUP 8360 #define INITIAL 0 8361 #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) 8362 #define yyterminate() yyTok = END_TOKEN; return yyTok 8363 #define YY_FATAL_ERROR(a) 8364 // The following line is needed to build the tokenizer with a condition stack. 8365 // The macro is used in the tokenizer grammar with lines containing 8366 // BEGIN(mediaqueries) and BEGIN(initial). yy_start acts as index to 8367 // tokenizer transition table, and 'mediaqueries' and 'initial' are 8368 // offset multipliers that specify which transitions are active 8369 // in the tokenizer during in each condition (tokenizer state). 8370 #define BEGIN yy_start = 1 + 2 * 8371 8372 #include "tokenizer.cpp" 8373 8374 } 9309 } -
trunk/Source/WebCore/css/CSSParser.h
r106166 r106217 320 320 void resetPropertyMarks() { m_propertyRange.start = m_propertyRange.end = UINT_MAX; } 321 321 int lex(void* yylval); 322 int token() { return yyTok; } 323 UChar* text(int* length); 324 void countLines(); 325 int lex(); 322 int token() { return m_token; } 326 323 327 324 PassRefPtr<CSSPrimitiveValue> createPrimitiveNumericValue(CSSParserValue*); … … 329 326 330 327 private: 328 inline bool isIdentifierStart(); 329 330 static inline UChar* checkAndSkipString(UChar*, UChar); 331 332 void parseEscape(UChar*&); 333 inline void parseIdentifier(UChar*&, bool&); 334 inline void parseString(UChar*&, UChar); 335 inline void parseURI(UChar*&, UChar*&); 336 inline bool parseUnicodeRange(); 337 bool parseNthChild(); 338 bool parseNthChildExtra(); 339 inline void detectFunctionTypeToken(int); 340 inline void detectMediaQueryToken(int); 341 inline void detectNumberToken(UChar*, int); 342 inline void detectDashToken(int); 343 inline void detectAtToken(int, bool); 344 331 345 void setStyleSheet(CSSStyleSheet*); 332 346 void ensureCSSValuePool(); … … 364 378 bool parseColor(const String&); 365 379 366 OwnArrayPtr<UChar> m_data; 367 UChar* yytext; 368 UChar* yy_c_buf_p; 369 UChar yy_hold_char; 370 int yy_last_accepting_state; 371 UChar* yy_last_accepting_cpos; 372 int yyleng; 373 int yyTok; 374 int yy_start; 380 enum ParsingMode { 381 NormalMode, 382 MediaQueryMode, 383 NthChildMode 384 }; 385 386 ParsingMode m_parsingMode; 387 OwnArrayPtr<UChar> m_dataStart; 388 UChar* m_currentCharacter; 389 UChar* m_tokenStart; 390 int m_token; 375 391 int m_lineNumber; 376 392 int m_lastSelectorLineNumber; -
trunk/wscript
r106094 r106217 279 279 excludes.append('DocTypeStrings.cpp') 280 280 excludes.append('HTMLEntityNames.cpp') 281 excludes.append('tokenizer.cpp')282 281 283 282 # Qt specific file in common sources
Note: See TracChangeset
for help on using the changeset viewer.