Changeset 65110 in webkit
- Timestamp:
- Aug 10, 2010 6:55:53 PM (14 years ago)
- Location:
- trunk/WebCore
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/WebCore/ChangeLog
r65108 r65110 1 2010-08-10 Adam Barth <abarth@webkit.org> 2 3 Reviewed by Eric Seidel. 4 5 Clients of HTMLTokenizer should be able to see where characters went in the token 6 https://bugs.webkit.org/show_bug.cgi?id=43766 7 8 When viewing the source of a document, we want to colorize different 9 parts of the input depending on how they were tokenized. In this 10 patch, we expose the internal segmentation of a token by recording the 11 start and end offsets for each attribute name and each attribute value. 12 13 * html/HTMLToken.h: 14 (WebCore::HTMLToken::addNewAttribute): 15 (WebCore::HTMLToken::beginAttributeName): 16 (WebCore::HTMLToken::endAttributeName): 17 (WebCore::HTMLToken::beginAttributeValue): 18 (WebCore::HTMLToken::endAttributeValue): 19 (WebCore::HTMLToken::appendToAttributeName): 20 (WebCore::HTMLToken::appendToAttributeValue): 21 (WebCore::AtomicHTMLToken::AtomicHTMLToken): 22 * html/HTMLTokenizer.cpp: 23 (WebCore::HTMLTokenizer::nextToken): 24 1 25 2010-08-10 Dumitru Daniliuc <dumi@chromium.org> 2 26 -
trunk/WebCore/html/HTMLToken.h
r64724 r65110 46 46 }; 47 47 48 class Range { 49 public: 50 int m_start; 51 int m_end; 52 }; 53 48 54 class Attribute { 49 55 public: 56 Range m_nameRange; 57 Range m_valueRange; 50 58 WTF::Vector<UChar, 32> m_name; 51 59 WTF::Vector<UChar, 32> m_value; … … 147 155 m_attributes.grow(m_attributes.size() + 1); 148 156 m_currentAttribute = &m_attributes.last(); 157 #ifndef NDEBUG 158 m_currentAttribute->m_nameRange.m_start = 0; 159 m_currentAttribute->m_nameRange.m_end = 0; 160 m_currentAttribute->m_valueRange.m_start = 0; 161 m_currentAttribute->m_valueRange.m_end = 0; 162 #endif 163 } 164 165 void beginAttributeName(int index) 166 { 167 m_currentAttribute->m_nameRange.m_start = index; 168 } 169 170 void endAttributeName(int index) 171 { 172 m_currentAttribute->m_nameRange.m_end = index; 173 m_currentAttribute->m_valueRange.m_start = index; 174 m_currentAttribute->m_valueRange.m_end = index; 175 } 176 177 void beginAttributeValue(int index) 178 { 179 m_currentAttribute->m_valueRange.m_start = index; 180 #ifndef NDEBUG 181 m_currentAttribute->m_valueRange.m_end = 0; 182 #endif 183 } 184 185 void endAttributeValue(int index) 186 { 187 m_currentAttribute->m_valueRange.m_end = index; 149 188 } 150 189 … … 153 192 ASSERT(character); 154 193 ASSERT(m_type == StartTag || m_type == EndTag); 194 ASSERT(m_currentAttribute->m_nameRange.m_start); 155 195 m_currentAttribute->m_name.append(character); 156 196 } … … 160 200 ASSERT(character); 161 201 ASSERT(m_type == StartTag || m_type == EndTag); 202 ASSERT(m_currentAttribute->m_valueRange.m_start); 162 203 m_currentAttribute->m_value.append(character); 163 204 } … … 324 365 String name(iter->m_name.data(), iter->m_name.size()); 325 366 String value(iter->m_value.data(), iter->m_value.size()); 367 ASSERT(iter->m_nameRange.m_start); 368 ASSERT(iter->m_nameRange.m_end); 369 ASSERT(iter->m_valueRange.m_start); 370 ASSERT(iter->m_valueRange.m_end); 326 371 RefPtr<Attribute> mappedAttribute = Attribute::createMapped(name, value); 327 372 if (!m_attributes) { -
trunk/WebCore/html/HTMLTokenizer.cpp
r65077 r65110 865 865 else if (isASCIIUpper(cc)) { 866 866 m_token->addNewAttribute(); 867 m_token->beginAttributeName(source.numberOfCharactersConsumed()); 867 868 m_token->appendToAttributeName(toLowerCase(cc)); 868 869 ADVANCE_TO(AttributeNameState); … … 874 875 parseError(); 875 876 m_token->addNewAttribute(); 877 m_token->beginAttributeName(source.numberOfCharactersConsumed()); 876 878 m_token->appendToAttributeName(cc); 877 879 ADVANCE_TO(AttributeNameState); … … 881 883 882 884 BEGIN_STATE(AttributeNameState) { 883 if (isTokenizerWhitespace(cc)) 885 if (isTokenizerWhitespace(cc)) { 886 m_token->endAttributeName(source.numberOfCharactersConsumed()); 884 887 ADVANCE_TO(AfterAttributeNameState); 885 else if (cc == '/') 888 } else if (cc == '/') { 889 m_token->endAttributeName(source.numberOfCharactersConsumed()); 886 890 ADVANCE_TO(SelfClosingStartTagState); 887 else if (cc == '=') 891 } else if (cc == '=') { 892 m_token->endAttributeName(source.numberOfCharactersConsumed()); 888 893 ADVANCE_TO(BeforeAttributeValueState); 889 else if (cc == '>') 890 return emitAndResumeIn(source, DataState); 891 else if (isASCIIUpper(cc)) { 894 } else if (cc == '>') { 895 m_token->endAttributeName(source.numberOfCharactersConsumed()); 896 return emitAndResumeIn(source, DataState); 897 } else if (isASCIIUpper(cc)) { 892 898 m_token->appendToAttributeName(toLowerCase(cc)); 893 899 ADVANCE_TO(AttributeNameState); 894 900 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 895 901 parseError(); 902 m_token->endAttributeName(source.numberOfCharactersConsumed()); 896 903 RECONSUME_IN(DataState); 897 904 } else { … … 915 922 else if (isASCIIUpper(cc)) { 916 923 m_token->addNewAttribute(); 924 m_token->beginAttributeName(source.numberOfCharactersConsumed()); 917 925 m_token->appendToAttributeName(toLowerCase(cc)); 918 926 ADVANCE_TO(AttributeNameState); … … 924 932 parseError(); 925 933 m_token->addNewAttribute(); 934 m_token->beginAttributeName(source.numberOfCharactersConsumed()); 926 935 m_token->appendToAttributeName(cc); 927 936 ADVANCE_TO(AttributeNameState); … … 933 942 if (isTokenizerWhitespace(cc)) 934 943 ADVANCE_TO(BeforeAttributeValueState); 935 else if (cc == '"') 944 else if (cc == '"') { 945 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); 936 946 ADVANCE_TO(AttributeValueDoubleQuotedState); 937 else if (cc == '&') 947 } else if (cc == '&') { 948 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); 938 949 RECONSUME_IN(AttributeValueUnquotedState); 939 else if (cc == '\'') 950 } else if (cc == '\'') { 951 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1); 940 952 ADVANCE_TO(AttributeValueSingleQuotedState); 941 else if (cc == '>') {953 } else if (cc == '>') { 942 954 parseError(); 943 955 return emitAndResumeIn(source, DataState); … … 948 960 if (cc == '<' || cc == '=' || cc == '`') 949 961 parseError(); 962 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); 950 963 m_token->appendToAttributeValue(cc); 951 964 ADVANCE_TO(AttributeValueUnquotedState); … … 955 968 956 969 BEGIN_STATE(AttributeValueDoubleQuotedState) { 957 if (cc == '"') 970 if (cc == '"') { 971 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 958 972 ADVANCE_TO(AfterAttributeValueQuotedState); 959 else if (cc == '&') {973 } else if (cc == '&') { 960 974 m_additionalAllowedCharacter = '"'; 961 975 ADVANCE_TO(CharacterReferenceInAttributeValueState); 962 976 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 963 977 parseError(); 978 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 964 979 RECONSUME_IN(DataState); 965 980 } else { … … 971 986 972 987 BEGIN_STATE(AttributeValueSingleQuotedState) { 973 if (cc == '\'') 988 if (cc == '\'') { 989 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 974 990 ADVANCE_TO(AfterAttributeValueQuotedState); 975 else if (cc == '&') {991 } else if (cc == '&') { 976 992 m_additionalAllowedCharacter = '\''; 977 993 ADVANCE_TO(CharacterReferenceInAttributeValueState); 978 994 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 979 995 parseError(); 996 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 980 997 RECONSUME_IN(DataState); 981 998 } else { … … 987 1004 988 1005 BEGIN_STATE(AttributeValueUnquotedState) { 989 if (isTokenizerWhitespace(cc)) 1006 if (isTokenizerWhitespace(cc)) { 1007 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 990 1008 ADVANCE_TO(BeforeAttributeNameState); 991 else if (cc == '&') {1009 } else if (cc == '&') { 992 1010 m_additionalAllowedCharacter = '>'; 993 1011 ADVANCE_TO(CharacterReferenceInAttributeValueState); 994 } else if (cc == '>') 995 return emitAndResumeIn(source, DataState); 996 else if (cc == InputStreamPreprocessor::endOfFileMarker) { 997 parseError(); 1012 } else if (cc == '>') { 1013 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 1014 return emitAndResumeIn(source, DataState); 1015 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { 1016 parseError(); 1017 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 998 1018 RECONSUME_IN(DataState); 999 1019 } else {
Note: See TracChangeset
for help on using the changeset viewer.