Changeset 65110 in webkit


Ignore:
Timestamp:
Aug 10, 2010 6:55:53 PM (14 years ago)
Author:
abarth@webkit.org
Message:

2010-08-10 Adam Barth <abarth@webkit.org>

Reviewed by Eric Seidel.

Clients of HTMLTokenizer should be able to see where characters went in the token
https://bugs.webkit.org/show_bug.cgi?id=43766

When viewing the source of a document, we want to colorize different
parts of the input depending on how they were tokenized. In this
patch, we expose the internal segmentation of a token by recording the
start and end offsets for each attribute name and each attribute value.

  • html/HTMLToken.h: (WebCore::HTMLToken::addNewAttribute): (WebCore::HTMLToken::beginAttributeName): (WebCore::HTMLToken::endAttributeName): (WebCore::HTMLToken::beginAttributeValue): (WebCore::HTMLToken::endAttributeValue): (WebCore::HTMLToken::appendToAttributeName): (WebCore::HTMLToken::appendToAttributeValue): (WebCore::AtomicHTMLToken::AtomicHTMLToken):
  • html/HTMLTokenizer.cpp: (WebCore::HTMLTokenizer::nextToken):
Location:
trunk/WebCore
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/WebCore/ChangeLog

    r65108 r65110  
     12010-08-10  Adam Barth  <abarth@webkit.org>
     2
     3        Reviewed by Eric Seidel.
     4
     5        Clients of HTMLTokenizer should be able to see where characters went in the token
     6        https://bugs.webkit.org/show_bug.cgi?id=43766
     7
     8        When viewing the source of a document, we want to colorize different
     9        parts of the input depending on how they were tokenized.  In this
     10        patch, we expose the internal segmentation of a token by recording the
     11        start and end offsets for each attribute name and each attribute value.
     12
     13        * html/HTMLToken.h:
     14        (WebCore::HTMLToken::addNewAttribute):
     15        (WebCore::HTMLToken::beginAttributeName):
     16        (WebCore::HTMLToken::endAttributeName):
     17        (WebCore::HTMLToken::beginAttributeValue):
     18        (WebCore::HTMLToken::endAttributeValue):
     19        (WebCore::HTMLToken::appendToAttributeName):
     20        (WebCore::HTMLToken::appendToAttributeValue):
     21        (WebCore::AtomicHTMLToken::AtomicHTMLToken):
     22        * html/HTMLTokenizer.cpp:
     23        (WebCore::HTMLTokenizer::nextToken):
     24
    1252010-08-10  Dumitru Daniliuc  <dumi@chromium.org>
    226
  • trunk/WebCore/html/HTMLToken.h

    r64724 r65110  
    4646    };
    4747
     48    class Range {
     49    public:
     50        int m_start;
     51        int m_end;
     52    };
     53
    4854    class Attribute {
    4955    public:
     56        Range m_nameRange;
     57        Range m_valueRange;
    5058        WTF::Vector<UChar, 32> m_name;
    5159        WTF::Vector<UChar, 32> m_value;
     
    147155        m_attributes.grow(m_attributes.size() + 1);
    148156        m_currentAttribute = &m_attributes.last();
     157#ifndef NDEBUG
     158        m_currentAttribute->m_nameRange.m_start = 0;
     159        m_currentAttribute->m_nameRange.m_end = 0;
     160        m_currentAttribute->m_valueRange.m_start = 0;
     161        m_currentAttribute->m_valueRange.m_end = 0;
     162#endif
     163    }
     164
     165    void beginAttributeName(int index)
     166    {
     167        m_currentAttribute->m_nameRange.m_start = index;
     168    }
     169
     170    void endAttributeName(int index)
     171    {
     172        m_currentAttribute->m_nameRange.m_end = index;
     173        m_currentAttribute->m_valueRange.m_start = index;
     174        m_currentAttribute->m_valueRange.m_end = index;
     175    }
     176
     177    void beginAttributeValue(int index)
     178    {
     179        m_currentAttribute->m_valueRange.m_start = index;
     180#ifndef NDEBUG
     181        m_currentAttribute->m_valueRange.m_end = 0;
     182#endif
     183    }
     184
     185    void endAttributeValue(int index)
     186    {
     187        m_currentAttribute->m_valueRange.m_end = index;
    149188    }
    150189
     
    153192        ASSERT(character);
    154193        ASSERT(m_type == StartTag || m_type == EndTag);
     194        ASSERT(m_currentAttribute->m_nameRange.m_start);
    155195        m_currentAttribute->m_name.append(character);
    156196    }
     
    160200        ASSERT(character);
    161201        ASSERT(m_type == StartTag || m_type == EndTag);
     202        ASSERT(m_currentAttribute->m_valueRange.m_start);
    162203        m_currentAttribute->m_value.append(character);
    163204    }
     
    324365                    String name(iter->m_name.data(), iter->m_name.size());
    325366                    String value(iter->m_value.data(), iter->m_value.size());
     367                    ASSERT(iter->m_nameRange.m_start);
     368                    ASSERT(iter->m_nameRange.m_end);
     369                    ASSERT(iter->m_valueRange.m_start);
     370                    ASSERT(iter->m_valueRange.m_end);
    326371                    RefPtr<Attribute> mappedAttribute = Attribute::createMapped(name, value);
    327372                    if (!m_attributes) {
  • trunk/WebCore/html/HTMLTokenizer.cpp

    r65077 r65110  
    865865        else if (isASCIIUpper(cc)) {
    866866            m_token->addNewAttribute();
     867            m_token->beginAttributeName(source.numberOfCharactersConsumed());
    867868            m_token->appendToAttributeName(toLowerCase(cc));
    868869            ADVANCE_TO(AttributeNameState);
     
    874875                parseError();
    875876            m_token->addNewAttribute();
     877            m_token->beginAttributeName(source.numberOfCharactersConsumed());
    876878            m_token->appendToAttributeName(cc);
    877879            ADVANCE_TO(AttributeNameState);
     
    881883
    882884    BEGIN_STATE(AttributeNameState) {
    883         if (isTokenizerWhitespace(cc))
     885        if (isTokenizerWhitespace(cc)) {
     886            m_token->endAttributeName(source.numberOfCharactersConsumed());
    884887            ADVANCE_TO(AfterAttributeNameState);
    885         else if (cc == '/')
     888        } else if (cc == '/') {
     889            m_token->endAttributeName(source.numberOfCharactersConsumed());
    886890            ADVANCE_TO(SelfClosingStartTagState);
    887         else if (cc == '=')
     891        } else if (cc == '=') {
     892            m_token->endAttributeName(source.numberOfCharactersConsumed());
    888893            ADVANCE_TO(BeforeAttributeValueState);
    889         else if (cc == '>')
    890             return emitAndResumeIn(source, DataState);
    891         else if (isASCIIUpper(cc)) {
     894        } else if (cc == '>') {
     895            m_token->endAttributeName(source.numberOfCharactersConsumed());
     896            return emitAndResumeIn(source, DataState);
     897        } else if (isASCIIUpper(cc)) {
    892898            m_token->appendToAttributeName(toLowerCase(cc));
    893899            ADVANCE_TO(AttributeNameState);
    894900        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
    895901            parseError();
     902            m_token->endAttributeName(source.numberOfCharactersConsumed());
    896903            RECONSUME_IN(DataState);
    897904        } else {
     
    915922        else if (isASCIIUpper(cc)) {
    916923            m_token->addNewAttribute();
     924            m_token->beginAttributeName(source.numberOfCharactersConsumed());
    917925            m_token->appendToAttributeName(toLowerCase(cc));
    918926            ADVANCE_TO(AttributeNameState);
     
    924932                parseError();
    925933            m_token->addNewAttribute();
     934            m_token->beginAttributeName(source.numberOfCharactersConsumed());
    926935            m_token->appendToAttributeName(cc);
    927936            ADVANCE_TO(AttributeNameState);
     
    933942        if (isTokenizerWhitespace(cc))
    934943            ADVANCE_TO(BeforeAttributeValueState);
    935         else if (cc == '"')
     944        else if (cc == '"') {
     945            m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    936946            ADVANCE_TO(AttributeValueDoubleQuotedState);
    937         else if (cc == '&')
     947        } else if (cc == '&') {
     948            m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    938949            RECONSUME_IN(AttributeValueUnquotedState);
    939         else if (cc == '\'')
     950        } else if (cc == '\'') {
     951            m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
    940952            ADVANCE_TO(AttributeValueSingleQuotedState);
    941         else if (cc == '>') {
     953        } else if (cc == '>') {
    942954            parseError();
    943955            return emitAndResumeIn(source, DataState);
     
    948960            if (cc == '<' || cc == '=' || cc == '`')
    949961                parseError();
     962            m_token->beginAttributeValue(source.numberOfCharactersConsumed());
    950963            m_token->appendToAttributeValue(cc);
    951964            ADVANCE_TO(AttributeValueUnquotedState);
     
    955968
    956969    BEGIN_STATE(AttributeValueDoubleQuotedState) {
    957         if (cc == '"')
     970        if (cc == '"') {
     971            m_token->endAttributeValue(source.numberOfCharactersConsumed());
    958972            ADVANCE_TO(AfterAttributeValueQuotedState);
    959         else if (cc == '&') {
     973        } else if (cc == '&') {
    960974            m_additionalAllowedCharacter = '"';
    961975            ADVANCE_TO(CharacterReferenceInAttributeValueState);
    962976        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
    963977            parseError();
     978            m_token->endAttributeValue(source.numberOfCharactersConsumed());
    964979            RECONSUME_IN(DataState);
    965980        } else {
     
    971986
    972987    BEGIN_STATE(AttributeValueSingleQuotedState) {
    973         if (cc == '\'')
     988        if (cc == '\'') {
     989            m_token->endAttributeValue(source.numberOfCharactersConsumed());
    974990            ADVANCE_TO(AfterAttributeValueQuotedState);
    975         else if (cc == '&') {
     991        } else if (cc == '&') {
    976992            m_additionalAllowedCharacter = '\'';
    977993            ADVANCE_TO(CharacterReferenceInAttributeValueState);
    978994        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
    979995            parseError();
     996            m_token->endAttributeValue(source.numberOfCharactersConsumed());
    980997            RECONSUME_IN(DataState);
    981998        } else {
     
    9871004
    9881005    BEGIN_STATE(AttributeValueUnquotedState) {
    989         if (isTokenizerWhitespace(cc))
     1006        if (isTokenizerWhitespace(cc)) {
     1007            m_token->endAttributeValue(source.numberOfCharactersConsumed());
    9901008            ADVANCE_TO(BeforeAttributeNameState);
    991         else if (cc == '&') {
     1009        } else if (cc == '&') {
    9921010            m_additionalAllowedCharacter = '>';
    9931011            ADVANCE_TO(CharacterReferenceInAttributeValueState);
    994         } else if (cc == '>')
    995             return emitAndResumeIn(source, DataState);
    996         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
    997             parseError();
     1012        } else if (cc == '>') {
     1013            m_token->endAttributeValue(source.numberOfCharactersConsumed());
     1014            return emitAndResumeIn(source, DataState);
     1015        } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
     1016            parseError();
     1017            m_token->endAttributeValue(source.numberOfCharactersConsumed());
    9981018            RECONSUME_IN(DataState);
    9991019        } else {
Note: See TracChangeset for help on using the changeset viewer.