Context Navigation

← Previous Changeset
Next Changeset →

Changeset 87177 in webkit

Timestamp:

May 24, 2011 11:49:18 AM (13 years ago)

Author:

oliver@apple.com

Message:

2011-05-24 Oliver Hunt <oliver@apple.com>

Reviewed by Geoffrey Garen.

Avoid creating unnecessary identifiers and strings in the syntax checker
https://bugs.webkit.org/show_bug.cgi?id=61378

Selectively tell the lexer that there are some places it does not need to
do the real work of creating Identifiers for IDENT and STRING tokens.

Make parseString and parseIdentifier templatized on whether they should
do real work, or merely validate the tokens.

SunSpider --parse-only reports ~5-8% win depending on hardware.

parser/ASTBuilder.h: (JSC::ASTBuilder::createDotAccess):
parser/JSParser.cpp: (JSC::JSParser::next): (JSC::JSParser::consume): (JSC::JSParser::parseVarDeclarationList): (JSC::JSParser::parseConstDeclarationList): (JSC::JSParser::parseExpression): (JSC::JSParser::parseAssignmentExpression): (JSC::JSParser::parseConditionalExpression): (JSC::JSParser::parseBinaryExpression): (JSC::JSParser::parseProperty): (JSC::JSParser::parseObjectLiteral): (JSC::JSParser::parseArrayLiteral): (JSC::JSParser::parseArguments): (JSC::JSParser::parseMemberExpression):
parser/Lexer.cpp: (JSC::Lexer::parseIdentifier): (JSC::Lexer::parseString): (JSC::Lexer::lex):
parser/Lexer.h:
parser/SyntaxChecker.h: (JSC::SyntaxChecker::createDotAccess): (JSC::SyntaxChecker::createProperty):

Location:

trunk/Source/JavaScriptCore

Files:

: 6 edited

ChangeLog (modified) (1 diff)
parser/ASTBuilder.h (modified) (2 diffs)
parser/JSParser.cpp (modified) (18 diffs)
parser/Lexer.cpp (modified) (12 diffs)
parser/Lexer.h (modified) (2 diffs)
parser/SyntaxChecker.h (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/Source/JavaScriptCore/ChangeLog

-                      r87157
+                      r87177
+-05-24  Oliver Hunt  <oliver@apple.com>
+        Reviewed by Geoffrey Garen.
+        Avoid creating unnecessary identifiers and strings in the syntax checker
+        https://bugs.webkit.org/show_bug.cgi?id=61378
+        Selectively tell the lexer that there are some places it does not need to
+        do the real work of creating Identifiers for IDENT and STRING tokens.
+        Make parseString and parseIdentifier templatized on whether they should
+        do real work, or merely validate the tokens.
+        SunSpider --parse-only reports ~5-8% win depending on hardware.
+        * parser/ASTBuilder.h:
+        (JSC::ASTBuilder::createDotAccess):
+        * parser/JSParser.cpp:
+        (JSC::JSParser::next):
+        (JSC::JSParser::consume):
+        (JSC::JSParser::parseVarDeclarationList):
+        (JSC::JSParser::parseConstDeclarationList):
+        (JSC::JSParser::parseExpression):
+        (JSC::JSParser::parseAssignmentExpression):
+        (JSC::JSParser::parseConditionalExpression):
+        (JSC::JSParser::parseBinaryExpression):
+        (JSC::JSParser::parseProperty):
+        (JSC::JSParser::parseObjectLiteral):
+        (JSC::JSParser::parseArrayLiteral):
+        (JSC::JSParser::parseArguments):
+        (JSC::JSParser::parseMemberExpression):
+        * parser/Lexer.cpp:
+        (JSC::Lexer::parseIdentifier):
+        (JSC::Lexer::parseString):
+        (JSC::Lexer::lex):
+        * parser/Lexer.h:
+        * parser/SyntaxChecker.h:
+        (JSC::SyntaxChecker::createDotAccess):
+        (JSC::SyntaxChecker::createProperty):
 -05-23  Michael Saboff  <msaboff@apple.com>

trunk/Source/JavaScriptCore/parser/ASTBuilder.h

-                      r78727
+                      r87177
     static const bool NeedsFreeVariableInfo = true;
     static const bool CanUseFunctionCache = true;
+    static const int  DontBuildKeywords = 0;
+    static const int  DontBuildStrings = 0;
     ExpressionNode* makeBinaryNode(int token, std::pair<ExpressionNode*, BinaryOpInfo>, std::pair<ExpressionNode*, BinaryOpInfo>);
 …
+    }
     ExpressionNode* createDotAccess(ExpressionNode* base, const Identifier& property, int start, int divot, int end)
+    {
         DotAccessorNode* node = new (m_globalData) DotAccessorNode(m_globalData, base, property);
+    ExpressionNode* createDotAccess(ExpressionNode* base, const Identifier* property, int start, int divot, int end)
+    {
+        DotAccessorNode* node = new (m_globalData) DotAccessorNode(m_globalData, base, *property);
         setExceptionLocation(node, start, divot, end);
         return node;

trunk/Source/JavaScriptCore/parser/JSParser.cpp

-                      r85456
+                      r87177
 #define failIfFalseIfStrict(cond) do { if ((!(cond)) && strictMode()) fail(); } while (0)
 #define consumeOrFail(tokenType) do { if (!consume(tokenType)) fail(); } while (0)
+#define consumeOrFailWithFlags(tokenType, flags) do { if (!consume(tokenType, flags)) fail(); } while (0)
 #define matchOrFail(tokenType) do { if (!match(tokenType)) fail(); } while (0)
 #define failIfStackOverflow() do { failIfFalse(canRecurse()); } while (0)
 …
     };
     void next(Lexer::LexType lexType = Lexer::IdentifyReservedWords)
+    void next(unsigned lexType = 0)
+    {
         m_lastLine = m_token.m_info.line;
 …
+    }
     bool consume(JSTokenType expected)
+    bool consume(JSTokenType expected, unsigned flags = 0)
+    {
         bool result = m_token.m_type == expected;
         failIfFalse(result);
         next();
+        next(flags);
         return result;
+    }
 …
             int varDivot = tokenStart() + 1;
             initStart = tokenStart();
             next(); // consume '='
+            next(TreeBuilder::DontBuildStrings); // consume '='
             int initialAssignments = m_assignmentCount;
             TreeExpression initializer = parseAssignmentExpression(context);
 …
         TreeExpression initializer = 0;
         if (hasInitializer) {
             next(); // consume '='
+            next(TreeBuilder::DontBuildStrings); // consume '='
             initializer = parseAssignmentExpression(context);
+        }
 …
     typename TreeBuilder::Comma commaNode = context.createCommaExpr(node, right);
     while (match(COMMA)) {
         next();
+        next(TreeBuilder::DontBuildStrings);
         right = parseAssignmentExpression(context);
         failIfFalse(right);
 …
         start = tokenStart();
         m_assignmentCount++;
         next();
+        next(TreeBuilder::DontBuildStrings);
         if (strictMode() && m_lastIdentifier && context.isResolve(lhs)) {
             failIfTrueIfStrict(m_globalData->propertyNames->eval == *m_lastIdentifier);
 …
     m_nonTrivialExpressionCount++;
     m_nonLHSCount++;
     next();
+    next(TreeBuilder::DontBuildStrings);
     TreeExpression lhs = parseAssignmentExpression(context);
     consumeOrFail(COLON);
+    consumeOrFailWithFlags(COLON, TreeBuilder::DontBuildStrings);
     TreeExpression rhs = parseAssignmentExpression(context);
 …
         m_nonLHSCount++;
         int operatorToken = m_token.m_type;
         next();
+        next(TreeBuilder::DontBuildStrings);
         while (operatorStackDepth &&  context.operatorStackHasHigherPrecedence(operatorStackDepth, precedence)) {
 …
     case STRING: {
         const Identifier* ident = m_token.m_data.ident;
+        next(Lexer::IgnoreReservedWords);
+        if (complete || (wasIdent && (*ident == m_globalData->propertyNames->get || *ident == m_globalData->propertyNames->set)))
+            next(Lexer::IgnoreReservedWords);
+        else
+            next(Lexer::IgnoreReservedWords | TreeBuilder::DontBuildKeywords);
         if (match(COLON)) {
             next();
 …
+{
     int startOffset = m_token.m_data.intValue;
     consumeOrFail(OPENBRACE);
+    consumeOrFailWithFlags(OPENBRACE, TreeBuilder::DontBuildStrings);
     if (match(CLOSEBRACE)) {
 …
     TreePropertyList tail = propertyList;
     while (match(COMMA)) {
         next();
+        next(TreeBuilder::DontBuildStrings);
         // allow extra comma, see http://bugs.webkit.org/show_bug.cgi?id=5939
         if (match(CLOSEBRACE))
 …
 template <class TreeBuilder> TreeExpression JSParser::parseArrayLiteral(TreeBuilder& context)
+{
     consumeOrFail(OPENBRACKET);
+    consumeOrFailWithFlags(OPENBRACKET, TreeBuilder::DontBuildStrings);
     int elisions = 0;
     while (match(COMMA)) {
         next();
+        next(TreeBuilder::DontBuildStrings);
         elisions++;
+    }
     if (match(CLOSEBRACKET)) {
         next();
+        next(TreeBuilder::DontBuildStrings);
         return context.createArray(elisions);
+    }
 …
     elisions = 0;
     while (match(COMMA)) {
         next();
+        next(TreeBuilder::DontBuildStrings);
         elisions = 0;
 …
         if (match(CLOSEBRACKET)) {
             next();
+            next(TreeBuilder::DontBuildStrings);
             return context.createArray(elisions, elementList);
+        }
 …
 template <class TreeBuilder> TreeArguments JSParser::parseArguments(TreeBuilder& context)
+{
     consumeOrFail(OPENPAREN);
+    consumeOrFailWithFlags(OPENPAREN, TreeBuilder::DontBuildStrings);
     if (match(CLOSEPAREN)) {
         next();
+        next(TreeBuilder::DontBuildStrings);
         return context.createArguments();
+    }
 …
     TreeArgumentsList tail = argList;
     while (match(COMMA)) {
         next();
+        next(TreeBuilder::DontBuildStrings);
         TreeExpression arg = parseAssignmentExpression(context);
         failIfFalse(arg);
 …
             m_nonTrivialExpressionCount++;
             int expressionEnd = lastTokenEnd();
             next(Lexer::IgnoreReservedWords);
+            next(Lexer::IgnoreReservedWords | TreeBuilder::DontBuildKeywords);
             matchOrFail(IDENT);
             base = context.createDotAccess(base, *m_token.m_data.ident, expressionStart, expressionEnd, tokenEnd());
+            base = context.createDotAccess(base, m_token.m_data.ident, expressionStart, expressionEnd, tokenEnd());
             next();
             break;

trunk/Source/JavaScriptCore/parser/Lexer.cpp

-                      r75852
+                      r87177
+}
 ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, LexType lexType)
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, unsigned lexType)
+{
     bool bufferRequired = false;
 …
         if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character)))
             return ERRORTOK;
+        record16(character);
+        if  (shouldCreateIdentifier)
+            record16(character);
         identifierStart = currentCharacter();
+    }
+    if (!bufferRequired)
+        identifierLength = currentCharacter() - identifierStart;
+    else {
+        if (identifierStart != currentCharacter())
+            m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+        identifierStart = m_buffer16.data();
+        identifierLength = m_buffer16.size();
+    }
+    const Identifier* ident = makeIdentifier(identifierStart, identifierLength);
+    lvalp->ident = ident;
+    const Identifier* ident = 0;
+    if (shouldCreateIdentifier) {
+        if (!bufferRequired)
+            identifierLength = currentCharacter() - identifierStart;
+        else {
+            if (identifierStart != currentCharacter())
+                m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+            identifierStart = m_buffer16.data();
+            identifierLength = m_buffer16.size();
+        }
+        ident = makeIdentifier(identifierStart, identifierLength);
+        lvalp->ident = ident;
+    } else
+        lvalp->ident = 0;
     m_delimited = false;
+    if (LIKELY(!bufferRequired && lexType == IdentifyReservedWords)) {
+    if (LIKELY(!bufferRequired && !(lexType & IgnoreReservedWords))) {
+        ASSERT(shouldCreateIdentifier);
         // Keywords must not be recognized if there was an \uXXXX in the identifier.
         const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
 …
+}
 ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
+template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
+{
     int stringQuoteCharacter = m_current;
 …
     while (m_current != stringQuoteCharacter) {
         if (UNLIKELY(m_current == '\\')) {
             if (stringStart != currentCharacter())
+            if (stringStart != currentCharacter() && shouldBuildStrings)
                 m_buffer16.append(stringStart, currentCharacter() - stringStart);
             shift();
 …
             // Most common escape sequences first
             if (escape) {
+                record16(escape);
+                 if (shouldBuildStrings)
+                     record16(escape);
                 shift();
             } else if (UNLIKELY(isLineTerminator(m_current)))
 …
                     int prev = m_current;
                     shift();
+                    record16(convertHex(prev, m_current));
+                    if (shouldBuildStrings)
+                        record16(convertHex(prev, m_current));
                     shift();
                 } else
+                } else if (shouldBuildStrings)
                     record16('x');
             } else if (m_current == 'u') {
                 shift();
                 int character = getUnicodeCharacter();
+                if (character != -1)
+                    record16(character);
+                else if (m_current == stringQuoteCharacter)
+                    record16('u');
+                else // Only stringQuoteCharacter allowed after \u
+                if (character != -1) {
+                    if (shouldBuildStrings)
+                        record16(character);
+                } else if (m_current == stringQuoteCharacter) {
+                    if (shouldBuildStrings)
+                        record16('u');
+                } else // Only stringQuoteCharacter allowed after \u
                     return false;
             } else if (strictMode && isASCIIDigit(m_current)) {
 …
                 if (character1 != '0' || isASCIIDigit(m_current))
                     return false;
+                record16(0);
+                if (shouldBuildStrings)
+                    record16(0);
             } else if (!strictMode && isASCIIOctalDigit(m_current)) {
                 // Octal character sequences
 …
                     shift();
                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
+                        record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
+                        if (shouldBuildStrings)
+                            record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
                         shift();
+                    } else
+                        record16((character1 - '0') * 8 + character2 - '0');
+                } else
+                    record16(character1 - '0');
+                    } else {
+                        if (shouldBuildStrings)
+                            record16((character1 - '0') * 8 + character2 - '0');
+                    }
+                } else {
+                    if (shouldBuildStrings)
+                        record16(character1 - '0');
+                }
             } else if (m_current != -1) {
+                record16(m_current);
+                if (shouldBuildStrings)
+                    record16(m_current);
                 shift();
             } else
 …
+    }
     if (currentCharacter() != stringStart)
+    if (currentCharacter() != stringStart && shouldBuildStrings)
         m_buffer16.append(stringStart, currentCharacter() - stringStart);
+    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+    if (shouldBuildStrings)
+        lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+    else
+        lvalp->ident = 0;
     m_buffer16.resize(0);
     return true;
 …
+}
 JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType lexType, bool strictMode)
+JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, unsigned lexType, bool strictMode)
+{
     ASSERT(!m_error);
 …
         break;
     case CharacterQuote:
+        if (UNLIKELY(!parseString(lvalp, strictMode)))
+            goto returnError;
+        if (lexType & DontBuildStrings) {
+            if (UNLIKELY(!parseString<false>(lvalp, strictMode)))
+                goto returnError;
+        } else {
+            if (UNLIKELY(!parseString<true>(lvalp, strictMode)))
+                goto returnError;
+        }
         shift();
         m_delimited = false;
 …
         // Fall through into CharacterBackSlash.
     case CharacterBackSlash:
+        token = parseIdentifier(lvalp, lexType);
+        if (lexType & DontBuildKeywords)
+            token = parseIdentifier<false>(lvalp, lexType);
+        else
+            token = parseIdentifier<true>(lvalp, lexType);
         break;
     case CharacterLineTerminator:

trunk/Source/JavaScriptCore/parser/Lexer.h

-                      r82696
+                      r87177
         // Functions for the parser itself.
+        enum LexType { IdentifyReservedWords, IgnoreReservedWords };
+        JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType, bool strictMode);
+        enum LexType {
+            IgnoreReservedWords = 1,
+            DontBuildStrings = 2,
+            DontBuildKeywords = 4
+        };
+        JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, unsigned, bool strictMode);
         bool nextTokenIsColon();
         int lineNumber() const { return m_lineNumber; }
 …
         ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
         ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, LexType);
         ALWAYS_INLINE bool parseString(JSTokenData* lvalp, bool strictMode);
+        template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned);
+        template <bool shouldBuildStrings> ALWAYS_INLINE bool parseString(JSTokenData* lvalp, bool strictMode);
         ALWAYS_INLINE void parseHex(double& returnValue);
         ALWAYS_INLINE bool parseOctal(double& returnValue);

trunk/Source/JavaScriptCore/parser/SyntaxChecker.h

-                      r76177
+                      r87177
 #define SyntaxChecker_h
+#include "Lexer.h"
 #include <yarr/YarrSyntaxChecker.h>
 …
     static const bool NeedsFreeVariableInfo = false;
     static const bool CanUseFunctionCache = true;
+    static const unsigned DontBuildKeywords = Lexer::DontBuildKeywords;
+    static const unsigned DontBuildStrings = Lexer::DontBuildStrings;
     int createSourceElements() { return 1; }
 …
     ExpressionType createNull() { return NullExpr; }
     ExpressionType createBracketAccess(ExpressionType, ExpressionType, bool, int, int, int) { return BracketExpr; }
     ExpressionType createDotAccess(ExpressionType, const Identifier&, int, int, int) { return DotExpr; }
+    ExpressionType createDotAccess(ExpressionType, const Identifier*, int, int, int) { return DotExpr; }
     ExpressionType createRegExp(const Identifier& pattern, const Identifier&, int) { return Yarr::checkSyntax(pattern.ustring()) ? 0 : RegExpExpr; }
     ExpressionType createNewExpr(ExpressionType, int, int, int, int) { return NewExpr; }
 …
     template <bool complete> Property createProperty(const Identifier* name, int, PropertyNode::Type type)
+    {
-        ASSERT(name);
         if (!complete)
             return Property(type);
+        ASSERT(name);
         return Property(name, type);
+    }

Note: See TracChangeset for help on using the changeset viewer.