Changeset 60267 in webkit


Ignore:
Timestamp:
May 26, 2010 8:44:28 PM (14 years ago)
Author:
barraclough@apple.com
Message:

Bug 39795 - Add support for YARR JIT generation of greedy quantified parens at the end of the main disjunction.

Reviewed by Oliver Hunt.

If the last item in a main disjunction is a quantified set of parentheses,
this is easier to code generate for than the general case for quantified
parentheses. This is because we never need to backtrack into the parentheses

  • the first match will be the final and accepted match.

This patch also somewhat reverts a recent change to when fallback to PCRE
occurs. At the minute the compiler is tracking on patterns which will
require JIT fallback. This is handy from a performance perspective (it saves
the failed attempt at JIT compilation), but it means introducing knowledge
of the JITs capabilities into the other layers of the regex compilers. For
the specific feature of back-references, add a flag tracking their presence
on the pattern, and make these expressions fallback without attempting to
JIT. For parentheses, return to detecting which cases are have or have not
been handled during JIT compilation.

18% progression on tagcloud, ~1.5% overall on sunspidey.

  • yarr/RegexCompiler.cpp:

(JSC::Yarr::RegexPatternConstructor::atomBackReference):
(JSC::Yarr::RegexPatternConstructor::quantifyAtom):

  • yarr/RegexJIT.cpp:

(JSC::Yarr::RegexGenerator::TermGenerationState::isLastTerm):
(JSC::Yarr::RegexGenerator::TermGenerationState::isMainDisjunction):
(JSC::Yarr::RegexGenerator::generateParenthesesGreedyNoBacktrack):
(JSC::Yarr::RegexGenerator::generateTerm):
(JSC::Yarr::RegexGenerator::RegexGenerator):
(JSC::Yarr::RegexGenerator::shouldFallBack):
(JSC::Yarr::jitCompileRegex):

  • yarr/RegexPattern.h:

(JSC::Yarr::RegexPattern::RegexPattern):
(JSC::Yarr::RegexPattern::reset):

Location:
trunk/JavaScriptCore
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/ChangeLog

    r60261 r60267  
     12010-05-26  Gavin Barraclough  <barraclough@apple.com>
     2
     3        Reviewed by Oliver Hunt.
     4
     5        Bug 39795 - Add support for YARR JIT generation of greedy quantified parens at the end of the main disjunction.
     6
     7        If the last item in a main disjunction is a quantified set of parentheses,
     8        this is easier to code generate for than the general case for quantified
     9        parentheses. This is because we never need to backtrack into the parentheses
     10        - the first match will be the final and accepted match.
     11
     12        This patch also somewhat reverts a recent change to when fallback to PCRE
     13        occurs. At the minute the compiler is tracking on patterns which will
     14        require JIT fallback. This is handy from a performance perspective (it saves
     15        the failed attempt at JIT compilation), but it means introducing knowledge
     16        of the JITs capabilities into the other layers of the regex compilers. For
     17        the specific feature of back-references, add a flag tracking their presence
     18        on the pattern, and make these expressions fallback without attempting to
     19        JIT. For parentheses, return to detecting which cases are have or have not
     20        been handled during JIT compilation.
     21
     22        18% progression on tagcloud, ~1.5% overall on sunspidey.
     23
     24        * yarr/RegexCompiler.cpp:
     25        (JSC::Yarr::RegexPatternConstructor::atomBackReference):
     26        (JSC::Yarr::RegexPatternConstructor::quantifyAtom):
     27        * yarr/RegexJIT.cpp:
     28        (JSC::Yarr::RegexGenerator::TermGenerationState::isLastTerm):
     29        (JSC::Yarr::RegexGenerator::TermGenerationState::isMainDisjunction):
     30        (JSC::Yarr::RegexGenerator::generateParenthesesGreedyNoBacktrack):
     31        (JSC::Yarr::RegexGenerator::generateTerm):
     32        (JSC::Yarr::RegexGenerator::RegexGenerator):
     33        (JSC::Yarr::RegexGenerator::shouldFallBack):
     34        (JSC::Yarr::jitCompileRegex):
     35        * yarr/RegexPattern.h:
     36        (JSC::Yarr::RegexPattern::RegexPattern):
     37        (JSC::Yarr::RegexPattern::reset):
     38
    1392010-05-26  Geoffrey Garen  <ggaren@apple.com>
    240
  • trunk/JavaScriptCore/yarr/RegexCompiler.cpp

    r57925 r60267  
    373373    {
    374374        ASSERT(subpatternId);
    375         m_pattern.m_shouldFallBack = true;
     375        m_pattern.m_containsBackreferences = true;
    376376        m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
    377377
     
    448448            return;
    449449        }
    450 
    451         if (max > 1 && term.type == PatternTerm::TypeParenthesesSubpattern)
    452             m_pattern.m_shouldFallBack = true;
    453450
    454451        if (min == 0)
  • trunk/JavaScriptCore/yarr/RegexJIT.cpp

    r59222 r60267  
    346346            return alternative()->m_terms[t];
    347347        }
     348        bool isLastTerm()
     349        {
     350            ASSERT(alternativeValid());
     351            return (t + 1) == alternative()->m_terms.size();
     352        }
     353        bool isMainDisjunction()
     354        {
     355            return !disjunction->m_parent;
     356        }
    348357
    349358        PatternTerm& lookaheadTerm()
     
    902911        PatternDisjunction* disjunction = term.parentheses.disjunction;
    903912        ASSERT(term.quantityCount == 1);
     913
     914        if (!term.parentheses.isCopy) {
     915            m_shouldFallBack = true;
     916            return;
     917        }
    904918
    905919        unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
     
    9881002            success.link(this);
    9891003        }
     1004    }
     1005
     1006    void generateParenthesesGreedyNoBacktrack(TermGenerationState& state)
     1007    {
     1008        PatternTerm& parenthesesTerm = state.term();
     1009        PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
     1010        ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
     1011        ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
     1012
     1013        // Capturing not yet implemented!
     1014        if (parenthesesTerm.invertOrCapture) {
     1015            m_shouldFallBack = true;
     1016            return;
     1017        }
     1018
     1019        // Quantification limit not yet implemented!
     1020        if (parenthesesTerm.quantityCount != 0xffffffff) {
     1021            m_shouldFallBack = true;
     1022            return;
     1023        }
     1024
     1025        TermGenerationState parenthesesState(disjunction, state.checkedTotal);
     1026
     1027        Label matchAgain(this);
     1028        for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) {
     1029
     1030            PatternAlternative* alternative = parenthesesState.alternative();
     1031            optimizeAlternative(alternative);
     1032
     1033            int countToCheck = alternative->m_minimumSize;
     1034            if (countToCheck) {
     1035                parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
     1036                parenthesesState.checkedTotal += countToCheck;
     1037            }
     1038
     1039            for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm())
     1040                generateTerm(parenthesesState);
     1041
     1042            // If we get here, we matched! Limit not yet supported, so just try to match more!
     1043            jump(matchAgain);
     1044           
     1045            parenthesesState.linkAlternativeBacktracks(this);
     1046            // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
     1047
     1048            if (countToCheck) {
     1049                sub32(Imm32(countToCheck), index);
     1050                parenthesesState.checkedTotal -= countToCheck;
     1051            }
     1052        }
     1053
     1054        // If the last alternative falls through to here, we have a failed match...
     1055        // Which means that we match whatever we have matched up to this point (even if nothing).
    9901056    }
    9911057
     
    11011167
    11021168        case PatternTerm::TypeBackReference:
    1103             ASSERT_NOT_REACHED();
     1169            m_shouldFallBack = true;
    11041170            break;
    11051171
     
    11081174
    11091175        case PatternTerm::TypeParenthesesSubpattern:
    1110             ASSERT((term.quantityCount == 1) && !term.parentheses.isCopy); // must fallback to pcre before this point
    1111             generateParenthesesSingle(state);
     1176            if (term.quantityCount == 1) {
     1177                generateParenthesesSingle(state);
     1178                break;
     1179            } else if (state.isLastTerm() && state.isMainDisjunction()) { // Is this is the last term of the main disjunction?
     1180                // If this has a greedy quantifier, then it will never need to backtrack!
     1181                if (term.quantityType == QuantifierGreedy) {
     1182                    generateParenthesesGreedyNoBacktrack(state);
     1183                    break;
     1184                }
     1185            }
     1186            m_shouldFallBack = true;
    11121187            break;
    11131188
     
    13621437    RegexGenerator(RegexPattern& pattern)
    13631438        : m_pattern(pattern)
     1439        , m_shouldFallBack(false)
    13641440    {
    13651441    }
     
    13911467    }
    13921468
     1469    bool shouldFallBack()
     1470    {
     1471        return m_shouldFallBack;
     1472    }
     1473
    13931474private:
    13941475    RegexPattern& m_pattern;
     1476    bool m_shouldFallBack;
    13951477    Vector<AlternativeBacktrackRecord> m_backtrackRecords;
    13961478};
     
    13991481{
    14001482    RegexPattern pattern(ignoreCase, multiline);
    1401 
    14021483    if ((error = compileRegex(patternString, pattern)))
    14031484        return;
    1404 
    14051485    numSubpatterns = pattern.m_numSubpatterns;
    14061486
    1407     if (pattern.m_shouldFallBack) {
    1408         JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
    1409         JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
    1410         jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
    1411     } else {
     1487    if (!pattern.m_containsBackreferences) {
    14121488        RegexGenerator generator(pattern);
    14131489        generator.compile(globalData, jitObject);
    1414     }
     1490        if (!generator.shouldFallBack())
     1491            return;
     1492    }
     1493
     1494    JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
     1495    JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
     1496    jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
    14151497}
    14161498
  • trunk/JavaScriptCore/yarr/RegexPattern.h

    r57925 r60267  
    272272        , m_numSubpatterns(0)
    273273        , m_maxBackReference(0)
    274         , m_shouldFallBack(false)
     274        , m_containsBackreferences(false)
    275275        , newlineCached(0)
    276276        , digitsCached(0)
     
    294294        m_maxBackReference = 0;
    295295
    296         m_shouldFallBack = false;
     296        m_containsBackreferences = false;
    297297
    298298        newlineCached = 0;
     
    362362    unsigned m_numSubpatterns;
    363363    unsigned m_maxBackReference;
    364     bool m_shouldFallBack;
     364    bool m_containsBackreferences;
    365365    PatternDisjunction* m_body;
    366366    Vector<PatternDisjunction*, 4> m_disjunctions;
Note: See TracChangeset for help on using the changeset viewer.