Changeset 73594 in webkit


Ignore:
Timestamp:
Dec 8, 2010 9:40:29 PM (13 years ago)
Author:
barraclough@apple.com
Message:

Permit Character Class Escape in CharacterRange in Character Class.
https://bugs.webkit.org/show_bug.cgi?id=50483
https://bugs.webkit.org/show_bug.cgi?id=50538
https://bugs.webkit.org/show_bug.cgi?id=50654
https://bugs.webkit.org/show_bug.cgi?id=50646

Reviewed by Sam Weinig.

We recently tightened up our spec conformance in generating syntax
error in these cases, however testing in the wild has shown this
to be problematic. This reverts the previous change in allowing
class escapes (e.g. \d) in ranges in character classes ([]), but
does retain some closer conformance to the spec in only allowing
ranges that would be permitted per the grammar rules in the spec
(e.g. in /[\d-a-z]/ "a-z" cannot be considered as a range).

JavaScriptCore:

  • yarr/RegexParser.h:

(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomBuiltInCharacterClass):
(JSC::Yarr::Parser::parse):

LayoutTests:

  • fast/js/regexp-ranges-and-escaped-hyphens-expected.txt:
  • fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js:
  • fast/regex/invalid-range-in-class-expected.txt:
  • fast/regex/pcre-test-1-expected.txt:
  • fast/regex/script-tests/invalid-range-in-class.js:
  • fast/regex/script-tests/pcre-test-1.js:
Location:
trunk
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/ChangeLog

    r73592 r73594  
     12010-12-08  Gavin Barraclough  <barraclough@apple.com>
     2
     3        Reviewed by Sam Weinig.
     4
     5        Permit Character Class Escape in CharacterRange in Character Class.
     6        https://bugs.webkit.org/show_bug.cgi?id=50483
     7        https://bugs.webkit.org/show_bug.cgi?id=50538
     8        https://bugs.webkit.org/show_bug.cgi?id=50654
     9        https://bugs.webkit.org/show_bug.cgi?id=50646
     10
     11        We recently tightened up our spec conformance in generating syntax
     12        error in these cases, however testing in the wild has shown this
     13        to be problematic. This reverts the previous change in allowing
     14        class escapes (e.g. \d) in ranges in character classes ([]), but
     15        does retain some closer conformance to the spec in only allowing
     16        ranges that would be permitted per the grammar rules in the spec
     17        (e.g. in /[\d-a-z]/ "a-z" cannot be considered as a range).
     18
     19        * yarr/RegexParser.h:
     20        (JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
     21        (JSC::Yarr::Parser::CharacterClassParserDelegate::atomBuiltInCharacterClass):
     22        (JSC::Yarr::Parser::parse):
     23
    1242010-12-08  Geoffrey Garen  <ggaren@apple.com>
    225
  • trunk/JavaScriptCore/yarr/RegexParser.h

    r72999 r73594  
    5959        ParenthesesTypeInvalid,
    6060        CharacterClassUnmatched,
    61         CharacterClassInvalidRange,
    6261        CharacterClassOutOfOrder,
    6362        EscapeUnterminated,
     
    143142                return;
    144143
     144                // See coment in atomBuiltInCharacterClass below.
     145                // This too is technically an error, per ECMA-262, and again we
     146                // we chose to allow this.  Note a subtlely here that while we
     147                // diverge from the spec's definition of CharacterRange we do
     148                // remain in compliance with the grammar.  For example, consider
     149                // the expression /[\d-a-z]/.  We comply with the grammar in
     150                // this case by not allowing a-z to be matched as a range.
    145151            case AfterCharacterClassHyphen:
    146                 // Error! We have something like /[\d-x]/.
    147                 m_err = CharacterClassInvalidRange;
     152                m_delegate.atomCharacterClassAtom(ch);
     153                m_state = Empty;
    148154                return;
    149155            }
     
    168174                return;
    169175
     176                // If we hit either of these cases, we have an invalid range that
     177                // looks something like /[x-\d]/ or /[\d-\d]/.
     178                // According to ECMA-262 this should be a syntax error, but
     179                // empirical testing shows this to break teh webz.  Instead we
     180                // comply with to the ECMA-262 grammar, and assume the grammar to
     181                // have matched the range correctly, but tweak our interpretation
     182                // of CharacterRange.  Effectively we implicitly handle the hyphen
     183                // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/.
    170184            case CachedCharacterHyphen:
     185                m_delegate.atomCharacterClassAtom(m_character);
     186                m_delegate.atomCharacterClassAtom('-');
     187                // fall through
    171188            case AfterCharacterClassHyphen:
    172                 // Error! If we hit either of these cases, we have an
    173                 // invalid range that looks something like /[x-\d]/
    174                 // or /[\d-\d]/.
    175                 m_err = CharacterClassInvalidRange;
     189                m_delegate.atomCharacterClassBuiltIn(classID, invert);
     190                m_state = Empty;
    176191                return;
    177192            }
     
    682697            "unrecognized character after (?",
    683698            "missing terminating ] for character class",
    684             "invalid range in character class",
    685699            "range out of order in character class",
    686700            "\\ at end of pattern"
  • trunk/LayoutTests/ChangeLog

    r73593 r73594  
     12010-12-08  Gavin Barraclough  <barraclough@apple.com>
     2
     3        Reviewed by Sam Weinig.
     4
     5        Permit Character Class Escape in CharacterRange in Character Class.
     6        https://bugs.webkit.org/show_bug.cgi?id=50483
     7        https://bugs.webkit.org/show_bug.cgi?id=50538
     8        https://bugs.webkit.org/show_bug.cgi?id=50654
     9        https://bugs.webkit.org/show_bug.cgi?id=50646
     10
     11        We recently tightened up our spec conformance in generating syntax
     12        error in these cases, however testing in the wild has shown this
     13        to be problematic. This reverts the previous change in allowing
     14        class escapes (e.g. \d) in ranges in character classes ([]), but
     15        does retain some closer conformance to the spec in only allowing
     16        ranges that would be permitted per the grammar rules in the spec
     17        (e.g. in /[\d-a-z]/ "a-z" cannot be considered as a range).
     18
     19        * fast/js/regexp-ranges-and-escaped-hyphens-expected.txt:
     20        * fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js:
     21        * fast/regex/invalid-range-in-class-expected.txt:
     22        * fast/regex/pcre-test-1-expected.txt:
     23        * fast/regex/script-tests/invalid-range-in-class.js:
     24        * fast/regex/script-tests/pcre-test-1.js:
     25
    1262010-12-08  Yuta Kitamura  <yutak@chromium.org>
    227
  • trunk/LayoutTests/fast/js/regexp-ranges-and-escaped-hyphens-expected.txt

    r72813 r73594  
    66PASS regexp01.toString() is "1235"
    77PASS regexp01a.toString() is "123 5"
    8 PASS /[1\s-35]+/.exec("21-3 54"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
    9 PASS /[1-\s35]+/.exec("21-3 54"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
     8PASS regexp01b.toString() is "1-3 5"
     9PASS regexp01c.toString() is "1-3 5"
    1010PASS regexp01d.toString() is "123 5"
    1111PASS regexp01e.toString() is "123 5"
  • trunk/LayoutTests/fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js

    r72813 r73594  
    1111shouldBe('regexp01a.toString()', '"123 5"');
    1212
    13 // These are invalid ranges.
    14 shouldThrow('/[1\\s-35]+/.exec("21-3 54");');
    15 shouldThrow('/[1-\\s35]+/.exec("21-3 54");');
     13// These are invalid ranges, according to ECMA-262, but we allow them.
     14var regexp01b = /[1\s-35]+/.exec("21-3 54");
     15shouldBe('regexp01b.toString()', '"1-3 5"');
     16var regexp01c = /[1-\s35]+/.exec("21-3 54");
     17shouldBe('regexp01c.toString()', '"1-3 5"');
    1618
    1719var regexp01d = /[1-3\s5]+/.exec("-123 54");
  • trunk/LayoutTests/fast/regex/invalid-range-in-class-expected.txt

    r72813 r73594  
    77PASS /[a\-c]+/.exec("-acbd") is ["-ac"]
    88PASS /[c-a]+/.exec("-acbd"); threw exception SyntaxError: Invalid regular expression: range out of order in character class.
    9 PASS /[\d-x]+/.exec("1-3xy"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
    10 PASS /[x-\d]+/.exec("1-3xy"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
    11 PASS /[\d-\d]+/.exec("1-3xy"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
     9PASS /[\d-x]+/.exec("1-3xy"); is ["1-3x"]
     10PASS /[x-\d]+/.exec("1-3xy"); is ["1-3x"]
     11PASS /[\d-\d]+/.exec("1-3xy"); is ["1-3"]
     12PASS /[\d-a-z]+/.exec("az1-3y"); is ["az1-3"]
    1213PASS /[\d\-x]+/.exec("1-3xy"); is ["1-3x"]
    1314PASS /[x\-\d]+/.exec("1-3xy"); is ["1-3x"]
  • trunk/LayoutTests/fast/regex/pcre-test-1-expected.txt

    r73065 r73594  
    368368PASS regex93.exec(input0); is results
    369369PASS regex93.exec(input1); is results
    370 PASS eval(regex94); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
     370PASS regex94.exec(input0); is results
     371PASS regex94.exec(input1); is results
    371372PASS regex95.exec(input0); is results
    372373PASS regex96.exec(input0); is results
     
    12251226PASS regex604.exec(input0); is results
    12261227PASS regex605.exec(input0); is results
    1227 PASS eval(regex608); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
    1228 PASS eval(regex609); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
     1228PASS regex608.exec(input0); is results
     1229PASS regex608.exec(input1); is results
     1230PASS regex608.exec(input2); is results
     1231PASS regex608.exec(input3); is results
     1232PASS regex609.exec(input0); is results
     1233PASS regex609.exec(input1); is results
     1234PASS regex609.exec(input2); is results
     1235PASS regex609.exec(input3); is results
    12291236PASS regex610.exec(input0); is results
    12301237PASS regex611.exec(input0); is results
  • trunk/LayoutTests/fast/regex/script-tests/invalid-range-in-class.js

    r72813 r73594  
    1010shouldThrow('/[c-a]+/.exec("-acbd");');
    1111
    12 // A character-class in a range is invalid.
    13 shouldThrow('/[\\d-x]+/.exec("1-3xy");');
    14 shouldThrow('/[x-\\d]+/.exec("1-3xy");');
    15 shouldThrow('/[\\d-\\d]+/.exec("1-3xy");');
     12// A character-class in a range is invalid, according to ECMA-262, but we allow it.
     13shouldBe('/[\\d-x]+/.exec("1-3xy");', '["1-3x"]');
     14shouldBe('/[x-\\d]+/.exec("1-3xy");', '["1-3x"]');
     15shouldBe('/[\\d-\\d]+/.exec("1-3xy");', '["1-3"]');
     16
     17// Whilst we break with ECMA-262's definition of CharacterRange, we do comply with
     18// the grammar, and as such in the following regex a-z cannot be matched as a range.
     19shouldBe('/[\\d-a-z]+/.exec("az1-3y");', '["az1-3"]');
    1620
    1721// An escaped hypen should not be confused for an invalid range.
  • trunk/LayoutTests/fast/regex/script-tests/pcre-test-1.js

    r73065 r73594  
    22"A chunk of our port of PCRE's test suite, adapted to be more applicable to JavaScript."
    33);
    4 
    5 function shouldNotCompile(patternName)
    6 {
    7     shouldThrow("eval(" + patternName + ");");
    8 }
    94
    105var regex0 = /the quick brown fox/;
     
    13311326shouldBe('regex93.exec(input1);', 'results');
    13321327
    1333 var regex94 = "/[\\d-z]+/";
    1334 shouldNotCompile("regex94");
     1328var regex94 = /[\d-z]+/;
     1329var input0 = "12-34z";
     1330var results = ["12-34z"];
     1331shouldBe('regex94.exec(input0);', 'results');
     1332// Failers
     1333var input1 = "aaa";
     1334var results = null;
     1335shouldBe('regex94.exec(input1);', 'results');
    13351336
    13361337var regex95 = /\x5c/;
     
    49244925shouldBe('regex605.exec(input0);', 'results');
    49254926
    4926 var regex608 = "/^[a-\\d]/";
    4927 shouldNotCompile("regex608");
    4928 
    4929 var regex609 = "/^[\\d-a]/";
    4930 shouldNotCompile("regex609");
     4927var regex608 = /^[a-\d]/;
     4928var input0 = "abcde";
     4929var results = ["a"];
     4930shouldBe('regex608.exec(input0);', 'results');
     4931var input1 = "-things";
     4932var results = ["-"];
     4933shouldBe('regex608.exec(input1);', 'results');
     4934var input2 = "0digit";
     4935var results = ["0"];
     4936shouldBe('regex608.exec(input2);', 'results');
     4937// Failers
     4938var input3 = "bcdef";
     4939var results = null;
     4940shouldBe('regex608.exec(input3);', 'results');
     4941
     4942var regex609 = /^[\d-a]/;
     4943var input0 = "abcde";
     4944var results = ["a"];
     4945shouldBe('regex609.exec(input0);', 'results');
     4946var input1 = "-things";
     4947var results = ["-"];
     4948shouldBe('regex609.exec(input1);', 'results');
     4949var input2 = "0digit";
     4950var results = ["0"];
     4951shouldBe('regex609.exec(input2);', 'results');
     4952// Failers
     4953var input3 = "bcdef";
     4954var results = null;
     4955shouldBe('regex609.exec(input3);', 'results');
    49314956
    49324957var regex610 = /[\s]+/;
Note: See TracChangeset for help on using the changeset viewer.