Changeset 10818 in webkit
- Timestamp:
- Oct 11, 2005, 1:43:49 PM (20 years ago)
- Location:
- trunk/JavaScriptCore
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/ChangeLog
r10802 r10818 1 2005-10-10 Geoffrey Garen <ggaren@apple.com> 2 3 - Implemented caching of match state inside the global RegExp object 4 (lastParen, leftContext, rightContext, lastMatch, input). 5 6 exec(), test(), match(), search(), and replace() now dipatch regular 7 expression matching through the RegExp object's performMatch function, 8 to facilitate caching. This replaces registerRegexp and 9 setSubPatterns. 10 11 - Implemented the special '$' aliases (e.g. RegExp.input aliases to 12 RegExp.$_). 13 14 - Moved support for backreferences into the new static hash table 15 used for other special RegExp properties. Truncated backreferences 16 at $9 to match IE, FF, and the "What's New in Netscape 1.2?" doc. 17 (String.replace still supports double-digit backreferences.) 18 19 - Tweaked RegExp.prototype.exec to handle ginormous values in lastIndex. 20 21 Fixes 11 -- count em, 11 -- JavaScriptCore tests. 22 23 Reviewed by NOBODY (OOPS!). 24 25 * JavaScriptCore.xcodeproj/project.pbxproj: Added regexp_object.lut.h 26 * kjs/create_hash_table: Tweaked to allow for more exotic characters. 27 We now rely on the compiler to catch illegal 28 identifiers. 29 * kjs/regexp.cpp: 30 (KJS::RegExp::RegExp): 31 * kjs/regexp_object.cpp: 32 (RegExpProtoFuncImp::callAsFunction): 33 (RegExpObjectImp::RegExpObjectImp): 34 (RegExpObjectImp::performMatch): 35 (RegExpObjectImp::arrayOfMatches): 36 (RegExpObjectImp::backrefGetter): 37 (RegExpObjectImp::getLastMatch): 38 (RegExpObjectImp::getLastParen): 39 (RegExpObjectImp::getLeftContext): 40 (RegExpObjectImp::getRightContext): 41 (RegExpObjectImp::getOwnPropertySlot): 42 (RegExpObjectImp::getValueProperty): 43 (RegExpObjectImp::put): 44 (RegExpObjectImp::putValueProperty): 45 * kjs/regexp_object.h: 46 (KJS::RegExpObjectImp::): 47 * kjs/string_object.cpp: 48 (substituteBackreferences): 49 (replace): 50 (StringProtoFuncImp::callAsFunction): 51 1 52 2005-10-09 Darin Adler <darin@apple.com> 2 53 -
trunk/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj
r10744 r10818 1130 1130 93F1983308245BA1001E9ABC /* math_object.lut.h */, 1131 1131 93F1983108245B9E001E9ABC /* number_object.lut.h */, 1132 14F6037308FB039300E9E573 /* regexp_object.lut.h */, 1132 1133 93F1983508245BA6001E9ABC /* string_object.lut.h */, 1133 1134 932F5B3F0822A1C700736975 /* Headers */, … … 1357 1358 1358 1359 /* Begin PBXShellScriptBuildPhase section */ 1360 14F6037308FB039300E9E573 /* regexp_object.lut.h */ = { 1361 isa = PBXShellScriptBuildPhase; 1362 buildActionMask = 2147483647; 1363 files = ( 1364 ); 1365 inputPaths = ( 1366 "kjs/create_hash_table\nkjs/create_hash_table", 1367 kjs/regexp_object.cpp, 1368 ); 1369 name = regexp_object.lut.h; 1370 outputPaths = ( 1371 "$(DERIVED_FILE_DIR)/regexp_object.lut.h", 1372 ); 1373 runOnlyForDeploymentPostprocessing = 0; 1374 shellPath = /bin/sh; 1375 shellScript = "kjs/create_hash_table kjs/regexp_object.cpp -i > \"$DERIVED_FILE_DIR/regexp_object.lut.h\""; 1376 }; 1359 1377 93396BB50824516200AB803D /* chartables.c */ = { 1360 1378 isa = PBXShellScriptBuildPhase; -
trunk/JavaScriptCore/kjs/create_hash_table
r9553 r10818 65 65 @params = (); 66 66 $inside = 0; 67 } elsif (/^([-:\@\w\[\=\]]+)\s*([\w\:-]+)\s*([\w\|]*)\s*(\w*)\s*$/ && $inside) {67 } elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*$/ && $inside) { 68 68 my $key = $1; 69 69 my $val = $2; … … 77 77 push(@params, length($param) > 0 ? $param : "0"); 78 78 } elsif ($inside) { 79 die "invalid data ";79 die "invalid data {" . $_ . "}"; 80 80 } 81 81 } -
trunk/JavaScriptCore/kjs/regexp.cpp
r10701 r10818 37 37 int options = PCRE_UTF8; 38 38 // Note: the Global flag is already handled by RegExpProtoFunc::execute. 39 // FIXME: That last comment is dubious. Not all RegExps get run through RegExpProtoFunc::execute. 39 40 if (flags & IgnoreCase) 40 41 options |= PCRE_CASELESS; -
trunk/JavaScriptCore/kjs/regexp_object.cpp
r10701 r10818 24 24 #include "regexp_object.h" 25 25 26 #include "regexp_object.lut.h" 27 26 28 #include <stdio.h> 27 29 #include "value.h" … … 33 35 #include "regexp.h" 34 36 #include "error_object.h" 37 #include "lookup.h" 35 38 36 39 using namespace KJS; … … 80 83 } 81 84 } 85 82 86 return throwError(exec, TypeError); 83 87 } 84 88 85 RegExpImp *reimp = static_cast<RegExpImp*>(thisObj);86 RegExp *re = reimp->regExp();87 UString s;88 UString str;89 89 switch (id) { 90 case Exec: // 15.10.6.291 case Test:90 case Test: // 15.10.6.2 91 case Exec: 92 92 { 93 s = args[0]->toString(exec); 94 int length = s.size(); 95 ValueImp *lastIndex = thisObj->get(exec,"lastIndex"); 96 int i = lastIndex->toInt32(exec); 97 bool globalFlag = thisObj->get(exec,"global")->toBoolean(exec); 93 RegExp *regExp = static_cast<RegExpImp*>(thisObj)->regExp(); 94 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp()); 95 96 UString input; 97 if (args.isEmpty()) 98 input = regExpObj->get(exec, "input")->toString(exec); 99 else 100 input = args[0]->toString(exec); 101 102 double lastIndex = thisObj->get(exec, "lastIndex")->toInteger(exec); 103 104 bool globalFlag = thisObj->get(exec, "global")->toBoolean(exec); 98 105 if (!globalFlag) 99 i = 0; 100 if (i < 0 || i > length) { 101 thisObj->put(exec,"lastIndex", Number(0), DontDelete | DontEnum); 102 if (id == Test) 103 return Boolean(false); 104 else 105 return Null(); 106 } 107 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp()); 108 int **ovector = regExpObj->registerRegexp( re, s ); 109 110 str = re->match(s, i, 0L, ovector); 111 regExpObj->setSubPatterns(re->subPatterns()); 112 106 lastIndex = 0; 107 if (lastIndex < 0 || lastIndex > input.size()) { 108 thisObj->put(exec, "lastIndex", jsZero(), DontDelete | DontEnum); 109 return Null(); 110 } 111 112 UString match = regExpObj->performMatch(regExp, input, static_cast<int>(lastIndex)); 113 bool didMatch = !match.isNull(); 114 115 // Test 113 116 if (id == Test) 114 return Boolean( !str.isNull());115 116 if (str.isNull()) // no match117 {117 return Boolean(didMatch); 118 119 // Exec 120 if (didMatch) { 118 121 if (globalFlag) 119 thisObj->put(exec,"lastIndex",Number(0), DontDelete | DontEnum); 122 thisObj->put(exec, "lastIndex", Number(lastIndex + match.size()), DontDelete | DontEnum); 123 return regExpObj->arrayOfMatches(exec, match); 124 } else { 125 if (globalFlag) 126 thisObj->put(exec, "lastIndex", jsZero(), DontDelete | DontEnum); 120 127 return Null(); 121 }122 else // success123 {124 if (globalFlag)125 thisObj->put(exec,"lastIndex",Number( (*ovector)[1] ), DontDelete | DontEnum);126 return regExpObj->arrayOfMatches(exec,str);127 128 } 128 129 } 129 130 break; 130 131 case ToString: 131 s = thisObj->get(exec,"source")->toString(exec); 132 str = "/"; 133 str += s; 134 str += "/"; 135 if (thisObj->get(exec,"global")->toBoolean(exec)) { 136 str += "g"; 137 } 138 if (thisObj->get(exec,"ignoreCase")->toBoolean(exec)) { 139 str += "i"; 140 } 141 if (thisObj->get(exec,"multiline")->toBoolean(exec)) { 142 str += "m"; 143 } 144 return String(str); 132 UString result = "/" + thisObj->get(exec, "source")->toString(exec) + "/"; 133 if (thisObj->get(exec, "global")->toBoolean(exec)) { 134 result += "g"; 135 } 136 if (thisObj->get(exec, "ignoreCase")->toBoolean(exec)) { 137 result += "i"; 138 } 139 if (thisObj->get(exec, "multiline")->toBoolean(exec)) { 140 result += "m"; 141 } 142 return String(result); 145 143 } 146 144 … … 163 161 164 162 // ------------------------------ RegExpObjectImp ------------------------------ 163 164 const ClassInfo RegExpObjectImp::info = {"RegExp", &InternalFunctionImp::info, &RegExpTable, 0}; 165 166 /* Source for regexp_object.lut.h 167 @begin RegExpTable 20 168 input RegExpObjectImp::Input None 169 $_ RegExpObjectImp::Input DontEnum 170 multiline RegExpObjectImp::Multiline None 171 $* RegExpObjectImp::Multiline DontEnum 172 lastMatch RegExpObjectImp::LastMatch DontDelete|ReadOnly 173 $& RegExpObjectImp::LastMatch DontDelete|ReadOnly|DontEnum 174 lastParen RegExpObjectImp::LastParen DontDelete|ReadOnly 175 $+ RegExpObjectImp::LastParen DontDelete|ReadOnly|DontEnum 176 leftContext RegExpObjectImp::LeftContext DontDelete|ReadOnly 177 $` RegExpObjectImp::LeftContext DontDelete|ReadOnly|DontEnum 178 rightContext RegExpObjectImp::RightContext DontDelete|ReadOnly 179 $' RegExpObjectImp::RightContext DontDelete|ReadOnly|DontEnum 180 $1 RegExpObjectImp::Dollar1 DontDelete|ReadOnly 181 $2 RegExpObjectImp::Dollar2 DontDelete|ReadOnly 182 $3 RegExpObjectImp::Dollar3 DontDelete|ReadOnly 183 $4 RegExpObjectImp::Dollar4 DontDelete|ReadOnly 184 $5 RegExpObjectImp::Dollar5 DontDelete|ReadOnly 185 $6 RegExpObjectImp::Dollar6 DontDelete|ReadOnly 186 $7 RegExpObjectImp::Dollar7 DontDelete|ReadOnly 187 $8 RegExpObjectImp::Dollar8 DontDelete|ReadOnly 188 $9 RegExpObjectImp::Dollar9 DontDelete|ReadOnly 189 @end 190 */ 165 191 166 192 RegExpObjectImp::RegExpObjectImp(ExecState *exec, … … 168 194 RegExpPrototypeImp *regProto) 169 195 170 : InternalFunctionImp(funcProto), lastOvector(0L), lastNrSubPatterns(0)196 : InternalFunctionImp(funcProto), multiline(false), lastInput(""), lastOvector(0), lastNumSubPatterns(0) 171 197 { 172 198 // ECMA 15.10.5.1 RegExp.prototype … … 182 208 } 183 209 184 int **RegExpObjectImp::registerRegexp( const RegExp* re, const UString& s ) 185 { 186 lastString = s; 187 delete [] lastOvector; 188 lastOvector = 0; 189 lastNrSubPatterns = re->subPatterns(); 190 return &lastOvector; 210 /* 211 To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular 212 expression matching through the performMatch function. We use cached results to calculate, 213 e.g., RegExp.lastMatch and RegExp.leftParen. 214 */ 215 UString RegExpObjectImp::performMatch(RegExp* r, const UString& s, int startOffset, int *endOffset, int **ovector) 216 { 217 int tmpOffset; 218 int *tmpOvector; 219 UString match = r->match(s, startOffset, &tmpOffset, &tmpOvector); 220 221 if (endOffset) 222 *endOffset = tmpOffset; 223 if (ovector) 224 *ovector = tmpOvector; 225 226 if (!match.isNull()) { 227 assert(tmpOvector); 228 229 lastInput = s; 230 delete [] lastOvector; 231 lastOvector = tmpOvector; 232 lastNumSubPatterns = r->subPatterns(); 233 } 234 235 return match; 191 236 } 192 237 … … 197 242 list.append(String(result)); 198 243 if ( lastOvector ) 199 for ( unsigned i = 1 ; i < lastN rSubPatterns + 1 ; ++i )244 for ( unsigned i = 1 ; i < lastNumSubPatterns + 1 ; ++i ) 200 245 { 201 246 int start = lastOvector[2*i]; … … 203 248 list.append(jsUndefined()); 204 249 else { 205 UString substring = last String.substr( start, lastOvector[2*i+1] - start );250 UString substring = lastInput.substr( start, lastOvector[2*i+1] - start ); 206 251 list.append(String(substring)); 207 252 } … … 209 254 ObjectImp *arr = exec->lexicalInterpreter()->builtinArray()->construct(exec, list); 210 255 arr->put(exec, "index", Number(lastOvector[0])); 211 arr->put(exec, "input", String(last String));256 arr->put(exec, "input", String(lastInput)); 212 257 return arr; 213 258 } 214 259 215 ValueImp *RegExpObjectImp::backrefGetter(ExecState *exec, const Identifier& propertyName, const PropertySlot& slot) 216 { 217 RegExpObjectImp *thisObj = static_cast<RegExpObjectImp *>(slot.slotBase()); 218 unsigned i = slot.index(); 219 220 if (i < thisObj->lastNrSubPatterns + 1) { 221 int *lastOvector = thisObj->lastOvector; 222 UString substring = thisObj->lastString.substr(lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] ); 260 ValueImp *RegExpObjectImp::getBackref(unsigned i) const 261 { 262 if (lastOvector && i < lastNumSubPatterns + 1) { 263 UString substring = lastInput.substr(lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] ); 223 264 return String(substring); 224 265 } … … 227 268 } 228 269 270 ValueImp *RegExpObjectImp::getLastMatch() const 271 { 272 if (lastOvector) { 273 UString substring = lastInput.substr(lastOvector[0], lastOvector[1] - lastOvector[0]); 274 return String(substring); 275 } 276 277 return String(""); 278 } 279 280 ValueImp *RegExpObjectImp::getLastParen() const 281 { 282 int i = lastNumSubPatterns; 283 if (i > 0) { 284 assert(lastOvector); 285 UString substring = lastInput.substr(lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i]); 286 return String(substring); 287 } 288 289 return String(""); 290 } 291 292 ValueImp *RegExpObjectImp::getLeftContext() const 293 { 294 if (lastOvector) { 295 UString substring = lastInput.substr(0, lastOvector[0]); 296 return String(substring); 297 } 298 299 return String(""); 300 } 301 302 ValueImp *RegExpObjectImp::getRightContext() const 303 { 304 if (lastOvector) { 305 UString s = lastInput; 306 UString substring = s.substr(lastOvector[1], s.size() - lastOvector[1]); 307 return String(substring); 308 } 309 310 return String(""); 311 } 312 229 313 bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier& propertyName, PropertySlot& slot) 230 314 { 231 UString s = propertyName.ustring(); 232 if (s[0] == '$' && lastOvector) 233 { 234 bool ok; 235 unsigned i = s.substr(1).toUInt32(&ok); 236 if (ok) { 237 slot.setCustomIndex(this, i, backrefGetter); 238 return true; 239 } 240 } 241 242 return InternalFunctionImp::getOwnPropertySlot(exec, propertyName, slot); 243 } 244 315 return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpTable, this, propertyName, slot); 316 } 317 318 ValueImp *RegExpObjectImp::getValueProperty(ExecState *exec, int token) const 319 { 320 switch (token) { 321 case Dollar1: 322 return getBackref(1); 323 case Dollar2: 324 return getBackref(2); 325 case Dollar3: 326 return getBackref(3); 327 case Dollar4: 328 return getBackref(4); 329 case Dollar5: 330 return getBackref(5); 331 case Dollar6: 332 return getBackref(6); 333 case Dollar7: 334 return getBackref(7); 335 case Dollar8: 336 return getBackref(8); 337 case Dollar9: 338 return getBackref(9); 339 case Input: 340 return jsString(lastInput); 341 case Multiline: 342 return jsBoolean(multiline); 343 case LastMatch: 344 return getLastMatch(); 345 case LastParen: 346 return getLastParen(); 347 case LeftContext: 348 return getLeftContext(); 349 case RightContext: 350 return getRightContext(); 351 default: 352 assert(0); 353 } 354 355 return String(""); 356 } 357 358 void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, ValueImp *value, int attr) 359 { 360 lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpTable, this); 361 } 362 363 void RegExpObjectImp::putValueProperty(ExecState *exec, int token, ValueImp *value, int attr) 364 { 365 switch (token) { 366 case Input: 367 lastInput = value->toString(exec); 368 break; 369 case Multiline: 370 multiline = value->toBoolean(exec); 371 break; 372 default: 373 assert(0); 374 } 375 } 376 245 377 bool RegExpObjectImp::implementsConstruct() const 246 378 { -
trunk/JavaScriptCore/kjs/regexp_object.h
r10456 r10818 66 66 class RegExpObjectImp : public InternalFunctionImp { 67 67 public: 68 enum { Dollar1, Dollar2, Dollar3, Dollar4, Dollar5, Dollar6, Dollar7, Dollar8, Dollar9, 69 Input, Multiline, LastMatch, LastParen, LeftContext, RightContext }; 70 68 71 RegExpObjectImp(ExecState *exec, 69 72 FunctionPrototypeImp *funcProto, … … 75 78 virtual ValueImp *callAsFunction(ExecState *exec, ObjectImp *thisObj, const List &args); 76 79 80 virtual void put(ExecState *, const Identifier &, ValueImp *, int attr = None); 81 void putValueProperty(ExecState *, int token, ValueImp *, int attr); 77 82 virtual bool getOwnPropertySlot(ExecState *, const Identifier&, PropertySlot&); 78 int ** registerRegexp( const RegExp* re, const UString& s );79 void setSubPatterns(int num) { lastNrSubPatterns = num; }83 ValueImp *getValueProperty(ExecState *, int token) const; 84 UString performMatch(RegExp *, const UString&, int startOffset = 0, int *endOffset = 0, int **ovector = 0); 80 85 ObjectImp *arrayOfMatches(ExecState *exec, const UString &result) const; 86 87 virtual const ClassInfo *classInfo() const { return &info; } 81 88 private: 82 static ValueImp *backrefGetter(ExecState *exec, const Identifier&, const PropertySlot& slot); 83 84 UString lastString; 89 ValueImp *getBackref(unsigned) const; 90 ValueImp *getLastMatch() const; 91 ValueImp *getLastParen() const; 92 ValueImp *getLeftContext() const; 93 ValueImp *getRightContext() const; 94 95 // Global search cache / settings 96 bool multiline; 97 UString lastInput; 85 98 int *lastOvector; 86 unsigned lastNrSubPatterns; 99 unsigned lastNumSubPatterns; 100 101 static const ClassInfo info; 87 102 }; 88 103 -
trunk/JavaScriptCore/kjs/string_object.cpp
r10757 r10818 233 233 } 234 234 235 static inline UString substituteBackreferences(const UString &replacement, const UString &source, int * *ovector, RegExp *reg)235 static inline UString substituteBackreferences(const UString &replacement, const UString &source, int *ovector, RegExp *reg) 236 236 { 237 237 UString substitutedReplacement = replacement; … … 247 247 unsigned backrefIndex = substitutedReplacement.substr(i+1,1).toUInt32(&converted, false /* tolerate empty string */); 248 248 if (converted && backrefIndex <= (unsigned)reg->subPatterns()) { 249 int backrefStart = (*ovector)[2*backrefIndex];250 int backrefLength = (*ovector)[2*backrefIndex+1] - backrefStart;249 int backrefStart = ovector[2*backrefIndex]; 250 int backrefLength = ovector[2*backrefIndex+1] - backrefStart; 251 251 substitutedReplacement = substitutedReplacement.substr(0,i) 252 252 + source.substr(backrefStart, backrefLength) … … 289 289 // This is either a loop (if global is set) or a one-way (if not). 290 290 do { 291 int **ovector = regExpObj->registerRegexp( reg, source ); 292 UString matchString = reg->match(source, startPosition, &matchIndex, ovector); 293 regExpObj->setSubPatterns(reg->subPatterns()); 291 int *ovector; 292 UString matchString = regExpObj->performMatch(reg, source, startPosition, &matchIndex, &ovector); 294 293 if (matchIndex == -1) 295 294 break; … … 299 298 300 299 if (replacementFunction) { 301 int completeMatchStart = (*ovector)[0];300 int completeMatchStart = ovector[0]; 302 301 List args; 303 302 … … 305 304 306 305 for (unsigned i = 0; i < reg->subPatterns(); i++) { 307 int matchStart = (*ovector)[(i + 1) * 2];308 int matchLen = (*ovector)[(i + 1) * 2 + 1] - matchStart;306 int matchStart = ovector[(i + 1) * 2]; 307 int matchLen = ovector[(i + 1) * 2 + 1] - matchStart; 309 308 310 309 args.append(jsString(source.substr(matchStart, matchLen))); … … 455 454 RegExp *reg, *tmpReg = 0; 456 455 RegExpImp *imp = 0; 457 if (a0->isObject() && a0->getObject()->inherits(&RegExpImp::info)) 458 { 456 if (a0->isObject() && a0->getObject()->inherits(&RegExpImp::info)) { 459 457 imp = static_cast<RegExpImp *>(a0); 460 458 reg = imp->regExp(); 461 } 462 else 463 { /* 459 } else { 460 /* 464 461 * ECMA 15.5.4.12 String.prototype.search (regexp) 465 462 * If regexp is not an object whose [[Class]] property is "RegExp", it is … … 469 466 } 470 467 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp()); 471 int **ovector = regExpObj->registerRegexp(reg, u); 472 UString mstr = reg->match(u, -1, &pos, ovector); 468 UString mstr = regExpObj->performMatch(reg, u, 0, &pos); 473 469 if (id == Search) { 474 470 result = Number(pos); … … 480 476 result = Null(); 481 477 } else { 482 regExpObj->setSubPatterns(reg->subPatterns());483 478 result = regExpObj->arrayOfMatches(exec,mstr); 484 479 } … … 494 489 lastIndex = pos; 495 490 pos += mstr.isEmpty() ? 1 : mstr.size(); 496 delete [] *ovector; 497 mstr = reg->match(u, pos, &pos, ovector); 491 mstr = regExpObj->performMatch(reg, u, pos, &pos); 498 492 } 499 493 if (imp) … … 515 509 result = replace(exec, s, a0, a1); 516 510 break; 517 case Slice: // http://developer.netscape.com/docs/manuals/js/client/jsref/string.htm#1194366511 case Slice: 518 512 { 519 513 // The arg processing is very much like ArrayProtoFunc::Slice
Note:
See TracChangeset
for help on using the changeset viewer.