Changeset 219611 in webkit
- Timestamp:
- Jul 18, 2017 9:25:42 AM (7 years ago)
- Location:
- trunk/Source/JavaScriptCore
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r219595 r219611 1 2017-07-18 Michael Saboff <msaboff@apple.com> 2 3 [JSC] There should be a debug option to dump a compiled RegExp Pattern 4 https://bugs.webkit.org/show_bug.cgi?id=174601 5 6 Reviewed by Alex Christensen. 7 8 Added the debug option dumpCompiledRegExpPatterns which will dump the YarrPattern and related 9 objects after a regular expression has been compiled. 10 11 * runtime/Options.h: 12 * yarr/YarrPattern.cpp: 13 (JSC::Yarr::YarrPattern::compile): 14 (JSC::Yarr::indentForNestingLevel): 15 (JSC::Yarr::dumpUChar32): 16 (JSC::Yarr::PatternAlternative::dump): 17 (JSC::Yarr::PatternTerm::dumpQuantifier): 18 (JSC::Yarr::PatternTerm::dump): 19 (JSC::Yarr::PatternDisjunction::dump): 20 (JSC::Yarr::YarrPattern::dumpPattern): 21 * yarr/YarrPattern.h: 22 (JSC::Yarr::YarrPattern::global): 23 1 24 2017-07-17 Darin Adler <darin@apple.com> 2 25 -
trunk/Source/JavaScriptCore/runtime/Options.h
r219187 r219611 433 433 v(unsigned, prototypeHitCountForLLIntCaching, 2, Normal, "Number of prototype property hits before caching a prototype in the LLInt. A count of 0 means never cache.") \ 434 434 \ 435 v(bool, dumpCompiledRegExpPatterns, false, Normal, nullptr) \ 436 \ 435 437 v(bool, dumpModuleRecord, false, Normal, nullptr) \ 436 438 v(bool, dumpModuleLoadingState, false, Normal, nullptr) \ -
trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp
r219427 r219611 28 28 #include "YarrPattern.h" 29 29 30 #include "Options.h" 30 31 #include "Yarr.h" 31 32 #include "YarrCanonicalize.h" 32 33 #include "YarrParser.h" 34 #include <wtf/DataLog.h> 33 35 #include <wtf/Vector.h> 34 36 #include <wtf/WTFThreadData.h> … … 959 961 return error; 960 962 963 if (Options::dumpCompiledRegExpPatterns()) 964 dumpPattern(patternString); 965 961 966 return nullptr; 962 967 } … … 984 989 } 985 990 991 static void indentForNestingLevel(PrintStream& out, unsigned nestingDepth) 992 { 993 out.print(" "); 994 for (; nestingDepth; --nestingDepth) 995 out.print(" "); 996 } 997 998 static void dumpUChar32(PrintStream& out, UChar32 c) 999 { 1000 if (c >= ' '&& c <= 0xff) 1001 out.printf("'%c'", static_cast<char>(c)); 1002 else 1003 out.printf("0x%04x", c); 1004 } 1005 1006 void PatternAlternative::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth) 1007 { 1008 out.print("minimum size: ", m_minimumSize); 1009 if (m_hasFixedSize) 1010 out.print(",fixed size"); 1011 if (m_onceThrough) 1012 out.print(",once through"); 1013 if (m_startsWithBOL) 1014 out.print(",starts with ^"); 1015 if (m_containsBOL) 1016 out.print(",contains ^"); 1017 out.print("\n"); 1018 1019 for (size_t i = 0; i < m_terms.size(); ++i) 1020 m_terms[i].dump(out, thisPattern, nestingDepth); 1021 } 1022 1023 void PatternTerm::dumpQuantifier(PrintStream& out) 1024 { 1025 if (quantityType == QuantifierFixedCount && quantityMinCount == 1 && quantityMaxCount == 1) 1026 return; 1027 out.print(" {", quantityMinCount.unsafeGet()); 1028 if (quantityMinCount != quantityMaxCount) { 1029 if (quantityMaxCount == UINT_MAX) 1030 out.print(",..."); 1031 else 1032 out.print(",", quantityMaxCount.unsafeGet()); 1033 } 1034 out.print("}"); 1035 if (quantityType == QuantifierGreedy) 1036 out.print(" greedy"); 1037 else if (quantityType == QuantifierNonGreedy) 1038 out.print(" non-greedy"); 1039 } 1040 1041 void PatternTerm::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth) 1042 { 1043 indentForNestingLevel(out, nestingDepth); 1044 1045 if (invert() && (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion)) 1046 out.print("not "); 1047 1048 switch (type) { 1049 case TypeAssertionBOL: 1050 out.println("BOL"); 1051 break; 1052 case TypeAssertionEOL: 1053 out.println("EOL"); 1054 break; 1055 case TypeAssertionWordBoundary: 1056 out.println("word boundary"); 1057 break; 1058 case TypePatternCharacter: 1059 out.printf("character "); 1060 if (thisPattern->ignoreCase() && isASCIIAlpha(patternCharacter)) { 1061 dumpUChar32(out, toASCIIUpper(patternCharacter)); 1062 out.print("/"); 1063 dumpUChar32(out, toASCIILower(patternCharacter)); 1064 } else 1065 dumpUChar32(out, patternCharacter); 1066 dumpQuantifier(out); 1067 if (quantityType != QuantifierFixedCount) 1068 out.print(",frame location ", frameLocation); 1069 out.println(); 1070 break; 1071 case TypeCharacterClass: 1072 out.print("character class "); 1073 if (characterClass == thisPattern->newlineCharacterClass()) 1074 out.print("<newline>"); 1075 else if (characterClass == thisPattern->digitsCharacterClass()) 1076 out.print("<digits>"); 1077 else if (characterClass == thisPattern->spacesCharacterClass()) 1078 out.print("<whitespace>"); 1079 else if (characterClass == thisPattern->wordcharCharacterClass()) 1080 out.print("<word>"); 1081 else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass()) 1082 out.print("<unicode ignore case>"); 1083 else if (characterClass == thisPattern->nondigitsCharacterClass()) 1084 out.print("<non-digits>"); 1085 else if (characterClass == thisPattern->nonspacesCharacterClass()) 1086 out.print("<non-whitespace>"); 1087 else if (characterClass == thisPattern->nonwordcharCharacterClass()) 1088 out.print("<non-word>"); 1089 else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass()) 1090 out.print("<unicode non-ignore case>"); 1091 else { 1092 bool needMatchesRangesSeperator = false; 1093 1094 auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) { 1095 size_t matchesSize = matches.size(); 1096 if (matchesSize) { 1097 if (needMatchesRangesSeperator) 1098 out.print(","); 1099 needMatchesRangesSeperator = true; 1100 1101 out.print(prefix, ":("); 1102 for (size_t i = 0; i < matchesSize; ++i) { 1103 if (i) 1104 out.print(","); 1105 dumpUChar32(out, matches[i]); 1106 } 1107 out.print(")"); 1108 } 1109 }; 1110 1111 auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) { 1112 size_t rangeSize = ranges.size(); 1113 if (rangeSize) { 1114 if (needMatchesRangesSeperator) 1115 out.print(","); 1116 needMatchesRangesSeperator = true; 1117 1118 out.print(prefix, "ranges:("); 1119 for (size_t i = 0; i < rangeSize; ++i) { 1120 if (i) 1121 out.print(","); 1122 CharacterRange range = ranges[i]; 1123 out.print("("); 1124 dumpUChar32(out, range.begin); 1125 out.print(".."); 1126 dumpUChar32(out, range.end); 1127 out.print(")"); 1128 } 1129 out.print(")"); 1130 } 1131 }; 1132 1133 out.print("["); 1134 dumpMatches("ASCII", characterClass->m_matches); 1135 dumpRanges("ASCII", characterClass->m_ranges); 1136 dumpMatches("Unicode", characterClass->m_matchesUnicode); 1137 dumpRanges("Unicode", characterClass->m_rangesUnicode); 1138 out.print("]"); 1139 } 1140 dumpQuantifier(out); 1141 if (quantityType != QuantifierFixedCount || thisPattern->unicode()) 1142 out.print(",frame location ", frameLocation); 1143 out.println(); 1144 break; 1145 case TypeBackReference: 1146 out.print("back reference to subpattern #", backReferenceSubpatternId); 1147 out.println(",frame location ", frameLocation); 1148 break; 1149 case TypeForwardReference: 1150 out.println("forward reference"); 1151 break; 1152 case TypeParenthesesSubpattern: 1153 if (m_capture) 1154 out.print("captured "); 1155 else 1156 out.print("non-captured "); 1157 1158 FALLTHROUGH; 1159 case TypeParentheticalAssertion: 1160 if (m_invert) 1161 out.print("inverted "); 1162 1163 if (type == TypeParenthesesSubpattern) 1164 out.print("subpattern"); 1165 else if (type == TypeParentheticalAssertion) 1166 out.print("assertion"); 1167 1168 if (m_capture) 1169 out.print(" #", parentheses.subpatternId); 1170 1171 dumpQuantifier(out); 1172 1173 if (parentheses.isCopy) 1174 out.print(",copy"); 1175 1176 if (parentheses.isTerminal) 1177 out.print(",terminal"); 1178 1179 if (quantityMaxCount != 1 || parentheses.isCopy || quantityType != QuantifierFixedCount) 1180 out.println(",frame location ", frameLocation); 1181 else 1182 out.println(); 1183 1184 if (parentheses.disjunction->m_alternatives.size() > 1) { 1185 indentForNestingLevel(out, nestingDepth + 1); 1186 unsigned alternativeFrameLocation = frameLocation; 1187 if (quantityType != QuantifierFixedCount) 1188 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1189 out.println("alternative list,frame location ", alternativeFrameLocation); 1190 } 1191 1192 parentheses.disjunction->dump(out, thisPattern, nestingDepth + 1); 1193 break; 1194 case TypeDotStarEnclosure: 1195 out.println(".* enclosure,frame location ", thisPattern->m_initialStartValueFrameLocation); 1196 break; 1197 } 1198 } 1199 1200 void PatternDisjunction::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth = 0) 1201 { 1202 unsigned alternativeCount = m_alternatives.size(); 1203 for (unsigned i = 0; i < alternativeCount; ++i) { 1204 indentForNestingLevel(out, nestingDepth); 1205 if (alternativeCount > 1) 1206 out.print("alternative #", i, ": "); 1207 m_alternatives[i].get()->dump(out, thisPattern, nestingDepth + (alternativeCount > 1)); 1208 } 1209 } 1210 1211 void YarrPattern::dumpPattern(const String& patternString) 1212 { 1213 dumpPattern(WTF::dataFile(), patternString); 1214 } 1215 1216 void YarrPattern::dumpPattern(PrintStream& out, const String& patternString) 1217 { 1218 out.print("RegExp pattern for /"); 1219 out.print(patternString); 1220 out.print("/"); 1221 if (global()) 1222 out.print("g"); 1223 if (ignoreCase()) 1224 out.print("i"); 1225 if (multiline()) 1226 out.print("m"); 1227 if (unicode()) 1228 out.print("u"); 1229 if (sticky()) 1230 out.print("y"); 1231 if (m_flags != NoFlags) { 1232 bool printSeperator = false; 1233 out.print(" ("); 1234 if (global()) { 1235 out.print("global"); 1236 printSeperator = true; 1237 } 1238 if (ignoreCase()) { 1239 if (printSeperator) 1240 out.print("|"); 1241 out.print("ignore case"); 1242 printSeperator = true; 1243 } 1244 if (multiline()) { 1245 if (printSeperator) 1246 out.print("|"); 1247 out.print("multiline"); 1248 printSeperator = true; 1249 } 1250 if (unicode()) { 1251 if (printSeperator) 1252 out.print("|"); 1253 out.print("unicode"); 1254 printSeperator = true; 1255 } 1256 if (sticky()) { 1257 if (printSeperator) 1258 out.print("|"); 1259 out.print("sticky"); 1260 printSeperator = true; 1261 } 1262 out.print(")"); 1263 } 1264 out.print(":\n"); 1265 m_body->dump(out, this); 1266 } 1267 986 1268 } } -
trunk/Source/JavaScriptCore/yarr/YarrPattern.h
r219031 r219611 29 29 #include "RegExpKey.h" 30 30 #include <wtf/CheckedArithmetic.h> 31 #include <wtf/PrintStream.h> 31 32 #include <wtf/Vector.h> 32 33 #include <wtf/text/WTFString.h> … … 34 35 namespace JSC { namespace Yarr { 35 36 37 struct YarrPattern; 36 38 struct PatternDisjunction; 37 39 … … 223 225 quantityType = type; 224 226 } 227 228 void dumpQuantifier(PrintStream&); 229 void dump(PrintStream&, YarrPattern*, unsigned); 225 230 }; 226 231 … … 259 264 } 260 265 266 void dump(PrintStream&, YarrPattern*, unsigned); 267 261 268 Vector<PatternTerm> m_terms; 262 269 PatternDisjunction* m_parent; … … 282 289 return static_cast<PatternAlternative*>(m_alternatives.last().get()); 283 290 } 291 292 void dump(PrintStream&, YarrPattern*, unsigned); 284 293 285 294 Vector<std::unique_ptr<PatternAlternative>> m_alternatives; … … 449 458 } 450 459 460 void dumpPattern(const String& pattern); 461 void dumpPattern(PrintStream& out, const String& pattern); 462 463 bool global() const { return m_flags & FlagGlobal; } 451 464 bool ignoreCase() const { return m_flags & FlagIgnoreCase; } 452 465 bool multiline() const { return m_flags & FlagMultiline; }
Note: See TracChangeset
for help on using the changeset viewer.