Changeset 181663 in webkit
- Timestamp:
- Mar 17, 2015, 1:47:42 PM (10 years ago)
- Location:
- trunk
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r181662 r181663 1 2015-03-17 Benjamin Poulain <bpoulain@apple.com> 2 3 Compile character ranges targeting the same state as range check in the bytecode 4 https://bugs.webkit.org/show_bug.cgi?id=142759 5 6 Reviewed by Alex Christensen. 7 8 Previously, character ranges would be compiled as many individual character checks. 9 For example, a transition on "[a-z]" would do 26 character checks + jump, which leads 10 to enormous matchines. 11 12 With this patch, we find the ranges at lowering time and generate a single instruction 13 for them: "CheckValueRange". This helps making the machine denser when the input 14 use character sets. 15 16 The second part of this patch goes further in the case where the transitions out of 17 a state cover the entire alphabet. In that case, we create a fallback transition 18 on the fly and remove all the ranges made useless. 19 That case is common when ranges are used with inverse character set (e.g. [^a]+a). 20 21 * contentextensions/DFABytecode.h: 22 (WebCore::ContentExtensions::instructionSizeWithArguments): 23 * contentextensions/DFABytecodeCompiler.cpp: 24 (WebCore::ContentExtensions::DFABytecodeCompiler::emitCheckValueRange): 25 (WebCore::ContentExtensions::DFABytecodeCompiler::compileNode): 26 (WebCore::ContentExtensions::DFABytecodeCompiler::compileNodeTransitions): 27 (WebCore::ContentExtensions::DFABytecodeCompiler::compileCheckForRange): 28 * contentextensions/DFABytecodeCompiler.h: 29 Extend the compiler to detect ranges and lower them as CheckValueRange. 30 31 * contentextensions/DFABytecodeInterpreter.cpp: 32 (WebCore::ContentExtensions::DFABytecodeInterpreter::interpret): 33 Range checks in the interpreter. 34 35 * contentextensions/NFA.cpp: 36 (WebCore::ContentExtensions::NFA::setFinal): 37 This assertion does not make sense with the current codebase. Actions are "compressed", 38 it is possible to have two patterns with the same action. 39 40 * contentextensions/NFAToDFA.cpp: 41 (WebCore::ContentExtensions::simplifyTransitions): 42 A very simple DFA optimization function: it only reduce the strength of ranges. 43 44 (WebCore::ContentExtensions::NFAToDFA::convert): 45 1 46 2015-03-17 Jer Noble <jer.noble@apple.com> 2 47 -
trunk/Source/WebCore/contentextensions/DFABytecode.h
r181421 r181663 42 42 CheckValue, 43 43 44 // Jump to an offset if the input value is within a certain range. 45 // The lower value (1 byte). 46 // The higher value (1 byte). 47 // The index to jump to if the value is in the range (4 bytes). 48 CheckValueRange, 49 44 50 // AppendAction has one argument: 45 51 // The action to append (4 bytes). … … 64 70 case DFABytecodeInstruction::CheckValue: 65 71 return sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + sizeof(unsigned); 72 case DFABytecodeInstruction::CheckValueRange: 73 return sizeof(DFABytecodeInstruction) + sizeof(uint8_t) + sizeof(uint8_t) + sizeof(unsigned); 66 74 case DFABytecodeInstruction::AppendAction: 67 75 return sizeof(DFABytecodeInstruction) + sizeof(unsigned); -
trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.cpp
r181421 r181663 31 31 #include "ContentExtensionRule.h" 32 32 #include "DFA.h" 33 #include "DFANode.h" 33 34 34 35 namespace WebCore { … … 77 78 } 78 79 80 void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, unsigned destinationNodeIndex) 81 { 82 ASSERT_WITH_MESSAGE(lowValue != highValue, "A single value check should be emitted for single values."); 83 ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is smaller than highValue."); 84 85 append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::CheckValueRange); 86 append<uint8_t>(m_bytecode, lowValue); 87 append<uint8_t>(m_bytecode, highValue); 88 m_linkRecords.append(std::make_pair(m_bytecode.size(), destinationNodeIndex)); 89 append<unsigned>(m_bytecode, 0); 90 } 91 79 92 void DFABytecodeCompiler::emitTerminate() 80 93 { … … 96 109 emitAppendAction(static_cast<unsigned>(action)); 97 110 } 98 99 for (const auto& transition : node.transitions) 100 emitCheckValue(transition.key, transition.value); 101 111 compileNodeTransitions(node); 112 } 113 114 void DFABytecodeCompiler::compileNodeTransitions(const DFANode& node) 115 { 116 bool hasRangeMin = false; 117 uint16_t rangeMin; 118 unsigned rangeDestination = 0; 119 120 for (unsigned char i = 0; i < 128; ++i) { 121 auto transitionIterator = node.transitions.find(i); 122 if (transitionIterator == node.transitions.end()) { 123 if (hasRangeMin) { 124 ASSERT_WITH_MESSAGE(!(node.hasFallbackTransition && node.fallbackTransition == rangeDestination), "Individual transitions to the fallback transitions should have been eliminated by the optimizer."); 125 126 unsigned char lastHighValue = i - 1; 127 compileCheckForRange(rangeMin, lastHighValue, rangeDestination); 128 hasRangeMin = false; 129 } 130 continue; 131 } 132 133 if (!hasRangeMin) { 134 hasRangeMin = true; 135 rangeMin = transitionIterator->key; 136 rangeDestination = transitionIterator->value; 137 } else { 138 if (transitionIterator->value == rangeDestination) 139 continue; 140 141 unsigned char lastHighValue = i - 1; 142 compileCheckForRange(rangeMin, lastHighValue, rangeDestination); 143 rangeMin = i; 144 rangeDestination = transitionIterator->value; 145 } 146 } 147 if (hasRangeMin) 148 compileCheckForRange(rangeMin, 127, rangeDestination); 149 102 150 if (node.hasFallbackTransition) 103 151 emitJump(node.fallbackTransition); … … 105 153 emitTerminate(); 106 154 } 107 155 156 void DFABytecodeCompiler::compileCheckForRange(uint16_t lowValue, uint16_t highValue, unsigned destinationNodeIndex) 157 { 158 ASSERT_WITH_MESSAGE(lowValue < 128, "The DFA engine only supports the ASCII alphabet."); 159 ASSERT_WITH_MESSAGE(highValue < 128, "The DFA engine only supports the ASCII alphabet."); 160 ASSERT(lowValue <= highValue); 161 162 if (lowValue == highValue) 163 emitCheckValue(lowValue, destinationNodeIndex); 164 else 165 emitCheckValueRange(lowValue, highValue, destinationNodeIndex); 166 } 167 108 168 void DFABytecodeCompiler::compile() 109 169 { -
trunk/Source/WebCore/contentextensions/DFABytecodeCompiler.h
r181421 r181663 30 30 31 31 #include "DFABytecode.h" 32 #include "DFANode.h"33 32 #include <wtf/Vector.h> 34 33 … … 38 37 39 38 class DFA; 39 class DFANode; 40 40 41 41 class DFABytecodeCompiler { … … 51 51 private: 52 52 void compileNode(unsigned); 53 void compileNodeTransitions(const DFANode&); 54 void compileCheckForRange(uint16_t lowValue, uint16_t highValue, unsigned destinationNodeIndex); 53 55 54 56 void emitAppendAction(unsigned); … … 56 58 void emitJump(unsigned destinationNodeIndex); 57 59 void emitCheckValue(uint8_t value, unsigned destinationNodeIndex); 60 void emitCheckValueRange(uint8_t lowValue, uint8_t highValue, unsigned destinationNodeIndex); 58 61 void emitTerminate(); 59 62 -
trunk/Source/WebCore/contentextensions/DFABytecodeInterpreter.cpp
r181421 r181663 74 74 break; 75 75 76 case DFABytecodeInstruction::CheckValueRange: { 77 if (urlIndexIsAfterEndOfString) 78 return actions; 79 80 char character = url[urlIndex]; 81 if (character >= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode)) 82 && character <= getBits<uint8_t>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t))) { 83 programCounter = getBits<unsigned>(m_bytecode, m_bytecodeLength, programCounter + sizeof(DFABytecode) + sizeof(uint8_t) + sizeof(uint8_t)); 84 if (!character) 85 urlIndexIsAfterEndOfString = true; 86 urlIndex++; // This represents an edge in the DFA. 87 } else 88 programCounter += instructionSizeWithArguments(DFABytecodeInstruction::CheckValueRange); 89 break; 90 } 91 76 92 case DFABytecodeInstruction::Jump: 77 93 if (!url[urlIndex] || urlIndexIsAfterEndOfString) -
trunk/Source/WebCore/contentextensions/NFA.cpp
r181405 r181663 84 84 void NFA::setFinal(unsigned node, uint64_t ruleId) 85 85 { 86 ASSERT(!m_nodes[node].finalRuleIds.contains(ruleId));87 m_nodes[node].finalRuleIds.append(ruleId);86 if (!m_nodes[node].finalRuleIds.contains(ruleId)) 87 m_nodes[node].finalRuleIds.append(ruleId); 88 88 } 89 89 -
trunk/Source/WebCore/contentextensions/NFAToDFA.cpp
r181405 r181663 339 339 } 340 340 341 static void simplifyTransitions(Vector<DFANode>& dfaGraph) 342 { 343 for (DFANode& dfaNode : dfaGraph) { 344 if (!dfaNode.hasFallbackTransition 345 && ((dfaNode.transitions.size() == 126 && !dfaNode.transitions.contains(0)) 346 || (dfaNode.transitions.size() == 127 && dfaNode.transitions.contains(0)))) { 347 unsigned bestTarget = std::numeric_limits<unsigned>::max(); 348 unsigned bestTargetScore = 0; 349 HashMap<unsigned, unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> targetHistogram; 350 for (const auto transition : dfaNode.transitions) { 351 if (!transition.key) 352 continue; 353 354 unsigned transitionTarget = transition.value; 355 auto addResult = targetHistogram.add(transitionTarget, 1); 356 if (!addResult.isNewEntry) 357 addResult.iterator->value++; 358 359 if (addResult.iterator->value > bestTargetScore) { 360 bestTargetScore = addResult.iterator->value; 361 bestTarget = transitionTarget; 362 } 363 } 364 ASSERT_WITH_MESSAGE(bestTargetScore, "There should be at least one valid target since having transitions is a precondition to enter this path."); 365 366 dfaNode.hasFallbackTransition = true; 367 dfaNode.fallbackTransition = bestTarget; 368 } 369 370 if (dfaNode.hasFallbackTransition) { 371 Vector<uint16_t, 128> keys; 372 DFANodeTransitions& transitions = dfaNode.transitions; 373 copyKeysToVector(transitions, keys); 374 375 for (uint16_t key : keys) { 376 auto transitionIterator = transitions.find(key); 377 if (transitionIterator->value == dfaNode.fallbackTransition) 378 transitions.remove(transitionIterator); 379 } 380 } 381 } 382 } 383 341 384 DFA NFAToDFA::convert(NFA& nfa) 342 385 { … … 388 431 } while (!unprocessedNodes.isEmpty()); 389 432 433 simplifyTransitions(dfaGraph); 390 434 return DFA(WTF::move(dfaGraph), 0); 391 435 } -
trunk/Tools/ChangeLog
r181661 r181663 1 2015-03-17 Benjamin Poulain <bpoulain@apple.com> 2 3 Compile character ranges targeting the same state as range check in the bytecode 4 https://bugs.webkit.org/show_bug.cgi?id=142759 5 6 Reviewed by Alex Christensen. 7 8 * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp: 9 (TestWebKitAPI::TEST_F): 10 1 11 2015-03-17 Youenn Fablet <youenn.fablet@crf.canon.fr> 2 12 -
trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp
r181421 r181663 123 123 } 124 124 125 TEST_F(ContentExtensionTest, RangeBasic) 126 { 127 const char* rangeBasicFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*w[0-9]c\", \"url-filter-is-case-sensitive\":true}},{\"action\":{\"type\":\"block-cookies\"},\"trigger\":{\"url-filter\":\".*[A-H][a-z]cko\", \"url-filter-is-case-sensitive\":true}}]"; 128 auto extensionData = ContentExtensions::compileRuleList(rangeBasicFilter); 129 auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData)); 130 131 ContentExtensions::ContentExtensionsBackend backend; 132 backend.addContentExtension("PatternNestedGroupsFilter", extension); 133 134 testRequest(backend, mainDocumentRequest("http://w3c.org"), { ContentExtensions::ActionType::BlockLoad }); 135 testRequest(backend, mainDocumentRequest("w2c://whatwg.org/"), { ContentExtensions::ActionType::BlockLoad }); 136 testRequest(backend, mainDocumentRequest("http://webkit.org/w0c"), { ContentExtensions::ActionType::BlockLoad }); 137 testRequest(backend, mainDocumentRequest("http://webkit.org/wac"), { }); 138 testRequest(backend, mainDocumentRequest("http://webkit.org/wAc"), { }); 139 140 // Note: URL parsing and canonicalization lowercase the scheme and hostname. 141 testRequest(backend, mainDocumentRequest("Aacko://webkit.org"), { }); 142 testRequest(backend, mainDocumentRequest("aacko://webkit.org"), { }); 143 testRequest(backend, mainDocumentRequest("http://gCcko.org/"), { }); 144 testRequest(backend, mainDocumentRequest("http://gccko.org/"), { }); 145 146 testRequest(backend, mainDocumentRequest("http://webkit.org/Gecko"), { ContentExtensions::ActionType::BlockCookies }); 147 testRequest(backend, mainDocumentRequest("http://webkit.org/gecko"), { }); 148 testRequest(backend, mainDocumentRequest("http://webkit.org/GEcko"), { }); 149 } 150 151 TEST_F(ContentExtensionTest, RangeExclusionGeneratingUniversalTransition) 152 { 153 // Transition of the type ([^X]X) effictively transition on every input. 154 const char* rangeExclusionGeneratingUniversalTransitionFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*[^a]+afoobar\"}}]"; 155 auto extensionData = ContentExtensions::compileRuleList(rangeExclusionGeneratingUniversalTransitionFilter); 156 auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData)); 157 158 ContentExtensions::ContentExtensionsBackend backend; 159 backend.addContentExtension("PatternNestedGroupsFilter", extension); 160 161 testRequest(backend, mainDocumentRequest("http://w3c.org"), { }); 162 163 testRequest(backend, mainDocumentRequest("http://w3c.org/foobafoobar"), { ContentExtensions::ActionType::BlockLoad }); 164 testRequest(backend, mainDocumentRequest("http://w3c.org/foobarfoobar"), { }); 165 testRequest(backend, mainDocumentRequest("http://w3c.org/FOOBAFOOBAR"), { ContentExtensions::ActionType::BlockLoad }); 166 testRequest(backend, mainDocumentRequest("http://w3c.org/FOOBARFOOBAR"), { }); 167 168 // The character before the "a" prefix cannot be another "a". 169 testRequest(backend, mainDocumentRequest("http://w3c.org/aafoobar"), { }); 170 testRequest(backend, mainDocumentRequest("http://w3c.org/Aafoobar"), { }); 171 testRequest(backend, mainDocumentRequest("http://w3c.org/aAfoobar"), { }); 172 testRequest(backend, mainDocumentRequest("http://w3c.org/AAfoobar"), { }); 173 } 174 125 175 const char* patternsStartingWithGroupFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"(http://whatwg\\\\.org/)?webkit\134\134.org\"}}]"; 126 176 … … 219 269 testRequest(backend, mainDocumentRequest("http://webkit.org/foobarfoo"), { }); 220 270 testRequest(backend, mainDocumentRequest("http://webkit.org/foobarf"), { }); 271 } 272 273 TEST_F(ContentExtensionTest, EndOfLineAssertionWithInvertedCharacterSet) 274 { 275 const char* endOfLineAssertionWithInvertedCharacterSetFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*[^y]$\"}}]"; 276 auto extensionData = ContentExtensions::compileRuleList(endOfLineAssertionWithInvertedCharacterSetFilter); 277 auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData)); 278 279 ContentExtensions::ContentExtensionsBackend backend; 280 backend.addContentExtension("EndOfLineAssertion", extension); 281 282 testRequest(backend, mainDocumentRequest("http://webkit.org/"), { ContentExtensions::ActionType::BlockLoad }); 283 testRequest(backend, mainDocumentRequest("http://webkit.org/a"), { ContentExtensions::ActionType::BlockLoad }); 284 testRequest(backend, mainDocumentRequest("http://webkit.org/foobar"), { ContentExtensions::ActionType::BlockLoad }); 285 testRequest(backend, mainDocumentRequest("http://webkit.org/Ya"), { ContentExtensions::ActionType::BlockLoad }); 286 testRequest(backend, mainDocumentRequest("http://webkit.org/yFoobar"), { ContentExtensions::ActionType::BlockLoad }); 287 testRequest(backend, mainDocumentRequest("http://webkit.org/y"), { }); 288 testRequest(backend, mainDocumentRequest("http://webkit.org/Y"), { }); 289 testRequest(backend, mainDocumentRequest("http://webkit.org/foobary"), { }); 290 testRequest(backend, mainDocumentRequest("http://webkit.org/foobarY"), { }); 221 291 } 222 292
Note:
See TracChangeset
for help on using the changeset viewer.