Changeset 195434 in webkit
- Timestamp:
- Jan 21, 2016, 7:31:32 PM (10 years ago)
- Location:
- trunk/Source/JavaScriptCore
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r195433 r195434 1 2016-01-21 Filip Pizlo <fpizlo@apple.com> 2 3 B3 CSE should be able to match a full redundancy even if none of the matches dominate the value in question 4 https://bugs.webkit.org/show_bug.cgi?id=153321 5 6 Reviewed by Benjamin Poulain. 7 8 I once learned that LLVM's GVN can manufacture Phi functions. I don't know the details 9 but I'm presuming that it involves: 10 11 if (p) 12 tmp1 = *ptr 13 else 14 tmp2 = *ptr 15 tmp3 = *ptr // Replace this with Phi(tmp1, tmp2). 16 17 This adds such an optimization to our CSE. The idea is that we search through basic 18 blocks until we find the value we want, a side effect, or the start of the procedure. If 19 we find a value that matches our search criteria, we record it and ignore the 20 predecessors. If we find a side effect or the start of the procedure, we give up the 21 whole search. This ensures that if we come out of the search without giving up, we'll 22 have a set of matches that are fully redundant. 23 24 CSE could then create a Phi graph by using SSACalculator. But the recent work on FixSSA 25 revealed a much more exciting option: create a stack slot! In case there is more than one 26 match, CSE now creates a stack slot that each match stores to, and replaces the redundant 27 instruction with a loadfrom the stack slot. The stack slot is anonymous, which ensures 28 that FixSSA will turn it into an optimal Phi graph or whatever. 29 30 This is a significant speed-up on Octane/richards. 31 32 * b3/B3DuplicateTails.cpp: 33 * b3/B3EliminateCommonSubexpressions.cpp: 34 * b3/B3FixSSA.cpp: 35 (JSC::B3::fixSSA): 36 * b3/B3Generate.cpp: 37 (JSC::B3::generateToAir): 38 * b3/B3Procedure.h: 39 (JSC::B3::Procedure::setFrontendData): 40 (JSC::B3::Procedure::frontendData): 41 * b3/testb3.cpp: 42 * ftl/FTLState.cpp: 43 (JSC::FTL::State::State): 44 1 45 2016-01-21 Filip Pizlo <fpizlo@apple.com> 2 46 -
trunk/Source/JavaScriptCore/b3/B3DuplicateTails.cpp
r195395 r195434 124 124 m_proc.resetReachability(); 125 125 m_proc.invalidateCFG(); 126 127 if (verbose) {128 dataLog("Procedure just before SSA conversion:\n");129 dataLog(m_proc);130 }131 fixSSA(m_proc);132 126 } 133 127 -
trunk/Source/JavaScriptCore/b3/B3EliminateCommonSubexpressions.cpp
r195417 r195434 38 38 #include "B3ProcedureInlines.h" 39 39 #include "B3PureCSE.h" 40 #include "B3StackSlotValue.h" 40 41 #include "B3ValueKey.h" 41 42 #include "B3ValueInlines.h" 43 #include "DFGGraph.h" 44 #include <wtf/CommaPrinter.h> 42 45 #include <wtf/HashMap.h> 46 #include <wtf/ListDump.h> 43 47 #include <wtf/RangeSet.h> 44 48 … … 61 65 62 66 struct ImpureBlockData { 67 void dump(PrintStream& out) const 68 { 69 out.print("{writes = ", writes, ", memoryValues = {"); 70 71 CommaPrinter comma; 72 for (auto& entry : memoryValues) 73 out.print(comma, pointerDump(entry.key), "=>", pointerListDump(entry.value)); 74 75 out.print("}}"); 76 } 77 63 78 RangeSet<HeapRange> writes; 64 79 65 80 // Maps an address base to all of the MemoryValues that do things to it. After we're done 66 // processing a map, this tells us the values at tail. 67 HashMap<Value*, MemoryMatches> memoryValues; 81 // processing a map, this tells us the values at tail. Note that we use a Matches array 82 // because those MemoryValues could be turned into Identity's, and we need to check for this 83 // as we go. 84 HashMap<Value*, Matches> memoryValues; 68 85 }; 69 86 … … 86 103 87 104 for (BasicBlock* block : m_proc) { 88 m_data = &m_impureBlockData[block]; 89 for (Value* value : *block) 90 m_data->writes.add(value->effects().writes); 91 } 92 93 for (BasicBlock* block : m_proc.blocksInPreOrder()) { 94 m_block = block; 95 m_data = &m_impureBlockData[block]; 96 m_writes.clear(); 97 for (m_index = 0; m_index < block->size(); ++m_index) { 98 m_value = block->at(m_index); 105 ImpureBlockData& data = m_impureBlockData[block]; 106 for (Value* value : *block) { 107 if (HeapRange writes = value->effects().writes) 108 clobber(data, writes); 109 110 if (MemoryValue* memory = value->as<MemoryValue>()) 111 addMemoryValue(data, memory); 112 } 113 114 if (verbose) 115 dataLog("Block ", *block, ": ", data, "\n"); 116 } 117 118 Vector<BasicBlock*> postOrder = m_proc.blocksInPostOrder(); 119 for (unsigned i = postOrder.size(); i--;) { 120 m_block = postOrder[i]; 121 if (verbose) 122 dataLog("Looking at ", *m_block, ":\n"); 123 124 m_data = ImpureBlockData(); 125 for (m_index = 0; m_index < m_block->size(); ++m_index) { 126 m_value = m_block->at(m_index); 99 127 process(); 100 128 } 129 m_insertionSet.execute(m_block); 130 m_impureBlockData[m_block] = m_data; 131 } 132 133 for (StackSlotValue* stack : m_stacks) 134 m_insertionSet.insertValue(0, stack); 135 for (BasicBlock* block : m_proc) { 136 for (unsigned valueIndex = 0; valueIndex < block->size(); ++valueIndex) { 137 auto iter = m_stores.find(block->at(valueIndex)); 138 if (iter == m_stores.end()) 139 continue; 140 141 for (Value* value : iter->value) 142 m_insertionSet.insertValue(valueIndex + 1, value); 143 } 101 144 m_insertionSet.execute(block); 102 145 } 146 147 if (verbose) 148 dataLog("B3 after CSE:\n", m_proc); 103 149 104 150 return m_changed; … … 117 163 118 164 if (HeapRange writes = m_value->effects().writes) 119 clobber( writes);165 clobber(m_data, writes); 120 166 121 167 if (MemoryValue* memory = m_value->as<MemoryValue>()) … … 123 169 } 124 170 125 void clobber( HeapRange writes)126 { 127 m_writes.add(writes);128 129 m_data->memoryValues.removeIf(130 [&] (HashMap<Value*, M emoryMatches>::KeyValuePairType& entry) -> bool {171 void clobber(ImpureBlockData& data, HeapRange writes) 172 { 173 data.writes.add(writes); 174 175 data.memoryValues.removeIf( 176 [&] (HashMap<Value*, Matches>::KeyValuePairType& entry) -> bool { 131 177 entry.value.removeAllMatching( 132 [&] (MemoryValue* memory) -> bool { 133 return memory->range().overlaps(writes); 178 [&] (Value* value) -> bool { 179 if (MemoryValue* memory = value->as<MemoryValue>()) 180 return memory->range().overlaps(writes); 181 return true; 134 182 }); 135 183 return entry.value.isEmpty(); … … 157 205 switch (memory->opcode()) { 158 206 case Load8Z: { 159 MemoryValue* match = findMemoryValue( 160 ptr, range, [&] (MemoryValue* candidate) -> bool { 207 handleMemoryValue( 208 ptr, range, 209 [&] (MemoryValue* candidate) -> bool { 161 210 return candidate->offset() == offset 162 211 && (candidate->opcode() == Load8Z || candidate->opcode() == Store8); 212 }, 213 [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* { 214 if (match->opcode() == Store8) { 215 Value* mask = m_proc.add<Const32Value>(m_value->origin(), 0xff); 216 fixups.append(mask); 217 Value* zext = m_proc.add<Value>( 218 BitAnd, m_value->origin(), match->child(0), mask); 219 fixups.append(sext); 220 return sext; 221 } 222 return nullptr; 163 223 }); 164 if (replace(match))165 break;166 addMemoryValue(memory);167 224 break; 168 225 } 169 226 170 227 case Load8S: { 171 MemoryValue* match = findMemoryValue( 172 ptr, range, [&] (MemoryValue* candidate) -> bool { 228 handleMemoryValue( 229 ptr, range, 230 [&] (MemoryValue* candidate) -> bool { 173 231 return candidate->offset() == offset 174 232 && (candidate->opcode() == Load8S || candidate->opcode() == Store8); 233 }, 234 [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* { 235 if (match->opcode() == Store8) { 236 Value* sext = m_proc.add<Value>( 237 SExt8, m_value->origin(), match->child(0)); 238 fixups.append(sext); 239 return sext; 240 } 241 return nullptr; 175 242 }); 176 if (match) {177 if (match->opcode() == Store8) {178 m_value->replaceWithIdentity(179 m_insertionSet.insert<Value>(180 m_index, SExt8, m_value->origin(), match->child(0)));181 m_changed = true;182 break;183 }184 replace(match);185 break;186 }187 addMemoryValue(memory);188 243 break; 189 244 } 190 245 191 246 case Load16Z: { 192 MemoryValue* match = findMemoryValue( 193 ptr, range, [&] (MemoryValue* candidate) -> bool { 247 handleMemoryValue( 248 ptr, range, 249 [&] (MemoryValue* candidate) -> bool { 194 250 return candidate->offset() == offset 195 251 && (candidate->opcode() == Load16Z || candidate->opcode() == Store16); 252 }, 253 [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* { 254 if (match->opcode() == Store16) { 255 Value* mask = m_proc.add<Const32Value>(m_value->origin(), 0xffff); 256 fixups.append(mask); 257 Value* zext = m_proc.add<Value>( 258 BitAnd, m_value->origin(), match->child(0), mask); 259 fixups.append(sext); 260 return sext; 261 } 262 return nullptr; 196 263 }); 197 if (replace(match))198 break;199 addMemoryValue(memory);200 264 break; 201 265 } 202 266 203 267 case Load16S: { 204 MemoryValue* match = findMemoryValue(268 handleMemoryValue( 205 269 ptr, range, [&] (MemoryValue* candidate) -> bool { 206 270 return candidate->offset() == offset 207 271 && (candidate->opcode() == Load16S || candidate->opcode() == Store16); 272 }, 273 [&] (MemoryValue* match, Vector<Value*>& fixups) -> Value* { 274 if (match->opcode() == Store16) { 275 Value* sext = m_proc.add<Value>( 276 SExt16, m_value->origin(), match->child(0)); 277 fixups.append(sext); 278 return sext; 279 } 280 return nullptr; 208 281 }); 209 if (match) {210 if (match->opcode() == Store16) {211 m_value->replaceWithIdentity(212 m_insertionSet.insert<Value>(213 m_index, SExt16, m_value->origin(), match->child(0)));214 m_changed = true;215 break;216 }217 replace(match);218 break;219 }220 addMemoryValue(memory);221 282 break; 222 283 } 223 284 224 285 case Load: { 225 MemoryValue* match = findMemoryValue( 226 ptr, range, [&] (MemoryValue* candidate) -> bool { 286 handleMemoryValue( 287 ptr, range, 288 [&] (MemoryValue* candidate) -> bool { 227 289 if (candidate->offset() != offset) 228 290 return false; … … 236 298 return false; 237 299 }); 238 if (replace(match))239 break;240 addMemoryValue(memory);241 300 break; 242 301 } … … 245 304 case Store16: 246 305 case Store: { 247 addMemoryValue(m emory);306 addMemoryValue(m_data, memory); 248 307 break; 249 308 } … … 256 315 } 257 316 258 bool replace(MemoryValue* match) 259 { 260 if (!match) 317 template<typename Filter> 318 void handleMemoryValue(Value* ptr, HeapRange range, const Filter& filter) 319 { 320 handleMemoryValue( 321 ptr, range, filter, 322 [] (MemoryValue*, Vector<Value*>&) -> Value* { 323 return nullptr; 324 }); 325 } 326 327 template<typename Filter, typename Replace> 328 void handleMemoryValue( 329 Value* ptr, HeapRange range, const Filter& filter, const Replace& replace) 330 { 331 MemoryMatches matches = findMemoryValue(ptr, range, filter); 332 if (replaceMemoryValue(matches, replace)) 333 return; 334 addMemoryValue(m_data, m_value->as<MemoryValue>()); 335 } 336 337 template<typename Replace> 338 bool replaceMemoryValue(const MemoryMatches& matches, const Replace& replace) 339 { 340 if (matches.isEmpty()) 261 341 return false; 262 342 263 343 if (verbose) 264 dataLog("Eliminating ", *m_value, " due to ", *match, "\n"); 265 266 if (match->isStore()) 267 m_value->replaceWithIdentity(match->child(0)); 268 else 269 m_value->replaceWithIdentity(match); 344 dataLog("Eliminating ", *m_value, " due to ", pointerListDump(matches), "\n"); 345 270 346 m_changed = true; 347 348 if (matches.size() == 1) { 349 MemoryValue* dominatingMatch = matches[0]; 350 RELEASE_ASSERT(m_dominators.dominates(dominatingMatch->owner, m_block)); 351 352 if (verbose) 353 dataLog(" Eliminating using ", *dominatingMatch, "\n"); 354 Vector<Value*> extraValues; 355 if (Value* value = replace(dominatingMatch, extraValues)) { 356 for (Value* extraValue : extraValues) 357 m_insertionSet.insertValue(m_index, extraValue); 358 m_value->replaceWithIdentity(value); 359 } else { 360 if (dominatingMatch->isStore()) 361 m_value->replaceWithIdentity(dominatingMatch->child(0)); 362 else 363 m_value->replaceWithIdentity(dominatingMatch); 364 } 365 return true; 366 } 367 368 // FIXME: It would be way better if this phase just did SSA calculation directly. 369 // Right now we're relying on the fact that CSE's position in the phase order is 370 // almost right before SSA fixup. 371 372 StackSlotValue* stack = m_proc.add<StackSlotValue>( 373 m_value->origin(), sizeofType(m_value->type()), StackSlotKind::Anonymous); 374 m_stacks.append(stack); 375 376 MemoryValue* load = m_insertionSet.insert<MemoryValue>( 377 m_index, Load, m_value->type(), m_value->origin(), stack); 378 if (verbose) 379 dataLog(" Inserting load of value: ", *load, "\n"); 380 m_value->replaceWithIdentity(load); 381 382 for (MemoryValue* match : matches) { 383 Vector<Value*>& stores = m_stores.add(match, Vector<Value*>()).iterator->value; 384 385 Value* value = replace(match, stores); 386 if (!value) { 387 if (match->isStore()) 388 value = match->child(0); 389 else 390 value = match; 391 } 392 393 Value* store = m_proc.add<MemoryValue>(Store, m_value->origin(), value, stack); 394 stores.append(store); 395 } 396 271 397 return true; 272 398 } 273 399 274 void addMemoryValue(MemoryValue* memory)275 {276 addMemoryValue(*m_data, memory);277 }278 279 400 void addMemoryValue(ImpureBlockData& data, MemoryValue* memory) 280 401 { 281 MemoryMatches& matches = 282 data.memoryValues.add(memory->lastChild(), MemoryMatches()).iterator->value; 402 if (verbose) 403 dataLog("Adding memory: ", *memory, "\n"); 404 405 Matches& matches = data.memoryValues.add(memory->lastChild(), Matches()).iterator->value; 283 406 284 407 if (matches.contains(memory)) … … 289 412 290 413 template<typename Filter> 291 MemoryValue* findMemoryValue(Value* ptr, HeapRange range, const Filter& filter) 292 { 414 MemoryMatches findMemoryValue(Value* ptr, HeapRange range, const Filter& filter) 415 { 416 if (verbose) 417 dataLog(*m_value, ": looking for ", *ptr, "...\n"); 418 293 419 auto find = [&] (ImpureBlockData& data) -> MemoryValue* { 294 420 auto iter = data.memoryValues.find(ptr); 295 421 if (iter != data.memoryValues.end()) { 296 for (MemoryValue* candidate : iter->value) { 297 if (filter(candidate)) 298 return candidate; 422 for (Value* candidateValue : iter->value) { 423 if (MemoryValue* candidateMemory = candidateValue->as<MemoryValue>()) { 424 if (filter(candidateMemory)) 425 return candidateMemory; 426 } 299 427 } 300 428 } … … 302 430 }; 303 431 304 if (MemoryValue* match = find(*m_data)) 305 return match; 306 307 if (m_writes.overlaps(range)) 308 return nullptr; 432 if (MemoryValue* match = find(m_data)) { 433 if (verbose) 434 dataLog(" Found ", *match, " locally.\n"); 435 return { match }; 436 } 437 438 if (m_data.writes.overlaps(range)) { 439 if (verbose) 440 dataLog(" Giving up because of writes.\n"); 441 return { }; 442 } 309 443 310 444 BlockWorklist worklist; … … 313 447 worklist.pushAll(m_block->predecessors()); 314 448 315 Memory Value* match = nullptr;449 MemoryMatches matches; 316 450 317 451 while (BasicBlock* block = worklist.pop()) { 452 if (verbose) 453 dataLog(" Looking at ", *block, "\n"); 318 454 seenList.append(block); 319 455 320 456 ImpureBlockData& data = m_impureBlockData[block]; 321 457 322 if (m_dominators.strictlyDominates(block, m_block)) { 323 match = find(data); 324 if (match) 325 continue; 326 } 327 328 if (data.writes.overlaps(range)) 329 return nullptr; 330 458 MemoryValue* match = find(data); 459 if (match && match != m_value) { 460 if (verbose) 461 dataLog(" Found match: ", *match, "\n"); 462 matches.append(match); 463 continue; 464 } 465 466 if (data.writes.overlaps(range)) { 467 if (verbose) 468 dataLog(" Giving up because of writes.\n"); 469 return { }; 470 } 471 472 if (!block->numPredecessors()) { 473 if (verbose) 474 dataLog(" Giving up because it's live at root.\n"); 475 // This essentially proves that this is live at the prologue. That means that we 476 // cannot reliably optimize this case. 477 return { }; 478 } 479 331 480 worklist.pushAll(block->predecessors()); 332 481 } 333 482 334 if (!match) 335 return nullptr; 336 337 for (BasicBlock* block : seenList) 338 addMemoryValue(m_impureBlockData[block], match); 339 addMemoryValue(match); 340 341 return match; 342 } 343 344 typedef Vector<Value*, 1> Matches; 483 if (verbose) 484 dataLog(" Got matches: ", pointerListDump(matches), "\n"); 485 return matches; 486 } 345 487 346 488 Procedure& m_proc; … … 351 493 IndexMap<BasicBlock, ImpureBlockData> m_impureBlockData; 352 494 353 ImpureBlockData* m_data; 354 RangeSet<HeapRange> m_writes; 495 ImpureBlockData m_data; 355 496 356 497 BasicBlock* m_block; … … 358 499 Value* m_value; 359 500 501 Vector<StackSlotValue*> m_stacks; 502 HashMap<Value*, Vector<Value*>> m_stores; 503 360 504 InsertionSet m_insertionSet; 361 505 -
trunk/Source/JavaScriptCore/b3/B3FixSSA.cpp
r195395 r195434 123 123 bool fixSSA(Procedure& proc) 124 124 { 125 PhaseScope phaseScope(proc, "fixSSA"); 126 125 127 // Collect the stack "variables". If there aren't any, then we don't have anything to do. 126 128 // That's a fairly common case. -
trunk/Source/JavaScriptCore/b3/B3Generate.cpp
r195422 r195434 35 35 #include "B3DuplicateTails.h" 36 36 #include "B3EliminateCommonSubexpressions.h" 37 #include "B3FixSSA.h" 37 38 #include "B3FoldPathConstants.h" 38 39 #include "B3LegalizeMemoryOffsets.h" … … 82 83 eliminateCommonSubexpressions(procedure); 83 84 duplicateTails(procedure); 85 fixSSA(procedure); 84 86 foldPathConstants(procedure); 85 87 -
trunk/Source/JavaScriptCore/b3/B3Procedure.h
r195395 r195434 72 72 void printOrigin(PrintStream& out, Origin origin) const; 73 73 74 // This is a debugging hack. Sometimes while debugging B3 you need to break the abstraction 75 // and get at the DFG Graph, or whatever data structure the frontend used to describe the 76 // program. The FTL passes the DFG Graph. 77 void setFrontendData(const void* value) { m_frontendData = value; } 78 const void* frontendData() const { return m_frontendData; } 79 74 80 JS_EXPORT_PRIVATE BasicBlock* addBlock(double frequency = 1); 75 81 … … 285 291 std::unique_ptr<Air::Code> m_code; 286 292 RefPtr<SharedTask<void(PrintStream&, Origin)>> m_originPrinter; 293 const void* m_frontendData; 287 294 }; 288 295 -
trunk/Source/JavaScriptCore/b3/testb3.cpp
r195422 r195434 9456 9456 9457 9457 CHECK(compileAndRun<bool>(proc, &left) == (left == right)); 9458 } 9459 9460 void testStore8Load8Z(int32_t value) 9461 { 9462 Procedure proc; 9463 BasicBlock* root = proc.addBlock(); 9464 9465 int8_t byte; 9466 Value* ptr = root->appendNew<ConstPtrValue>(proc, Origin(), &byte); 9467 9468 root->appendNew<MemoryValue>( 9469 proc, Store8, Origin(), 9470 root->appendNew<Value>( 9471 proc, Trunc, Origin(), 9472 root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0)), 9473 ptr); 9474 9475 root->appendNew<ControlValue>( 9476 proc, Return, Origin(), 9477 root->appendNew<MemoryValue>(proc, Load8Z, Origin(), ptr)); 9478 9479 CHECK(compileAndRun<int32_t>(proc, value) == static_cast<uint8_t>(value)); 9480 } 9481 9482 void testStore16Load16Z(int32_t value) 9483 { 9484 Procedure proc; 9485 BasicBlock* root = proc.addBlock(); 9486 9487 int16_t byte; 9488 Value* ptr = root->appendNew<ConstPtrValue>(proc, Origin(), &byte); 9489 9490 root->appendNew<MemoryValue>( 9491 proc, Store16, Origin(), 9492 root->appendNew<Value>( 9493 proc, Trunc, Origin(), 9494 root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0)), 9495 ptr); 9496 9497 root->appendNew<ControlValue>( 9498 proc, Return, Origin(), 9499 root->appendNew<MemoryValue>(proc, Load16Z, Origin(), ptr)); 9500 9501 CHECK(compileAndRun<int32_t>(proc, value) == static_cast<uint16_t>(value)); 9458 9502 } 9459 9503 … … 10731 10775 RUN(testBranch64EqualMemImm(-1, 1)); 10732 10776 10777 RUN(testStore8Load8Z(0)); 10778 RUN(testStore8Load8Z(123)); 10779 RUN(testStore8Load8Z(12345)); 10780 RUN(testStore8Load8Z(-123)); 10781 10782 RUN(testStore16Load16Z(0)); 10783 RUN(testStore16Load16Z(123)); 10784 RUN(testStore16Load16Z(12345)); 10785 RUN(testStore16Load16Z(12345678)); 10786 RUN(testStore16Load16Z(-123)); 10787 10733 10788 if (tasks.isEmpty()) 10734 10789 usage(); -
trunk/Source/JavaScriptCore/ftl/FTLState.cpp
r194716 r195434 79 79 out.print("DFG:", bitwise_cast<Node*>(origin.data())); 80 80 }); 81 82 proc->setFrontendData(&graph); 81 83 #endif // FTL_USES_B3 82 84 }
Note:
See TracChangeset
for help on using the changeset viewer.