Changeset 182357 in webkit
- Timestamp:
- Apr 5, 2015 8:18:47 AM (9 years ago)
- Location:
- trunk/Source/WebKit2
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebKit2/ChangeLog
r182356 r182357 1 2015-04-04 Antti Koivisto <antti@apple.com> 2 3 Network cache Bloom filter is too big 4 https://bugs.webkit.org/show_bug.cgi?id=143400 5 6 Reviewed by Chris Dumez. 7 8 It is currently 1MB. 9 10 This patch switches the cache from a counting filter (CountingBloomFilter) to a bit filter (BloomFilter). 11 12 It also reduces the filter size from 2^20 to 2^18 elements which is good for ~26000 cache entries while 13 still keeping false positive rate below 1%. The current cache capacity allows around 4000 entries 14 with typical web contents. 15 16 The new filter size is 32KB. 17 18 * NetworkProcess/cache/NetworkCacheStorage.cpp: 19 (WebKit::NetworkCache::Storage::Storage): 20 (WebKit::NetworkCache::Storage::synchronize): 21 22 Turn initialization function into general purpose synchronization function. 23 24 (WebKit::NetworkCache::Storage::addToContentsFilter): 25 26 Collect newly added hashes so we don't miss entries that were added during synchronization. 27 28 (WebKit::NetworkCache::Storage::mayContain): 29 (WebKit::NetworkCache::Storage::remove): 30 (WebKit::NetworkCache::Storage::retrieve): 31 (WebKit::NetworkCache::Storage::store): 32 (WebKit::NetworkCache::Storage::dispatchPendingWriteOperations): 33 (WebKit::NetworkCache::Storage::dispatchFullWriteOperation): 34 (WebKit::NetworkCache::Storage::dispatchHeaderWriteOperation): 35 (WebKit::NetworkCache::Storage::setMaximumSize): 36 (WebKit::NetworkCache::Storage::clear): 37 (WebKit::NetworkCache::Storage::shrinkIfNeeded): 38 (WebKit::NetworkCache::Storage::shrink): 39 40 Non-counting Bloom filter does not support removals so this requires a new strategy. 41 42 Shrink code now simply deletes entries. The filter is updated by calling synchronize() at the end. 43 While we could synchronize the filter during traversal it is better to just have one function for that. 44 45 (WebKit::NetworkCache::Storage::initialize): Deleted. 46 * NetworkProcess/cache/NetworkCacheStorage.h: 47 (WebKit::NetworkCache::Storage::mayContain): 48 (WebKit::NetworkCache::Storage::cacheMayContain): Deleted. 49 1 50 2015-04-04 Andy Estes <aestes@apple.com> 2 51 -
trunk/Source/WebKit2/NetworkProcess/cache/NetworkCacheStorage.cpp
r182271 r182357 71 71 { 72 72 deleteOldVersions(); 73 initialize(); 74 } 75 76 void Storage::initialize() 77 { 78 ASSERT(RunLoop::isMain()); 73 synchronize(); 74 } 75 76 void Storage::synchronize() 77 { 78 ASSERT(RunLoop::isMain()); 79 80 if (m_synchronizationInProgress || m_shrinkInProgress) 81 return; 82 m_synchronizationInProgress = true; 83 84 LOG(NetworkCacheStorage, "(NetworkProcess) synchronizing cache"); 79 85 80 86 StringCapture cachePathCapture(m_directoryPath); 81 82 87 backgroundIOQueue().dispatch([this, cachePathCapture] { 83 88 String cachePath = cachePathCapture.string(); 84 traverseCacheFiles(cachePath, [this](const String& fileName, const String& partitionPath) { 89 90 auto filter = std::make_unique<ContentsFilter>(); 91 size_t size = 0; 92 unsigned count = 0; 93 traverseCacheFiles(cachePath, [&filter, &size, &count](const String& fileName, const String& partitionPath) { 85 94 Key::HashType hash; 86 95 if (!Key::stringToHash(fileName, hash)) 87 96 return; 88 unsigned shortHash = Key::toShortHash(hash);89 RunLoop::main().dispatch([this, shortHash] {90 m_contentsFilter.add(shortHash);91 });92 97 auto filePath = WebCore::pathByAppendingComponent(partitionPath, fileName); 93 98 long long fileSize = 0; 94 99 WebCore::getFileSize(filePath, fileSize); 95 m_approximateSize += fileSize; 96 }); 97 m_hasPopulatedContentsFilter = true; 98 }); 100 if (!fileSize) 101 return; 102 filter->add(Key::toShortHash(hash)); 103 size += fileSize; 104 ++count; 105 }); 106 107 auto* filterPtr = filter.release(); 108 RunLoop::main().dispatch([this, filterPtr, size] { 109 auto filter = std::unique_ptr<ContentsFilter>(filterPtr); 110 111 for (auto hash : m_contentsFilterHashesAddedDuringSynchronization) 112 filter->add(hash); 113 m_contentsFilterHashesAddedDuringSynchronization.clear(); 114 115 m_contentsFilter = WTF::move(filter); 116 m_approximateSize = size; 117 m_synchronizationInProgress = false; 118 }); 119 120 LOG(NetworkCacheStorage, "(NetworkProcess) cache synchronization completed approximateSize=%zu count=%d", size, count); 121 }); 122 } 123 124 void Storage::addToContentsFilter(const Key& key) 125 { 126 ASSERT(RunLoop::isMain()); 127 128 if (m_contentsFilter) 129 m_contentsFilter->add(key.shortHash()); 130 131 // If we get new entries during filter synchronization take care to add them to the new filter as well. 132 if (m_synchronizationInProgress) 133 m_contentsFilterHashesAddedDuringSynchronization.append(key.shortHash()); 134 } 135 136 bool Storage::mayContain(const Key& key) const 137 { 138 ASSERT(RunLoop::isMain()); 139 return !m_contentsFilter || m_contentsFilter->mayContain(key.shortHash()); 99 140 } 100 141 … … 289 330 ASSERT(RunLoop::isMain()); 290 331 291 // For simplicity we don't reduce m_approximateSize on removals. 292 // The next cache shrink will update the size. 293 294 if (m_contentsFilter.mayContain(key.shortHash())) 295 m_contentsFilter.remove(key.shortHash()); 332 // We can't remove the key from the Bloom filter (but some false positives are expected anyway). 333 // For simplicity we also don't reduce m_approximateSize on removals. 334 // The next synchronization will update everything. 296 335 297 336 StringCapture filePathCapture(filePathForKey(key, m_directoryPath)); … … 386 425 } 387 426 388 if (! cacheMayContain(key.shortHash())) {427 if (!mayContain(key)) { 389 428 completionHandler(nullptr); 390 429 return; … … 413 452 414 453 // Add key to the filter already here as we do lookups from the pending operations too. 415 m_contentsFilter.add(record.key.shortHash());454 addToContentsFilter(record.key); 416 455 417 456 dispatchPendingWriteOperations(); … … 480 519 m_activeWriteOperations.add(WTF::move(writeOperation)); 481 520 482 if (write.existingRecord && cacheMayContain(write.record.key.shortHash())) {521 if (write.existingRecord && mayContain(write.record.key)) { 483 522 dispatchHeaderWriteOperation(write); 484 523 continue; … … 493 532 ASSERT(m_activeWriteOperations.contains(&write)); 494 533 495 if (!m_contentsFilter.mayContain(write.record.key.shortHash()))496 m_contentsFilter.add(write.record.key.shortHash());534 // This was added already when starting the store but filter might have been wiped. 535 addToContentsFilter(write.record.key); 497 536 498 537 StringCapture cachePathCapture(m_directoryPath); … … 506 545 507 546 channel->write(0, headerAndBodyData, [this, &write, bodyOffset, fd](int error) { 508 LOG(NetworkCacheStorage, "(NetworkProcess) write complete error=%d", error);509 if (error) {510 if (m_contentsFilter.mayContain(write.record.key.shortHash()))511 m_contentsFilter.remove(write.record.key.shortHash());512 }513 547 size_t bodySize = write.record.body.size(); 514 548 size_t totalSize = bodyOffset + bodySize; 515 549 550 // On error the entry still stays in the contents filter until next synchronization. 516 551 m_approximateSize += totalSize; 517 552 … … 524 559 m_activeWriteOperations.remove(&write); 525 560 dispatchPendingWriteOperations(); 561 562 LOG(NetworkCacheStorage, "(NetworkProcess) write complete error=%d", error); 526 563 }); 527 564 }); … … 535 572 ASSERT(write.existingRecord); 536 573 ASSERT(m_activeWriteOperations.contains(&write)); 537 ASSERT( cacheMayContain(write.record.key.shortHash()));574 ASSERT(mayContain(write.record.key)); 538 575 539 576 // Try to update the header of an existing entry. … … 571 608 { 572 609 ASSERT(RunLoop::isMain()); 610 611 #if !ASSERT_DISABLED 612 const size_t assumedAverageRecordSize = 50 << 20; 613 size_t maximumRecordCount = size / assumedAverageRecordSize; 614 // ~10 bits per element are required for <1% false positive rate. 615 size_t effectiveBloomFilterCapacity = ContentsFilter::tableSize / 10; 616 // If this gets hit it might be time to increase the filter size. 617 ASSERT(maximumRecordCount < effectiveBloomFilterCapacity); 618 #endif 619 573 620 m_maximumSize = size; 574 621 … … 581 628 LOG(NetworkCacheStorage, "(NetworkProcess) clearing cache"); 582 629 583 m_contentsFilter.clear(); 630 if (m_contentsFilter) 631 m_contentsFilter->clear(); 584 632 m_approximateSize = 0; 585 633 … … 630 678 ASSERT(RunLoop::isMain()); 631 679 632 if (m_approximateSize <= m_maximumSize) 633 return; 634 if (m_shrinkInProgress) 680 if (m_approximateSize > m_maximumSize) 681 shrink(); 682 } 683 684 void Storage::shrink() 685 { 686 ASSERT(RunLoop::isMain()); 687 688 if (m_shrinkInProgress || m_synchronizationInProgress) 635 689 return; 636 690 m_shrinkInProgress = true; 637 691 638 692 LOG(NetworkCacheStorage, "(NetworkProcess) shrinking cache approximateSize=%zu, m_maximumSize=%zu", static_cast<size_t>(m_approximateSize), m_maximumSize); 639 640 m_approximateSize = 0;641 693 642 694 StringCapture cachePathCapture(m_directoryPath); 643 695 backgroundIOQueue().dispatch([this, cachePathCapture] { 644 696 String cachePath = cachePathCapture.string(); 645 traverseCacheFiles(cachePath, [ this](const String& fileName, const String& partitionPath) {697 traverseCacheFiles(cachePath, [](const String& fileName, const String& partitionPath) { 646 698 auto filePath = WebCore::pathByAppendingComponent(partitionPath, fileName); 647 699 … … 652 704 LOG(NetworkCacheStorage, "Deletion probability=%f shouldDelete=%d", probability, shouldDelete); 653 705 654 if (!shouldDelete) { 655 long long fileSize = 0; 656 WebCore::getFileSize(filePath, fileSize); 657 m_approximateSize += fileSize; 658 return; 659 } 660 661 WebCore::deleteFile(filePath); 662 Key::HashType hash; 663 if (!Key::stringToHash(fileName, hash)) 664 return; 665 unsigned shortHash = Key::toShortHash(hash); 666 RunLoop::main().dispatch([this, shortHash] { 667 if (m_contentsFilter.mayContain(shortHash)) 668 m_contentsFilter.remove(shortHash); 669 }); 706 if (shouldDelete) 707 WebCore::deleteFile(filePath); 670 708 }); 671 709 … … 676 714 }); 677 715 678 m_shrinkInProgress = false; 679 680 LOG(NetworkCacheStorage, "(NetworkProcess) cache shrink completed approximateSize=%zu", static_cast<size_t>(m_approximateSize)); 716 RunLoop::main().dispatch([this] { 717 m_shrinkInProgress = false; 718 // We could synchronize during the shrink traversal. However this is fast and it is better to have just one code path. 719 synchronize(); 720 }); 721 722 LOG(NetworkCacheStorage, "(NetworkProcess) cache shrink completed"); 681 723 }); 682 724 } -
trunk/Source/WebKit2/NetworkProcess/cache/NetworkCacheStorage.h
r182321 r182357 86 86 Storage(const String& directoryPath); 87 87 88 void initialize();88 void synchronize(); 89 89 void deleteOldVersions(); 90 90 void shrinkIfNeeded(); 91 void shrink(); 91 92 92 93 struct ReadOperation { … … 112 113 WorkQueue& serialBackgroundIOQueue() { return m_serialBackgroundIOQueue.get(); } 113 114 114 bool cacheMayContain(unsigned shortHash) { return !m_hasPopulatedContentsFilter || m_contentsFilter.mayContain(shortHash); } 115 bool mayContain(const Key&) const; 116 117 // 2^18 bit filter can support up to 26000 entries with false positive rate < 1%. 118 using ContentsFilter = BloomFilter<18>; 119 void addToContentsFilter(const Key&); 115 120 116 121 const String m_baseDirectoryPath; … … 118 123 119 124 size_t m_maximumSize { std::numeric_limits<size_t>::max() }; 125 size_t m_approximateSize { 0 }; 120 126 121 CountingBloomFilter<20> m_contentsFilter; 122 std::atomic<bool> m_hasPopulatedContentsFilter { false }; 127 std::unique_ptr<ContentsFilter> m_contentsFilter; 123 128 124 std::atomic<size_t> m_approximateSize { 0 }; 125 std::atomic<bool> m_shrinkInProgress { false }; 129 bool m_synchronizationInProgress { false }; 130 bool m_shrinkInProgress { false }; 131 132 Vector<unsigned> m_contentsFilterHashesAddedDuringSynchronization; 126 133 127 134 static const int maximumRetrievePriority = 4;
Note: See TracChangeset
for help on using the changeset viewer.