Changeset 182321 in webkit
- Timestamp:
- Apr 3, 2015 10:23:39 AM (9 years ago)
- Location:
- trunk
- Files:
-
- 1 added
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WTF/ChangeLog
r182254 r182321 1 2015-04-03 Antti Koivisto <antti@apple.com> 2 3 Add non-counting Bloom filter implementation 4 https://bugs.webkit.org/show_bug.cgi?id=143366 5 6 Reviewed by Sam Weinig. 7 8 Add a traditional single-bit-per-bucket Bloom filter in addition to the existing counting implementation. 9 10 - rename BloomFilter -> CountingBloomFilter. 11 - add a new basic BloomFilter type. 12 - update some terminology to match http://en.wikipedia.org/wiki/Bloom_filter and modernize the code a bit. 13 - add API tests. 14 15 * wtf/BloomFilter.h: 16 (WTF::BloomFilter::BloomFilter): 17 (WTF::BloomFilter::add): 18 19 Also support merging. 20 21 (WTF::BloomFilter::mayContain): 22 (WTF::BloomFilter::arrayIndex): 23 (WTF::BloomFilter::bitMask): 24 (WTF::BloomFilter<keyBits>::mayContain): 25 (WTF::BloomFilter<keyBits>::add): 26 (WTF::BloomFilter<keyBits>::isBitSet): 27 (WTF::BloomFilter<keyBits>::setBit): 28 (WTF::BloomFilter<keyBits>::clear): 29 (WTF::CountingBloomFilter::CountingBloomFilter): 30 (WTF::CountingBloomFilter::mayContain): 31 (WTF::CountingBloomFilter::firstBucket): 32 (WTF::CountingBloomFilter::secondBucket): 33 (WTF::CountingBloomFilter<keyBits>::add): 34 (WTF::CountingBloomFilter<keyBits>::remove): 35 (WTF::CountingBloomFilter<keyBits>::clear): 36 (WTF::CountingBloomFilter<keyBits>::likelyEmpty): 37 (WTF::CountingBloomFilter<keyBits>::isClear): 38 (WTF::BloomFilter::maximumCount): Deleted. 39 (WTF::BloomFilter::firstSlot): Deleted. 40 (WTF::BloomFilter::secondSlot): Deleted. 41 (WTF::BloomFilter<keyBits>::remove): Deleted. 42 (WTF::BloomFilter<keyBits>::likelyEmpty): Deleted. 43 (WTF::BloomFilter<keyBits>::isClear): Deleted. 44 1 45 015-04-01 Antti Koivisto <antti@apple.com> 2 46 -
trunk/Source/WTF/wtf/BloomFilter.h
r175810 r182321 1 1 /* 2 * Copyright (C) 2011 Apple Inc. All rights reserved.2 * Copyright (C) 2011, 2015 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 27 27 #define BloomFilter_h 28 28 29 #include <array> 29 30 #include <wtf/text/AtomicString.h> 30 31 31 32 namespace WTF { 32 33 33 // Counting bloom filter with k=2 and 8 bit counters. Uses 2^keyBitsbytes of memory.34 // Bloom filter with k=2. Uses 2^keyBits/8 bytes of memory. 34 35 // False positive rate is approximately (1-e^(-2n/m))^2, where n is the number of unique 35 36 // keys and m is the table size (==2^keyBits). 37 // See http://en.wikipedia.org/wiki/Bloom_filter 36 38 template <unsigned keyBits> 37 39 class BloomFilter { … … 39 41 public: 40 42 static const size_t tableSize = 1 << keyBits; 43 44 BloomFilter(); 45 46 void add(unsigned hash); 47 void add(const BloomFilter&); 48 49 // The filter may give false positives (claim it may contain a key it doesn't) 50 // but never false negatives (claim it doesn't contain a key it does). 51 bool mayContain(unsigned hash) const; 52 53 void clear(); 54 55 void add(const AtomicString& string) { add(string.impl()->existingHash()); } 56 void add(const String& string) { add(string.impl()->hash()); } 57 bool mayContain(const AtomicString& string) const { return mayContain(string.impl()->existingHash()); } 58 bool mayContain(const String& string) const { return mayContain(string.impl()->hash()); } 59 60 private: 61 static const unsigned bitsPerPosition = 8 * sizeof(unsigned); 41 62 static const unsigned keyMask = (1 << keyBits) - 1; 42 static uint8_t maximumCount() { return std::numeric_limits<uint8_t>::max(); } 43 44 BloomFilter() { clear(); } 63 static unsigned arrayIndex(unsigned key) { return key / bitsPerPosition; } 64 static unsigned bitMask(unsigned key) { return 1 << (key % bitsPerPosition); } 65 66 bool isBitSet(unsigned key) const; 67 void setBit(unsigned key); 68 69 std::array<unsigned, tableSize / bitsPerPosition> m_bitArray; 70 }; 71 72 template <unsigned keyBits> 73 inline BloomFilter<keyBits>::BloomFilter() 74 : m_bitArray() 75 { 76 } 77 78 template <unsigned keyBits> 79 inline bool BloomFilter<keyBits>::mayContain(unsigned hash) const 80 { 81 // The top and bottom bits of the incoming hash are treated as independent bloom filter hash functions. 82 // This works well as long as the filter size is not much above 2^16. 83 return isBitSet(hash) && isBitSet(hash >> 16); 84 } 85 86 template <unsigned keyBits> 87 inline void BloomFilter<keyBits>::add(unsigned hash) 88 { 89 setBit(hash); 90 setBit(hash >> 16); 91 } 92 93 template <unsigned keyBits> 94 inline void BloomFilter<keyBits>::add(const BloomFilter& other) 95 { 96 for (size_t i = 0; i < m_bitArray.size(); ++i) 97 m_bitArray[i] |= other.m_bitArray[i]; 98 } 99 100 template <unsigned keyBits> 101 bool BloomFilter<keyBits>::isBitSet(unsigned key) const 102 { 103 unsigned maskedKey = key & keyMask; 104 ASSERT(arrayIndex(maskedKey) < m_bitArray.size()); 105 return m_bitArray[arrayIndex(maskedKey)] & bitMask(maskedKey); 106 } 107 108 template <unsigned keyBits> 109 void BloomFilter<keyBits>::setBit(unsigned key) 110 { 111 unsigned maskedKey = key & keyMask; 112 ASSERT(arrayIndex(maskedKey) < m_bitArray.size()); 113 m_bitArray[arrayIndex(maskedKey)] |= bitMask(maskedKey); 114 } 115 116 template <unsigned keyBits> 117 inline void BloomFilter<keyBits>::clear() 118 { 119 m_bitArray.fill(0); 120 } 121 122 // Counting bloom filter with 8 bit counters. Uses 2^keyBits bytes of memory. Error rates as above. 123 // See http://en.wikipedia.org/wiki/Bloom_filter#Counting_filters 124 template <unsigned keyBits> 125 class CountingBloomFilter { 126 WTF_MAKE_FAST_ALLOCATED; 127 public: 128 static const size_t tableSize = 1 << keyBits; 129 static unsigned maximumCount() { return std::numeric_limits<uint8_t>::max(); } 130 131 CountingBloomFilter(); 45 132 46 133 void add(unsigned hash); … … 49 136 // The filter may give false positives (claim it may contain a key it doesn't) 50 137 // but never false negatives (claim it doesn't contain a key it does). 51 bool mayContain(unsigned hash) const { return first Slot(hash) && secondSlot(hash); }138 bool mayContain(unsigned hash) const { return firstBucket(hash) && secondBucket(hash); } 52 139 53 140 // The filter must be cleared before reuse even if all keys are removed. … … 70 157 71 158 private: 72 uint8_t& firstSlot(unsigned hash) { return m_table[hash & keyMask]; } 73 uint8_t& secondSlot(unsigned hash) { return m_table[(hash >> 16) & keyMask]; } 74 const uint8_t& firstSlot(unsigned hash) const { return m_table[hash & keyMask]; } 75 const uint8_t& secondSlot(unsigned hash) const { return m_table[(hash >> 16) & keyMask]; } 76 77 uint8_t m_table[tableSize]; 159 static const unsigned keyMask = (1 << keyBits) - 1; 160 161 uint8_t& firstBucket(unsigned hash) { return m_buckets[hash & keyMask]; } 162 uint8_t& secondBucket(unsigned hash) { return m_buckets[(hash >> 16) & keyMask]; } 163 const uint8_t& firstBucket(unsigned hash) const { return m_buckets[hash & keyMask]; } 164 const uint8_t& secondBucket(unsigned hash) const { return m_buckets[(hash >> 16) & keyMask]; } 165 166 std::array<uint8_t, tableSize> m_buckets; 78 167 }; 79 80 template <unsigned keyBits> 81 inline void BloomFilter<keyBits>::add(unsigned hash) 82 { 83 uint8_t& first = firstSlot(hash); 84 uint8_t& second = secondSlot(hash); 168 169 template <unsigned keyBits> 170 inline CountingBloomFilter<keyBits>::CountingBloomFilter() 171 : m_buckets() 172 { 173 } 174 175 template <unsigned keyBits> 176 inline void CountingBloomFilter<keyBits>::add(unsigned hash) 177 { 178 auto& first = firstBucket(hash); 179 auto& second = secondBucket(hash); 85 180 if (LIKELY(first < maximumCount())) 86 181 ++first; … … 90 185 91 186 template <unsigned keyBits> 92 inline void BloomFilter<keyBits>::remove(unsigned hash)93 { 94 uint8_t& first = firstSlot(hash);95 uint8_t& second = secondSlot(hash);187 inline void CountingBloomFilter<keyBits>::remove(unsigned hash) 188 { 189 auto& first = firstBucket(hash); 190 auto& second = secondBucket(hash); 96 191 ASSERT(first); 97 192 ASSERT(second); 98 // In case of an overflow, the slot sticks in the table until clear().193 // In case of an overflow, the bucket sticks in the table until clear(). 99 194 if (LIKELY(first < maximumCount())) 100 195 --first; … … 104 199 105 200 template <unsigned keyBits> 106 inline void BloomFilter<keyBits>::clear()107 { 108 m emset(m_table, 0, tableSize);201 inline void CountingBloomFilter<keyBits>::clear() 202 { 203 m_buckets.fill(0); 109 204 } 110 205 111 206 #if !ASSERT_DISABLED 112 207 template <unsigned keyBits> 113 bool BloomFilter<keyBits>::likelyEmpty() const114 { 115 for ( size_t n = 0; n < tableSize; ++n) {116 if ( m_table[n] && m_table[n]!= maximumCount())208 bool CountingBloomFilter<keyBits>::likelyEmpty() const 209 { 210 for (auto& bucket : m_buckets) { 211 if (bucket && bucket != maximumCount()) 117 212 return false; 118 213 } … … 121 216 122 217 template <unsigned keyBits> 123 bool BloomFilter<keyBits>::isClear() const124 { 125 for ( size_t n = 0; n < tableSize; ++n) {126 if ( m_table[n])218 bool CountingBloomFilter<keyBits>::isClear() const 219 { 220 for (auto& bucket : m_buckets) { 221 if (bucket) 127 222 return false; 128 223 } … … 134 229 135 230 using WTF::BloomFilter; 231 using WTF::CountingBloomFilter; 136 232 137 233 #endif -
trunk/Source/WebCore/ChangeLog
r182320 r182321 1 2015-04-03 Antti Koivisto <antti@apple.com> 2 3 Add non-counting bloom filter class 4 https://bugs.webkit.org/show_bug.cgi?id=143366 5 6 Reviewed by Sam Weinig. 7 8 * css/SelectorFilter.cpp: 9 (WebCore::SelectorFilter::setupParentStack): 10 * css/SelectorFilter.h: 11 12 Update names. 13 1 14 2015-04-03 Alex Christensen <achristensen@webkit.org> 2 15 -
trunk/Source/WebCore/css/SelectorFilter.cpp
r179132 r182321 89 89 // Kill whatever we stored before. 90 90 m_parentStack.shrink(0); 91 m_ancestorIdentifierFilter = std::make_unique< BloomFilter<bloomFilterKeyBits>>();91 m_ancestorIdentifierFilter = std::make_unique<CountingBloomFilter<bloomFilterKeyBits>>(); 92 92 // Fast version if parent is a root element: 93 93 if (!parent->parentNode() && !parent->isShadowRoot()) { -
trunk/Source/WebCore/css/SelectorFilter.h
r162772 r182321 65 65 // With 100 unique strings in the filter, 2^12 slot table has false positive rate of ~0.2%. 66 66 static const unsigned bloomFilterKeyBits = 12; 67 std::unique_ptr< BloomFilter<bloomFilterKeyBits>> m_ancestorIdentifierFilter;67 std::unique_ptr<CountingBloomFilter<bloomFilterKeyBits>> m_ancestorIdentifierFilter; 68 68 }; 69 69 -
trunk/Source/WebKit2/ChangeLog
r182315 r182321 1 2015-04-03 Antti Koivisto <antti@apple.com> 2 3 Add non-counting Bloom filter implementation 4 https://bugs.webkit.org/show_bug.cgi?id=143366 5 6 Reviewed by Sam Weinig. 7 8 * NetworkProcess/cache/NetworkCacheStorage.h: 9 1 10 2015-04-03 Zan Dobersek <zdobersek@igalia.com> 2 11 -
trunk/Source/WebKit2/NetworkProcess/cache/NetworkCacheStorage.h
r182271 r182321 119 119 size_t m_maximumSize { std::numeric_limits<size_t>::max() }; 120 120 121 BloomFilter<20> m_contentsFilter;121 CountingBloomFilter<20> m_contentsFilter; 122 122 std::atomic<bool> m_hasPopulatedContentsFilter { false }; 123 123 -
trunk/Tools/ChangeLog
r182318 r182321 1 2015-04-03 Antti Koivisto <antti@apple.com> 2 3 Add non-counting bloom filter class 4 https://bugs.webkit.org/show_bug.cgi?id=143366 5 6 Reviewed by Sam Weinig. 7 8 * TestWebKitAPI/TestWebKitAPI.xcodeproj/project.pbxproj: 9 * TestWebKitAPI/Tests/WTF/BloomFilter.cpp: Added. 10 (TestWebKitAPI::generateRandomHashes): 11 (TestWebKitAPI::TEST): 12 1 13 2015-04-03 Csaba Osztrogonác <ossy@webkit.org> 2 14 -
trunk/Tools/TestWebKitAPI/TestWebKitAPI.xcodeproj/project.pbxproj
r182161 r182321 283 283 E1220DCA155B28AA0013E2FC /* MemoryCacheDisableWithinResourceLoadDelegate.html in Copy Resources */ = {isa = PBXBuildFile; fileRef = E1220DC9155B287D0013E2FC /* MemoryCacheDisableWithinResourceLoadDelegate.html */; }; 284 284 E194E1BD177E53C7009C4D4E /* StopLoadingFromDidReceiveResponse.html in Copy Resources */ = {isa = PBXBuildFile; fileRef = E194E1BC177E534A009C4D4E /* StopLoadingFromDidReceiveResponse.html */; }; 285 E40019331ACE9B88001B0A2A /* BloomFilter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E40019301ACE9B5C001B0A2A /* BloomFilter.cpp */; }; 285 286 F660AA1115A5F631003A1243 /* GetInjectedBundleInitializationUserDataCallback_Bundle.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F660AA0F15A5F624003A1243 /* GetInjectedBundleInitializationUserDataCallback_Bundle.cpp */; }; 286 287 F660AA1515A61ABF003A1243 /* InjectedBundleInitializationUserDataCallbackWins_Bundle.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F660AA1415A61ABF003A1243 /* InjectedBundleInitializationUserDataCallbackWins_Bundle.cpp */; }; … … 688 689 E194E1BA177E5145009C4D4E /* StopLoadingFromDidReceiveResponse.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = StopLoadingFromDidReceiveResponse.mm; sourceTree = "<group>"; }; 689 690 E194E1BC177E534A009C4D4E /* StopLoadingFromDidReceiveResponse.html */ = {isa = PBXFileReference; lastKnownFileType = text.html; path = StopLoadingFromDidReceiveResponse.html; sourceTree = "<group>"; }; 691 E40019301ACE9B5C001B0A2A /* BloomFilter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BloomFilter.cpp; sourceTree = "<group>"; }; 690 692 E490296714E2E3A4002BEDD1 /* TypingStyleCrash.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = TypingStyleCrash.mm; sourceTree = "<group>"; }; 691 693 E4A757D3178AEA5B00B5D7A4 /* Deque.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Deque.cpp; sourceTree = "<group>"; }; … … 1026 1028 A7A966DA140ECCC8005EF9B4 /* CheckedArithmeticOperations.cpp */, 1027 1029 26A2C72E15E2E73C005B1A14 /* CString.cpp */, 1030 E40019301ACE9B5C001B0A2A /* BloomFilter.cpp */, 1028 1031 7AA021BA1AB09EA70052953F /* DateMath.cpp */, 1029 1032 E4A757D3178AEA5B00B5D7A4 /* Deque.cpp */, … … 1519 1522 7CCE7EA91A411A1D00447C4C /* TestBrowsingContextLoadDelegate.mm in Sources */, 1520 1523 7CCE7EAA1A411A2400447C4C /* TestProtocol.mm in Sources */, 1524 E40019331ACE9B88001B0A2A /* BloomFilter.cpp in Sources */, 1521 1525 7CCE7EAE1A411A3400447C4C /* TestsController.cpp in Sources */, 1522 1526 7CCE7EDD1A411A9200447C4C /* TimeRanges.cpp in Sources */,
Note: See TracChangeset
for help on using the changeset viewer.