Changeset 182363 in webkit


Ignore:
Timestamp:
Apr 5, 2015 11:45:38 AM (9 years ago)
Author:
Antti Koivisto
Message:

Bloom filter should support longer hashes
https://bugs.webkit.org/show_bug.cgi?id=143419

Reviewed by Dan Bernstein.

Source/WebKit2:

Use the hash digest directly in the contents filter instead of going via shortHash.

  • NetworkProcess/cache/NetworkCacheKey.h:

(WebKit::NetworkCache::Key::hash):
(WebKit::NetworkCache::Key::shortHash): Deleted.
(WebKit::NetworkCache::Key::toShortHash): Deleted.

No longer needed.

  • NetworkProcess/cache/NetworkCacheStorage.cpp:

(WebKit::NetworkCache::Storage::synchronize):
(WebKit::NetworkCache::Storage::addToContentsFilter):
(WebKit::NetworkCache::Storage::mayContain):

  • NetworkProcess/cache/NetworkCacheStorage.h:

Source/WTF:

It currently supports 'unsigned' hash only which is inconvenient and doesn't have enough independent bits for larger filters.

Support arbitrarily sized hashes of type std::array<uint8_t, hashSize> (like SHA1::Digest and MD5::Digest).

  • wtf/BloomFilter.h:

(WTF::BloomFilter<keyBits>::keysFromHash):
(WTF::BloomFilter<keyBits>::mayContain):
(WTF::BloomFilter<keyBits>::add):

Tools:

  • TestWebKitAPI/Tests/WTF/BloomFilter.cpp:

(TestWebKitAPI::generateRandomDigests):
(TestWebKitAPI::TEST):

Location:
trunk
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WTF/ChangeLog

    r182321 r182363  
     12015-04-05  Antti Koivisto  <antti@apple.com>
     2
     3        Bloom filter should support longer hashes
     4        https://bugs.webkit.org/show_bug.cgi?id=143419
     5
     6        Reviewed by Dan Bernstein.
     7
     8        It currently supports 'unsigned' hash only which is inconvenient and doesn't have enough independent bits for larger filters.
     9
     10        Support arbitrarily sized hashes of type std::array<uint8_t, hashSize> (like SHA1::Digest and MD5::Digest).
     11
     12        * wtf/BloomFilter.h:
     13        (WTF::BloomFilter<keyBits>::keysFromHash):
     14        (WTF::BloomFilter<keyBits>::mayContain):
     15        (WTF::BloomFilter<keyBits>::add):
     16
    1172015-04-03  Antti Koivisto  <antti@apple.com>
    218
  • trunk/Source/WTF/wtf/BloomFilter.h

    r182321 r182363  
    4545
    4646    void add(unsigned hash);
     47    // For example SHA1::Digest.
     48    template <size_t hashSize> void add(const std::array<uint8_t, hashSize>&);
     49
    4750    void add(const BloomFilter&);
    4851
     
    5053    // but never false negatives (claim it doesn't contain a key it does).
    5154    bool mayContain(unsigned hash) const;
     55    template <size_t hashSize> bool mayContain(const std::array<uint8_t, hashSize>&) const;
    5256   
    5357    void clear();
     
    6367    static unsigned arrayIndex(unsigned key) { return key / bitsPerPosition; }
    6468    static unsigned bitMask(unsigned key) { return 1 << (key % bitsPerPosition); }
     69    template <size_t hashSize> static std::pair<unsigned, unsigned> keysFromHash(const std::array<uint8_t, hashSize>&);
    6570
    6671    bool isBitSet(unsigned key) const;
     
    8994    setBit(hash);
    9095    setBit(hash >> 16);
     96}
     97
     98template <unsigned keyBits>
     99template <size_t hashSize>
     100inline std::pair<unsigned, unsigned> BloomFilter<keyBits>::keysFromHash(const std::array<uint8_t, hashSize>& hash)
     101{
     102    // We could use larger k value than 2 for long hashes.
     103    static_assert(hashSize >= 2 * sizeof(unsigned), "Hash array too short");
     104    return {
     105        *reinterpret_cast<const unsigned*>(hash.data()),
     106        *reinterpret_cast<const unsigned*>(hash.data() + sizeof(unsigned))
     107    };
     108}
     109
     110template <unsigned keyBits>
     111template <size_t hashSize>
     112inline bool BloomFilter<keyBits>::mayContain(const std::array<uint8_t, hashSize>& hash) const
     113{
     114    auto keys = keysFromHash(hash);
     115    return isBitSet(keys.first) && isBitSet(keys.second);
     116}
     117
     118template <unsigned keyBits>
     119template <size_t hashSize>
     120inline void BloomFilter<keyBits>::add(const std::array<uint8_t, hashSize>& hash)
     121{
     122    auto keys = keysFromHash(hash);
     123    setBit(keys.first);
     124    setBit(keys.second);
    91125}
    92126
  • trunk/Source/WebKit2/ChangeLog

    r182362 r182363  
     12015-04-05  Antti Koivisto  <antti@apple.com>
     2
     3        Bloom filter should support longer hashes
     4        https://bugs.webkit.org/show_bug.cgi?id=143419
     5
     6        Reviewed by Dan Bernstein.
     7
     8        Use the hash digest directly in the contents filter instead of going via shortHash.
     9
     10        * NetworkProcess/cache/NetworkCacheKey.h:
     11        (WebKit::NetworkCache::Key::hash):
     12        (WebKit::NetworkCache::Key::shortHash): Deleted.
     13        (WebKit::NetworkCache::Key::toShortHash): Deleted.
     14
     15            No longer needed.
     16
     17        * NetworkProcess/cache/NetworkCacheStorage.cpp:
     18        (WebKit::NetworkCache::Storage::synchronize):
     19        (WebKit::NetworkCache::Storage::addToContentsFilter):
     20        (WebKit::NetworkCache::Storage::mayContain):
     21        * NetworkProcess/cache/NetworkCacheStorage.h:
     22
    1232015-04-05  Antti Koivisto  <antti@apple.com>
    224
  • trunk/Source/WebKit2/NetworkProcess/cache/NetworkCacheKey.h

    r182296 r182363  
    5656
    5757    HashType hash() const { return m_hash; }
    58     unsigned shortHash() const  { return toShortHash(m_hash); }
    5958
    60     static unsigned toShortHash(const HashType& hash) { return *reinterpret_cast<const unsigned*>(hash.data()); }
    6159    static bool stringToHash(const String&, HashType&);
    6260
  • trunk/Source/WebKit2/NetworkProcess/cache/NetworkCacheStorage.cpp

    r182362 r182363  
    100100            if (!fileSize)
    101101                return;
    102             filter->add(Key::toShortHash(hash));
     102            filter->add(hash);
    103103            size += fileSize;
    104104            ++count;
     
    127127
    128128    if (m_contentsFilter)
    129         m_contentsFilter->add(key.shortHash());
     129        m_contentsFilter->add(key.hash());
    130130
    131131    // If we get new entries during filter synchronization take care to add them to the new filter as well.
    132132    if (m_synchronizationInProgress)
    133         m_contentsFilterHashesAddedDuringSynchronization.append(key.shortHash());
     133        m_contentsFilterHashesAddedDuringSynchronization.append(key.hash());
    134134}
    135135
     
    137137{
    138138    ASSERT(RunLoop::isMain());
    139     return !m_contentsFilter || m_contentsFilter->mayContain(key.shortHash());
     139    return !m_contentsFilter || m_contentsFilter->mayContain(key.hash());
    140140}
    141141
  • trunk/Source/WebKit2/NetworkProcess/cache/NetworkCacheStorage.h

    r182361 r182363  
    130130    bool m_shrinkInProgress { false };
    131131
    132     Vector<unsigned> m_contentsFilterHashesAddedDuringSynchronization;
     132    Vector<Key::HashType> m_contentsFilterHashesAddedDuringSynchronization;
    133133
    134134    static const int maximumRetrievePriority = 4;
  • trunk/Tools/ChangeLog

    r182332 r182363  
     12015-04-05  Antti Koivisto  <antti@apple.com>
     2
     3        Bloom filter should support longer hashes
     4        https://bugs.webkit.org/show_bug.cgi?id=143419
     5
     6        Reviewed by Dan Bernstein.
     7
     8        * TestWebKitAPI/Tests/WTF/BloomFilter.cpp:
     9        (TestWebKitAPI::generateRandomDigests):
     10        (TestWebKitAPI::TEST):
     11
    1122015-04-03  Csaba Osztrogonác  <ossy@webkit.org>
    213
  • trunk/Tools/TestWebKitAPI/Tests/WTF/BloomFilter.cpp

    r182321 r182363  
    2828#include <wtf/BloomFilter.h>
    2929#include <wtf/RandomNumber.h>
     30#include <wtf/SHA1.h>
    3031
    3132namespace TestWebKitAPI {
     
    3940}
    4041
     42static Vector<SHA1::Digest> generateRandomDigests(size_t hashCount)
     43{
     44    Vector<SHA1::Digest> hashes;
     45    SHA1 sha1;
     46    for (unsigned i = 0; i < hashCount; ++i) {
     47        double random = randomNumber();
     48        sha1.addBytes(reinterpret_cast<uint8_t*>(&random), sizeof(double));
     49        SHA1::Digest digest;
     50        sha1.computeHash(digest);
     51        hashes.append(digest);
     52    }
     53    return hashes;
     54}
     55
    4156TEST(WTF_BloomFilter, Basic)
    4257{
     
    5671        mayContainCount += filter.mayContain(hash) ? 1 : 0;
    5772    // False positive rate is ~0.09% so this should always be true.
     73    EXPECT_TRUE(mayContainCount < hashCount / 10);
     74
     75    for (auto hash : moreHashes)
     76        filter.add(hash);
     77
     78    for (auto hash : hashes)
     79        EXPECT_TRUE(filter.mayContain(hash));
     80    for (auto hash : moreHashes)
     81        EXPECT_TRUE(filter.mayContain(hash));
     82}
     83
     84TEST(WTF_BloomFilter, BasicDigest)
     85{
     86    const unsigned hashCount = 1000;
     87    auto hashes = generateRandomDigests(hashCount);
     88
     89    BloomFilter<20> filter;
     90    for (auto hash : hashes)
     91        filter.add(hash);
     92
     93    for (auto hash : hashes)
     94        EXPECT_TRUE(filter.mayContain(hash));
     95
     96    auto moreHashes = generateRandomDigests(hashCount);
     97    unsigned mayContainCount = 0;
     98    for (auto hash : moreHashes)
     99        mayContainCount += filter.mayContain(hash) ? 1 : 0;
     100    // False positive rate is ~0.000004% so this should always be true.
    58101    EXPECT_TRUE(mayContainCount < hashCount / 10);
    59102
Note: See TracChangeset for help on using the changeset viewer.