In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update.ld
| @ -1,5 +1,8 @@ | |||||
| build_config.mk | build_config.mk | ||||
| *.a | *.a | ||||
| *.o | *.o | ||||
| *.dylib* | |||||
| *.so | |||||
| *.so.* | |||||
| *_test | *_test | ||||
| db_bench | db_bench | ||||
| @ -0,0 +1,70 @@ | |||||
| // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |||||
| // Use of this source code is governed by a BSD-style license that can be | |||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||||
| // | |||||
| // A database can be configured with a custom FilterPolicy object. | |||||
| // This object is responsible for creating a small filter from a set | |||||
| // of keys. These filters are stored in leveldb and are consulted | |||||
| // automatically by leveldb to decide whether or not to read some | |||||
| // information from disk. In many cases, a filter can cut down the | |||||
| // number of disk seeks form a handful to a single disk seek per | |||||
| // DB::Get() call. | |||||
| // | |||||
| // Most people will want to use the builtin bloom filter support (see | |||||
| // NewBloomFilterPolicy() below). | |||||
| #ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ | |||||
| #define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ | |||||
| #include <string> | |||||
| namespace leveldb { | |||||
| class Slice; | |||||
| class FilterPolicy { | |||||
| public: | |||||
| virtual ~FilterPolicy(); | |||||
| // Return the name of this policy. Note that if the filter encoding | |||||
| // changes in an incompatible way, the name returned by this method | |||||
| // must be changed. Otherwise, old incompatible filters may be | |||||
| // passed to methods of this type. | |||||
| virtual const char* Name() const = 0; | |||||
| // keys[0,n-1] contains a list of keys (potentially with duplicates) | |||||
| // that are ordered according to the user supplied comparator. | |||||
| // Append a filter that summarizes keys[0,n-1] to *dst. | |||||
| // | |||||
| // Warning: do not change the initial contents of *dst. Instead, | |||||
| // append the newly constructed filter to *dst. | |||||
| virtual void CreateFilter(const Slice* keys, int n, std::string* dst) | |||||
| const = 0; | |||||
| // "filter" contains the data appended by a preceding call to | |||||
| // CreateFilter() on this class. This method must return true if | |||||
| // the key was in the list of keys passed to CreateFilter(). | |||||
| // This method may return true or false if the key was not on the | |||||
| // list, but it should aim to return false with a high probability. | |||||
| virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; | |||||
| }; | |||||
| // Return a new filter policy that uses a bloom filter with approximately | |||||
| // the specified number of bits per key. A good value for bits_per_key | |||||
| // is 10, which yields a filter with ~ 1% false positive rate. | |||||
| // | |||||
| // Callers must delete the result after any database that is using the | |||||
| // result has been closed. | |||||
| // | |||||
| // Note: if you are using a custom comparator that ignores some parts | |||||
| // of the keys being compared, you must not use NewBloomFilterPolicy() | |||||
| // and must provide your own FilterPolicy that also ignores the | |||||
| // corresponding parts of the keys. For example, if the comparator | |||||
| // ignores trailing spaces, it would be incorrect to use a | |||||
| // FilterPolicy (like NewBloomFilterPolicy) that does not ignore | |||||
| // trailing spaces in keys. | |||||
| extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key); | |||||
| } | |||||
| #endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ | |||||
| @ -0,0 +1,111 @@ | |||||
| // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |||||
| // Use of this source code is governed by a BSD-style license that can be | |||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||||
| #include "table/filter_block.h" | |||||
| #include "leveldb/filter_policy.h" | |||||
| #include "util/coding.h" | |||||
| namespace leveldb { | |||||
| // See doc/table_format.txt for an explanation of the filter block format. | |||||
| // Generate new filter every 2KB of data | |||||
| static const size_t kFilterBaseLg = 11; | |||||
| static const size_t kFilterBase = 1 << kFilterBaseLg; | |||||
| FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) | |||||
| : policy_(policy) { | |||||
| } | |||||
| void FilterBlockBuilder::StartBlock(uint64_t block_offset) { | |||||
| uint64_t filter_index = (block_offset / kFilterBase); | |||||
| assert(filter_index >= filter_offsets_.size()); | |||||
| while (filter_index > filter_offsets_.size()) { | |||||
| GenerateFilter(); | |||||
| } | |||||
| } | |||||
| void FilterBlockBuilder::AddKey(const Slice& key) { | |||||
| Slice k = key; | |||||
| start_.push_back(keys_.size()); | |||||
| keys_.append(k.data(), k.size()); | |||||
| } | |||||
| Slice FilterBlockBuilder::Finish() { | |||||
| if (!start_.empty()) { | |||||
| GenerateFilter(); | |||||
| } | |||||
| // Append array of per-filter offsets | |||||
| const uint32_t array_offset = result_.size(); | |||||
| for (size_t i = 0; i < filter_offsets_.size(); i++) { | |||||
| PutFixed32(&result_, filter_offsets_[i]); | |||||
| } | |||||
| PutFixed32(&result_, array_offset); | |||||
| result_.push_back(kFilterBaseLg); // Save encoding parameter in result | |||||
| return Slice(result_); | |||||
| } | |||||
| void FilterBlockBuilder::GenerateFilter() { | |||||
| const size_t num_keys = start_.size(); | |||||
| if (num_keys == 0) { | |||||
| // Fast path if there are no keys for this filter | |||||
| filter_offsets_.push_back(result_.size()); | |||||
| return; | |||||
| } | |||||
| // Make list of keys from flattened key structure | |||||
| start_.push_back(keys_.size()); // Simplify length computation | |||||
| tmp_keys_.resize(num_keys); | |||||
| for (size_t i = 0; i < num_keys; i++) { | |||||
| const char* base = keys_.data() + start_[i]; | |||||
| size_t length = start_[i+1] - start_[i]; | |||||
| tmp_keys_[i] = Slice(base, length); | |||||
| } | |||||
| // Generate filter for current set of keys and append to result_. | |||||
| filter_offsets_.push_back(result_.size()); | |||||
| policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_); | |||||
| tmp_keys_.clear(); | |||||
| keys_.clear(); | |||||
| start_.clear(); | |||||
| } | |||||
| FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, | |||||
| const Slice& contents) | |||||
| : policy_(policy), | |||||
| data_(NULL), | |||||
| offset_(NULL), | |||||
| num_(0), | |||||
| base_lg_(0) { | |||||
| size_t n = contents.size(); | |||||
| if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array | |||||
| base_lg_ = contents[n-1]; | |||||
| uint32_t last_word = DecodeFixed32(contents.data() + n - 5); | |||||
| if (last_word > n - 5) return; | |||||
| data_ = contents.data(); | |||||
| offset_ = data_ + last_word; | |||||
| num_ = (n - 5 - last_word) / 4; | |||||
| } | |||||
| bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) { | |||||
| uint64_t index = block_offset >> base_lg_; | |||||
| if (index < num_) { | |||||
| uint32_t start = DecodeFixed32(offset_ + index*4); | |||||
| uint32_t limit = DecodeFixed32(offset_ + index*4 + 4); | |||||
| if (start <= limit && limit <= (offset_ - data_)) { | |||||
| Slice filter = Slice(data_ + start, limit - start); | |||||
| return policy_->KeyMayMatch(key, filter); | |||||
| } else if (start == limit) { | |||||
| // Empty filters do not match any keys | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; // Errors are treated as potential matches | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,68 @@ | |||||
| // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |||||
| // Use of this source code is governed by a BSD-style license that can be | |||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||||
| // | |||||
| // A filter block is stored near the end of a Table file. It contains | |||||
| // filters (e.g., bloom filters) for all data blocks in the table combined | |||||
| // into a single filter block. | |||||
| #ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ | |||||
| #define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ | |||||
| #include <stddef.h> | |||||
| #include <stdint.h> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "leveldb/slice.h" | |||||
| #include "util/hash.h" | |||||
| namespace leveldb { | |||||
| class FilterPolicy; | |||||
| // A FilterBlockBuilder is used to construct all of the filters for a | |||||
| // particular Table. It generates a single string which is stored as | |||||
| // a special block in the Table. | |||||
| // | |||||
| // The sequence of calls to FilterBlockBuilder must match the regexp: | |||||
| // (StartBlock AddKey*)* Finish | |||||
| class FilterBlockBuilder { | |||||
| public: | |||||
| explicit FilterBlockBuilder(const FilterPolicy*); | |||||
| void StartBlock(uint64_t block_offset); | |||||
| void AddKey(const Slice& key); | |||||
| Slice Finish(); | |||||
| private: | |||||
| void GenerateFilter(); | |||||
| const FilterPolicy* policy_; | |||||
| std::string keys_; // Flattened key contents | |||||
| std::vector<size_t> start_; // Starting index in keys_ of each key | |||||
| std::string result_; // Filter data computed so far | |||||
| std::vector<Slice> tmp_keys_; // policy_->CreateFilter() argument | |||||
| std::vector<uint32_t> filter_offsets_; | |||||
| // No copying allowed | |||||
| FilterBlockBuilder(const FilterBlockBuilder&); | |||||
| void operator=(const FilterBlockBuilder&); | |||||
| }; | |||||
| class FilterBlockReader { | |||||
| public: | |||||
| // REQUIRES: "contents" and *policy must stay live while *this is live. | |||||
| FilterBlockReader(const FilterPolicy* policy, const Slice& contents); | |||||
| bool KeyMayMatch(uint64_t block_offset, const Slice& key); | |||||
| private: | |||||
| const FilterPolicy* policy_; | |||||
| const char* data_; // Pointer to filter data (at block-start) | |||||
| const char* offset_; // Pointer to beginning of offset array (at block-end) | |||||
| size_t num_; // Number of entries in offset array | |||||
| size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file) | |||||
| }; | |||||
| } | |||||
| #endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ | |||||
| @ -0,0 +1,128 @@ | |||||
| // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |||||
| // Use of this source code is governed by a BSD-style license that can be | |||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||||
| #include "table/filter_block.h" | |||||
| #include "leveldb/filter_policy.h" | |||||
| #include "util/coding.h" | |||||
| #include "util/hash.h" | |||||
| #include "util/logging.h" | |||||
| #include "util/testharness.h" | |||||
| #include "util/testutil.h" | |||||
| namespace leveldb { | |||||
| // For testing: emit an array with one hash value per key | |||||
| class TestHashFilter : public FilterPolicy { | |||||
| public: | |||||
| virtual const char* Name() const { | |||||
| return "TestHashFilter"; | |||||
| } | |||||
| virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { | |||||
| for (int i = 0; i < n; i++) { | |||||
| uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); | |||||
| PutFixed32(dst, h); | |||||
| } | |||||
| } | |||||
| virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { | |||||
| uint32_t h = Hash(key.data(), key.size(), 1); | |||||
| for (int i = 0; i + 4 <= filter.size(); i += 4) { | |||||
| if (h == DecodeFixed32(filter.data() + i)) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| }; | |||||
| class FilterBlockTest { | |||||
| public: | |||||
| TestHashFilter policy_; | |||||
| }; | |||||
| TEST(FilterBlockTest, EmptyBuilder) { | |||||
| FilterBlockBuilder builder(&policy_); | |||||
| Slice block = builder.Finish(); | |||||
| ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); | |||||
| FilterBlockReader reader(&policy_, block); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(100000, "foo")); | |||||
| } | |||||
| TEST(FilterBlockTest, SingleChunk) { | |||||
| FilterBlockBuilder builder(&policy_); | |||||
| builder.StartBlock(100); | |||||
| builder.AddKey("foo"); | |||||
| builder.AddKey("bar"); | |||||
| builder.AddKey("box"); | |||||
| builder.StartBlock(200); | |||||
| builder.AddKey("box"); | |||||
| builder.StartBlock(300); | |||||
| builder.AddKey("hello"); | |||||
| Slice block = builder.Finish(); | |||||
| FilterBlockReader reader(&policy_, block); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(100, "bar")); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(100, "box")); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(100, "hello")); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(100, "missing")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(100, "other")); | |||||
| } | |||||
| TEST(FilterBlockTest, MultiChunk) { | |||||
| FilterBlockBuilder builder(&policy_); | |||||
| // First filter | |||||
| builder.StartBlock(0); | |||||
| builder.AddKey("foo"); | |||||
| builder.StartBlock(2000); | |||||
| builder.AddKey("bar"); | |||||
| // Second filter | |||||
| builder.StartBlock(3100); | |||||
| builder.AddKey("box"); | |||||
| // Third filter is empty | |||||
| // Last filter | |||||
| builder.StartBlock(9000); | |||||
| builder.AddKey("box"); | |||||
| builder.AddKey("hello"); | |||||
| Slice block = builder.Finish(); | |||||
| FilterBlockReader reader(&policy_, block); | |||||
| // Check first filter | |||||
| ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(2000, "bar")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(0, "box")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(0, "hello")); | |||||
| // Check second filter | |||||
| ASSERT_TRUE(reader.KeyMayMatch(3100, "box")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello")); | |||||
| // Check third filter (empty) | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(4100, "box")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello")); | |||||
| // Check last filter | |||||
| ASSERT_TRUE(reader.KeyMayMatch(9000, "box")); | |||||
| ASSERT_TRUE(reader.KeyMayMatch(9000, "hello")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo")); | |||||
| ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar")); | |||||
| } | |||||
| } // namespace leveldb | |||||
| int main(int argc, char** argv) { | |||||
| return leveldb::test::RunAllTests(); | |||||
| } | |||||
| @ -0,0 +1,95 @@ | |||||
| // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |||||
| // Use of this source code is governed by a BSD-style license that can be | |||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||||
| #include "leveldb/filter_policy.h" | |||||
| #include "leveldb/slice.h" | |||||
| #include "util/hash.h" | |||||
| namespace leveldb { | |||||
| namespace { | |||||
| static uint32_t BloomHash(const Slice& key) { | |||||
| return Hash(key.data(), key.size(), 0xbc9f1d34); | |||||
| } | |||||
| class BloomFilterPolicy : public FilterPolicy { | |||||
| private: | |||||
| size_t bits_per_key_; | |||||
| size_t k_; | |||||
| public: | |||||
| explicit BloomFilterPolicy(int bits_per_key) | |||||
| : bits_per_key_(bits_per_key) { | |||||
| // We intentionally round down to reduce probing cost a little bit | |||||
| k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2) | |||||
| if (k_ < 1) k_ = 1; | |||||
| if (k_ > 30) k_ = 30; | |||||
| } | |||||
| virtual const char* Name() const { | |||||
| return "leveldb.BuiltinBloomFilter"; | |||||
| } | |||||
| virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { | |||||
| // Compute bloom filter size (in both bits and bytes) | |||||
| size_t bits = n * bits_per_key_; | |||||
| // For small n, we can see a very high false positive rate. Fix it | |||||
| // by enforcing a minimum bloom filter length. | |||||
| if (bits < 64) bits = 64; | |||||
| size_t bytes = (bits + 7) / 8; | |||||
| bits = bytes * 8; | |||||
| const size_t init_size = dst->size(); | |||||
| dst->resize(init_size + bytes, 0); | |||||
| dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter | |||||
| char* array = &(*dst)[init_size]; | |||||
| for (size_t i = 0; i < n; i++) { | |||||
| // Use double-hashing to generate a sequence of hash values. | |||||
| // See analysis in [Kirsch,Mitzenmacher 2006]. | |||||
| uint32_t h = BloomHash(keys[i]); | |||||
| const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits | |||||
| for (size_t j = 0; j < k_; j++) { | |||||
| const uint32_t bitpos = h % bits; | |||||
| array[bitpos/8] |= (1 << (bitpos % 8)); | |||||
| h += delta; | |||||
| } | |||||
| } | |||||
| } | |||||
| virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { | |||||
| const size_t len = bloom_filter.size(); | |||||
| if (len < 2) return false; | |||||
| const char* array = bloom_filter.data(); | |||||
| const size_t bits = (len - 1) * 8; | |||||
| // Use the encoded k so that we can read filters generated by | |||||
| // bloom filters created using different parameters. | |||||
| const size_t k = array[len-1]; | |||||
| if (k > 30) { | |||||
| // Reserved for potentially new encodings for short bloom filters. | |||||
| // Consider it a match. | |||||
| return true; | |||||
| } | |||||
| uint32_t h = BloomHash(key); | |||||
| const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits | |||||
| for (size_t j = 0; j < k; j++) { | |||||
| const uint32_t bitpos = h % bits; | |||||
| if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; | |||||
| h += delta; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| }; | |||||
| } | |||||
| const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { | |||||
| return new BloomFilterPolicy(bits_per_key); | |||||
| } | |||||
| } // namespace leveldb | |||||
| @ -0,0 +1,159 @@ | |||||
| // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |||||
| // Use of this source code is governed by a BSD-style license that can be | |||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||||
| #include "leveldb/filter_policy.h" | |||||
| #include "util/logging.h" | |||||
| #include "util/testharness.h" | |||||
| #include "util/testutil.h" | |||||
| namespace leveldb { | |||||
| static const int kVerbose = 1; | |||||
| static Slice Key(int i, char* buffer) { | |||||
| memcpy(buffer, &i, sizeof(i)); | |||||
| return Slice(buffer, sizeof(i)); | |||||
| } | |||||
| class BloomTest { | |||||
| private: | |||||
| const FilterPolicy* policy_; | |||||
| std::string filter_; | |||||
| std::vector<std::string> keys_; | |||||
| public: | |||||
| BloomTest() : policy_(NewBloomFilterPolicy(10)) { } | |||||
| ~BloomTest() { | |||||
| delete policy_; | |||||
| } | |||||
| void Reset() { | |||||
| keys_.clear(); | |||||
| filter_.clear(); | |||||
| } | |||||
| void Add(const Slice& s) { | |||||
| keys_.push_back(s.ToString()); | |||||
| } | |||||
| void Build() { | |||||
| std::vector<Slice> key_slices; | |||||
| for (size_t i = 0; i < keys_.size(); i++) { | |||||
| key_slices.push_back(Slice(keys_[i])); | |||||
| } | |||||
| filter_.clear(); | |||||
| policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_); | |||||
| keys_.clear(); | |||||
| if (kVerbose >= 2) DumpFilter(); | |||||
| } | |||||
| size_t FilterSize() const { | |||||
| return filter_.size(); | |||||
| } | |||||
| void DumpFilter() { | |||||
| fprintf(stderr, "F("); | |||||
| for (size_t i = 0; i+1 < filter_.size(); i++) { | |||||
| const unsigned int c = static_cast<unsigned int>(filter_[i]); | |||||
| for (int j = 0; j < 8; j++) { | |||||
| fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.'); | |||||
| } | |||||
| } | |||||
| fprintf(stderr, ")\n"); | |||||
| } | |||||
| bool Matches(const Slice& s) { | |||||
| if (!keys_.empty()) { | |||||
| Build(); | |||||
| } | |||||
| return policy_->KeyMayMatch(s, filter_); | |||||
| } | |||||
| double FalsePositiveRate() { | |||||
| char buffer[sizeof(int)]; | |||||
| int result = 0; | |||||
| for (int i = 0; i < 10000; i++) { | |||||
| if (Matches(Key(i + 1000000000, buffer))) { | |||||
| result++; | |||||
| } | |||||
| } | |||||
| return result / 10000.0; | |||||
| } | |||||
| }; | |||||
| TEST(BloomTest, EmptyFilter) { | |||||
| ASSERT_TRUE(! Matches("hello")); | |||||
| ASSERT_TRUE(! Matches("world")); | |||||
| } | |||||
| TEST(BloomTest, Small) { | |||||
| Add("hello"); | |||||
| Add("world"); | |||||
| ASSERT_TRUE(Matches("hello")); | |||||
| ASSERT_TRUE(Matches("world")); | |||||
| ASSERT_TRUE(! Matches("x")); | |||||
| ASSERT_TRUE(! Matches("foo")); | |||||
| } | |||||
| static int NextLength(int length) { | |||||
| if (length < 10) { | |||||
| length += 1; | |||||
| } else if (length < 100) { | |||||
| length += 10; | |||||
| } else if (length < 1000) { | |||||
| length += 100; | |||||
| } else { | |||||
| length += 1000; | |||||
| } | |||||
| return length; | |||||
| } | |||||
| TEST(BloomTest, VaryingLengths) { | |||||
| char buffer[sizeof(int)]; | |||||
| // Count number of filters that significantly exceed the false positive rate | |||||
| int mediocre_filters = 0; | |||||
| int good_filters = 0; | |||||
| for (int length = 1; length <= 10000; length = NextLength(length)) { | |||||
| Reset(); | |||||
| for (int i = 0; i < length; i++) { | |||||
| Add(Key(i, buffer)); | |||||
| } | |||||
| Build(); | |||||
| ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length; | |||||
| // All added keys must match | |||||
| for (int i = 0; i < length; i++) { | |||||
| ASSERT_TRUE(Matches(Key(i, buffer))) | |||||
| << "Length " << length << "; key " << i; | |||||
| } | |||||
| // Check false positive rate | |||||
| double rate = FalsePositiveRate(); | |||||
| if (kVerbose >= 1) { | |||||
| fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", | |||||
| rate*100.0, length, static_cast<int>(FilterSize())); | |||||
| } | |||||
| ASSERT_LE(rate, 0.02); // Must not be over 2% | |||||
| if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often | |||||
| else good_filters++; | |||||
| } | |||||
| if (kVerbose >= 1) { | |||||
| fprintf(stderr, "Filters: %d good, %d mediocre\n", | |||||
| good_filters, mediocre_filters); | |||||
| } | |||||
| ASSERT_LE(mediocre_filters, good_filters/5); | |||||
| } | |||||
| // Different bits-per-byte | |||||
| } // namespace leveldb | |||||
| int main(int argc, char** argv) { | |||||
| return leveldb::test::RunAllTests(); | |||||
| } | |||||
| @ -0,0 +1,11 @@ | |||||
| // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |||||
| // Use of this source code is governed by a BSD-style license that can be | |||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||||
| #include "leveldb/filter_policy.h" | |||||
| namespace leveldb { | |||||
| FilterPolicy::~FilterPolicy() { } | |||||
| } // namespace leveldb | |||||