作者: 谢瑞阳 10225101483 徐翔宇 10225101535
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

92 rader
2.9 KiB

  1. // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/filter_policy.h"
  5. #include "leveldb/slice.h"
  6. #include "util/hash.h"
  7. namespace leveldb {
  8. namespace {
  9. static uint32_t BloomHash(const Slice& key) {
  10. return Hash(key.data(), key.size(), 0xbc9f1d34);
  11. }
  12. class BloomFilterPolicy : public FilterPolicy {
  13. private:
  14. size_t bits_per_key_;
  15. size_t k_;
  16. public:
  17. explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
  18. // We intentionally round down to reduce probing cost a little bit
  19. k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
  20. if (k_ < 1) k_ = 1;
  21. if (k_ > 30) k_ = 30;
  22. }
  23. virtual const char* Name() const { return "leveldb.BuiltinBloomFilter2"; }
  24. virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
  25. // Compute bloom filter size (in both bits and bytes)
  26. size_t bits = n * bits_per_key_;
  27. // For small n, we can see a very high false positive rate. Fix it
  28. // by enforcing a minimum bloom filter length.
  29. if (bits < 64) bits = 64;
  30. size_t bytes = (bits + 7) / 8;
  31. bits = bytes * 8;
  32. const size_t init_size = dst->size();
  33. dst->resize(init_size + bytes, 0);
  34. dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
  35. char* array = &(*dst)[init_size];
  36. for (int i = 0; i < n; i++) {
  37. // Use double-hashing to generate a sequence of hash values.
  38. // See analysis in [Kirsch,Mitzenmacher 2006].
  39. uint32_t h = BloomHash(keys[i]);
  40. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  41. for (size_t j = 0; j < k_; j++) {
  42. const uint32_t bitpos = h % bits;
  43. array[bitpos / 8] |= (1 << (bitpos % 8));
  44. h += delta;
  45. }
  46. }
  47. }
  48. virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
  49. const size_t len = bloom_filter.size();
  50. if (len < 2) return false;
  51. const char* array = bloom_filter.data();
  52. const size_t bits = (len - 1) * 8;
  53. // Use the encoded k so that we can read filters generated by
  54. // bloom filters created using different parameters.
  55. const size_t k = array[len - 1];
  56. if (k > 30) {
  57. // Reserved for potentially new encodings for short bloom filters.
  58. // Consider it a match.
  59. return true;
  60. }
  61. uint32_t h = BloomHash(key);
  62. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  63. for (size_t j = 0; j < k; j++) {
  64. const uint32_t bitpos = h % bits;
  65. if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;
  66. h += delta;
  67. }
  68. return true;
  69. }
  70. };
  71. } // namespace
  72. const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
  73. return new BloomFilterPolicy(bits_per_key);
  74. }
  75. } // namespace leveldb