作者: 谢瑞阳 10225101483 徐翔宇 10225101535
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

95 regels
2.8 KiB

  1. // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/filter_policy.h"
  5. #include "leveldb/slice.h"
  6. #include "util/hash.h"
  7. namespace leveldb {
  8. namespace {
  9. static uint32_t BloomHash(const Slice& key) {
  10. return Hash(key.data(), key.size(), 0xbc9f1d34);
  11. }
  12. class BloomFilterPolicy : public FilterPolicy {
  13. private:
  14. size_t bits_per_key_;
  15. size_t k_;
  16. public:
  17. explicit BloomFilterPolicy(int bits_per_key)
  18. : bits_per_key_(bits_per_key) {
  19. // We intentionally round down to reduce probing cost a little bit
  20. k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
  21. if (k_ < 1) k_ = 1;
  22. if (k_ > 30) k_ = 30;
  23. }
  24. virtual const char* Name() const {
  25. return "leveldb.BuiltinBloomFilter";
  26. }
  27. virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
  28. // Compute bloom filter size (in both bits and bytes)
  29. size_t bits = n * bits_per_key_;
  30. // For small n, we can see a very high false positive rate. Fix it
  31. // by enforcing a minimum bloom filter length.
  32. if (bits < 64) bits = 64;
  33. size_t bytes = (bits + 7) / 8;
  34. bits = bytes * 8;
  35. const size_t init_size = dst->size();
  36. dst->resize(init_size + bytes, 0);
  37. dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
  38. char* array = &(*dst)[init_size];
  39. for (size_t i = 0; i < n; i++) {
  40. // Use double-hashing to generate a sequence of hash values.
  41. // See analysis in [Kirsch,Mitzenmacher 2006].
  42. uint32_t h = BloomHash(keys[i]);
  43. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  44. for (size_t j = 0; j < k_; j++) {
  45. const uint32_t bitpos = h % bits;
  46. array[bitpos/8] |= (1 << (bitpos % 8));
  47. h += delta;
  48. }
  49. }
  50. }
  51. virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
  52. const size_t len = bloom_filter.size();
  53. if (len < 2) return false;
  54. const char* array = bloom_filter.data();
  55. const size_t bits = (len - 1) * 8;
  56. // Use the encoded k so that we can read filters generated by
  57. // bloom filters created using different parameters.
  58. const size_t k = array[len-1];
  59. if (k > 30) {
  60. // Reserved for potentially new encodings for short bloom filters.
  61. // Consider it a match.
  62. return true;
  63. }
  64. uint32_t h = BloomHash(key);
  65. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  66. for (size_t j = 0; j < k; j++) {
  67. const uint32_t bitpos = h % bits;
  68. if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
  69. h += delta;
  70. }
  71. return true;
  72. }
  73. };
  74. }
  75. const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
  76. return new BloomFilterPolicy(bits_per_key);
  77. }
  78. } // namespace leveldb