10225501448 李度 10225101546 陈胤遒 10215501422 高宇菲
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

92 lignes
2.9 KiB

  1. // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/filter_policy.h"
  5. #include "leveldb/slice.h"
  6. #include "util/hash.h"
  7. namespace leveldb {
  8. namespace {
  9. static uint32_t BloomHash(const Slice& key) {
  10. return Hash(key.data(), key.size(), 0xbc9f1d34);
  11. }
  12. class BloomFilterPolicy : public FilterPolicy {
  13. public:
  14. explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
  15. // We intentionally round down to reduce probing cost a little bit
  16. k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
  17. if (k_ < 1) k_ = 1;
  18. if (k_ > 30) k_ = 30;
  19. }
  20. const char* Name() const override { return "leveldb.BuiltinBloomFilter2"; }
  21. void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
  22. // Compute bloom filter size (in both bits and bytes)
  23. size_t bits = n * bits_per_key_;
  24. // For small n, we can see a very high false positive rate. Fix it
  25. // by enforcing a minimum bloom filter length.
  26. if (bits < 64) bits = 64;
  27. size_t bytes = (bits + 7) / 8;
  28. bits = bytes * 8;
  29. const size_t init_size = dst->size();
  30. dst->resize(init_size + bytes, 0);
  31. dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
  32. char* array = &(*dst)[init_size];
  33. for (int i = 0; i < n; i++) {
  34. // Use double-hashing to generate a sequence of hash values.
  35. // See analysis in [Kirsch,Mitzenmacher 2006].
  36. uint32_t h = BloomHash(keys[i]);
  37. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  38. for (size_t j = 0; j < k_; j++) {
  39. const uint32_t bitpos = h % bits;
  40. array[bitpos / 8] |= (1 << (bitpos % 8));
  41. h += delta;
  42. }
  43. }
  44. }
  45. bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
  46. const size_t len = bloom_filter.size();
  47. if (len < 2) return false;
  48. const char* array = bloom_filter.data();
  49. const size_t bits = (len - 1) * 8;
  50. // Use the encoded k so that we can read filters generated by
  51. // bloom filters created using different parameters.
  52. const size_t k = array[len - 1];
  53. if (k > 30) {
  54. // Reserved for potentially new encodings for short bloom filters.
  55. // Consider it a match.
  56. return true;
  57. }
  58. uint32_t h = BloomHash(key);
  59. const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
  60. for (size_t j = 0; j < k; j++) {
  61. const uint32_t bitpos = h % bits;
  62. if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;
  63. h += delta;
  64. }
  65. return true;
  66. }
  67. private:
  68. size_t bits_per_key_;
  69. size_t k_;
  70. };
  71. } // namespace
  72. const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
  73. return new BloomFilterPolicy(bits_per_key);
  74. }
  75. } // namespace leveldb