building_data_management_systems.Xuanzhou.2024Fall.DaSE
/
LevelDB-course-Lab

// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "leveldb/filter_policy.h"

#include "leveldb/slice.h"
#include "util/hash.h"

namespace leveldb {
namespace {static uint32_t BloomHash(const Slice& key) {  return Hash(key.data(), key.size(), 0xbc9f1d34);}
class BloomFilterPolicy : public FilterPolicy { private:  size_t bits_per_key_;  size_t k_;
 public:  explicit BloomFilterPolicy(int bits_per_key)      : bits_per_key_(bits_per_key) {    // We intentionally round down to reduce probing cost a little bit
    k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
    if (k_ < 1) k_ = 1;    if (k_ > 30) k_ = 30;  }
  virtual const char* Name() const {    return "leveldb.BuiltinBloomFilter2";  }
  virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {    // Compute bloom filter size (in both bits and bytes)
    size_t bits = n * bits_per_key_;
    // For small n, we can see a very high false positive rate.  Fix it
    // by enforcing a minimum bloom filter length.
    if (bits < 64) bits = 64;
    size_t bytes = (bits + 7) / 8;    bits = bytes * 8;
    const size_t init_size = dst->size();    dst->resize(init_size + bytes, 0);    dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
    char* array = &(*dst)[init_size];    for (size_t i = 0; i < n; i++) {      // Use double-hashing to generate a sequence of hash values.
      // See analysis in [Kirsch,Mitzenmacher 2006].
      uint32_t h = BloomHash(keys[i]);      const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
      for (size_t j = 0; j < k_; j++) {        const uint32_t bitpos = h % bits;        array[bitpos/8] |= (1 << (bitpos % 8));        h += delta;      }    }  }
  virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {    const size_t len = bloom_filter.size();    if (len < 2) return false;
    const char* array = bloom_filter.data();    const size_t bits = (len - 1) * 8;
    // Use the encoded k so that we can read filters generated by
    // bloom filters created using different parameters.
    const size_t k = array[len-1];    if (k > 30) {      // Reserved for potentially new encodings for short bloom filters.
      // Consider it a match.
      return true;    }
    uint32_t h = BloomHash(key);    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
    for (size_t j = 0; j < k; j++) {      const uint32_t bitpos = h % bits;      if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;      h += delta;    }    return true;  }};}
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {  return new BloomFilterPolicy(bits_per_key);}
}  // namespace leveldb