You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
3.7 KiB

  1. // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/filter_policy.h"
  5. #include "util/coding.h"
  6. #include "util/logging.h"
  7. #include "util/testharness.h"
  8. #include "util/testutil.h"
  9. namespace leveldb {
  10. static const int kVerbose = 1;
  11. static Slice Key(int i, char* buffer) {
  12. EncodeFixed32(buffer, i);
  13. return Slice(buffer, sizeof(uint32_t));
  14. }
  15. class BloomTest {
  16. private:
  17. const FilterPolicy* policy_;
  18. std::string filter_;
  19. std::vector<std::string> keys_;
  20. public:
  21. BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
  22. ~BloomTest() {
  23. delete policy_;
  24. }
  25. void Reset() {
  26. keys_.clear();
  27. filter_.clear();
  28. }
  29. void Add(const Slice& s) {
  30. keys_.push_back(s.ToString());
  31. }
  32. void Build() {
  33. std::vector<Slice> key_slices;
  34. for (size_t i = 0; i < keys_.size(); i++) {
  35. key_slices.push_back(Slice(keys_[i]));
  36. }
  37. filter_.clear();
  38. policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
  39. keys_.clear();
  40. if (kVerbose >= 2) DumpFilter();
  41. }
  42. size_t FilterSize() const {
  43. return filter_.size();
  44. }
  45. void DumpFilter() {
  46. fprintf(stderr, "F(");
  47. for (size_t i = 0; i+1 < filter_.size(); i++) {
  48. const unsigned int c = static_cast<unsigned int>(filter_[i]);
  49. for (int j = 0; j < 8; j++) {
  50. fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
  51. }
  52. }
  53. fprintf(stderr, ")\n");
  54. }
  55. bool Matches(const Slice& s) {
  56. if (!keys_.empty()) {
  57. Build();
  58. }
  59. return policy_->KeyMayMatch(s, filter_);
  60. }
  61. double FalsePositiveRate() {
  62. char buffer[sizeof(int)];
  63. int result = 0;
  64. for (int i = 0; i < 10000; i++) {
  65. if (Matches(Key(i + 1000000000, buffer))) {
  66. result++;
  67. }
  68. }
  69. return result / 10000.0;
  70. }
  71. };
  72. TEST(BloomTest, EmptyFilter) {
  73. ASSERT_TRUE(! Matches("hello"));
  74. ASSERT_TRUE(! Matches("world"));
  75. }
  76. TEST(BloomTest, Small) {
  77. Add("hello");
  78. Add("world");
  79. ASSERT_TRUE(Matches("hello"));
  80. ASSERT_TRUE(Matches("world"));
  81. ASSERT_TRUE(! Matches("x"));
  82. ASSERT_TRUE(! Matches("foo"));
  83. }
  84. static int NextLength(int length) {
  85. if (length < 10) {
  86. length += 1;
  87. } else if (length < 100) {
  88. length += 10;
  89. } else if (length < 1000) {
  90. length += 100;
  91. } else {
  92. length += 1000;
  93. }
  94. return length;
  95. }
  96. TEST(BloomTest, VaryingLengths) {
  97. char buffer[sizeof(int)];
  98. // Count number of filters that significantly exceed the false positive rate
  99. int mediocre_filters = 0;
  100. int good_filters = 0;
  101. for (int length = 1; length <= 10000; length = NextLength(length)) {
  102. Reset();
  103. for (int i = 0; i < length; i++) {
  104. Add(Key(i, buffer));
  105. }
  106. Build();
  107. ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
  108. // All added keys must match
  109. for (int i = 0; i < length; i++) {
  110. ASSERT_TRUE(Matches(Key(i, buffer)))
  111. << "Length " << length << "; key " << i;
  112. }
  113. // Check false positive rate
  114. double rate = FalsePositiveRate();
  115. if (kVerbose >= 1) {
  116. fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
  117. rate*100.0, length, static_cast<int>(FilterSize()));
  118. }
  119. ASSERT_LE(rate, 0.02); // Must not be over 2%
  120. if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
  121. else good_filters++;
  122. }
  123. if (kVerbose >= 1) {
  124. fprintf(stderr, "Filters: %d good, %d mediocre\n",
  125. good_filters, mediocre_filters);
  126. }
  127. ASSERT_LE(mediocre_filters, good_filters/5);
  128. }
  129. // Different bits-per-byte
  130. } // namespace leveldb
  131. int main(int argc, char** argv) {
  132. return leveldb::test::RunAllTests();
  133. }