You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

108 lines
3.5 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. //
  5. // BlockBuilder generates blocks where keys are prefix-compressed:
  6. //
  7. // When we store a key, we drop the prefix shared with the previous
  8. // string. This helps reduce the space requirement significantly.
  9. // Furthermore, once every K keys, we do not apply the prefix
  10. // compression and store the entire key. We call this a "restart
  11. // point". The tail end of the block stores the offsets of all of the
  12. // restart points, and can be used to do a binary search when looking
  13. // for a particular key. Values are stored as-is (without compression)
  14. // immediately following the corresponding key.
  15. //
  16. // An entry for a particular key-value pair has the form:
  17. // shared_bytes: varint32
  18. // unshared_bytes: varint32
  19. // value_length: varint32
  20. // key_delta: char[unshared_bytes]
  21. // value: char[value_length]
  22. // shared_bytes == 0 for restart points.
  23. //
  24. // The trailer of the block has the form:
  25. // restarts: uint32[num_restarts]
  26. // num_restarts: uint32
  27. // restarts[i] contains the offset within the block of the ith restart point.
  28. #include "table/block_builder.h"
  29. #include <assert.h>
  30. #include <algorithm>
  31. #include "leveldb/comparator.h"
  32. #include "leveldb/table_builder.h"
  33. #include "util/coding.h"
  34. namespace leveldb {
  35. BlockBuilder::BlockBuilder(const Options* options)
  36. : options_(options), restarts_(), counter_(0), finished_(false) {
  37. assert(options->block_restart_interval >= 1);
  38. restarts_.push_back(0); // First restart point is at offset 0
  39. }
  40. void BlockBuilder::Reset() {
  41. buffer_.clear();
  42. restarts_.clear();
  43. restarts_.push_back(0); // First restart point is at offset 0
  44. counter_ = 0;
  45. finished_ = false;
  46. last_key_.clear();
  47. }
  48. size_t BlockBuilder::CurrentSizeEstimate() const {
  49. return (buffer_.size() + // Raw data buffer
  50. restarts_.size() * sizeof(uint32_t) + // Restart array
  51. sizeof(uint32_t)); // Restart array length
  52. }
  53. Slice BlockBuilder::Finish() {
  54. // Append restart array
  55. for (size_t i = 0; i < restarts_.size(); i++) {
  56. PutFixed32(&buffer_, restarts_[i]);
  57. }
  58. PutFixed32(&buffer_, restarts_.size());
  59. finished_ = true;
  60. return Slice(buffer_);
  61. }
  62. void BlockBuilder::Add(const Slice& key, const Slice& value) {
  63. Slice last_key_piece(last_key_);
  64. assert(!finished_);
  65. assert(counter_ <= options_->block_restart_interval);
  66. assert(buffer_.empty() // No values yet?
  67. || options_->comparator->Compare(key, last_key_piece) > 0);
  68. size_t shared = 0;
  69. if (counter_ < options_->block_restart_interval) {
  70. // See how much sharing to do with previous string
  71. const size_t min_length = std::min(last_key_piece.size(), key.size());
  72. while ((shared < min_length) && (last_key_piece[shared] == key[shared])) {
  73. shared++;
  74. }
  75. } else {
  76. // Restart compression
  77. restarts_.push_back(buffer_.size());
  78. counter_ = 0;
  79. }
  80. const size_t non_shared = key.size() - shared;
  81. // Add "<shared><non_shared><value_size>" to buffer_
  82. PutVarint32(&buffer_, shared);
  83. PutVarint32(&buffer_, non_shared);
  84. PutVarint32(&buffer_, value.size());
  85. // Add string delta to buffer_ followed by value
  86. buffer_.append(key.data() + shared, non_shared);
  87. buffer_.append(value.data(), value.size());
  88. // Update state
  89. last_key_.resize(shared);
  90. last_key_.append(key.data() + shared, non_shared);
  91. assert(Slice(last_key_) == key);
  92. counter_++;
  93. }
  94. } // namespace leveldb