// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Decodes the blocks generated by block_builder.cc. #include "table/block.h" #include #include #include #include "leveldb/comparator.h" #include "table/format.h" #include "util/coding.h" #include "util/logging.h" namespace leveldb { inline uint32_t Block::NumRestarts() const { assert(size_ >= sizeof(uint32_t)); return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); } Block::Block(const BlockContents& contents) : data_(contents.data.data()), size_(contents.data.size()), owned_(contents.heap_allocated) { if (size_ < sizeof(uint32_t)) { size_ = 0; // Error marker } else { size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t); if (NumRestarts() > max_restarts_allowed) { // The size is too small for NumRestarts() size_ = 0; } else { restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); } } } Block::~Block() { if (owned_) { delete[] data_; } } // Helper routine: decode the next block entry starting at "p", // storing the number of shared key bytes, non_shared key bytes, // and the length of the value in "*shared", "*non_shared", and // "*value_length", respectively. Will not dereference past "limit". // // If any errors are detected, returns nullptr. Otherwise, returns a // pointer to the key delta (just past the three decoded values). static inline const char* DecodeEntry(const char* p, const char* limit, uint32_t* shared, uint32_t* non_shared, uint32_t* value_length) { if (limit - p < 3) return nullptr; *shared = reinterpret_cast(p)[0]; *non_shared = reinterpret_cast(p)[1]; *value_length = reinterpret_cast(p)[2]; if ((*shared | *non_shared | *value_length) < 128) { // Fast path: all three values are encoded in one byte each p += 3; } else { if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) return nullptr; } if (static_cast(limit - p) < (*non_shared + *value_length)) { return nullptr; } return p; } class Block::Iter : public Iterator { private: const Comparator* const comparator_; const char* const data_; // underlying block contents uint32_t const restarts_; // Offset of restart array (list of fixed32) uint32_t const num_restarts_; // Number of uint32_t entries in restart array // current_ is offset in data_ of current entry. >= restarts_ if !Valid uint32_t current_; uint32_t restart_index_; // Index of restart block in which current_ falls std::string key_; Slice value_; Status status_; inline int Compare(const Slice& a, const Slice& b) const { return comparator_->Compare(a, b); } // Return the offset in data_ just past the end of the current entry. inline uint32_t NextEntryOffset() const { return (value_.data() + value_.size()) - data_; } uint32_t GetRestartPoint(uint32_t index) { assert(index < num_restarts_); return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); } void SeekToRestartPoint(uint32_t index) { key_.clear(); restart_index_ = index; // current_ will be fixed by ParseNextKey(); // ParseNextKey() starts at the end of value_, so set value_ accordingly uint32_t offset = GetRestartPoint(index); value_ = Slice(data_ + offset, 0); } public: Iter(const Comparator* comparator, const char* data, uint32_t restarts, uint32_t num_restarts) : comparator_(comparator), data_(data), restarts_(restarts), num_restarts_(num_restarts), current_(restarts_), restart_index_(num_restarts_) { assert(num_restarts_ > 0); } bool Valid() const override { return current_ < restarts_; } Status status() const override { return status_; } Slice key() const override { assert(Valid()); return key_; } Slice value() const override { assert(Valid()); return value_; } void Next() override { assert(Valid()); ParseNextKey(); } void Prev() override { assert(Valid()); // Scan backwards to a restart point before current_ const uint32_t original = current_; while (GetRestartPoint(restart_index_) >= original) { if (restart_index_ == 0) { // No more entries current_ = restarts_; restart_index_ = num_restarts_; return; } restart_index_--; } SeekToRestartPoint(restart_index_); do { // Loop until end of current entry hits the start of original entry } while (ParseNextKey() && NextEntryOffset() < original); } void Seek(const Slice& target) override { // Binary search in restart array to find the last restart point // with a key < target uint32_t left = 0; uint32_t right = num_restarts_ - 1; int current_key_compare = 0; if (Valid()) { // If we're already scanning, use the current position as a starting // point. This is beneficial if the key we're seeking to is ahead of the // current position. current_key_compare = Compare(key_, target); if (current_key_compare < 0) { // key_ is smaller than target left = restart_index_; } else if (current_key_compare > 0) { right = restart_index_; } else { // We're seeking to the key we're already at. return;// 已找到,不需要进一步查找 } } while (left < right) { uint32_t mid = (left + right + 1) / 2; uint32_t region_offset = GetRestartPoint(mid); uint32_t shared, non_shared, value_length; const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_, &shared, &non_shared, &value_length); if (key_ptr == nullptr || (shared != 0)) { CorruptionError(); return; // 返回错误状态 } Slice mid_key(key_ptr, non_shared); if (Compare(mid_key, target) < 0) { // Key at "mid" is smaller than "target". Therefore all // blocks before "mid" are uninteresting. left = mid; } else { // Key at "mid" is >= "target". Therefore all blocks at or // after "mid" are uninteresting. right = mid - 1; } } // We might be able to use our current position within the restart block. // This is true if we determined the key we desire is in the current block // and is after than the current key. assert(current_key_compare == 0 || Valid()); bool skip_seek = left == restart_index_ && current_key_compare < 0; if (!skip_seek) { SeekToRestartPoint(left); } // Linear search (within restart block) for first key >= target while (true) { if (!ParseNextKey()) { return; } if (Compare(key_, target) >= 0) { return; } } } void SeekToFirst() override { SeekToRestartPoint(0); ParseNextKey(); } void SeekToLast() override { SeekToRestartPoint(num_restarts_ - 1); while (ParseNextKey() && NextEntryOffset() < restarts_) { // Keep skipping } } private: void CorruptionError() { current_ = restarts_; restart_index_ = num_restarts_; status_ = Status::Corruption("bad entry in block"); key_.clear(); value_.clear(); } bool ParseNextKey() { current_ = NextEntryOffset(); const char* p = data_ + current_; const char* limit = data_ + restarts_; // Restarts come right after data if (p >= limit) { // No more entries to return. Mark as invalid. current_ = restarts_; restart_index_ = num_restarts_; return false; } // Decode next entry uint32_t shared, non_shared, value_length; p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); if (p == nullptr || key_.size() < shared) { CorruptionError(); return false; } else { key_.resize(shared); key_.append(p, non_shared); value_ = Slice(p + non_shared, value_length); while (restart_index_ + 1 < num_restarts_ && GetRestartPoint(restart_index_ + 1) < current_) { ++restart_index_; } return true; } } }; Iterator* Block::NewIterator(const Comparator* comparator) { if (size_ < sizeof(uint32_t)) { return NewErrorIterator(Status::Corruption("bad block contents")); } const uint32_t num_restarts = NumRestarts(); if (num_restarts == 0) { return NewEmptyIterator(); } else { return new Iter(comparator, data_, restart_offset_, num_restarts); } } } // namespace leveldb