- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
-
- #include "leveldb/table.h"
-
- #include "leveldb/cache.h"
- #include "leveldb/comparator.h"
- #include "leveldb/env.h"
- #include "leveldb/filter_policy.h"
- #include "leveldb/options.h"
- #include "table/block.h"
- #include "table/filter_block.h"
- #include "table/format.h"
- #include "table/two_level_iterator.h"
- #include "util/coding.h"
-
- namespace leveldb {
-
- struct Table::Rep {
- ~Rep() {
- delete filter;
- delete [] filter_data;
- delete index_block;
- }
-
- Options options;
- Status status;
- RandomAccessFile* file;
- uint64_t cache_id;
- FilterBlockReader* filter;
- const char* filter_data;
-
- BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
- Block* index_block;
- };
-
- Status Table::Open(const Options& options,
- RandomAccessFile* file,
- uint64_t size,
- Table** table) {
- *table = NULL;
- if (size < Footer::kEncodedLength) {
- return Status::Corruption("file is too short to be an sstable");
- }
-
- char footer_space[Footer::kEncodedLength];
- Slice footer_input;
- Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
- &footer_input, footer_space);
- if (!s.ok()) return s;
-
- Footer footer;
- s = footer.DecodeFrom(&footer_input);
- if (!s.ok()) return s;
-
- // Read the index block
- BlockContents contents;
- Block* index_block = NULL;
- if (s.ok()) {
- ReadOptions opt;
- if (options.paranoid_checks) {
- opt.verify_checksums = true;
- }
- s = ReadBlock(file, opt, footer.index_handle(), &contents);
- if (s.ok()) {
- index_block = new Block(contents);
- }
- }
-
- if (s.ok()) {
- // We've successfully read the footer and the index block: we're
- // ready to serve requests.
- Rep* rep = new Table::Rep;
- rep->options = options;
- rep->file = file;
- rep->metaindex_handle = footer.metaindex_handle();
- rep->index_block = index_block;
- rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
- rep->filter_data = NULL;
- rep->filter = NULL;
- *table = new Table(rep);
- (*table)->ReadMeta(footer);
- } else {
- if (index_block) delete index_block;
- }
-
- return s;
- }
-
- void Table::ReadMeta(const Footer& footer) {
- if (rep_->options.filter_policy == NULL) {
- return; // Do not need any metadata
- }
-
- // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
- // it is an empty block.
- ReadOptions opt;
- if (rep_->options.paranoid_checks) {
- opt.verify_checksums = true;
- }
- BlockContents contents;
- if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
- // Do not propagate errors since meta info is not needed for operation
- return;
- }
- Block* meta = new Block(contents);
-
- Iterator* iter = meta->NewIterator(BytewiseComparator());
- std::string key = "filter.";
- key.append(rep_->options.filter_policy->Name());
- iter->Seek(key);
- if (iter->Valid() && iter->key() == Slice(key)) {
- ReadFilter(iter->value());
- }
- delete iter;
- delete meta;
- }
-
- void Table::ReadFilter(const Slice& filter_handle_value) {
- Slice v = filter_handle_value;
- BlockHandle filter_handle;
- if (!filter_handle.DecodeFrom(&v).ok()) {
- return;
- }
-
- // We might want to unify with ReadBlock() if we start
- // requiring checksum verification in Table::Open.
- ReadOptions opt;
- if (rep_->options.paranoid_checks) {
- opt.verify_checksums = true;
- }
- BlockContents block;
- if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
- return;
- }
- if (block.heap_allocated) {
- rep_->filter_data = block.data.data(); // Will need to delete later
- }
- rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
- }
-
- Table::~Table() {
- delete rep_;
- }
-
- static void DeleteBlock(void* arg, void* ignored) {
- delete reinterpret_cast<Block*>(arg);
- }
-
- static void DeleteCachedBlock(const Slice& key, void* value) {
- Block* block = reinterpret_cast<Block*>(value);
- delete block;
- }
-
- static void ReleaseBlock(void* arg, void* h) {
- Cache* cache = reinterpret_cast<Cache*>(arg);
- Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
- cache->Release(handle);
- }
-
- // Convert an index iterator value (i.e., an encoded BlockHandle)
- // into an iterator over the contents of the corresponding block.
- Iterator* Table::BlockReader(void* arg,
- const ReadOptions& options,
- const Slice& index_value) {
- Table* table = reinterpret_cast<Table*>(arg);
- Cache* block_cache = table->rep_->options.block_cache;
- Block* block = NULL;
- Cache::Handle* cache_handle = NULL;
-
- BlockHandle handle;
- Slice input = index_value;
- Status s = handle.DecodeFrom(&input);
- // We intentionally allow extra stuff in index_value so that we
- // can add more features in the future.
-
- if (s.ok()) {
- BlockContents contents;
- if (block_cache != NULL) {
- char cache_key_buffer[16];
- EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
- EncodeFixed64(cache_key_buffer+8, handle.offset());
- Slice key(cache_key_buffer, sizeof(cache_key_buffer));
- cache_handle = block_cache->Lookup(key);
- if (cache_handle != NULL) {
- block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
- } else {
- s = ReadBlock(table->rep_->file, options, handle, &contents);
- if (s.ok()) {
- block = new Block(contents);
- if (contents.cachable && options.fill_cache) {
- cache_handle = block_cache->Insert(
- key, block, block->size(), &DeleteCachedBlock);
- }
- }
- }
- } else {
- s = ReadBlock(table->rep_->file, options, handle, &contents);
- if (s.ok()) {
- block = new Block(contents);
- }
- }
- }
-
- Iterator* iter;
- if (block != NULL) {
- iter = block->NewIterator(table->rep_->options.comparator);
- if (cache_handle == NULL) {
- iter->RegisterCleanup(&DeleteBlock, block, NULL);
- } else {
- iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
- }
- } else {
- iter = NewErrorIterator(s);
- }
- return iter;
- }
-
- Iterator* Table::NewIterator(const ReadOptions& options) const {
- return NewTwoLevelIterator(
- rep_->index_block->NewIterator(rep_->options.comparator),
- &Table::BlockReader, const_cast<Table*>(this), options);
- }
-
- Status Table::InternalGet(const ReadOptions& options, const Slice& k,
- void* arg,
- void (*saver)(void*, const Slice&, const Slice&)) {
- Status s;
- Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
- iiter->Seek(k);
- if (iiter->Valid()) {
- Slice handle_value = iiter->value();
- FilterBlockReader* filter = rep_->filter;
- BlockHandle handle;
- if (filter != NULL &&
- handle.DecodeFrom(&handle_value).ok() &&
- !filter->KeyMayMatch(handle.offset(), k)) {
- // Not found
- } else {
- Iterator* block_iter = BlockReader(this, options, iiter->value());
- block_iter->Seek(k);
- if (block_iter->Valid()) {
- (*saver)(arg, block_iter->key(), block_iter->value());
- }
- s = block_iter->status();
- delete block_iter;
- }
- }
- if (s.ok()) {
- s = iiter->status();
- }
- delete iiter;
- return s;
- }
-
-
- uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
- Iterator* index_iter =
- rep_->index_block->NewIterator(rep_->options.comparator);
- index_iter->Seek(key);
- uint64_t result;
- if (index_iter->Valid()) {
- BlockHandle handle;
- Slice input = index_iter->value();
- Status s = handle.DecodeFrom(&input);
- if (s.ok()) {
- result = handle.offset();
- } else {
- // Strange: we can't decode the block handle in the index block.
- // We'll just return the offset of the metaindex block, which is
- // close to the whole file size for this case.
- result = rep_->metaindex_handle.offset();
- }
- } else {
- // key is past the last key in the file. Approximate the offset
- // by returning the offset of the metaindex block (which is
- // right near the end of the file).
- result = rep_->metaindex_handle.offset();
- }
- delete index_iter;
- return result;
- }
-
- } // namespace leveldb
|