// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "include/table.h" #include "include/cache.h" #include "include/env.h" #include "table/block.h" #include "table/format.h" #include "table/two_level_iterator.h" #include "util/coding.h" namespace leveldb { struct Table::Rep { ~Rep() { delete index_block; } Options options; Status status; RandomAccessFile* file; uint64_t cache_id; BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer Block* index_block; }; Status Table::Open(const Options& options, RandomAccessFile* file, uint64_t size, Table** table) { *table = NULL; if (size < Footer::kEncodedLength) { return Status::InvalidArgument("file is too short to be an sstable"); } char footer_space[Footer::kEncodedLength]; Slice footer_input; Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, &footer_input, footer_space); if (!s.ok()) return s; Footer footer; s = footer.DecodeFrom(&footer_input); if (!s.ok()) return s; // Read the index block Block* index_block = NULL; if (s.ok()) { s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block); } if (s.ok()) { // We've successfully read the footer and the index block: we're // ready to serve requests. Rep* rep = new Table::Rep; rep->options = options; rep->file = file; rep->metaindex_handle = footer.metaindex_handle(); rep->index_block = index_block; rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); *table = new Table(rep); } else { if (index_block) delete index_block; } return s; } Table::~Table() { delete rep_; } static void DeleteBlock(void* arg, void* ignored) { delete reinterpret_cast(arg); } static void DeleteCachedBlock(const Slice& key, void* value) { Block* block = reinterpret_cast(value); delete block; } static void ReleaseBlock(void* arg, void* h) { Cache* cache = reinterpret_cast(arg); Cache::Handle* handle = reinterpret_cast(h); cache->Release(handle); } // Convert an index iterator value (i.e., an encoded BlockHandle) // into an iterator over the contents of the corresponding block. Iterator* Table::BlockReader(void* arg, const ReadOptions& options, const Slice& index_value) { Table* table = reinterpret_cast(arg); Cache* block_cache = table->rep_->options.block_cache; Block* block = NULL; Cache::Handle* cache_handle = NULL; BlockHandle handle; Slice input = index_value; Status s = handle.DecodeFrom(&input); // We intentionally allow extra stuff in index_value so that we // can add more features in the future. if (s.ok()) { if (block_cache != NULL) { char cache_key_buffer[16]; EncodeFixed64(cache_key_buffer, table->rep_->cache_id); EncodeFixed64(cache_key_buffer+8, handle.offset()); Slice key(cache_key_buffer, sizeof(cache_key_buffer)); cache_handle = block_cache->Lookup(key); if (cache_handle != NULL) { block = reinterpret_cast(block_cache->Value(cache_handle)); } else { s = ReadBlock(table->rep_->file, options, handle, &block); if (s.ok() && options.fill_cache) { cache_handle = block_cache->Insert( key, block, block->size(), &DeleteCachedBlock); } } } else { s = ReadBlock(table->rep_->file, options, handle, &block); } } Iterator* iter; if (block != NULL) { iter = block->NewIterator(table->rep_->options.comparator); if (cache_handle == NULL) { iter->RegisterCleanup(&DeleteBlock, block, NULL); } else { iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle); } } else { iter = NewErrorIterator(s); } return iter; } Iterator* Table::NewIterator(const ReadOptions& options) const { return NewTwoLevelIterator( rep_->index_block->NewIterator(rep_->options.comparator), &Table::BlockReader, const_cast(this), options); } uint64_t Table::ApproximateOffsetOf(const Slice& key) const { Iterator* index_iter = rep_->index_block->NewIterator(rep_->options.comparator); index_iter->Seek(key); uint64_t result; if (index_iter->Valid()) { BlockHandle handle; Slice input = index_iter->value(); Status s = handle.DecodeFrom(&input); if (s.ok()) { result = handle.offset(); } else { // Strange: we can't decode the block handle in the index block. // We'll just return the offset of the metaindex block, which is // close to the whole file size for this case. result = rep_->metaindex_handle.offset(); } } else { // key is past the last key in the file. Approximate the offset // by returning the offset of the metaindex block (which is // right near the end of the file). result = rep_->metaindex_handle.offset(); } delete index_iter; return result; } }