小组成员:谢瑞阳、徐翔宇
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

175 lines
5.1 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "include/table.h"
  5. #include "include/cache.h"
  6. #include "include/env.h"
  7. #include "table/block.h"
  8. #include "table/format.h"
  9. #include "table/two_level_iterator.h"
  10. #include "util/coding.h"
  11. namespace leveldb {
  12. struct Table::Rep {
  13. ~Rep() {
  14. delete index_block;
  15. }
  16. Options options;
  17. Status status;
  18. RandomAccessFile* file;
  19. uint64_t cache_id;
  20. BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
  21. Block* index_block;
  22. };
  23. Status Table::Open(const Options& options,
  24. RandomAccessFile* file,
  25. Table** table) {
  26. *table = NULL;
  27. const uint64_t size = file->Size();
  28. if (size < Footer::kEncodedLength) {
  29. return Status::InvalidArgument("file is too short to be an sstable");
  30. }
  31. char footer_space[Footer::kEncodedLength];
  32. Slice footer_input;
  33. Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
  34. &footer_input, footer_space);
  35. if (!s.ok()) return s;
  36. Footer footer;
  37. s = footer.DecodeFrom(&footer_input);
  38. if (!s.ok()) return s;
  39. // Read the index block
  40. Block* index_block = NULL;
  41. if (s.ok()) {
  42. s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block);
  43. }
  44. if (s.ok()) {
  45. // We've successfully read the footer and the index block: we're
  46. // ready to serve requests.
  47. Rep* rep = new Table::Rep;
  48. rep->options = options;
  49. rep->file = file;
  50. rep->metaindex_handle = footer.metaindex_handle();
  51. rep->index_block = index_block;
  52. rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
  53. *table = new Table(rep);
  54. } else {
  55. if (index_block) delete index_block;
  56. }
  57. return s;
  58. }
  59. Table::~Table() {
  60. delete rep_;
  61. }
  62. static void DeleteBlock(void* arg, void* ignored) {
  63. delete reinterpret_cast<Block*>(arg);
  64. }
  65. static void DeleteCachedBlock(const Slice& key, void* value) {
  66. Block* block = reinterpret_cast<Block*>(value);
  67. delete block;
  68. }
  69. static void ReleaseBlock(void* arg, void* h) {
  70. Cache* cache = reinterpret_cast<Cache*>(arg);
  71. Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
  72. cache->Release(handle);
  73. }
  74. // Convert an index iterator value (i.e., an encoded BlockHandle)
  75. // into an iterator over the contents of the corresponding block.
  76. Iterator* Table::BlockReader(void* arg,
  77. const ReadOptions& options,
  78. const Slice& index_value) {
  79. Table* table = reinterpret_cast<Table*>(arg);
  80. Cache* block_cache = table->rep_->options.block_cache;
  81. Block* block = NULL;
  82. Cache::Handle* cache_handle = NULL;
  83. BlockHandle handle;
  84. Slice input = index_value;
  85. Status s = handle.DecodeFrom(&input);
  86. // We intentionally allow extra stuff in index_value so that we
  87. // can add more features in the future.
  88. if (s.ok()) {
  89. if (block_cache != NULL) {
  90. char cache_key_buffer[16];
  91. EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
  92. EncodeFixed64(cache_key_buffer+8, handle.offset());
  93. Slice key(cache_key_buffer, sizeof(cache_key_buffer));
  94. cache_handle = block_cache->Lookup(key);
  95. if (cache_handle != NULL) {
  96. block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
  97. } else {
  98. s = ReadBlock(table->rep_->file, options, handle, &block);
  99. if (s.ok() && options.fill_cache) {
  100. cache_handle = block_cache->Insert(
  101. key, block, block->size(), &DeleteCachedBlock);
  102. }
  103. }
  104. } else {
  105. s = ReadBlock(table->rep_->file, options, handle, &block);
  106. }
  107. }
  108. Iterator* iter;
  109. if (block != NULL) {
  110. iter = block->NewIterator(table->rep_->options.comparator);
  111. if (cache_handle == NULL) {
  112. iter->RegisterCleanup(&DeleteBlock, block, NULL);
  113. } else {
  114. iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
  115. }
  116. } else {
  117. iter = NewErrorIterator(s);
  118. }
  119. return iter;
  120. }
  121. Iterator* Table::NewIterator(const ReadOptions& options) const {
  122. return NewTwoLevelIterator(
  123. rep_->index_block->NewIterator(rep_->options.comparator),
  124. &Table::BlockReader, const_cast<Table*>(this), options);
  125. }
  126. uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
  127. Iterator* index_iter =
  128. rep_->index_block->NewIterator(rep_->options.comparator);
  129. index_iter->Seek(key);
  130. uint64_t result;
  131. if (index_iter->Valid()) {
  132. BlockHandle handle;
  133. Slice input = index_iter->value();
  134. Status s = handle.DecodeFrom(&input);
  135. if (s.ok()) {
  136. result = handle.offset();
  137. } else {
  138. // Strange: we can't decode the block handle in the index block.
  139. // We'll just return the offset of the metaindex block, which is
  140. // close to the whole file size for this case.
  141. result = rep_->metaindex_handle.offset();
  142. }
  143. } else {
  144. // key is past the last key in the file. Approximate the offset
  145. // by returning the offset of the metaindex block (which is
  146. // right near the end of the file).
  147. result = rep_->metaindex_handle.offset();
  148. }
  149. delete index_iter;
  150. return result;
  151. }
  152. }