You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

259 line
7.8 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "db/log_reader.h"
  5. #include <stdio.h>
  6. #include "leveldb/env.h"
  7. #include "util/coding.h"
  8. #include "util/crc32c.h"
  9. namespace leveldb {
  10. namespace log {
  11. Reader::Reporter::~Reporter() {
  12. }
  13. Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
  14. uint64_t initial_offset)
  15. : file_(file),
  16. reporter_(reporter),
  17. checksum_(checksum),
  18. backing_store_(new char[kBlockSize]),
  19. buffer_(),
  20. eof_(false),
  21. last_record_offset_(0),
  22. end_of_buffer_offset_(0),
  23. initial_offset_(initial_offset) {
  24. }
  25. Reader::~Reader() {
  26. delete[] backing_store_;
  27. }
  28. bool Reader::SkipToInitialBlock() {
  29. size_t offset_in_block = initial_offset_ % kBlockSize;
  30. uint64_t block_start_location = initial_offset_ - offset_in_block;
  31. // Don't search a block if we'd be in the trailer
  32. if (offset_in_block > kBlockSize - 6) {
  33. offset_in_block = 0;
  34. block_start_location += kBlockSize;
  35. }
  36. end_of_buffer_offset_ = block_start_location;
  37. // Skip to start of first block that can contain the initial record
  38. if (block_start_location > 0) {
  39. Status skip_status = file_->Skip(block_start_location);
  40. if (!skip_status.ok()) {
  41. ReportDrop(block_start_location, skip_status);
  42. return false;
  43. }
  44. }
  45. return true;
  46. }
  47. bool Reader::ReadRecord(Slice* record, std::string* scratch) {
  48. if (last_record_offset_ < initial_offset_) {
  49. if (!SkipToInitialBlock()) {
  50. return false;
  51. }
  52. }
  53. scratch->clear();
  54. record->clear();
  55. bool in_fragmented_record = false;
  56. // Record offset of the logical record that we're reading
  57. // 0 is a dummy value to make compilers happy
  58. uint64_t prospective_record_offset = 0;
  59. Slice fragment;
  60. while (true) {
  61. uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
  62. const unsigned int record_type = ReadPhysicalRecord(&fragment);
  63. switch (record_type) {
  64. case kFullType:
  65. if (in_fragmented_record) {
  66. // Handle bug in earlier versions of log::Writer where
  67. // it could emit an empty kFirstType record at the tail end
  68. // of a block followed by a kFullType or kFirstType record
  69. // at the beginning of the next block.
  70. if (scratch->empty()) {
  71. in_fragmented_record = false;
  72. } else {
  73. ReportCorruption(scratch->size(), "partial record without end(1)");
  74. }
  75. }
  76. prospective_record_offset = physical_record_offset;
  77. scratch->clear();
  78. *record = fragment;
  79. last_record_offset_ = prospective_record_offset;
  80. return true;
  81. case kFirstType:
  82. if (in_fragmented_record) {
  83. // Handle bug in earlier versions of log::Writer where
  84. // it could emit an empty kFirstType record at the tail end
  85. // of a block followed by a kFullType or kFirstType record
  86. // at the beginning of the next block.
  87. if (scratch->empty()) {
  88. in_fragmented_record = false;
  89. } else {
  90. ReportCorruption(scratch->size(), "partial record without end(2)");
  91. }
  92. }
  93. prospective_record_offset = physical_record_offset;
  94. scratch->assign(fragment.data(), fragment.size());
  95. in_fragmented_record = true;
  96. break;
  97. case kMiddleType:
  98. if (!in_fragmented_record) {
  99. ReportCorruption(fragment.size(),
  100. "missing start of fragmented record(1)");
  101. } else {
  102. scratch->append(fragment.data(), fragment.size());
  103. }
  104. break;
  105. case kLastType:
  106. if (!in_fragmented_record) {
  107. ReportCorruption(fragment.size(),
  108. "missing start of fragmented record(2)");
  109. } else {
  110. scratch->append(fragment.data(), fragment.size());
  111. *record = Slice(*scratch);
  112. last_record_offset_ = prospective_record_offset;
  113. return true;
  114. }
  115. break;
  116. case kEof:
  117. if (in_fragmented_record) {
  118. ReportCorruption(scratch->size(), "partial record without end(3)");
  119. scratch->clear();
  120. }
  121. return false;
  122. case kBadRecord:
  123. if (in_fragmented_record) {
  124. ReportCorruption(scratch->size(), "error in middle of record");
  125. in_fragmented_record = false;
  126. scratch->clear();
  127. }
  128. break;
  129. default: {
  130. char buf[40];
  131. snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
  132. ReportCorruption(
  133. (fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
  134. buf);
  135. in_fragmented_record = false;
  136. scratch->clear();
  137. break;
  138. }
  139. }
  140. }
  141. return false;
  142. }
  143. uint64_t Reader::LastRecordOffset() {
  144. return last_record_offset_;
  145. }
  146. void Reader::ReportCorruption(size_t bytes, const char* reason) {
  147. ReportDrop(bytes, Status::Corruption(reason));
  148. }
  149. void Reader::ReportDrop(size_t bytes, const Status& reason) {
  150. if (reporter_ != NULL &&
  151. end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) {
  152. reporter_->Corruption(bytes, reason);
  153. }
  154. }
  155. unsigned int Reader::ReadPhysicalRecord(Slice* result) {
  156. while (true) {
  157. if (buffer_.size() < kHeaderSize) {
  158. if (!eof_) {
  159. // Last read was a full read, so this is a trailer to skip
  160. buffer_.clear();
  161. Status status = file_->Read(kBlockSize, &buffer_, backing_store_);
  162. end_of_buffer_offset_ += buffer_.size();
  163. if (!status.ok()) {
  164. buffer_.clear();
  165. ReportDrop(kBlockSize, status);
  166. eof_ = true;
  167. return kEof;
  168. } else if (buffer_.size() < kBlockSize) {
  169. eof_ = true;
  170. }
  171. continue;
  172. } else if (buffer_.size() == 0) {
  173. // End of file
  174. return kEof;
  175. } else {
  176. size_t drop_size = buffer_.size();
  177. buffer_.clear();
  178. ReportCorruption(drop_size, "truncated record at end of file");
  179. return kEof;
  180. }
  181. }
  182. // Parse the header
  183. const char* header = buffer_.data();
  184. const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
  185. const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
  186. const unsigned int type = header[6];
  187. const uint32_t length = a | (b << 8);
  188. if (kHeaderSize + length > buffer_.size()) {
  189. size_t drop_size = buffer_.size();
  190. buffer_.clear();
  191. ReportCorruption(drop_size, "bad record length");
  192. return kBadRecord;
  193. }
  194. if (type == kZeroType && length == 0) {
  195. // Skip zero length record without reporting any drops since
  196. // such records are produced by the mmap based writing code in
  197. // env_posix.cc that preallocates file regions.
  198. buffer_.clear();
  199. return kBadRecord;
  200. }
  201. // Check crc
  202. if (checksum_) {
  203. uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
  204. uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
  205. if (actual_crc != expected_crc) {
  206. // Drop the rest of the buffer since "length" itself may have
  207. // been corrupted and if we trust it, we could find some
  208. // fragment of a real log record that just happens to look
  209. // like a valid log record.
  210. size_t drop_size = buffer_.size();
  211. buffer_.clear();
  212. ReportCorruption(drop_size, "checksum mismatch");
  213. return kBadRecord;
  214. }
  215. }
  216. buffer_.remove_prefix(kHeaderSize + length);
  217. // Skip physical record that started before initial_offset_
  218. if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length <
  219. initial_offset_) {
  220. result->clear();
  221. return kBadRecord;
  222. }
  223. *result = Slice(header + kHeaderSize, length);
  224. return type;
  225. }
  226. }
  227. }
  228. }