10225101483
/
XOY-Leveldb

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "db/db_iter.h"

#include "db/filename.h"
#include "db/dbformat.h"
#include "include/env.h"
#include "include/iterator.h"
#include "port/port.h"
#include "util/logging.h"
#include "util/mutexlock.h"

namespace leveldb {
#if 0
static void DumpInternalIter(Iterator* iter) {  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {    ParsedInternalKey k;    if (!ParseInternalKey(iter->key(), &k)) {      fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());    } else {      fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());    }  }}#endif

namespace {
// Memtables and sstables that make the DB representation contain
// (userkey,seq,type) => uservalue entries.  DBIter
// combines multiple entries for the same userkey found in the DB
// representation into a single entry while accounting for sequence
// numbers, deletion markers, overwrites, etc.
class DBIter: public Iterator { public:  DBIter(const std::string* dbname, Env* env,         const Comparator* cmp, Iterator* iter, SequenceNumber s)      : dbname_(dbname),        env_(env),        user_comparator_(cmp),        iter_(iter),        sequence_(s),        large_(NULL),        valid_(false) {  }  virtual ~DBIter() {    delete iter_;    delete large_;  }  virtual bool Valid() const { return valid_; }  virtual Slice key() const {    assert(valid_);    return key_;  }  virtual Slice value() const {    assert(valid_);    if (large_ == NULL) {      return value_;    } else {      MutexLock l(&large_->mutex);      if (!large_->produced) {        ReadIndirectValue();      }      return large_->value;    }  }
  virtual void Next() {    assert(valid_);    // iter_ is already positioned past DBIter::key()
    FindNextUserEntry();  }
  virtual void Prev() {    assert(valid_);    bool ignored;    ScanUntilBeforeCurrentKey(&ignored);    FindPrevUserEntry();  }
  virtual void Seek(const Slice& target) {    ParsedInternalKey ikey(target, sequence_, kValueTypeForSeek);    std::string tmp;    AppendInternalKey(&tmp, ikey);    iter_->Seek(tmp);    FindNextUserEntry();  }  virtual void SeekToFirst() {    iter_->SeekToFirst();    FindNextUserEntry();  }
  virtual void SeekToLast();
  virtual Status status() const {    if (status_.ok()) {      if (large_ != NULL && !large_->status.ok()) return large_->status;      return iter_->status();    } else {      return status_;    }  }
 private:  void FindNextUserEntry();  void FindPrevUserEntry();  void SaveKey(const Slice& k) { key_.assign(k.data(), k.size()); }  void SaveValue(const Slice& v) {    if (value_.capacity() > v.size() + 1048576) {      std::string empty;      swap(empty, value_);    }    value_.assign(v.data(), v.size());  }  bool ParseKey(ParsedInternalKey* key);  void SkipPast(const Slice& k);  void ScanUntilBeforeCurrentKey(bool* found_live);
  void ReadIndirectValue() const;
  struct Large {    port::Mutex mutex;    std::string value;    bool produced;    Status status;  };
  const std::string* const dbname_;  Env* const env_;
  const Comparator* const user_comparator_;
  // iter_ is positioned just past current entry for DBIter if valid_
  Iterator* const iter_;
  SequenceNumber const sequence_;  Status status_;  std::string key_;                  // Always a user key
  std::string value_;  Large* large_;      // Non-NULL if value is an indirect reference
  bool valid_;
  // No copying allowed
  DBIter(const DBIter&);  void operator=(const DBIter&);};
inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {  if (!ParseInternalKey(iter_->key(), ikey)) {    status_ = Status::Corruption("corrupted internal key in DBIter");    return false;  } else {    return true;  }}
void DBIter::FindNextUserEntry() {  if (large_ != NULL) {    if (status_.ok() && !large_->status.ok()) {      status_ = large_->status;    }    delete large_;    large_ = NULL;  }  while (iter_->Valid()) {    ParsedInternalKey ikey;    if (!ParseKey(&ikey)) {      // Skip past corrupted entry
      iter_->Next();      continue;    }    if (ikey.sequence > sequence_) {      // Ignore entries newer than the snapshot
      iter_->Next();      continue;    }
    switch (ikey.type) {      case kTypeDeletion:        SaveKey(ikey.user_key);  // Make local copy for use by SkipPast()
        iter_->Next();        SkipPast(key_);        // Do not return deleted entries.  Instead keep looping.
        break;
      case kTypeValue:        SaveKey(ikey.user_key);        SaveValue(iter_->value());        iter_->Next();        SkipPast(key_);        // Yield the value we just found.
        valid_ = true;        return;
      case kTypeLargeValueRef:        SaveKey(ikey.user_key);        // Save the large value ref as value_, and read it lazily on a call
        // to value()
        SaveValue(iter_->value());        large_ = new Large;        large_->produced = false;        iter_->Next();        SkipPast(key_);        // Yield the value we just found.
        valid_ = true;        return;    }  }  valid_ = false;  key_.clear();  value_.clear();  assert(large_ == NULL);}
void DBIter::SkipPast(const Slice& k) {  while (iter_->Valid()) {    ParsedInternalKey ikey;    // Note that if we cannot parse an internal key, we keep looping
    // so that if we have a run like the following:
    //     <x,100,v> => value100
    //     <corrupted entry for user key x>
    //     <x,50,v> => value50
    // we will skip over the corrupted entry as well as value50.
    if (ParseKey(&ikey) && user_comparator_->Compare(ikey.user_key, k) != 0) {      break;    }    iter_->Next();  }}
void DBIter::SeekToLast() {  // Position iter_ at the last uncorrupted user key and then
  // let FindPrevUserEntry() do the heavy lifting to find
  // a user key that is live.
  iter_->SeekToLast();  ParsedInternalKey current;  while (iter_->Valid() && !ParseKey(&current)) {    iter_->Prev();  }  if (iter_->Valid()) {    SaveKey(current.user_key);  }  FindPrevUserEntry();}
// Let X be the user key at which iter_ is currently positioned.
// Adjust DBIter to point at the last entry with a key <= X that
// has a live value.
void DBIter::FindPrevUserEntry() {  // Consider the following example:
  //
  //     A@540
  //     A@400
  //
  //     B@300
  //     B@200
  //     B@100        <- iter_
  //
  //     C@301
  //     C@201
  //
  // The comments marked "(first iteration)" below relate what happens
  // for the preceding example in the first iteration of the while loop
  // below.  There may be more than one iteration either if there are
  // no live values for B, or if there is a corruption.
  while (iter_->Valid()) {    std::string saved = key_;    bool found_live;    ScanUntilBeforeCurrentKey(&found_live);    // (first iteration) iter_ at A@400
    if (found_live) {      // Step forward into range of entries with user key >= saved
      if (!iter_->Valid()) {        iter_->SeekToFirst();      } else {        iter_->Next();      }      // (first iteration) iter_ at B@300

      FindNextUserEntry();  // Sets key_ to the key of the next value it found
      if (valid_ && user_comparator_->Compare(key_, saved) == 0) {        // (first iteration) iter_ at C@301
        return;      }
      // FindNextUserEntry() could not find any entries under the
      // user key "saved".  This is probably a corruption since
      // ScanUntilBefore(saved) found a live value.  So we skip
      // backwards to an earlier key and ignore the corrupted
      // entries for "saved".
      //
      // (first iteration) iter_ at C@301 and saved == "B"
      key_ = saved;      bool ignored;      ScanUntilBeforeCurrentKey(&ignored);      // (first iteration) iter_ at A@400
    }  }  valid_ = false;  key_.clear();  value_.clear();}
void DBIter::ScanUntilBeforeCurrentKey(bool* found_live) {  *found_live = false;  if (!iter_->Valid()) {    iter_->SeekToLast();  }
  while (iter_->Valid()) {    ParsedInternalKey current;    if (!ParseKey(&current)) {      iter_->Prev();      continue;    }
    if (current.sequence > sequence_) {      // Ignore entries that are serialized after this read
      iter_->Prev();      continue;    }
    const int cmp = user_comparator_->Compare(current.user_key, key_);    if (cmp < 0) {      SaveKey(current.user_key);      return;    } else if (cmp == 0) {      switch (current.type) {        case kTypeDeletion:          *found_live = false;          break;
        case kTypeValue:        case kTypeLargeValueRef:          *found_live = true;          break;      }    } else {  // cmp > 0
      *found_live = false;    }
    iter_->Prev();  }}
void DBIter::ReadIndirectValue() const {  assert(!large_->produced);  large_->produced = true;  LargeValueRef large_ref;  if (value_.size() != LargeValueRef::ByteSize()) {    large_->status = Status::Corruption("malformed large value reference");    return;  }  memcpy(large_ref.data, value_.data(), LargeValueRef::ByteSize());  std::string fname = LargeValueFileName(*dbname_, large_ref);  RandomAccessFile* file;  Status s = env_->NewRandomAccessFile(fname, &file);  if (s.ok()) {    uint64_t file_size = file->Size();    uint64_t value_size = large_ref.ValueSize();    large_->value.resize(value_size);    Slice result;    s = file->Read(0, file_size, &result,                   const_cast<char*>(large_->value.data()));    if (s.ok()) {      if (result.size() == file_size) {        switch (large_ref.compression_type()) {          case kNoCompression: {            if (result.data() != large_->value.data()) {              large_->value.assign(result.data(), result.size());            }            break;          }          case kLightweightCompression: {            std::string uncompressed;            if (port::Lightweight_Uncompress(result.data(), result.size(),                                       &uncompressed) &&                uncompressed.size() == large_ref.ValueSize()) {              swap(uncompressed, large_->value);            } else {              s = Status::Corruption(                  "Unable to read entire compressed large value file");            }          }        }      } else {        s = Status::Corruption("Unable to read entire large value file");      }    }    delete file;        // Ignore errors on closing
  }  if (!s.ok()) {    large_->value.clear();    large_->status = s;  }}
}  // anonymous namespace

Iterator* NewDBIterator(    const std::string* dbname,    Env* env,    const Comparator* user_key_comparator,    Iterator* internal_iter,    const SequenceNumber& sequence) {  return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);}
}