building_data_management_systems.Xuanzhou.2024Fall.DaSE
/
XOY-Leveldb


								// Copyright (c) 2011 The LevelDB Authors. All rights reserved.

								// Use of this source code is governed by a BSD-style license that can be

								// found in the LICENSE file. See the AUTHORS file for names of contributors.


								#include "db/db_iter.h"


								#include "db/filename.h"

								#include "db/dbformat.h"

								#include "include/env.h"

								#include "include/iterator.h"

								#include "port/port.h"

								#include "util/logging.h"

								#include "util/mutexlock.h"


								namespace leveldb {


								#if 0

								static void DumpInternalIter(Iterator* iter) {

								  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {

								    ParsedInternalKey k;

								    if (!ParseInternalKey(iter->key(), &k)) {

								      fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());

								    } else {

								      fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());

								    }

								  }

								}

								#endif


								namespace {


								// Memtables and sstables that make the DB representation contain

								// (userkey,seq,type) => uservalue entries.  DBIter

								// combines multiple entries for the same userkey found in the DB

								// representation into a single entry while accounting for sequence

								// numbers, deletion markers, overwrites, etc.

								class DBIter: public Iterator {

								 public:

								  DBIter(const std::string* dbname, Env* env,

								         const Comparator* cmp, Iterator* iter, SequenceNumber s)

								      : dbname_(dbname),

								        env_(env),

								        user_comparator_(cmp),

								        iter_(iter),

								        sequence_(s),

								        large_(NULL),

								        valid_(false) {

								  }

								  virtual ~DBIter() {

								    delete iter_;

								    delete large_;

								  }

								  virtual bool Valid() const { return valid_; }

								  virtual Slice key() const {

								    assert(valid_);

								    return key_;

								  }

								  virtual Slice value() const {

								    assert(valid_);

								    if (large_ == NULL) {

								      return value_;

								    } else {

								      MutexLock l(&large_->mutex);

								      if (!large_->produced) {

								        ReadIndirectValue();

								      }

								      return large_->value;

								    }

								  }


								  virtual void Next() {

								    assert(valid_);

								    // iter_ is already positioned past DBIter::key()

								    FindNextUserEntry();

								  }


								  virtual void Prev() {

								    assert(valid_);

								    bool ignored;

								    ScanUntilBeforeCurrentKey(&ignored);

								    FindPrevUserEntry();

								  }


								  virtual void Seek(const Slice& target) {

								    ParsedInternalKey ikey(target, sequence_, kValueTypeForSeek);

								    std::string tmp;

								    AppendInternalKey(&tmp, ikey);

								    iter_->Seek(tmp);

								    FindNextUserEntry();

								  }

								  virtual void SeekToFirst() {

								    iter_->SeekToFirst();

								    FindNextUserEntry();

								  }


								  virtual void SeekToLast();


								  virtual Status status() const {

								    if (status_.ok()) {

								      if (large_ != NULL && !large_->status.ok()) return large_->status;

								      return iter_->status();

								    } else {

								      return status_;

								    }

								  }


								 private:

								  void FindNextUserEntry();

								  void FindPrevUserEntry();

								  void SaveKey(const Slice& k) { key_.assign(k.data(), k.size()); }

								  void SaveValue(const Slice& v) {

								    if (value_.capacity() > v.size() + 1048576) {

								      std::string empty;

								      swap(empty, value_);

								    }

								    value_.assign(v.data(), v.size());

								  }

								  bool ParseKey(ParsedInternalKey* key);

								  void SkipPast(const Slice& k);

								  void ScanUntilBeforeCurrentKey(bool* found_live);


								  void ReadIndirectValue() const;


								  struct Large {

								    port::Mutex mutex;

								    std::string value;

								    bool produced;

								    Status status;

								  };


								  const std::string* const dbname_;

								  Env* const env_;


								  const Comparator* const user_comparator_;


								  // iter_ is positioned just past current entry for DBIter if valid_

								  Iterator* const iter_;


								  SequenceNumber const sequence_;

								  Status status_;

								  std::string key_;                  // Always a user key

								  std::string value_;

								  Large* large_;      // Non-NULL if value is an indirect reference

								  bool valid_;


								  // No copying allowed

								  DBIter(const DBIter&);

								  void operator=(const DBIter&);

								};


								inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {

								  if (!ParseInternalKey(iter_->key(), ikey)) {

								    status_ = Status::Corruption("corrupted internal key in DBIter");

								    return false;

								  } else {

								    return true;

								  }

								}


								void DBIter::FindNextUserEntry() {

								  if (large_ != NULL) {

								    if (status_.ok() && !large_->status.ok()) {

								      status_ = large_->status;

								    }

								    delete large_;

								    large_ = NULL;

								  }

								  while (iter_->Valid()) {

								    ParsedInternalKey ikey;

								    if (!ParseKey(&ikey)) {

								      // Skip past corrupted entry

								      iter_->Next();

								      continue;

								    }

								    if (ikey.sequence > sequence_) {

								      // Ignore entries newer than the snapshot

								      iter_->Next();

								      continue;

								    }


								    switch (ikey.type) {

								      case kTypeDeletion:

								        SaveKey(ikey.user_key);  // Make local copy for use by SkipPast()

								        iter_->Next();

								        SkipPast(key_);

								        // Do not return deleted entries.  Instead keep looping.

								        break;


								      case kTypeValue:

								        SaveKey(ikey.user_key);

								        SaveValue(iter_->value());

								        iter_->Next();

								        SkipPast(key_);

								        // Yield the value we just found.

								        valid_ = true;

								        return;


								      case kTypeLargeValueRef:

								        SaveKey(ikey.user_key);

								        // Save the large value ref as value_, and read it lazily on a call

								        // to value()

								        SaveValue(iter_->value());

								        large_ = new Large;

								        large_->produced = false;

								        iter_->Next();

								        SkipPast(key_);

								        // Yield the value we just found.

								        valid_ = true;

								        return;

								    }

								  }

								  valid_ = false;

								  key_.clear();

								  value_.clear();

								  assert(large_ == NULL);

								}


								void DBIter::SkipPast(const Slice& k) {

								  while (iter_->Valid()) {

								    ParsedInternalKey ikey;

								    // Note that if we cannot parse an internal key, we keep looping

								    // so that if we have a run like the following:

								    //     <x,100,v> => value100

								    //     <corrupted entry for user key x>

								    //     <x,50,v> => value50

								    // we will skip over the corrupted entry as well as value50.

								    if (ParseKey(&ikey) && user_comparator_->Compare(ikey.user_key, k) != 0) {

								      break;

								    }

								    iter_->Next();

								  }

								}


								void DBIter::SeekToLast() {

								  // Position iter_ at the last uncorrupted user key and then

								  // let FindPrevUserEntry() do the heavy lifting to find

								  // a user key that is live.

								  iter_->SeekToLast();

								  ParsedInternalKey current;

								  while (iter_->Valid() && !ParseKey(&current)) {

								    iter_->Prev();

								  }

								  if (iter_->Valid()) {

								    SaveKey(current.user_key);

								  }

								  FindPrevUserEntry();

								}


								// Let X be the user key at which iter_ is currently positioned.

								// Adjust DBIter to point at the last entry with a key <= X that

								// has a live value.

								void DBIter::FindPrevUserEntry() {

								  // Consider the following example:

								  //

								  //     A@540

								  //     A@400

								  //

								  //     B@300

								  //     B@200

								  //     B@100        <- iter_

								  //

								  //     C@301

								  //     C@201

								  //

								  // The comments marked "(first iteration)" below relate what happens

								  // for the preceding example in the first iteration of the while loop

								  // below.  There may be more than one iteration either if there are

								  // no live values for B, or if there is a corruption.

								  while (iter_->Valid()) {

								    std::string saved = key_;

								    bool found_live;

								    ScanUntilBeforeCurrentKey(&found_live);

								    // (first iteration) iter_ at A@400

								    if (found_live) {

								      // Step forward into range of entries with user key >= saved

								      if (!iter_->Valid()) {

								        iter_->SeekToFirst();

								      } else {

								        iter_->Next();

								      }

								      // (first iteration) iter_ at B@300


								      FindNextUserEntry();  // Sets key_ to the key of the next value it found

								      if (valid_ && user_comparator_->Compare(key_, saved) == 0) {

								        // (first iteration) iter_ at C@301

								        return;

								      }


								      // FindNextUserEntry() could not find any entries under the

								      // user key "saved".  This is probably a corruption since

								      // ScanUntilBefore(saved) found a live value.  So we skip

								      // backwards to an earlier key and ignore the corrupted

								      // entries for "saved".

								      //

								      // (first iteration) iter_ at C@301 and saved == "B"

								      key_ = saved;

								      bool ignored;

								      ScanUntilBeforeCurrentKey(&ignored);

								      // (first iteration) iter_ at A@400

								    }

								  }

								  valid_ = false;

								  key_.clear();

								  value_.clear();

								}


								void DBIter::ScanUntilBeforeCurrentKey(bool* found_live) {

								  *found_live = false;

								  if (!iter_->Valid()) {

								    iter_->SeekToLast();

								  }


								  while (iter_->Valid()) {

								    ParsedInternalKey current;

								    if (!ParseKey(&current)) {

								      iter_->Prev();

								      continue;

								    }


								    if (current.sequence > sequence_) {

								      // Ignore entries that are serialized after this read

								      iter_->Prev();

								      continue;

								    }


								    const int cmp = user_comparator_->Compare(current.user_key, key_);

								    if (cmp < 0) {

								      SaveKey(current.user_key);

								      return;

								    } else if (cmp == 0) {

								      switch (current.type) {

								        case kTypeDeletion:

								          *found_live = false;

								          break;


								        case kTypeValue:

								        case kTypeLargeValueRef:

								          *found_live = true;

								          break;

								      }

								    } else {  // cmp > 0

								      *found_live = false;

								    }


								    iter_->Prev();

								  }

								}


								void DBIter::ReadIndirectValue() const {

								  assert(!large_->produced);

								  large_->produced = true;

								  LargeValueRef large_ref;

								  if (value_.size() != LargeValueRef::ByteSize()) {

								    large_->status = Status::Corruption("malformed large value reference");

								    return;

								  }

								  memcpy(large_ref.data, value_.data(), LargeValueRef::ByteSize());

								  std::string fname = LargeValueFileName(*dbname_, large_ref);

								  RandomAccessFile* file;

								  Status s = env_->NewRandomAccessFile(fname, &file);

								  if (s.ok()) {

								    uint64_t file_size = file->Size();

								    uint64_t value_size = large_ref.ValueSize();

								    large_->value.resize(value_size);

								    Slice result;

								    s = file->Read(0, file_size, &result,

								                   const_cast<char*>(large_->value.data()));

								    if (s.ok()) {

								      if (result.size() == file_size) {

								        switch (large_ref.compression_type()) {

								          case kNoCompression: {

								            if (result.data() != large_->value.data()) {

								              large_->value.assign(result.data(), result.size());

								            }

								            break;

								          }

								          case kLightweightCompression: {

								            std::string uncompressed;

								            if (port::Lightweight_Uncompress(result.data(), result.size(),

								                                       &uncompressed) &&

								                uncompressed.size() == large_ref.ValueSize()) {

								              swap(uncompressed, large_->value);

								            } else {

								              s = Status::Corruption(

								                  "Unable to read entire compressed large value file");

								            }

								          }

								        }

								      } else {

								        s = Status::Corruption("Unable to read entire large value file");

								      }

								    }

								    delete file;        // Ignore errors on closing

								  }

								  if (!s.ok()) {

								    large_->value.clear();

								    large_->status = s;

								  }

								}


								}  // anonymous namespace


								Iterator* NewDBIterator(

								    const std::string* dbname,

								    Env* env,

								    const Comparator* user_key_comparator,

								    Iterator* internal_iter,

								    const SequenceNumber& sequence) {

								  return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);

								}


								}