|  | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | 
						
						
							|  | // Use of this source code is governed by a BSD-style license that can be | 
						
						
							|  | // found in the LICENSE file. See the AUTHORS file for names of contributors. | 
						
						
							|  | // | 
						
						
							|  | // The representation of a DBImpl consists of a set of Versions.  The | 
						
						
							|  | // newest version is called "current".  Older versions may be kept | 
						
						
							|  | // around to provide a consistent view to live iterators. | 
						
						
							|  | // | 
						
						
							|  | // Each Version keeps track of a set of Table files per level.  The | 
						
						
							|  | // entire set of versions is maintained in a VersionSet. | 
						
						
							|  | // | 
						
						
							|  | // Version,VersionSet are thread-compatible, but require external | 
						
						
							|  | // synchronization on all accesses. | 
						
						
							|  |  | 
						
						
							|  | #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ | 
						
						
							|  | #define STORAGE_LEVELDB_DB_VERSION_SET_H_ | 
						
						
							|  |  | 
						
						
							|  | #include <map> | 
						
						
							|  | #include <set> | 
						
						
							|  | #include <vector> | 
						
						
							|  | #include "db/dbformat.h" | 
						
						
							|  | #include "db/version_edit.h" | 
						
						
							|  | #include "port/port.h" | 
						
						
							|  | #include "port/thread_annotations.h" | 
						
						
							|  |  | 
						
						
							|  | namespace leveldb { | 
						
						
							|  | 
 | 
						
						
							|  | namespace log { class Writer; } | 
						
						
							|  | 
 | 
						
						
							|  | class Compaction; | 
						
						
							|  | class Iterator; | 
						
						
							|  | class MemTable; | 
						
						
							|  | class TableBuilder; | 
						
						
							|  | class TableCache; | 
						
						
							|  | class Version; | 
						
						
							|  | class VersionSet; | 
						
						
							|  | class WritableFile; | 
						
						
							|  | 
 | 
						
						
							|  | // Return the smallest index i such that files[i]->largest >= key. | 
						
						
							|  | // Return files.size() if there is no such file. | 
						
						
							|  | // REQUIRES: "files" contains a sorted list of non-overlapping files. | 
						
						
							|  | extern int FindFile(const InternalKeyComparator& icmp, | 
						
						
							|  |                     const std::vector<FileMetaData*>& files, | 
						
						
							|  |                     const Slice& key); | 
						
						
							|  | 
 | 
						
						
							|  | // Returns true iff some file in "files" overlaps the user key range | 
						
						
							|  | // [*smallest,*largest]. | 
						
						
							|  | // smallest==NULL represents a key smaller than all keys in the DB. | 
						
						
							|  | // largest==NULL represents a key largest than all keys in the DB. | 
						
						
							|  | // REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges | 
						
						
							|  | //           in sorted order. | 
						
						
							|  | extern bool SomeFileOverlapsRange( | 
						
						
							|  |     const InternalKeyComparator& icmp, | 
						
						
							|  |     bool disjoint_sorted_files, | 
						
						
							|  |     const std::vector<FileMetaData*>& files, | 
						
						
							|  |     const Slice* smallest_user_key, | 
						
						
							|  |     const Slice* largest_user_key); | 
						
						
							|  | 
 | 
						
						
							|  | class Version { | 
						
						
							|  |  public: | 
						
						
							|  |   // Append to *iters a sequence of iterators that will | 
						
						
							|  |   // yield the contents of this Version when merged together. | 
						
						
							|  |   // REQUIRES: This version has been saved (see VersionSet::SaveTo) | 
						
						
							|  |   void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters); | 
						
						
							|  | 
 | 
						
						
							|  |   // Lookup the value for key.  If found, store it in *val and | 
						
						
							|  |   // return OK.  Else return a non-OK status.  Fills *stats. | 
						
						
							|  |   // REQUIRES: lock is not held | 
						
						
							|  |   struct GetStats { | 
						
						
							|  |     FileMetaData* seek_file; | 
						
						
							|  |     int seek_file_level; | 
						
						
							|  |   }; | 
						
						
							|  |   Status Get(const ReadOptions&, const LookupKey& key, std::string* val, | 
						
						
							|  |              GetStats* stats); | 
						
						
							|  | 
 | 
						
						
							|  |   // Adds "stats" into the current state.  Returns true if a new | 
						
						
							|  |   // compaction may need to be triggered, false otherwise. | 
						
						
							|  |   // REQUIRES: lock is held | 
						
						
							|  |   bool UpdateStats(const GetStats& stats); | 
						
						
							|  | 
 | 
						
						
							|  |   // Record a sample of bytes read at the specified internal key. | 
						
						
							|  |   // Samples are taken approximately once every config::kReadBytesPeriod | 
						
						
							|  |   // bytes.  Returns true if a new compaction may need to be triggered. | 
						
						
							|  |   // REQUIRES: lock is held | 
						
						
							|  |   bool RecordReadSample(Slice key); | 
						
						
							|  | 
 | 
						
						
							|  |   // Reference count management (so Versions do not disappear out from | 
						
						
							|  |   // under live iterators) | 
						
						
							|  |   void Ref(); | 
						
						
							|  |   void Unref(); | 
						
						
							|  | 
 | 
						
						
							|  |   void GetOverlappingInputs( | 
						
						
							|  |       int level, | 
						
						
							|  |       const InternalKey* begin,         // NULL means before all keys | 
						
						
							|  |       const InternalKey* end,           // NULL means after all keys | 
						
						
							|  |       std::vector<FileMetaData*>* inputs); | 
						
						
							|  | 
 | 
						
						
							|  |   // Returns true iff some file in the specified level overlaps | 
						
						
							|  |   // some part of [*smallest_user_key,*largest_user_key]. | 
						
						
							|  |   // smallest_user_key==NULL represents a key smaller than all keys in the DB. | 
						
						
							|  |   // largest_user_key==NULL represents a key largest than all keys in the DB. | 
						
						
							|  |   bool OverlapInLevel(int level, | 
						
						
							|  |                       const Slice* smallest_user_key, | 
						
						
							|  |                       const Slice* largest_user_key); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the level at which we should place a new memtable compaction | 
						
						
							|  |   // result that covers the range [smallest_user_key,largest_user_key]. | 
						
						
							|  |   int PickLevelForMemTableOutput(const Slice& smallest_user_key, | 
						
						
							|  |                                  const Slice& largest_user_key); | 
						
						
							|  | 
 | 
						
						
							|  |   int NumFiles(int level) const { return files_[level].size(); } | 
						
						
							|  | 
 | 
						
						
							|  |   // Return a human readable string that describes this version's contents. | 
						
						
							|  |   std::string DebugString() const; | 
						
						
							|  | 
 | 
						
						
							|  |  private: | 
						
						
							|  |   friend class Compaction; | 
						
						
							|  |   friend class VersionSet; | 
						
						
							|  | 
 | 
						
						
							|  |   class LevelFileNumIterator; | 
						
						
							|  |   Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; | 
						
						
							|  | 
 | 
						
						
							|  |   // Call func(arg, level, f) for every file that overlaps user_key in | 
						
						
							|  |   // order from newest to oldest.  If an invocation of func returns | 
						
						
							|  |   // false, makes no more calls. | 
						
						
							|  |   // | 
						
						
							|  |   // REQUIRES: user portion of internal_key == user_key. | 
						
						
							|  |   void ForEachOverlapping(Slice user_key, Slice internal_key, | 
						
						
							|  |                           void* arg, | 
						
						
							|  |                           bool (*func)(void*, int, FileMetaData*)); | 
						
						
							|  | 
 | 
						
						
							|  |   VersionSet* vset_;            // VersionSet to which this Version belongs | 
						
						
							|  |   Version* next_;               // Next version in linked list | 
						
						
							|  |   Version* prev_;               // Previous version in linked list | 
						
						
							|  |   int refs_;                    // Number of live refs to this version | 
						
						
							|  |  | 
						
						
							|  |   // List of files per level | 
						
						
							|  |   std::vector<FileMetaData*> files_[config::kNumLevels]; | 
						
						
							|  | 
 | 
						
						
							|  |   // Next file to compact based on seek stats. | 
						
						
							|  |   FileMetaData* file_to_compact_; | 
						
						
							|  |   int file_to_compact_level_; | 
						
						
							|  | 
 | 
						
						
							|  |   // Level that should be compacted next and its compaction score. | 
						
						
							|  |   // Score < 1 means compaction is not strictly needed.  These fields | 
						
						
							|  |   // are initialized by Finalize(). | 
						
						
							|  |   double compaction_score_; | 
						
						
							|  |   int compaction_level_; | 
						
						
							|  | 
 | 
						
						
							|  |   explicit Version(VersionSet* vset) | 
						
						
							|  |       : vset_(vset), next_(this), prev_(this), refs_(0), | 
						
						
							|  |         file_to_compact_(NULL), | 
						
						
							|  |         file_to_compact_level_(-1), | 
						
						
							|  |         compaction_score_(-1), | 
						
						
							|  |         compaction_level_(-1) { | 
						
						
							|  |   } | 
						
						
							|  | 
 | 
						
						
							|  |   ~Version(); | 
						
						
							|  | 
 | 
						
						
							|  |   // No copying allowed | 
						
						
							|  |   Version(const Version&); | 
						
						
							|  |   void operator=(const Version&); | 
						
						
							|  | }; | 
						
						
							|  | 
 | 
						
						
							|  | class VersionSet { | 
						
						
							|  |  public: | 
						
						
							|  |   VersionSet(const std::string& dbname, | 
						
						
							|  |              const Options* options, | 
						
						
							|  |              TableCache* table_cache, | 
						
						
							|  |              const InternalKeyComparator*); | 
						
						
							|  |   ~VersionSet(); | 
						
						
							|  | 
 | 
						
						
							|  |   // Apply *edit to the current version to form a new descriptor that | 
						
						
							|  |   // is both saved to persistent state and installed as the new | 
						
						
							|  |   // current version.  Will release *mu while actually writing to the file. | 
						
						
							|  |   // REQUIRES: *mu is held on entry. | 
						
						
							|  |   // REQUIRES: no other thread concurrently calls LogAndApply() | 
						
						
							|  |   Status LogAndApply(VersionEdit* edit, port::Mutex* mu) | 
						
						
							|  |       EXCLUSIVE_LOCKS_REQUIRED(mu); | 
						
						
							|  | 
 | 
						
						
							|  |   // Recover the last saved descriptor from persistent storage. | 
						
						
							|  |   Status Recover(bool *save_manifest); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the current version. | 
						
						
							|  |   Version* current() const { return current_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the current manifest file number | 
						
						
							|  |   uint64_t ManifestFileNumber() const { return manifest_file_number_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Allocate and return a new file number | 
						
						
							|  |   uint64_t NewFileNumber() { return next_file_number_++; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Arrange to reuse "file_number" unless a newer file number has | 
						
						
							|  |   // already been allocated. | 
						
						
							|  |   // REQUIRES: "file_number" was returned by a call to NewFileNumber(). | 
						
						
							|  |   void ReuseFileNumber(uint64_t file_number) { | 
						
						
							|  |     if (next_file_number_ == file_number + 1) { | 
						
						
							|  |       next_file_number_ = file_number; | 
						
						
							|  |     } | 
						
						
							|  |   } | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the number of Table files at the specified level. | 
						
						
							|  |   int NumLevelFiles(int level) const; | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the combined file size of all files at the specified level. | 
						
						
							|  |   int64_t NumLevelBytes(int level) const; | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the last sequence number. | 
						
						
							|  |   uint64_t LastSequence() const { return last_sequence_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Set the last sequence number to s. | 
						
						
							|  |   void SetLastSequence(uint64_t s) { | 
						
						
							|  |     assert(s >= last_sequence_); | 
						
						
							|  |     last_sequence_ = s; | 
						
						
							|  |   } | 
						
						
							|  | 
 | 
						
						
							|  |   // Mark the specified file number as used. | 
						
						
							|  |   void MarkFileNumberUsed(uint64_t number); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the current log file number. | 
						
						
							|  |   uint64_t LogNumber() const { return log_number_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the log file number for the log file that is currently | 
						
						
							|  |   // being compacted, or zero if there is no such log file. | 
						
						
							|  |   uint64_t PrevLogNumber() const { return prev_log_number_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Pick level and inputs for a new compaction. | 
						
						
							|  |   // Returns NULL if there is no compaction to be done. | 
						
						
							|  |   // Otherwise returns a pointer to a heap-allocated object that | 
						
						
							|  |   // describes the compaction.  Caller should delete the result. | 
						
						
							|  |   Compaction* PickCompaction(); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return a compaction object for compacting the range [begin,end] in | 
						
						
							|  |   // the specified level.  Returns NULL if there is nothing in that | 
						
						
							|  |   // level that overlaps the specified range.  Caller should delete | 
						
						
							|  |   // the result. | 
						
						
							|  |   Compaction* CompactRange( | 
						
						
							|  |       int level, | 
						
						
							|  |       const InternalKey* begin, | 
						
						
							|  |       const InternalKey* end); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the maximum overlapping data (in bytes) at next level for any | 
						
						
							|  |   // file at a level >= 1. | 
						
						
							|  |   int64_t MaxNextLevelOverlappingBytes(); | 
						
						
							|  | 
 | 
						
						
							|  |   // Create an iterator that reads over the compaction inputs for "*c". | 
						
						
							|  |   // The caller should delete the iterator when no longer needed. | 
						
						
							|  |   Iterator* MakeInputIterator(Compaction* c); | 
						
						
							|  | 
 | 
						
						
							|  |   // Returns true iff some level needs a compaction. | 
						
						
							|  |   bool NeedsCompaction() const { | 
						
						
							|  |     Version* v = current_; | 
						
						
							|  |     return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL); | 
						
						
							|  |   } | 
						
						
							|  | 
 | 
						
						
							|  |   // Add all files listed in any live version to *live. | 
						
						
							|  |   // May also mutate some internal state. | 
						
						
							|  |   void AddLiveFiles(std::set<uint64_t>* live); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the approximate offset in the database of the data for | 
						
						
							|  |   // "key" as of version "v". | 
						
						
							|  |   uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return a human-readable short (single-line) summary of the number | 
						
						
							|  |   // of files per level.  Uses *scratch as backing store. | 
						
						
							|  |   struct LevelSummaryStorage { | 
						
						
							|  |     char buffer[100]; | 
						
						
							|  |   }; | 
						
						
							|  |   const char* LevelSummary(LevelSummaryStorage* scratch) const; | 
						
						
							|  | 
 | 
						
						
							|  |  private: | 
						
						
							|  |   class Builder; | 
						
						
							|  | 
 | 
						
						
							|  |   friend class Compaction; | 
						
						
							|  |   friend class Version; | 
						
						
							|  | 
 | 
						
						
							|  |   bool ReuseManifest(const std::string& dscname, const std::string& dscbase); | 
						
						
							|  | 
 | 
						
						
							|  |   void Finalize(Version* v); | 
						
						
							|  | 
 | 
						
						
							|  |   void GetRange(const std::vector<FileMetaData*>& inputs, | 
						
						
							|  |                 InternalKey* smallest, | 
						
						
							|  |                 InternalKey* largest); | 
						
						
							|  | 
 | 
						
						
							|  |   void GetRange2(const std::vector<FileMetaData*>& inputs1, | 
						
						
							|  |                  const std::vector<FileMetaData*>& inputs2, | 
						
						
							|  |                  InternalKey* smallest, | 
						
						
							|  |                  InternalKey* largest); | 
						
						
							|  | 
 | 
						
						
							|  |   void SetupOtherInputs(Compaction* c); | 
						
						
							|  | 
 | 
						
						
							|  |   // Save current contents to *log | 
						
						
							|  |   Status WriteSnapshot(log::Writer* log); | 
						
						
							|  | 
 | 
						
						
							|  |   void AppendVersion(Version* v); | 
						
						
							|  | 
 | 
						
						
							|  |   Env* const env_; | 
						
						
							|  |   const std::string dbname_; | 
						
						
							|  |   const Options* const options_; | 
						
						
							|  |   TableCache* const table_cache_; | 
						
						
							|  |   const InternalKeyComparator icmp_; | 
						
						
							|  |   uint64_t next_file_number_; | 
						
						
							|  |   uint64_t manifest_file_number_; | 
						
						
							|  |   uint64_t last_sequence_; | 
						
						
							|  |   uint64_t log_number_; | 
						
						
							|  |   uint64_t prev_log_number_;  // 0 or backing store for memtable being compacted | 
						
						
							|  |  | 
						
						
							|  |   // Opened lazily | 
						
						
							|  |   WritableFile* descriptor_file_; | 
						
						
							|  |   log::Writer* descriptor_log_; | 
						
						
							|  |   Version dummy_versions_;  // Head of circular doubly-linked list of versions. | 
						
						
							|  |   Version* current_;        // == dummy_versions_.prev_ | 
						
						
							|  |  | 
						
						
							|  |   // Per-level key at which the next compaction at that level should start. | 
						
						
							|  |   // Either an empty string, or a valid InternalKey. | 
						
						
							|  |   std::string compact_pointer_[config::kNumLevels]; | 
						
						
							|  | 
 | 
						
						
							|  |   // No copying allowed | 
						
						
							|  |   VersionSet(const VersionSet&); | 
						
						
							|  |   void operator=(const VersionSet&); | 
						
						
							|  | }; | 
						
						
							|  | 
 | 
						
						
							|  | // A Compaction encapsulates information about a compaction. | 
						
						
							|  | class Compaction { | 
						
						
							|  |  public: | 
						
						
							|  |   ~Compaction(); | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the level that is being compacted.  Inputs from "level" | 
						
						
							|  |   // and "level+1" will be merged to produce a set of "level+1" files. | 
						
						
							|  |   int level() const { return level_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the object that holds the edits to the descriptor done | 
						
						
							|  |   // by this compaction. | 
						
						
							|  |   VersionEdit* edit() { return &edit_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // "which" must be either 0 or 1 | 
						
						
							|  |   int num_input_files(int which) const { return inputs_[which].size(); } | 
						
						
							|  | 
 | 
						
						
							|  |   // Return the ith input file at "level()+which" ("which" must be 0 or 1). | 
						
						
							|  |   FileMetaData* input(int which, int i) const { return inputs_[which][i]; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Maximum size of files to build during this compaction. | 
						
						
							|  |   uint64_t MaxOutputFileSize() const { return max_output_file_size_; } | 
						
						
							|  | 
 | 
						
						
							|  |   // Is this a trivial compaction that can be implemented by just | 
						
						
							|  |   // moving a single input file to the next level (no merging or splitting) | 
						
						
							|  |   bool IsTrivialMove() const; | 
						
						
							|  | 
 | 
						
						
							|  |   // Add all inputs to this compaction as delete operations to *edit. | 
						
						
							|  |   void AddInputDeletions(VersionEdit* edit); | 
						
						
							|  | 
 | 
						
						
							|  |   // Returns true if the information we have available guarantees that | 
						
						
							|  |   // the compaction is producing data in "level+1" for which no data exists | 
						
						
							|  |   // in levels greater than "level+1". | 
						
						
							|  |   bool IsBaseLevelForKey(const Slice& user_key); | 
						
						
							|  | 
 | 
						
						
							|  |   // Returns true iff we should stop building the current output | 
						
						
							|  |   // before processing "internal_key". | 
						
						
							|  |   bool ShouldStopBefore(const Slice& internal_key); | 
						
						
							|  | 
 | 
						
						
							|  |   // Release the input version for the compaction, once the compaction | 
						
						
							|  |   // is successful. | 
						
						
							|  |   void ReleaseInputs(); | 
						
						
							|  | 
 | 
						
						
							|  |  private: | 
						
						
							|  |   friend class Version; | 
						
						
							|  |   friend class VersionSet; | 
						
						
							|  | 
 | 
						
						
							|  |   Compaction(const Options* options, int level); | 
						
						
							|  | 
 | 
						
						
							|  |   int level_; | 
						
						
							|  |   uint64_t max_output_file_size_; | 
						
						
							|  |   Version* input_version_; | 
						
						
							|  |   VersionEdit edit_; | 
						
						
							|  | 
 | 
						
						
							|  |   // Each compaction reads inputs from "level_" and "level_+1" | 
						
						
							|  |   std::vector<FileMetaData*> inputs_[2];      // The two sets of inputs | 
						
						
							|  |  | 
						
						
							|  |   // State used to check for number of of overlapping grandparent files | 
						
						
							|  |   // (parent == level_ + 1, grandparent == level_ + 2) | 
						
						
							|  |   std::vector<FileMetaData*> grandparents_; | 
						
						
							|  |   size_t grandparent_index_;  // Index in grandparent_starts_ | 
						
						
							|  |   bool seen_key_;             // Some output key has been seen | 
						
						
							|  |   int64_t overlapped_bytes_;  // Bytes of overlap between current output | 
						
						
							|  |                               // and grandparent files | 
						
						
							|  |  | 
						
						
							|  |   // State for implementing IsBaseLevelForKey | 
						
						
							|  |  | 
						
						
							|  |   // level_ptrs_ holds indices into input_version_->levels_: our state | 
						
						
							|  |   // is that we are positioned at one of the file ranges for each | 
						
						
							|  |   // higher level than the ones involved in this compaction (i.e. for | 
						
						
							|  |   // all L >= level_ + 2). | 
						
						
							|  |   size_t level_ptrs_[config::kNumLevels]; | 
						
						
							|  | }; | 
						
						
							|  | 
 | 
						
						
							|  | }  // namespace leveldb | 
						
						
							|  |  | 
						
						
							|  | #endif  // STORAGE_LEVELDB_DB_VERSION_SET_H_
 |