|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#ifndef STORAGE_LEVELDB_DB_DBFORMAT_H_
|
|
#define STORAGE_LEVELDB_DB_DBFORMAT_H_
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <string>
|
|
|
|
#include "leveldb/comparator.h"
|
|
#include "leveldb/db.h"
|
|
#include "leveldb/filter_policy.h"
|
|
#include "leveldb/slice.h"
|
|
#include "leveldb/table_builder.h"
|
|
#include "util/coding.h"
|
|
#include "util/logging.h"
|
|
#include "iostream"
|
|
|
|
namespace leveldb {
|
|
|
|
// Grouping of constants. We may want to make some of these
|
|
// parameters set via options.
|
|
namespace config {
|
|
static const int kNumLevels = 7;
|
|
|
|
// Level-0 compaction is started when we hit this many files.
|
|
static const int kL0_CompactionTrigger = 4;
|
|
|
|
// Soft limit on number of level-0 files. We slow down writes at this point.
|
|
static const int kL0_SlowdownWritesTrigger = 8;
|
|
|
|
// Maximum number of level-0 files. We stop writes at this point.
|
|
static const int kL0_StopWritesTrigger = 12;
|
|
|
|
// Maximum level to which a new compacted memtable is pushed if it
|
|
// does not create overlap. We try to push to level 2 to avoid the
|
|
// relatively expensive level 0=>1 compactions and to avoid some
|
|
// expensive manifest file operations. We do not push all the way to
|
|
// the largest level since that can generate a lot of wasted disk
|
|
// space if the same key space is being repeatedly overwritten.
|
|
static const int kMaxMemCompactLevel = 2;
|
|
|
|
// Approximate gap in bytes between samples of data read during iteration.
|
|
static const int kReadBytesPeriod = 1048576;
|
|
|
|
} // namespace config
|
|
|
|
class InternalKey;
|
|
|
|
// Value types encoded as the last component of internal keys.
|
|
// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
|
|
// data structures.
|
|
enum ValueType { kTypeDeletion = 0x0, kTypeValue = 0x1 };
|
|
// kValueTypeForSeek defines the ValueType that should be passed when
|
|
// constructing a ParsedInternalKey object for seeking to a particular
|
|
// sequence number (since we sort sequence numbers in decreasing order
|
|
// and the value type is embedded as the low 8 bits in the sequence
|
|
// number in internal keys, we need to use the highest-numbered
|
|
// ValueType, not the lowest).
|
|
static const ValueType kValueTypeForSeek = kTypeValue;
|
|
|
|
typedef uint64_t SequenceNumber;
|
|
|
|
// We leave eight bits empty at the bottom so a type and sequence#
|
|
// can be packed together into 64-bits.
|
|
static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
|
|
|
|
struct ParsedInternalKey {
|
|
Slice user_key;
|
|
SequenceNumber sequence;
|
|
uint64_t deadTime;
|
|
ValueType type;
|
|
|
|
ParsedInternalKey() {} // Intentionally left uninitialized (for speed)
|
|
ParsedInternalKey(const Slice& u, const SequenceNumber& seq,
|
|
ValueType t, uint64_t d = 0)
|
|
: user_key(u), sequence(seq), type(t), deadTime(d) {}
|
|
std::string DebugString() const;
|
|
};
|
|
|
|
// Return the length of the encoding of "key".
|
|
inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
|
|
return key.user_key.size() + 8 + (key.deadTime != 0) * 8;
|
|
}
|
|
|
|
// Append the serialization of "key" to *result.
|
|
void AppendInternalKey(std::string* result, const ParsedInternalKey& key);
|
|
|
|
// Attempt to parse an internal key from "internal_key". On success,
|
|
// stores the parsed data in "*result", and returns true.
|
|
//
|
|
// On error, returns false, leaves "*result" in an undefined state.
|
|
bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result);
|
|
|
|
// Returns the user key portion of an internal key.
|
|
inline Slice ExtractUserKey(const Slice& internal_key) {
|
|
if(internal_key.size() < 8) {
|
|
std::cout<<"wrong key:"<<internal_key.ToString()<<std::endl;
|
|
}
|
|
assert(internal_key.size() >= 8);
|
|
uint64_t num = DecodeFixed64(internal_key.data() + internal_key.size() - 8);
|
|
uint8_t havettl = (num & 0b10) >> 1;
|
|
uint8_t islookup = (num & 0b100) >> 2;
|
|
size_t klen = internal_key.size() - 8;
|
|
if(havettl || islookup) klen -= 8;
|
|
Slice user_key = Slice(internal_key.data(), klen);
|
|
return user_key;
|
|
}
|
|
|
|
// A comparator for internal keys that uses a specified comparator for
|
|
// the user key portion and breaks ties by decreasing sequence number.
|
|
class InternalKeyComparator : public Comparator {
|
|
private:
|
|
const Comparator* user_comparator_;
|
|
|
|
public:
|
|
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
|
|
const char* Name() const override;
|
|
int Compare(const Slice& a, const Slice& b) const override;
|
|
void FindShortestSeparator(std::string* start,
|
|
const Slice& limit) const override;
|
|
void FindShortSuccessor(std::string* key) const override;
|
|
|
|
const Comparator* user_comparator() const { return user_comparator_; }
|
|
|
|
int Compare(const InternalKey& a, const InternalKey& b) const;
|
|
};
|
|
|
|
// Filter policy wrapper that converts from internal keys to user keys
|
|
class InternalFilterPolicy : public FilterPolicy {
|
|
private:
|
|
const FilterPolicy* const user_policy_;
|
|
|
|
public:
|
|
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) {}
|
|
const char* Name() const override;
|
|
void CreateFilter(const Slice* keys, int n, std::string* dst) const override;
|
|
bool KeyMayMatch(const Slice& key, const Slice& filter) const override;
|
|
};
|
|
|
|
// Modules in this directory should keep internal keys wrapped inside
|
|
// the following class instead of plain strings so that we do not
|
|
// incorrectly use string comparisons instead of an InternalKeyComparator.
|
|
class InternalKey {
|
|
private:
|
|
std::string rep_;
|
|
|
|
public:
|
|
InternalKey() {} // Leave rep_ as empty to indicate it is invalid
|
|
InternalKey(const Slice& user_key, SequenceNumber s,
|
|
ValueType t, uint64_t deadTime = 0) {
|
|
AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t, deadTime));
|
|
}
|
|
|
|
bool DecodeFrom(const Slice& s) {
|
|
rep_.assign(s.data(), s.size());
|
|
return !rep_.empty();
|
|
}
|
|
|
|
Slice Encode() const {
|
|
assert(!rep_.empty());
|
|
return rep_;
|
|
}
|
|
|
|
Slice user_key() const { return ExtractUserKey(rep_); }
|
|
|
|
void SetFrom(const ParsedInternalKey& p) {
|
|
rep_.clear();
|
|
AppendInternalKey(&rep_, p);
|
|
}
|
|
|
|
void Clear() { rep_.clear(); }
|
|
|
|
std::string DebugString() const;
|
|
};
|
|
|
|
inline int InternalKeyComparator::Compare(const InternalKey& a,
|
|
const InternalKey& b) const {
|
|
return Compare(a.Encode(), b.Encode());
|
|
}
|
|
|
|
inline bool ParseInternalKey(const Slice& internal_key,
|
|
ParsedInternalKey* result) {
|
|
//不确定需不需要标识islookup,先没改
|
|
const size_t n = internal_key.size();
|
|
if (n < 8) return false;
|
|
uint64_t tag = DecodeFixed64(internal_key.data() + n - 8);
|
|
uint8_t c = tag & 0xff;
|
|
uint8_t havettl = (c & 0b10) >> 1;
|
|
result->sequence = tag >> 8;
|
|
result->type = static_cast<ValueType>(c & 0b1);
|
|
if(havettl){
|
|
result->deadTime = DecodeFixed64(internal_key.data() + n - 16);
|
|
result->user_key = Slice(internal_key.data(), n - 16);
|
|
} else {
|
|
result->deadTime = 0;
|
|
result->user_key = Slice(internal_key.data(), n - 8);
|
|
}
|
|
// return c <= 0b111;
|
|
return ((c & 0b1) <= static_cast<uint8_t>(kTypeValue));
|
|
}
|
|
|
|
// A helper class useful for DBImpl::Get()
|
|
class LookupKey {
|
|
public:
|
|
// Initialize *this for looking up user_key at a snapshot with
|
|
// the specified sequence number.
|
|
LookupKey(const Slice& user_key, SequenceNumber sequence, uint64_t nowTime);
|
|
|
|
LookupKey(const LookupKey&) = delete;
|
|
LookupKey& operator=(const LookupKey&) = delete;
|
|
|
|
~LookupKey();
|
|
|
|
// Return a key suitable for lookup in a MemTable.
|
|
Slice memtable_key() const { return Slice(start_, end_ - start_); }
|
|
|
|
// Return an internal key (suitable for passing to an internal iterator)
|
|
Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
|
|
|
|
// Return the user key
|
|
Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 16); }
|
|
|
|
private:
|
|
// We construct a char array of the form:
|
|
// klength varint32 <-- start_
|
|
// userkey char[klength] <-- kstart_
|
|
// nowTime uint64
|
|
// tag uint64 最后一个字节为0000 0101
|
|
// <-- end_
|
|
// 同userkey下,原本(insert时)的比较器规则为seq优先,不考虑时间
|
|
// 新增标识位(tag倒数第三位),使比较器考虑时间
|
|
// The array is a suitable MemTable key.
|
|
// The suffix starting with "userkey" can be used as an InternalKey.
|
|
const char* start_;
|
|
const char* kstart_;
|
|
const char* end_;
|
|
char space_[200]; // Avoid allocation for short keys
|
|
};
|
|
|
|
inline LookupKey::~LookupKey() {
|
|
if (start_ != space_) delete[] start_;
|
|
}
|
|
|
|
} // namespace leveldb
|
|
|
|
#endif // STORAGE_LEVELDB_DB_DBFORMAT_H_
|