12 Commits

23 changed files with 489 additions and 84 deletions
Split View
  1. +5
    -0
      CMakeLists.txt
  2. +20
    -0
      db/builder.cc
  3. +1
    -1
      db/c.cc
  4. +83
    -23
      db/db_impl.cc
  5. +1
    -2
      db/db_impl.h
  6. +6
    -6
      db/db_test.cc
  7. +46
    -9
      db/dbformat.cc
  8. +40
    -15
      db/dbformat.h
  9. +1
    -1
      db/dumpfile.cc
  10. +78
    -5
      db/memtable.cc
  11. +1
    -1
      db/memtable.h
  12. +1
    -1
      db/repair.cc
  13. +2
    -0
      db/skiplist.h
  14. +9
    -1
      db/version_edit.cc
  15. +9
    -2
      db/version_edit.h
  16. +5
    -1
      db/version_set.cc
  17. +2
    -0
      db/version_set.h
  18. +36
    -6
      db/write_batch.cc
  19. +3
    -3
      include/leveldb/db.h
  20. +3
    -2
      include/leveldb/write_batch.h
  21. +11
    -0
      table/block.cc
  22. +113
    -0
      test/ttl_mmtable_test.cc
  23. +13
    -5
      test/ttl_test.cc

+ 5
- 0
CMakeLists.txt View File

@ -107,6 +107,7 @@ configure_file(
include_directories(
"${PROJECT_BINARY_DIR}/include"
"."
"./third_party/googletest/googletest/include/"
)
if(BUILD_SHARED_LIBS)
@ -524,6 +525,10 @@ add_executable(db_test2
)
target_link_libraries(db_test2 PRIVATE leveldb)
add_executable(ttl_mmtable_test
"${PROJECT_SOURCE_DIR}/test/ttl_mmtable_test.cc"
)
target_link_libraries(ttl_mmtable_test PRIVATE leveldb)
add_executable(ttl_test
"${PROJECT_SOURCE_DIR}/test/ttl_test.cc"

+ 20
- 0
db/builder.cc View File

@ -28,12 +28,32 @@ Status BuildTable(const std::string& dbname, Env* env, const Options& options,
return s;
}
time_t nowTime;
time(&nowTime);
assert(nowTime > 0);
TableBuilder* builder = new TableBuilder(options, file);
meta->smallest.DecodeFrom(iter->key());
Slice key;
ParsedInternalKey parsed;
meta->smallest_deadtime = UINT64_MAX;
meta->largest_deadtime = 0;
for (; iter->Valid(); iter->Next()) {
key = iter->key();
builder->Add(key, iter->value());
//在构建sstable文件的时候,记录当前文件的生存期最大和最小值,
//这里要注意internalkey和metadata中对于没有生存期的表示的转换
ParseInternalKey(key,&parsed);
if(parsed.deadTime == 0) parsed.deadTime = UINT64_MAX;
if(parsed.deadTime < nowTime) {
static int count = 0;
if(count % 1000 == 0) {
std::cout<<"count "<<count++<<" drop dead in L0: "<<parsed.user_key.ToString()<<" "<<parsed.deadTime<<std::endl;
}
continue;
}
meta->smallest_deadtime = std::min(meta->smallest_deadtime,parsed.deadTime);
meta->largest_deadtime = std::max(meta->largest_deadtime,parsed.deadTime);
}
if (!key.empty()) {
meta->largest.DecodeFrom(key);

+ 1
- 1
db/c.cc View File

@ -349,7 +349,7 @@ void leveldb_writebatch_iterate(const leveldb_writebatch_t* b, void* state,
void* state_;
void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen);
void (*deleted_)(void*, const char* k, size_t klen);
void Put(const Slice& key, const Slice& value) override {
void Put(const Slice& key, const Slice& value, uint64_t deadTime) override {
(*put_)(state_, key.data(), key.size(), value.data(), value.size());
}
void Delete(const Slice& key) override {

+ 83
- 23
db/db_impl.cc View File

@ -52,11 +52,12 @@ struct DBImpl::Writer {
};
struct DBImpl::CompactionState {
// Files produced by compaction
// Files produced by compaction 这里的改动和filemetadata对应
struct Output {
uint64_t number;
uint64_t file_size;
InternalKey smallest, largest;
uint64_t smallest_deadtime,largest_deadtime;
};
Output* current_output() { return &outputs[outputs.size() - 1]; }
@ -536,7 +537,7 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
}
edit->AddFile(level, meta.number, meta.file_size, meta.smallest,
meta.largest);
meta.largest,meta.smallest_deadtime,meta.largest_deadtime);
}
CompactionStats stats;
@ -590,6 +591,7 @@ void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
}
}
}
// max_level_with_files = config::kNumLevels - 1; //TODO:强制合并所有level中的sst,但是这么做不是很优雅
TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap
for (int level = 0; level < max_level_with_files; level++) {
TEST_CompactRange(level, begin, end);
@ -704,6 +706,31 @@ void DBImpl::BackgroundCall() {
background_work_finished_signal_.SignalAll();
}
bool DBImpl::RemoveExpireTable() {
bool remove = false;
VersionEdit edit;
time_t nowTime;
time(&nowTime);
Version *base = versions_->current();
base->Ref();
for(int level = 0; level < config::kNumLevels; level ++) {
const std::vector<FileMetaData*> &files = versions_->current()->Files(level);
for(auto meta:files) {
if(meta->largest_deadtime < nowTime) {
remove = true;
edit.RemoveFile(level,meta->number);
std::cout<<"remove file : "<<meta->number<<" from level : "<<level<<" deadtime : "<<meta->largest_deadtime<<std::endl;
}
}
}
if(remove) {
versions_->LogAndApply(&edit,&mutex_);
// RemoveObsoleteFiles();
}
base->Unref();
return remove;
}
void DBImpl::BackgroundCompaction() {
mutex_.AssertHeld();
@ -712,6 +739,10 @@ void DBImpl::BackgroundCompaction() {
return;
}
if(RemoveExpireTable()) {
return;
}
Compaction* c;
bool is_manual = (manual_compaction_ != nullptr);
InternalKey manual_end;
@ -740,7 +771,7 @@ void DBImpl::BackgroundCompaction() {
FileMetaData* f = c->input(0, 0);
c->edit()->RemoveFile(c->level(), f->number);
c->edit()->AddFile(c->level() + 1, f->number, f->file_size, f->smallest,
f->largest);
f->largest,f->smallest_deadtime,f->largest_deadtime);
status = versions_->LogAndApply(c->edit(), &mutex_);
if (!status.ok()) {
RecordBackgroundError(status);
@ -814,6 +845,8 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
out.number = file_number;
out.smallest.Clear();
out.largest.Clear();
out.smallest_deadtime = UINT64_MAX;
out.largest_deadtime = 0;
compact->outputs.push_back(out);
mutex_.Unlock();
}
@ -889,7 +922,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
for (size_t i = 0; i < compact->outputs.size(); i++) {
const CompactionState::Output& out = compact->outputs[i];
compact->compaction->edit()->AddFile(level + 1, out.number, out.file_size,
out.smallest, out.largest);
out.smallest, out.largest,out.smallest_deadtime,out.largest_deadtime);
}
return versions_->LogAndApply(compact->compaction->edit(), &mutex_);
}
@ -948,11 +981,20 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
// Handle key/value, add to state, etc.
bool drop = false;
time_t nowTime;
time(&nowTime);
if (!ParseInternalKey(key, &ikey)) {
// Do not hide error keys
current_user_key.clear();
has_current_user_key = false;
last_sequence_for_key = kMaxSequenceNumber;
} else if(ikey.deadTime != 0 && ikey.deadTime < nowTime){
static int count = 0;
if(count % 1000 == 0) {
std::cout<<"count "<<count<<" drop dead in Compaction: "<<ikey.user_key.ToString()<<" "<<ikey.deadTime<<std::endl;
count ++;
}
drop = true;
} else {
if (!has_current_user_key ||
user_comparator()->Compare(ikey.user_key, Slice(current_user_key)) !=
@ -978,7 +1020,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
// Therefore this deletion marker is obsolete and can be dropped.
drop = true;
}
last_sequence_for_key = ikey.sequence;
}
#if 0
@ -990,7 +1031,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
compact->compaction->IsBaseLevelForKey(ikey.user_key),
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
#endif
if (!drop) {
// Open output file if necessary
if (compact->builder == nullptr) {
@ -1003,6 +1043,17 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
compact->current_output()->smallest.DecodeFrom(key);
}
compact->current_output()->largest.DecodeFrom(key);
ParsedInternalKey parsed;
ParseInternalKey(key,&parsed);
uint64_t &smallest_deadtime = compact->current_output()->smallest_deadtime;
uint64_t &largest_deadtime = compact->current_output()->largest_deadtime;
if(parsed.deadTime == 0) {
smallest_deadtime = UINT64_MAX;
}
smallest_deadtime = std::min(smallest_deadtime,parsed.deadTime);
largest_deadtime = std::max(largest_deadtime,parsed.deadTime);
compact->builder->Add(key, input->value());
// Close output file if it is big enough
@ -1143,12 +1194,19 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& key,
{
mutex_.Unlock();
// First look in the memtable, then in the immutable memtable (if any).
LookupKey lkey(key, snapshot);
time_t nowTime;
time(&nowTime);
assert(nowTime > 0);
LookupKey lkey(key, snapshot, nowTime);
if (mem->Get(lkey, value, &s)) {
printf("in mem\n");
// Done
} else if (imm != nullptr && imm->Get(lkey, value, &s)) {
printf("in immem\n");
// Done
} else {
printf("in current\n");
s = current->Get(options, lkey, value, &stats);
have_stat_update = true;
}
@ -1194,14 +1252,15 @@ void DBImpl::ReleaseSnapshot(const Snapshot* snapshot) {
}
// Convenience methods
Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) {
return DB::Put(o, key, val);
Status DBImpl::Put(const WriteOptions& o, const Slice& key,
const Slice& val, uint64_t ttl) {
return DB::Put(o, key, val, ttl);
}
Status DBImpl::Put(const WriteOptions& options, const Slice& key,
const Slice& value, uint64_t ttl) {
return DB::Put(options,key,value,ttl);
}
// Status DBImpl::Put(const WriteOptions& options, const Slice& key,
// const Slice& value, uint64_t ttl) {
// return DB::Put(options,key,value,ttl);
// }
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
return DB::Delete(options, key);
@ -1237,7 +1296,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
// into mem_.
{
mutex_.Unlock();
status = log_->AddRecord(WriteBatchInternal::Contents(write_batch));
status = log_->AddRecord(WriteBatchInternal::Contents(write_batch)); //这里会有影响吗
bool sync_error = false;
if (status.ok() && options.sync) {
status = logfile_->Sync();
@ -1490,19 +1549,20 @@ void DBImpl::GetApproximateSizes(const Range* range, int n, uint64_t* sizes) {
// Default implementations of convenience methods that subclasses of DB
// can call if they wish
Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {
Status DB::Put(const WriteOptions& opt, const Slice& key,
const Slice& value, uint64_t ttl) {
WriteBatch batch;
batch.Put(key, value);
batch.Put(key, value, ttl);
return Write(opt, &batch);
}
//为了通过编译,忽略ttl
Status DB::Put(const WriteOptions& options, const Slice& key,
const Slice& value, uint64_t ttl) {
WriteBatch batch;
batch.Put(key, value);
return Write(options, &batch);
}
// //为了通过编译,忽略ttl
// Status DB::Put(const WriteOptions& options, const Slice& key,
// const Slice& value, uint64_t ttl) {
// WriteBatch batch;
// batch.Put(key, value);
// return Write(options, &batch);
// }
Status DB::Delete(const WriteOptions& opt, const Slice& key) {
WriteBatch batch;

+ 1
- 2
db/db_impl.h View File

@ -37,8 +37,6 @@ class DBImpl : public DB {
// Implementations of the DB interface
Status Put(const WriteOptions&, const Slice& key,
const Slice& value) override;
Status Put(const WriteOptions& options, const Slice& key,
const Slice& value, uint64_t ttl) override;
Status Delete(const WriteOptions&, const Slice& key) override;
Status Write(const WriteOptions& options, WriteBatch* updates) override;
@ -147,6 +145,7 @@ class DBImpl : public DB {
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status DoCompactionWork(CompactionState* compact)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
bool RemoveExpireTable(); //if remove some table thne return true otherwise return false
Status OpenCompactionOutputFile(CompactionState* compact);
Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);

+ 6
- 6
db/db_test.cc View File

@ -2114,12 +2114,12 @@ class ModelDB : public DB {
explicit ModelDB(const Options& options) : options_(options) {}
~ModelDB() override = default;
Status Put(const WriteOptions& o, const Slice& k, const Slice& v) override {
return DB::Put(o, k, v);
}
Status Put(const WriteOptions& o, const Slice& k,const Slice& v,uint64_t ttl) {
return DB::Put(o,k,v);
Status Put(const WriteOptions& o, const Slice& k, const Slice& v, uint64_t ttl = 0) override {
return DB::Put(o, k, v, ttl);
}
// Status Put(const WriteOptions& o, const Slice& k,const Slice& v,uint64_t ttl) {
// return DB::Put(o,k,v);
// }
Status Delete(const WriteOptions& o, const Slice& key) override {
return DB::Delete(o, key);
}
@ -2152,7 +2152,7 @@ class ModelDB : public DB {
class Handler : public WriteBatch::Handler {
public:
KVMap* map_;
void Put(const Slice& key, const Slice& value) override {
void Put(const Slice& key, const Slice& value, uint64_t deadTime = 0) override {
(*map_)[key.ToString()] = value.ToString();
}
void Delete(const Slice& key) override { map_->erase(key.ToString()); }

+ 46
- 9
db/dbformat.cc View File

@ -12,6 +12,14 @@
namespace leveldb {
static uint64_t PackSequenceAndTypeAndTtlAndLookup(
uint64_t seq, ValueType t, bool havettl, bool islookup) {
assert(seq <= kMaxSequenceNumber);
assert(t <= kValueTypeForSeek);
return (seq << 8) | (islookup << 2) | (havettl << 1) | t;
}
//下面有两个调这个函数的没改,也许也要修改标志位?
static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
assert(seq <= kMaxSequenceNumber);
assert(t <= kValueTypeForSeek);
@ -20,7 +28,10 @@ static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
result->append(key.user_key.data(), key.user_key.size());
PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
if(key.deadTime != 0)
PutFixed64(result, key.deadTime);
PutFixed64(result, PackSequenceAndTypeAndTtlAndLookup(
key.sequence, key.type, (key.deadTime != 0), false));
}
std::string ParsedInternalKey::DebugString() const {
@ -49,13 +60,35 @@ int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
// increasing user key (according to user-supplied comparator)
// decreasing sequence number
// decreasing type (though sequence# should be enough to disambiguate)
//目前看调用时都是a为node, b为key,万一有不是的,逻辑还得补充
//for debug
// std::string a = ExtractUserKey(akey).ToString();
// std::string b = ExtractUserKey(bkey).ToString();
int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
if (r == 0) {
const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
if (anum > bnum) {
const uint64_t atag = DecodeFixed64(akey.data() + akey.size() - 8);
const uint64_t btag = DecodeFixed64(bkey.data() + bkey.size() - 8);
const uint64_t aseq = atag >> 8;
const uint64_t bseq = btag >> 8;
if (aseq > bseq) {
r = -1;
} else if (anum < bnum) {
return r;
}
const uint64_t atime = DecodeFixed64(akey.data() + akey.size() - 16);
const uint64_t btime = DecodeFixed64(bkey.data() + bkey.size() - 16);
//原本应该找到了,新加判断
// if((btag & 0b100) && (atag & 0b10)){ //一个是查询键,另一个有ttl
// const uint64_t atime = DecodeFixed64(akey.data() + akey.size() - 16);
// const uint64_t btime = DecodeFixed64(bkey.data() + bkey.size() - 16);
// std::cout<<"atime:"<<atime<<" btime:"<<btime<<" "<<aseq<<" "<<bseq<<" "<<btag<<" "<<atag<<std::endl;
// if(atime <= btime){//过期了继续找
// r = -1;
// return r;
// }
// }
if (aseq < bseq) {
r = +1;
}
}
@ -114,9 +147,9 @@ bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const {
return user_policy_->KeyMayMatch(ExtractUserKey(key), f);
}
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s, uint64_t nowTime) {
size_t usize = user_key.size();
size_t needed = usize + 13; // A conservative estimate
size_t needed = usize + 21; // A conservative estimate
char* dst;
if (needed <= sizeof(space_)) {
dst = space_;
@ -124,13 +157,17 @@ LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
dst = new char[needed];
}
start_ = dst;
dst = EncodeVarint32(dst, usize + 8);
dst = EncodeVarint32(dst, usize + 16);
kstart_ = dst;
std::memcpy(dst, user_key.data(), usize);
dst += usize;
EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
EncodeFixed64(dst, nowTime);
dst += 8;
// EncodeFixed64(dst, PackSequenceAndTypeAndTtlAndLookup(s, kValueTypeForSeek, 0, true));
EncodeFixed64(dst, PackSequenceAndTypeAndTtlAndLookup(s, kValueTypeForSeek, 1, false));
dst += 8;
end_ = dst;
printf("lookupkey tag:%lx\n",PackSequenceAndTypeAndTtlAndLookup(s, kValueTypeForSeek, 1, false));
}
} // namespace leveldb

+ 40
- 15
db/dbformat.h View File

@ -16,6 +16,7 @@
#include "leveldb/table_builder.h"
#include "util/coding.h"
#include "util/logging.h"
#include "iostream"
namespace leveldb {
@ -69,17 +70,19 @@ static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
struct ParsedInternalKey {
Slice user_key;
SequenceNumber sequence;
uint64_t deadTime;
ValueType type;
ParsedInternalKey() {} // Intentionally left uninitialized (for speed)
ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
: user_key(u), sequence(seq), type(t) {}
ParsedInternalKey(const Slice& u, const SequenceNumber& seq,
ValueType t, uint64_t d = 0)
: user_key(u), sequence(seq), type(t), deadTime(d) {}
std::string DebugString() const;
};
// Return the length of the encoding of "key".
inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
return key.user_key.size() + 8;
return key.user_key.size() + 8 + (key.deadTime != 0) * 8;
}
// Append the serialization of "key" to *result.
@ -93,8 +96,17 @@ bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result);
// Returns the user key portion of an internal key.
inline Slice ExtractUserKey(const Slice& internal_key) {
if(internal_key.size() < 8) {
std::cout<<"wrong key:"<<internal_key.ToString()<<std::endl;
}
assert(internal_key.size() >= 8);
return Slice(internal_key.data(), internal_key.size() - 8);
uint64_t num = DecodeFixed64(internal_key.data() + internal_key.size() - 8);
uint8_t havettl = (num & 0b10) >> 1;
uint8_t islookup = (num & 0b100) >> 2;
size_t klen = internal_key.size() - 8;
if(havettl || islookup) klen -= 8;
Slice user_key = Slice(internal_key.data(), klen);
return user_key;
}
// A comparator for internal keys that uses a specified comparator for
@ -137,8 +149,9 @@ class InternalKey {
public:
InternalKey() {} // Leave rep_ as empty to indicate it is invalid
InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
InternalKey(const Slice& user_key, SequenceNumber s,
ValueType t, uint64_t deadTime = 0) {
AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t, deadTime));
}
bool DecodeFrom(const Slice& s) {
@ -170,14 +183,23 @@ inline int InternalKeyComparator::Compare(const InternalKey& a,
inline bool ParseInternalKey(const Slice& internal_key,
ParsedInternalKey* result) {
//islookup
const size_t n = internal_key.size();
if (n < 8) return false;
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
uint8_t c = num & 0xff;
result->sequence = num >> 8;
result->type = static_cast<ValueType>(c);
result->user_key = Slice(internal_key.data(), n - 8);
return (c <= static_cast<uint8_t>(kTypeValue));
uint64_t tag = DecodeFixed64(internal_key.data() + n - 8);
uint8_t c = tag & 0xff;
uint8_t havettl = (c & 0b10) >> 1;
result->sequence = tag >> 8;
result->type = static_cast<ValueType>(c & 0b1);
if(havettl){
result->deadTime = DecodeFixed64(internal_key.data() + n - 16);
result->user_key = Slice(internal_key.data(), n - 16);
} else {
result->deadTime = 0;
result->user_key = Slice(internal_key.data(), n - 8);
}
// return c <= 0b111;
return ((c & 0b1) <= static_cast<uint8_t>(kTypeValue));
}
// A helper class useful for DBImpl::Get()
@ -185,7 +207,7 @@ class LookupKey {
public:
// Initialize *this for looking up user_key at a snapshot with
// the specified sequence number.
LookupKey(const Slice& user_key, SequenceNumber sequence);
LookupKey(const Slice& user_key, SequenceNumber sequence, uint64_t nowTime);
LookupKey(const LookupKey&) = delete;
LookupKey& operator=(const LookupKey&) = delete;
@ -199,14 +221,17 @@ class LookupKey {
Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
// Return the user key
Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 16); }
private:
// We construct a char array of the form:
// klength varint32 <-- start_
// userkey char[klength] <-- kstart_
// tag uint64
// nowTime uint64
// tag uint64 0000 0101
// <-- end_
// userkey下insert时seq优先
// tag倒数第三位使
// The array is a suitable MemTable key.
// The suffix starting with "userkey" can be used as an InternalKey.
const char* start_;

+ 1
- 1
db/dumpfile.cc View File

@ -74,7 +74,7 @@ Status PrintLogContents(Env* env, const std::string& fname,
// Called on every item found in a WriteBatch.
class WriteBatchItemPrinter : public WriteBatch::Handler {
public:
void Put(const Slice& key, const Slice& value) override {
void Put(const Slice& key, const Slice& value, uint64_t deadTime) override {
std::string r = " put '";
AppendEscapedStringTo(&r, key);
r += "' '";

+ 78
- 5
db/memtable.cc View File

@ -8,6 +8,8 @@
#include "leveldb/env.h"
#include "leveldb/iterator.h"
#include "util/coding.h"
#include "ctime"
#include "iostream"
namespace leveldb {
@ -53,7 +55,23 @@ class MemTableIterator : public Iterator {
~MemTableIterator() override = default;
bool Valid() const override { return iter_.Valid(); }
void Seek(const Slice& k) override { iter_.Seek(EncodeKey(&tmp_, k)); }
void Seek(const Slice& k) override {
iter_.Seek(EncodeKey(&tmp_, k));
MemTable::KeyComparator comp_ = iter_.get_comparator();
while(Valid()) {
Slice now = key();
ParsedInternalKey parsed_k,parsed_now;
ParseInternalKey(k,&parsed_k);
ParseInternalKey(now,&parsed_now);
uint64_t deadtime_k = parsed_k.deadTime;
uint64_t deadtime_now = parsed_now.deadTime;
if(deadtime_k == 0) deadtime_k = UINT64_MAX;
if(deadtime_now == 0) deadtime_now = UINT64_MAX;
if(deadtime_k > deadtime_now) {Next();continue;};
if(comp_.comparator.Compare(k,now) <= 0) return;
Next();
}
}
void SeekToFirst() override { iter_.SeekToFirst(); }
void SeekToLast() override { iter_.SeekToLast(); }
void Next() override { iter_.Next(); }
@ -74,16 +92,24 @@ class MemTableIterator : public Iterator {
Iterator* MemTable::NewIterator() { return new MemTableIterator(&table_); }
void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
const Slice& value) {
const Slice& value, uint64_t deadTime) {
// Format of an entry is concatenation of:
// key_size : varint32 of internal_key.size()
// key bytes : char[internal_key.size()]
// tag : uint64((sequence << 8) | type)
// value_size : varint32 of value.size()
// value bytes : char[value.size()]
//变成
// key_size : varint32 of internal_key.size()
// key bytes : char[internal_key.size()]
// (deadTime) : uint64(可能有)
// tag : uint64((sequence << 8) | havettl << 1 | type)
// value_size : varint32 of value.size()
// value bytes : char[value.size()]
size_t key_size = key.size();
size_t val_size = value.size();
size_t internal_key_size = key_size + 8;
if(deadTime != 0) internal_key_size += 8;
const size_t encoded_len = VarintLength(internal_key_size) +
internal_key_size + VarintLength(val_size) +
val_size;
@ -91,22 +117,55 @@ void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
char* p = EncodeVarint32(buf, internal_key_size);
std::memcpy(p, key.data(), key_size);
p += key_size;
EncodeFixed64(p, (s << 8) | type);
if(deadTime == 0){
EncodeFixed64(p, (s << 8) | type);
} else {
EncodeFixed64(p, deadTime);
p += 8;
EncodeFixed64(p, (s << 8) | 0b10 | type);
}
p += 8;
p = EncodeVarint32(p, val_size);
std::memcpy(p, value.data(), val_size);
assert(p + val_size == buf + encoded_len);
table_.Insert(buf);
static int count = 0;
if(count++ % 1000 == 0)
std::cout<<"count: "<<count << " insert:" << key.ToString() <<" deadTime: " << deadTime << std::endl;
}
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
Slice memkey = key.memtable_key();
Table::Iterator iter(&table_);
iter.Seek(memkey.data());
while(iter.Valid()) {
Slice now = GetLengthPrefixedSlice(iter.key()); //迭代器所处的位置
MemTable::KeyComparator comp_ = iter.get_comparator();
ParsedInternalKey parsed_k,parsed_now;
ParseInternalKey(key.internal_key(),&parsed_k);
ParseInternalKey(now,&parsed_now);
uint64_t deadtime_k = parsed_k.deadTime;
uint64_t deadtime_now = parsed_now.deadTime;
if(deadtime_k == 0) deadtime_k = UINT64_MAX;
if(deadtime_now == 0) deadtime_now = UINT64_MAX;
std::cout<<"key :"<<parsed_k.user_key.ToString()<<" k time: "<<deadtime_k<<" now time: "<<deadtime_now<<std::endl;
std::cout<<"k&now"<<parsed_k.user_key.ToString()<<" "<<parsed_now.user_key.ToString()<<std::endl;
if(deadtime_k > deadtime_now) {
iter.Next();
continue;
}
std::cout<<"size : "<<key.internal_key().size()<<" "<<now.size()<<std::endl;
if(comp_.comparator.Compare(key.internal_key(),now) <= 0) break;
iter.Next();
}
std::cout << "search:" << key.user_key().ToString() << " valid?" << iter.Valid();
if (iter.Valid()) {
// entry format is:
// klength varint32
// userkey char[klength]
// (deadTime) uint64
// tag uint64
// vlength varint32
// value char[vlength]
@ -116,12 +175,26 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
const char* entry = iter.key();
uint32_t key_length;
const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);
std::cout << " get:" << ExtractUserKey(Slice(key_ptr, key_length)).ToString() << std::endl;
if (comparator_.comparator.user_comparator()->Compare(
Slice(key_ptr, key_length - 8), key.user_key()) == 0) {
ExtractUserKey(Slice(key_ptr, key_length)), key.user_key()) == 0) {
// Correct user key
const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
switch (static_cast<ValueType>(tag & 0xff)) {
switch (static_cast<ValueType>(tag & 0x01)) {
case kTypeValue: {
// uint8_t havettl = (tag & 0xff) >> 1;
// if(havettl){
// time_t nowTime;
// time(&nowTime);
// assert(nowTime > 0);
// const uint64_t deadTime = DecodeFixed64(key_ptr + key_length - 16);
// if(static_cast<uint64_t>(nowTime) >= deadTime){ //过期了
// std::cout << nowTime << "dead:" << deadTime << std::endl;
// *s = Status::NotFound(Slice());
// return true; //todo:之前有没过期的key
// }
// }
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
value->assign(v.data(), v.size());
return true;

+ 1
- 1
db/memtable.h View File

@ -54,7 +54,7 @@ class MemTable {
// specified sequence number and with the specified type.
// Typically value will be empty if type==kTypeDeletion.
void Add(SequenceNumber seq, ValueType type, const Slice& key,
const Slice& value);
const Slice& value, uint64_t deadTime = 0);
// If memtable contains a value for key, store it in *value and return true.
// If memtable contains a deletion for key, store a NotFound() error

+ 1
- 1
db/repair.cc View File

@ -90,7 +90,7 @@ class Repairer {
}
private:
struct TableInfo {
struct TableInfo { //TODO : add the recovery of smallest and largest deadtime
FileMetaData meta;
SequenceNumber max_sequence;
};

+ 2
- 0
db/skiplist.h View File

@ -90,6 +90,8 @@ class SkipList {
// Final state of iterator is Valid() iff list is not empty.
void SeekToLast();
Comparator get_comparator() const {return list_->compare_;}
private:
const SkipList* list_;
Node* node_;

+ 9
- 1
db/version_edit.cc View File

@ -81,6 +81,8 @@ void VersionEdit::EncodeTo(std::string* dst) const {
PutVarint64(dst, f.file_size);
PutLengthPrefixedSlice(dst, f.smallest.Encode());
PutLengthPrefixedSlice(dst, f.largest.Encode());
PutVarint64(dst,f.smallest_deadtime);
PutVarint64(dst,f.largest_deadtime);
}
}
@ -179,7 +181,9 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) &&
GetVarint64(&input, &f.file_size) &&
GetInternalKey(&input, &f.smallest) &&
GetInternalKey(&input, &f.largest)) {
GetInternalKey(&input, &f.largest) &&
GetVarint64(&input,&f.smallest_deadtime) &&
GetVarint64(&input,&f.largest_deadtime)) {
new_files_.push_back(std::make_pair(level, f));
} else {
msg = "new-file entry";
@ -250,6 +254,10 @@ std::string VersionEdit::DebugString() const {
r.append(f.smallest.DebugString());
r.append(" .. ");
r.append(f.largest.DebugString());
r.append(" ");
AppendNumberTo(&r,f.smallest_deadtime);
r.append(" .. ");
AppendNumberTo(&r,f.largest_deadtime);
}
r.append("\n}\n");
return r;

+ 9
- 2
db/version_edit.h View File

@ -16,7 +16,8 @@ namespace leveldb {
class VersionSet;
struct FileMetaData {
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) {}
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0),
smallest_deadtime(UINT64_MAX),largest_deadtime(UINT64_MAX) {}
int refs;
int allowed_seeks; // Seeks allowed until compaction
@ -24,6 +25,9 @@ struct FileMetaData {
uint64_t file_size; // File size in bytes
InternalKey smallest; // Smallest internal key served by table
InternalKey largest; // Largest internal key served by table
//FileMetaData中使Uint64MAX作为没有生存期的标志
uint64_t smallest_deadtime; //smallest deadtime
uint64_t largest_deadtime; //largest deadtime
};
class VersionEdit {
@ -61,12 +65,15 @@ class VersionEdit {
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
// REQUIRES: "smallest" and "largest" are smallest and largest keys in file
void AddFile(int level, uint64_t file, uint64_t file_size,
const InternalKey& smallest, const InternalKey& largest) {
const InternalKey& smallest, const InternalKey& largest,
uint64_t smallest_deadtime = UINT64_MAX, uint64_t largest_deadtime = UINT64_MAX) {
FileMetaData f;
f.number = file;
f.file_size = file_size;
f.smallest = smallest;
f.largest = largest;
f.smallest_deadtime = smallest_deadtime;
f.largest_deadtime = largest_deadtime;
new_files_.push_back(std::make_pair(level, f));
}

+ 5
- 1
db/version_set.cc View File

@ -263,8 +263,10 @@ static void SaveValue(void* arg, const Slice& ikey, const Slice& v) {
Saver* s = reinterpret_cast<Saver*>(arg);
ParsedInternalKey parsed_key;
if (!ParseInternalKey(ikey, &parsed_key)) {
// std::cout<<"corrupt get"<<std::endl;
s->state = kCorrupt;
} else {
std::cout<<"tar&found"<<parsed_key.user_key.ToString()<<" "<<s->user_key.ToString()<<std::endl;
if (s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0) {
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
if (s->state == kFound) {
@ -272,6 +274,7 @@ static void SaveValue(void* arg, const Slice& ikey, const Slice& v) {
}
}
}
std::cout<<"state : "<<s->state<<std::endl;
}
static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
@ -1087,7 +1090,8 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
const std::vector<FileMetaData*>& files = current_->files_[level];
for (size_t i = 0; i < files.size(); i++) {
const FileMetaData* f = files[i];
edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest,
f->smallest_deadtime,f->largest_deadtime);
}
}

+ 2
- 0
db/version_set.h View File

@ -111,6 +111,8 @@ class Version {
int NumFiles(int level) const { return files_[level].size(); }
const std::vector<FileMetaData*>& Files(int level) const {return files_[level]; }
// Return a human readable string that describes this version's contents.
std::string DebugString() const;

+ 36
- 6
db/write_batch.cc View File

@ -6,13 +6,20 @@
// sequence: fixed64
// count: fixed32
// data: record[count]
// record :=
// 原来record :=
// kTypeValue varstring varstring |
// kTypeDeletion varstring
// varstring :=
// len: varint32
// data: uint8[len]
// 增加ttl后record :=
// kTypeValue havettl (deadtime) varstring varstring |
// kTypeDeletion varstring
// varstring :=
// len: varint32
// data: uint8[len]
#include "leveldb/write_batch.h"
#include "db/dbformat.h"
@ -20,6 +27,8 @@
#include "db/write_batch_internal.h"
#include "leveldb/db.h"
#include "util/coding.h"
#include "ctime"
#include <cstdint>
namespace leveldb {
@ -54,12 +63,22 @@ Status WriteBatch::Iterate(Handler* handler) const {
input.remove_prefix(1);
switch (tag) {
case kTypeValue:
{
char havettl = input[0];
input.remove_prefix(1);
uint64_t deadTime = 0;
if(havettl){
deadTime = DecodeFixed64(input.data());
input.remove_prefix(8);
}
if (GetLengthPrefixedSlice(&input, &key) &&
GetLengthPrefixedSlice(&input, &value)) {
handler->Put(key, value);
handler->Put(key, value, deadTime);
} else {
return Status::Corruption("bad WriteBatch Put");
}
}
break;
case kTypeDeletion:
if (GetLengthPrefixedSlice(&input, &key)) {
@ -95,9 +114,20 @@ void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
EncodeFixed64(&b->rep_[0], seq);
}
void WriteBatch::Put(const Slice& key, const Slice& value) {
void WriteBatch::Put(const Slice& key, const Slice& value, uint64_t ttl) {
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
rep_.push_back(static_cast<char>(kTypeValue));
rep_.push_back(static_cast<char>(ttl != 0)); //1:havettl
if(ttl != 0){
time_t nowTime;
time(&nowTime);
assert(nowTime > 0);
assert(ttl > 0);
uint64_t deadTime = static_cast<uint64_t>(nowTime) + ttl;
PutFixed64(&rep_, deadTime);
}
PutLengthPrefixedSlice(&rep_, key);
PutLengthPrefixedSlice(&rep_, value);
}
@ -118,12 +148,12 @@ class MemTableInserter : public WriteBatch::Handler {
SequenceNumber sequence_;
MemTable* mem_;
void Put(const Slice& key, const Slice& value) override {
mem_->Add(sequence_, kTypeValue, key, value);
void Put(const Slice& key, const Slice& value, uint64_t deadTime) override {
mem_->Add(sequence_, kTypeValue, key, value, deadTime);
sequence_++;
}
void Delete(const Slice& key) override {
mem_->Add(sequence_, kTypeDeletion, key, Slice());
mem_->Add(sequence_, kTypeDeletion, key, Slice(), 0);
sequence_++;
}
};

+ 3
- 3
include/leveldb/db.h View File

@ -63,8 +63,8 @@ class LEVELDB_EXPORT DB {
// Set the database entry for "key" to "value". Returns OK on success,
// and a non-OK status on error.
// Note: consider setting options.sync = true.
virtual Status Put(const WriteOptions& options, const Slice& key,
const Slice& value) = 0;
// virtual Status Put(const WriteOptions& options, const Slice& key,
// const Slice& value) = 0;
// Remove the database entry (if any) for "key". Returns OK on
// success, and a non-OK status on error. It is not an error if "key"
@ -149,7 +149,7 @@ class LEVELDB_EXPORT DB {
// ----------------------------For TTL-----------------------------
// key设置ttl
virtual Status Put(const WriteOptions& options, const Slice& key,
const Slice& value, uint64_t ttl) = 0;
const Slice& value, uint64_t ttl = 0) = 0;
};
// Destroy the contents of the specified database.

+ 3
- 2
include/leveldb/write_batch.h View File

@ -25,6 +25,7 @@
#include "leveldb/export.h"
#include "leveldb/status.h"
#include <cstdint>
namespace leveldb {
@ -35,7 +36,7 @@ class LEVELDB_EXPORT WriteBatch {
class LEVELDB_EXPORT Handler {
public:
virtual ~Handler();
virtual void Put(const Slice& key, const Slice& value) = 0;
virtual void Put(const Slice& key, const Slice& value, uint64_t deadTime = 0) = 0;
virtual void Delete(const Slice& key) = 0;
};
@ -48,7 +49,7 @@ class LEVELDB_EXPORT WriteBatch {
~WriteBatch();
// Store the mapping "key->value" in the database.
void Put(const Slice& key, const Slice& value);
void Put(const Slice& key, const Slice& value, uint64_t ttl = 0);
// If the database contains a mapping for "key", erase it. Else do nothing.
void Delete(const Slice& key);

+ 11
- 0
table/block.cc View File

@ -14,6 +14,7 @@
#include "table/format.h"
#include "util/coding.h"
#include "util/logging.h"
#include "db/dbformat.h"
namespace leveldb {
@ -216,10 +217,20 @@ class Block::Iter : public Iterator {
SeekToRestartPoint(left);
}
// Linear search (within restart block) for first key >= target
//处理deadtime:从当前位置向后找到最新的未死亡的key
while (true) {
if (!ParseNextKey()) {
return;
}
ParsedInternalKey parsed_target,parsed_key_;
ParseInternalKey(target,&parsed_target);
ParseInternalKey(key_,&parsed_key_);
uint64_t deadtime_tar = parsed_target.deadTime;
uint64_t deadtime_key_ = parsed_key_.deadTime;
if(deadtime_tar == 0) deadtime_tar = UINT64_MAX;
if(deadtime_key_ == 0) deadtime_key_ = UINT64_MAX;
std::cout<<"key :"<<parsed_target.user_key.ToString()<<" tar time: "<<deadtime_tar<<" key time: "<<deadtime_key_<<std::endl;
if(deadtime_tar > deadtime_key_) continue;
if (Compare(key_, target) >= 0) {
return;
}

+ 113
- 0
test/ttl_mmtable_test.cc View File

@ -0,0 +1,113 @@
#include "leveldb/env.h"
#include "leveldb/db.h"
#include "ctime"
#include <iostream>
#include <cstdlib>
#include "gtest/gtest.h"
using namespace leveldb;
constexpr int value_size = 2048;
constexpr int data_size = 2048 << 15;
Status OpenDB(std::string dbName, DB **db) {
Options options;
options.create_if_missing = true;
return DB::Open(options, dbName, db);
}
void InsertData(DB *db, uint64_t ttl/* second */, int vsize = 1/*插不同长度的value*/) {
printf("-----inserting-----\n");
Status status;
WriteOptions writeOptions;
int key_num = data_size / value_size;
srand(static_cast<unsigned int>(time(0)));
for (int i = 0; i < key_num; i++) {
//int key_ = rand() % key_num+1;
int key_ = i+1;
std::string key = std::to_string(key_);
std::string value(vsize, 'a');
status = db->Put(writeOptions, key, value, ttl);
assert(status.ok());
}
}
void GetData(DB *db, bool isTimeout) {
printf("-----seeking-----\n");
ReadOptions readOptions;
Status status;
int key_num = data_size / value_size;
srand(static_cast<unsigned int>(time(0)));
for (int i = 0; i < key_num; i++) {
//int key_ = rand() % key_num+1;
int key_ = i+1;
std::string key = std::to_string(key_);
std::string value;
status = db->Get(readOptions, key, &value);
if(isTimeout) assert(status.IsNotFound());
else{
assert(status.ok());
std::cout << value << std::endl;
}
}
}
void TimeOut() {
DB *db;
printf("-----opening-----\n");
if(OpenDB("testdb", &db).ok() == false) {
std::cerr << "open db failed" << std::endl;
abort();
}
uint64_t ttl = 3;
InsertData(db, ttl);
GetData(db, false);
Env::Default()->SleepForMicroseconds(ttl * 1000000);
GetData(db, true);
delete(db);
printf("-----closing-----\n");
// printf("-----recovery-----\n");
// if(OpenDB("testdb", &db).ok() == false) {
// std::cerr << "open db failed" << std::endl;
// abort();
// }
// GetData(db, true);
printf("success!\n");
}
void GetEarlierData() {
DB *db;
printf("-----opening-----\n");
if(OpenDB("testdb", &db).ok() == false) {
std::cerr << "open db failed" << std::endl;
abort();
}
uint64_t ttl1 = 3;
uint64_t ttl2 = 5;
// InsertData(db, ttl2);
InsertData(db, ttl1, 2);
//都没过期先找到后插的
Env::Default()->SleepForMicroseconds(1 * 1000000);
GetData(db, false);
//再找到前一次
Env::Default()->SleepForMicroseconds(3 * 1000000);
GetData(db, true);
DestroyDB("testdb",Options());
delete(db);
printf("-----closing-----\n");
printf("success!\n");
}
int main(int argc, char** argv) {
GetEarlierData();
}

+ 13
- 5
test/ttl_test.cc View File

@ -26,7 +26,8 @@ void InsertData(DB *db, uint64_t ttl/* second */) {
int key_ = rand() % key_num+1;
std::string key = std::to_string(key_);
std::string value(value_size, 'a');
db->Put(writeOptions, key, value, ttl);
db->Put(writeOptions, std::to_string(i+1), value, ttl);
// db->Put(writeOptions, key, value, ttl);
}
}
@ -45,13 +46,14 @@ void GetData(DB *db, int size = (1 << 30)) {
}
TEST(TestTTL, ReadTTL) {
DestroyDB("testdb",Options());
DB *db;
if(OpenDB("testdb", &db).ok() == false) {
std::cerr << "open db failed" << std::endl;
abort();
}
uint64_t ttl = 20;
uint64_t ttl = 15;
InsertData(db, ttl);
@ -64,21 +66,25 @@ TEST(TestTTL, ReadTTL) {
std::string key = std::to_string(key_);
std::string value;
status = db->Get(readOptions, key, &value);
std::cout<<key<<" "<<value[0]<<std::endl;
ASSERT_TRUE(status.ok());
}
Env::Default()->SleepForMicroseconds(ttl * 1000000);
Env::Default()->SleepForMicroseconds((ttl+1) * 1000000);
for (int i = 0; i < 100; i++) {
int key_ = rand() % key_num+1;
std::string key = std::to_string(key_);
std::string value;
status = db->Get(readOptions, key, &value);
std::cout<<key<<" "<<value<<std::endl;
ASSERT_FALSE(status.ok());
}
delete db;
}
TEST(TestTTL, CompactionTTL) {
DestroyDB("testdb",Options());
DB *db;
if(OpenDB("testdb", &db).ok() == false) {
@ -86,7 +92,7 @@ TEST(TestTTL, CompactionTTL) {
abort();
}
uint64_t ttl = 20;
uint64_t ttl = 10;
InsertData(db, ttl);
leveldb::Range ranges[1];
@ -95,7 +101,8 @@ TEST(TestTTL, CompactionTTL) {
db->GetApproximateSizes(ranges, 1, sizes);
ASSERT_GT(sizes[0], 0);
Env::Default()->SleepForMicroseconds(ttl * 1000000);
Env::Default()->SleepForMicroseconds((ttl+1) * 1000000);
// Env::Default()->SleepForMicroseconds(ttl * 1000000);
db->CompactRange(nullptr, nullptr);
@ -104,6 +111,7 @@ TEST(TestTTL, CompactionTTL) {
// uint64_t sizes[1];
db->GetApproximateSizes(ranges, 1, sizes);
ASSERT_EQ(sizes[0], 0);
delete db;
}

Loading…
Cancel
Save