|
|
@ -14,6 +14,15 @@ |
|
|
|
#include "db/table_cache.h"
|
|
|
|
#include "db/version_set.h"
|
|
|
|
#include "db/write_batch_internal.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <atomic>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <iomanip>
|
|
|
|
#include <iostream>
|
|
|
|
#include <set>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "leveldb/db.h"
|
|
|
|
#include "leveldb/env.h"
|
|
|
@ -42,6 +51,7 @@ struct DBImpl::Writer { |
|
|
|
WriteBatch* batch; |
|
|
|
bool sync; |
|
|
|
bool done; |
|
|
|
|
|
|
|
port::CondVar cv; |
|
|
|
}; |
|
|
|
|
|
|
@ -51,6 +61,7 @@ struct DBImpl::CompactionState { |
|
|
|
uint64_t number; |
|
|
|
uint64_t file_size; |
|
|
|
InternalKey smallest, largest; |
|
|
|
TIMESTAMP old_ts,new_ts; |
|
|
|
}; |
|
|
|
|
|
|
|
Output* current_output() { return &outputs[outputs.size() - 1]; } |
|
|
@ -526,11 +537,15 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, |
|
|
|
if (s.ok() && meta.file_size > 0) { |
|
|
|
const Slice min_user_key = meta.smallest.user_key(); |
|
|
|
const Slice max_user_key = meta.largest.user_key(); |
|
|
|
const TIMESTAMP new_ts = meta.newer_ts; |
|
|
|
const TIMESTAMP old_ts = meta.oldest_ts; |
|
|
|
if (base != nullptr) { |
|
|
|
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key); |
|
|
|
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);// TODO :基于timestamp和size和seek的新的选择规则
|
|
|
|
} |
|
|
|
// edit->AddFile(level, meta.number, meta.file_size, meta.smallest,
|
|
|
|
// meta.largest);
|
|
|
|
edit->AddFile(level, meta.number, meta.file_size, meta.smallest, |
|
|
|
meta.largest); |
|
|
|
meta.largest,old_ts,new_ts); |
|
|
|
} |
|
|
|
|
|
|
|
CompactionStats stats; |
|
|
@ -619,7 +634,7 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin, |
|
|
|
bg_error_.ok()) { |
|
|
|
if (manual_compaction_ == nullptr) { // Idle
|
|
|
|
manual_compaction_ = &manual; |
|
|
|
MaybeScheduleCompaction();//有可能寻址过多,导致allow_seek为0,触发合并。
|
|
|
|
MaybeScheduleCompaction(); |
|
|
|
} else { // Running either my compaction or another compaction.
|
|
|
|
background_work_finished_signal_.Wait(); |
|
|
|
} |
|
|
@ -734,8 +749,10 @@ void DBImpl::BackgroundCompaction() { |
|
|
|
assert(c->num_input_files(0) == 1); |
|
|
|
FileMetaData* f = c->input(0, 0); |
|
|
|
c->edit()->RemoveFile(c->level(), f->number); |
|
|
|
// c->edit()->AddFile(c->level() + 1, f->number, f->file_size, f->smallest,
|
|
|
|
// f->largest);
|
|
|
|
c->edit()->AddFile(c->level() + 1, f->number, f->file_size, f->smallest, |
|
|
|
f->largest); |
|
|
|
f->largest,f->oldest_ts,f->newer_ts); |
|
|
|
status = versions_->LogAndApply(c->edit(), &mutex_); |
|
|
|
if (!status.ok()) { |
|
|
|
RecordBackgroundError(status); |
|
|
@ -747,7 +764,7 @@ void DBImpl::BackgroundCompaction() { |
|
|
|
status.ToString().c_str(), versions_->LevelSummary(&tmp)); |
|
|
|
} else { |
|
|
|
CompactionState* compact = new CompactionState(c); |
|
|
|
status = DoCompactionWork(compact); |
|
|
|
status = DoCompactionWork(compact);//
|
|
|
|
if (!status.ok()) { |
|
|
|
RecordBackgroundError(status); |
|
|
|
} |
|
|
@ -809,6 +826,8 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { |
|
|
|
out.number = file_number; |
|
|
|
out.smallest.Clear(); |
|
|
|
out.largest.Clear(); |
|
|
|
out.old_ts = UINT64_MAX; |
|
|
|
out.new_ts = 0; |
|
|
|
compact->outputs.push_back(out); |
|
|
|
mutex_.Unlock(); |
|
|
|
} |
|
|
@ -883,8 +902,10 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) { |
|
|
|
const int level = compact->compaction->level(); |
|
|
|
for (size_t i = 0; i < compact->outputs.size(); i++) { |
|
|
|
const CompactionState::Output& out = compact->outputs[i]; |
|
|
|
// compact->compaction->edit()->AddFile(level + 1, out.number, out.file_size,
|
|
|
|
// out.smallest, out.largest);
|
|
|
|
compact->compaction->edit()->AddFile(level + 1, out.number, out.file_size, |
|
|
|
out.smallest, out.largest); |
|
|
|
out.smallest, out.largest,out.old_ts,out.new_ts); |
|
|
|
} |
|
|
|
return versions_->LogAndApply(compact->compaction->edit(), &mutex_); |
|
|
|
} |
|
|
@ -908,13 +929,13 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { |
|
|
|
} |
|
|
|
|
|
|
|
Iterator* input = versions_->MakeInputIterator(compact->compaction); |
|
|
|
|
|
|
|
// Release mutex while we're actually doing the compaction work
|
|
|
|
mutex_.Unlock(); |
|
|
|
|
|
|
|
input->SeekToFirst(); |
|
|
|
Status status; |
|
|
|
ParsedInternalKey ikey; |
|
|
|
TIMESTAMP ts = 0; |
|
|
|
std::string current_user_key; |
|
|
|
bool has_current_user_key = false; |
|
|
|
SequenceNumber last_sequence_for_key = kMaxSequenceNumber; |
|
|
@ -940,7 +961,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//auto a = DecodeFixed64(input->value().data() + input->value().size() - kTSLength);//debug
|
|
|
|
// Handle key/value, add to state, etc.
|
|
|
|
bool drop = false; |
|
|
|
if (!ParseInternalKey(key, &ikey)) { |
|
|
@ -972,19 +993,10 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { |
|
|
|
// few iterations of this loop (by rule (A) above).
|
|
|
|
// Therefore this deletion marker is obsolete and can be dropped.
|
|
|
|
drop = true; |
|
|
|
} |
|
|
|
|
|
|
|
Slice value = input->value(); |
|
|
|
if (value.size() >= sizeof(uint64_t)) { |
|
|
|
const char* ptr = value.data(); |
|
|
|
std::string temp_str(value.data(), value.size()); |
|
|
|
uint64_t expiration_time = DBImpl::GetTS(&temp_str); |
|
|
|
uint64_t current_time = env_->GetCurrentTime(); |
|
|
|
}else if((ts = DecodeFixed64(input->value().data() + input->value().size() - kTSLength)) < env_->NowMicros()){ |
|
|
|
|
|
|
|
if (current_time > expiration_time) { |
|
|
|
drop = true; |
|
|
|
}else {drop = false;} |
|
|
|
} |
|
|
|
drop = true; |
|
|
|
} |
|
|
|
|
|
|
|
last_sequence_for_key = ikey.sequence; |
|
|
|
} |
|
|
@ -1010,6 +1022,10 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { |
|
|
|
compact->current_output()->smallest.DecodeFrom(key); |
|
|
|
} |
|
|
|
compact->current_output()->largest.DecodeFrom(key); |
|
|
|
assert(ts != 0); |
|
|
|
//auto b = compact->current_output()->old_ts;
|
|
|
|
compact->current_output()->old_ts = std::min(compact->current_output()->old_ts,ts); |
|
|
|
compact->current_output()->new_ts = std::max(compact->current_output()->new_ts,ts); |
|
|
|
compact->builder->Add(key, input->value()); |
|
|
|
|
|
|
|
// Close output file if it is big enough
|
|
|
@ -1156,29 +1172,29 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& key, |
|
|
|
} else if (imm != nullptr && imm->Get(lkey, value, &s)) { |
|
|
|
// Done
|
|
|
|
} else { |
|
|
|
stats.now_ts = this->env_->NowMicros(); |
|
|
|
s = current->Get(options, lkey, value, &stats); |
|
|
|
have_stat_update = true; |
|
|
|
} |
|
|
|
mutex_.Lock(); |
|
|
|
} |
|
|
|
if (s.ok()) { |
|
|
|
// 直接在这里判断是否过期
|
|
|
|
auto t1 = env_->GetCurrentTime(); |
|
|
|
auto t2 = GetTS(value); |
|
|
|
if(t1 >= t2){ |
|
|
|
// 过期
|
|
|
|
s = Status::NotFound("NotFound",Slice()); |
|
|
|
} else { |
|
|
|
// 没过期
|
|
|
|
*value = value->substr(0, value->size() - sizeof(uint64_t)); |
|
|
|
} |
|
|
|
} |
|
|
|
if (have_stat_update && current->UpdateStats(stats)) { |
|
|
|
MaybeScheduleCompaction(); |
|
|
|
if(s.ok()){ |
|
|
|
s = CheckIsExpire(value); |
|
|
|
} |
|
|
|
if (have_stat_update && current->UpdateStats(stats,s.IsExpire())) { |
|
|
|
MaybeScheduleCompaction();//有可能寻址过多,导致allow_seek为0,触发合并。
|
|
|
|
} |
|
|
|
mem->Unref(); |
|
|
|
if (imm != nullptr) imm->Unref(); |
|
|
|
|
|
|
|
current->Unref(); |
|
|
|
// if(!s.ok()){
|
|
|
|
// return s;
|
|
|
|
// }
|
|
|
|
// auto s2 = CheckIsExpire(value);
|
|
|
|
// if(!s2.ok()){
|
|
|
|
// current->UpdateStats(stats);
|
|
|
|
// }
|
|
|
|
return s; |
|
|
|
} |
|
|
|
|
|
|
@ -1215,10 +1231,23 @@ void DBImpl::ReleaseSnapshot(const Snapshot* snapshot) { |
|
|
|
Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) { |
|
|
|
return DB::Put(o, key, val); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::Put(const WriteOptions& options, const Slice& key, |
|
|
|
const Slice& value, uint64_t ttl) { |
|
|
|
// 扔掉了很复杂的逻辑
|
|
|
|
return DB::Put(options, key, value, ttl); |
|
|
|
|
|
|
|
std::string val_with_ts; |
|
|
|
val_with_ts.reserve(value.size() + kTSLength); |
|
|
|
char ts_string[kTSLength]; |
|
|
|
TIMESTAMP expiration_time = this->env_->NowMicros() + ttl * 1000000; |
|
|
|
EncodeFixed64(ts_string,expiration_time); |
|
|
|
//assert(sizeof(expiration_time) == sizeof(TIMESTAMP ));
|
|
|
|
// 追加原始 value 到 val_with_ts
|
|
|
|
val_with_ts.append(value.data(), value.size()); |
|
|
|
|
|
|
|
// 将 expiration_time 追加到 val_with_ts
|
|
|
|
val_with_ts.append(ts_string,kTSLength); |
|
|
|
return DB::Put(options, key, Slice(val_with_ts)); |
|
|
|
} |
|
|
|
|
|
|
|
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) { |
|
|
@ -1503,6 +1532,14 @@ void DBImpl::GetApproximateSizes(const Range* range, int n, uint64_t* sizes) { |
|
|
|
} |
|
|
|
v->Unref(); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* |
|
|
|
* @param val |
|
|
|
* @param val_with_ts 在val后面连接上预计超时的timestamp |
|
|
|
* @param ttl 存活时间 |
|
|
|
*/ |
|
|
|
void DBImpl::AppendTS(const Slice& val, std::string* val_with_ts,uint64_t ttl) { |
|
|
|
val_with_ts->reserve(kTSLength + val.size()); |
|
|
|
char ts_string[kTSLength]; |
|
|
@ -1517,41 +1554,51 @@ void DBImpl::AppendTS(const Slice& val, std::string* val_with_ts,uint64_t ttl) { |
|
|
|
* @return timestamp in val,and remove timestamp from val |
|
|
|
*/ |
|
|
|
uint64_t DBImpl::GetTS(std::string* val) { |
|
|
|
// 不用auto再写一下 老逻辑:
|
|
|
|
// uint64_t expiration_time;
|
|
|
|
// memcpy(&expiration_time, val->data() + val->size() - sizeof(uint64_t), sizeof(uint64_t));
|
|
|
|
// return expiration_time;
|
|
|
|
// 新逻辑:
|
|
|
|
auto expiration_time = DecodeFixed64(val->data() + val->size() - kTSLength); |
|
|
|
val->resize(val->size() - kTSLength); |
|
|
|
return expiration_time; |
|
|
|
} |
|
|
|
|
|
|
|
// Default implementations of convenience methods that subclasses of DB
|
|
|
|
// can call if they wish
|
|
|
|
Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { |
|
|
|
WriteBatch batch; |
|
|
|
batch.Put(key, value); |
|
|
|
return Write(opt, &batch); |
|
|
|
auto expiration_time = DecodeFixed64(val->data() + val->size() - kTSLength); |
|
|
|
|
|
|
|
val->resize(val->size() - kTSLength); |
|
|
|
|
|
|
|
return expiration_time; |
|
|
|
|
|
|
|
} |
|
|
|
Status DBImpl::CheckIsExpire(std::string* value) { |
|
|
|
//debug 用
|
|
|
|
auto a = env_->NowMicros(); |
|
|
|
auto b = GetTS(value); |
|
|
|
|
|
|
|
if(a > b){ |
|
|
|
return Status::Expire("Expire",Slice()); |
|
|
|
} |
|
|
|
return Status(); |
|
|
|
|
|
|
|
Status DB::Put(const WriteOptions& options, const Slice& key, |
|
|
|
} |
|
|
|
|
|
|
|
/**
|
|
|
|
* |
|
|
|
* @param options |
|
|
|
* @param key |
|
|
|
* @param value |
|
|
|
* @param ttl |
|
|
|
* @return |
|
|
|
*/ |
|
|
|
Status DB::Put(const WriteOptions& options, const Slice& key, |
|
|
|
const Slice& value, uint64_t ttl) { |
|
|
|
|
|
|
|
// 将 value 和 expiration_time 合并到一起,形成带 TTL 的 value
|
|
|
|
std::string val_with_ts; |
|
|
|
val_with_ts.reserve(value.size() + kTSLength); |
|
|
|
|
|
|
|
val_with_ts.reserve(value.size() + sizeof(uint64_t)); |
|
|
|
|
|
|
|
uint64_t expiration_time = std::chrono::duration_cast<std::chrono::milliseconds>( |
|
|
|
std::chrono::system_clock::now().time_since_epoch()) |
|
|
|
.count() + ttl * 1000; |
|
|
|
|
|
|
|
char ts_string[kTSLength]; |
|
|
|
EncodeFixed64(ts_string, expiration_time); |
|
|
|
// 追加原始 value 到 val_with_ts
|
|
|
|
val_with_ts.append(value.data(), value.size()); |
|
|
|
|
|
|
|
// 将 expiration_time 追加到 val_with_ts
|
|
|
|
// val_with_ts.append(reinterpret_cast<const char*>(&expiration_time), sizeof(expiration_time));
|
|
|
|
val_with_ts.append(ts_string, kTSLength); |
|
|
|
val_with_ts.append(reinterpret_cast<const char*>(&expiration_time), sizeof(expiration_time)); |
|
|
|
// std::cout<<"PUT"<<std::endl;
|
|
|
|
// std::cout << "timestamp: " << expiration_time << std::endl;
|
|
|
|
//"a\323='\277\222\001\000"
|
|
|
@ -1560,6 +1607,14 @@ Status DB::Put(const WriteOptions& options, const Slice& key, |
|
|
|
return Write(options, &batch); |
|
|
|
} |
|
|
|
|
|
|
|
// Default implementations of convenience methods that subclasses of DB
|
|
|
|
// can call if they wish
|
|
|
|
Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { |
|
|
|
WriteBatch batch; |
|
|
|
batch.Put(key, value); |
|
|
|
return Write(opt, &batch); |
|
|
|
} |
|
|
|
|
|
|
|
Status DB::Delete(const WriteOptions& opt, const Slice& key) { |
|
|
|
WriteBatch batch; |
|
|
|
batch.Delete(key); |
|
|
|