From f464e0993328508c217ba20e3cb1db7d8c72c2c1 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 15 Dec 2024 15:43:13 +0800 Subject: [PATCH 01/32] =?UTF-8?q?=E5=B9=B6=E5=8F=91=E6=8E=A7=E5=88=B6?= =?UTF-8?q?=E7=9A=84=E5=9F=BA=E6=9C=AC=E6=A1=86=E6=9E=B6=E5=92=8C=E5=A4=A7?= =?UTF-8?q?=E9=83=A8=E5=88=86=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 3DB设计.md | 29 ++++++-- fielddb/field_db.cpp | 202 +++++++++++++++++++++++++++++++++++++++++---------- fielddb/field_db.h | 28 +++++-- fielddb/meta.cpp | 58 +++++++++++++++ fielddb/meta.h | 55 ++++++++++++++ fielddb/metakv.cpp | 20 ----- fielddb/metakv.h | 26 ------- fielddb/request.cpp | 134 ++++++++++++++++++++++++++++++++++ fielddb/request.h | 123 ++++++++++++++++++++++++++++--- 9 files changed, 572 insertions(+), 103 deletions(-) create mode 100644 fielddb/meta.cpp create mode 100644 fielddb/meta.h delete mode 100644 fielddb/metakv.cpp delete mode 100644 fielddb/metakv.h diff --git a/3DB设计.md b/3DB设计.md index b37079f..29b8710 100644 --- a/3DB设计.md +++ b/3DB设计.md @@ -16,14 +16,14 @@ 如果当前有正在创建(修改)的索引或者之前的对于同一个key的索引put,则判断本次put是否含有对应的索引,如果没有则按上面一段的操作进行。如果含有,则加入之前设计中的taskqueue,在索引创建完成后会进行处理。 -我觉得:索引put涉及到了indexDB和kvDB两者之间的原子性 +想法:索引put涉及到了indexDB和kvDB两者之间的原子性 ## 创建(删除)索引 在进行这个操作之前对metaDB写入一个标记`(field,creating/deleting)`,表示在field上创建(删除)索引操作的事务的开始。(注:这里的key是包含了时间戳或者seq的)。 之后扫描kvDB,构建相应请求,对indexDB进行写入,这里也是通过writebatch写入的。写入完成之后,将之前的标记清除,表示当前的事务结束了。之后对于taskqueue里面的请求进行处理,完成之后,唤醒taskqueue中的请求。 -我觉得:创建(删除)索引这个操作实际上只对indexDB进行了写入请求,并不涉及indexDB和kvDB两者之间的一致性 +想法:创建(删除)索引这个操作实际上只对indexDB进行了写入请求,并不涉及indexDB和kvDB两者之间的一致性 ## 索引get 如果没有索引,则返回;如果索引正在创建,则存到taskqueue中,在索引创建完成之后进行处理(这里或许也可以直接返回);如果存在索引则将请求发往indexDB。 @@ -63,10 +63,29 @@ indexDB的写入情况判断如下:扫描indexDB,如果是creating操作且 不用时间戳,全部写入metaDB作为log,然后再写入kvDB和indexDB # 整体架构 -采用多线程架构 -由于二级索引理论上是幂等的操作,所以或许不用taskqueue来阻塞创建之后的写入? -如果这么看的话,其实创建(删除)索引的操作也不需要 +基于request(类似于writer)来处理并发的请求。对于创建和删除索引操作,包含一个pending队列,来维护会受到影响的请求。 +# 有关实现的部分 + +1. 对于metaDB中存入数据的编码部分放在了metakv文件中 + +## TODO List +1. index和kv的写入应该放在两个线程中同时写入,这里为了实现的方便,暂时先后完成 + +2. 原版的env中的Schedule是使用单例模式,也就是所有的数据库都只有一个线程,我们这里 +需要所有的数据库都有属于自己的线程,且可能不止一个,因此需要实现类似于线程池的东西 + +## 一些想法 +1. 根据对于某一个Field搜索的频率和耗时,自动的创建索引,且这个索引会在长时间不用后被清除 + +# 有关KV分离的想法 +复用原有的log,将log信息加入到version信息中,需要更改version edit,version等内容 +log带上编号,从小到大的进行垃圾回收 +垃圾回收过程中形成的新的写入保留原有的seq放入L0 + +KV分离核心的困难之一在于垃圾回收的并发控制。我的核心想法是在回收log的时候,不进行合并操作,将 +回收得到的东西直接保留seqno放进L0。由于L0本身就是无序的,如果在垃圾回收的过程中产生了并发写入, +新的写入也只会写入到L0,这样只要等待下一次的合并就行了。 diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 637046e..93c433c 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -1,14 +1,19 @@ #include "fielddb/field_db.h" #include +#include #include +#include #include #include "leveldb/db.h" #include "leveldb/env.h" #include "leveldb/options.h" #include "leveldb/status.h" +#include "leveldb/write_batch.h" #include "db/write_batch_internal.h" +#include "util/mutexlock.h" #include "util/serialize_value.h" #include "fielddb/encode_index.h" +#include "fielddb/meta.h" namespace fielddb { using namespace leveldb; @@ -37,38 +42,144 @@ Status FieldDB::OpenFieldDB(const Options& options, (*dbptr)->dbname_ = name; status = (*dbptr)->Recover(); + + (*dbptr)->options_ = &options; + (*dbptr)->env_ = options.env; return status; } -// todo +// TODO:Recover Status FieldDB::Recover() { - // + //TODO: + //1. 遍历所有Index类型的meta,重建内存中的index_状态表 + //2. 寻找所有KV类型的meta,再次提交一遍请求 + //3. 等待所有请求完成 return Status::OK(); } +Request *FieldDB::GetHandleInterval() { + mutex_.AssertHeld(); //保证队列是互斥访问的 + Request *tail = taskqueue_.front(); + for(auto *req_ptr : taskqueue_) { + if(req_ptr->isDeleteReq() || req_ptr->isiCreateReq()) { + return tail; + } + tail = req_ptr; + } + return tail; +} + +Status FieldDB::HandleRequest(Request &req) { + MutexLock L(&mutex_); + taskqueue_.push_back(&req); +Again: + while(!req.done && &req != taskqueue_.front()) { + req.cond_.Wait(); + } + if(req.done) { + return req.s; //在返回时自动释放锁L + } + Request *tail = GetHandleInterval(); + WriteBatch KVBatch,IndexBatch,MetaBatch; + Status status; + if(!tail->isiCreateReq() && !tail->isiDeleteReq()) { + //表明这一个区间并没有涉及index的创建删除 + { + //1. 构建各个Batch。构建的过程中要保证索引状态的一致性,需要上锁。 + MutexLock iL(&index_mu); + for(auto *req_ptr : taskqueue_) { + req_ptr->ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); + if(req_ptr == tail) break; + } + } + //2. 首先写入meta,再并发写入index和kv,完成之后清除meta数据 + //此处可以放锁是因为写入的有序性可以通过队列来保证 + mutex_.Unlock(); + WriteOptions op; + status = metaDB_->Write(op, &MetaBatch); + //TODO:index的写入需要在另外一个线程中同时完成 + status = indexDB_->Write(op, &IndexBatch); + status = kvDB_->Write(op, &KVBatch); + //3. 将meta数据清除 + MetaCleaner cleaner; + cleaner.Collect(MetaBatch); + cleaner.CleanMetaBatch(metaDB_); + mutex_.Lock(); + } else { + //对于创建和删除索引的请求,通过prepare完成索引状态的更新 + MutexLock iL(&index_mu); + req.Prepare(this); + } + + while(true) { + Request *ready = taskqueue_.front(); + taskqueue_.pop_front(); + //当前ready不是队首,不是和index的创建有关 + if(ready != &req && !ready->isPending() && + !req.isiCreateReq() && !req.isiDeleteReq()) { + ready->s = status; + ready->done = true; + ready->cond_.Signal(); + } + if (ready == tail) break; + } + if(!taskqueue_.empty()) { + taskqueue_.front()->cond_.Signal(); + } + //如果done==true,那么就不会继续等待直接退出 + //如果处于某个请求的pending list里面,那么就会继续等待重新入队 + //这里用了万恶的goto,蛤蛤 + goto Again; + + // return status; +} + + +//这里把一个空串作为常规put的name Status FieldDB::Put(const WriteOptions &options, const Slice &key, const Slice &value) { - return kvDB_->Put(options, key, value); + FieldArray FA = {{"",value.ToString()}}; + return PutFields(options, key, FA); + // return kvDB_->Put(options, key, value); } // TODO:需要对是否进行index更新做处理 Status FieldDB::PutFields(const WriteOptions &Options, const Slice &key, const FieldArray &fields) { - // - return kvDB_->PutFields(Options, key, fields); + //这里是为了const和slice-string的转换被迫搞得 + std::string key_ = key.ToString(); + FieldArray fields_ = fields; + + FieldsReq req(&key_,&fields_,&mutex_); + + Status status = HandleRequest(req); + return status; + // return kvDB_->PutFields(Options, key, fields); } // todo: 删除有索引的key时indexdb也要同步 Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { // - return kvDB_->Delete(options, key); + std::string key_ = key.ToString(); + DeleteReq req(&key_,&mutex_); + Status status = HandleRequest(req); + return status; + // return kvDB_->Delete(options, key); } // TODO:根据updates里面的东西,要对是否需要更新index进行分别处理 Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { + //或许应该再做一个接口?或者基于现有的接口进行改造 return Status::OK(); } - +//由于常规put将空串作为name,这里也需要适当修改 Status FieldDB::Get(const ReadOptions &options, const Slice &key, std::string *value) { - return kvDB_->Get(options, key, value); + // return kvDB_->Get(options, key, value); + FieldArray fields; + Status s = GetFields(options, key, &fields); + if(!s.ok()) { + return s; + } + *value = fields[0].second; + return s; } Status FieldDB::GetFields(const ReadOptions &options, const Slice &key, FieldArray *fields) { @@ -99,45 +210,62 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name) { //taskQueue相关 //写锁 是不是只需要给putfields设置一把锁就行 - std::vector> keysAndVal = - FindKeysAndValByFieldName(field_name); - WriteBatch writeBatch; - Slice value = Slice(); - for (auto &kvPair : keysAndVal){ - std::string indexKey; - AppendIndexKey(&indexKey, - ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); - writeBatch.Put(indexKey, value); - } - Status s = indexDB_->Write(WriteOptions(), &writeBatch); - if (!s.ok()) return s; - - index_[field_name] = Exist; - //唤醒taskqueue + // std::vector> keysAndVal = + // FindKeysAndValByFieldName(field_name); + // WriteBatch writeBatch; + // Slice value = Slice(); + // for (auto &kvPair : keysAndVal){ + // std::string indexKey; + // AppendIndexKey(&indexKey, + // ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); + // writeBatch.Put(indexKey, value); + // } + // Status s = indexDB_->Write(WriteOptions(), &writeBatch); + // if (!s.ok()) return s; + // index_[field_name].first = Exist; + // //唤醒taskqueue + // return s; + std::string Field = field_name; + iCreateReq req(&Field,&mutex_); + HandleRequest(req); + WriteBatch KVBatch,IndexBatch,MetaBatch; + req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); + indexDB_->Write(WriteOptions(), &IndexBatch); + req.Finalize(this); + return req.s; } Status FieldDB::DeleteIndex(const std::string &field_name) { //taskQueue相关 //写锁 - std::vector> keysAndVal = - FindKeysAndValByFieldName(field_name); - WriteBatch writeBatch; - for (auto &kvPair : keysAndVal){ - std::string indexKey; - AppendIndexKey(&indexKey, - ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); - writeBatch.Delete(indexKey); - } - Status s = indexDB_->Write(WriteOptions(), &writeBatch); - if (!s.ok()) return s; + // std::vector> keysAndVal = + // FindKeysAndValByFieldName(field_name); + // WriteBatch writeBatch; + // for (auto &kvPair : keysAndVal){ + // std::string indexKey; + // AppendIndexKey(&indexKey, + // ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); + // writeBatch.Delete(indexKey); + // } + // Status s = indexDB_->Write(WriteOptions(), &writeBatch); + // if (!s.ok()) return s; - index_.erase(field_name); - //唤醒taskqueue + // index_.erase(field_name); + // //唤醒taskqueue + // return s; + std::string Field = field_name; + iDeleteReq req(&Field,&mutex_); + HandleRequest(req); + WriteBatch KVBatch,IndexBatch,MetaBatch; + req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); + indexDB_->Write(WriteOptions(), &IndexBatch); + req.Finalize(this); + return req.s; } std::vector FieldDB::QueryByIndex(const Field &field, Status *s) { - if (index_.count(field.first) == 0 || index_[field.first] != Exist){ + if (index_.count(field.first) == 0 || index_[field.first].first != Exist){ *s = Status::NotFound(Slice()); return std::vector(); } diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 5c7e8d5..c54b525 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -1,6 +1,3 @@ -# ifndef FIELD_DB_H -# define FIELD_DB_H - #include "port/port_stdcxx.h" #include "db/db_impl.h" #include @@ -8,16 +5,24 @@ #include #include #include "leveldb/db.h" +#include "leveldb/env.h" #include "leveldb/options.h" #include "leveldb/slice.h" #include "leveldb/status.h" - #include "fielddb/request.h" - +#include +# ifndef FIELD_DB_H +# define FIELD_DB_H namespace fielddb { using namespace leveldb; class FieldDB : DB { public: + friend class Request; + friend class FieldsReq; + friend class iCreateReq; + friend class iDeleteReq; + friend class DeleteReq; + //用的时候必须FieldDB *db = new FieldDB()再open,不能像之前一样DB *db FieldDB() : indexDB_(nullptr), kvDB_(nullptr), metaDB_(nullptr) {}; /*lab1的要求,作为db派生类要实现的虚函数*/ @@ -47,6 +52,8 @@ private: private: std::string dbname_; + const Options *options_; + Env *env_; leveldb::DB *metaDB_; leveldb::DB *indexDB_; @@ -57,12 +64,21 @@ private: Deleting, Exist }; - std::map index_; + using FieldName = std::string; + // 标记index的状态,如果是creating/deleting,则会附带相应的请求 + std::map> index_; + port::Mutex index_mu; + leveldb::port::Mutex mutex_; // mutex for taskqueue std::deque taskqueue_; std::vector> FindKeysAndValByFieldName ( const std::string &fieldName); + + /*For request handling*/ + Status HandleRequest(Request &req); //每个请求自行构造请求后交由这个函数处理 + Request *GetHandleInterval(); //获得任务队列中的待处理区间,区间划分规则和原因见文档 + }; } // end of namespace # endif \ No newline at end of file diff --git a/fielddb/meta.cpp b/fielddb/meta.cpp new file mode 100644 index 0000000..970d1fb --- /dev/null +++ b/fielddb/meta.cpp @@ -0,0 +1,58 @@ +#include "fielddb/meta.h" +#include "util/coding.h" +#include +#include "leveldb/options.h" +#include "leveldb/slice.h" +#include "leveldb/write_batch.h" + +namespace fielddb { +using namespace leveldb; + +// Slice MetaKV::metaKey() { +// std::string buf; +// PutLengthPrefixedSlice(&buf, Key); +// PutFixed64(&buf, meta_seq); +// PutFixed32(&buf, tag); +// return Slice(buf); +// } + +// Slice MetaKV::metaValue() { +// return Slice(SerializeValue(Fields)); +// } + + +//对于含有index field的put的meta编码为 (KV|Key,Value) +void MetaKV::Trans(Slice &MetaKey,Slice &MetaValue) { + MetaKey.clear(); + MetaValue.clear(); + std::string buf; + PutFixed32(&buf, KV_Creating); + PutLengthPrefixedSlice(&buf, Slice(*name)); + MetaKey = Slice(buf); + MetaValue = Slice(*value); +} + +class CleanerHandler : public WriteBatch::Handler { +public: + WriteBatch *NeedClean; + void Put(const Slice& key, const Slice& value) override { + //将所有之前put的meta数据进行delete + NeedClean->Delete(key); + } + void Delete(const Slice& key) override { + //所有的传入的MetaBatch都是Put的 + assert(0); + } +}; + +void MetaCleaner::Collect(WriteBatch &MetaBatch) { + CleanerHandler Handler; + Handler.NeedClean = &NeedClean; + MetaBatch.Iterate(&Handler); +} + +void MetaCleaner::CleanMetaBatch(DB *metaDB) { + if(NeedClean.ApproximateSize() == 0) return; + metaDB->Write(WriteOptions(), &NeedClean); +} +} \ No newline at end of file diff --git a/fielddb/meta.h b/fielddb/meta.h new file mode 100644 index 0000000..eb3a927 --- /dev/null +++ b/fielddb/meta.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include "leveldb/slice.h" +#include "leveldb/write_batch.h" +#include "util/serialize_value.h" +#include "fielddb/field_db.h" +namespace fielddb { +using namespace leveldb; +/*根据写入的流程可以推断,需要存在metaDB中的数据其实都是带索引的数据,也就是FieldArray*/ +// class MetaKV { +// MetaKV(Slice &Key,FieldArray Fields): +// Key(Key),Fields(Fields),tag(0),meta_seq(0) { } +// inline int get_seq() { return meta_seq; } +// inline void set_seq(int meta_seq) { this->meta_seq = meta_seq; } +// inline void setPut() { tag = PUT; } +// inline void setDelete() { tag = DELETE; } +// Slice metaKey(); +// Slice metaValue(); +// private: +// enum {PUT = 0x0,DELETE = 0x1}; +// uint64_t meta_seq; +// uint8_t tag; +// Slice &Key; +// FieldArray Fields; +// }; + +enum MetaType { + Index, //记录index状态的meta + KV_Creating, //记录含有index field的put的meta + KV_Deleting, +}; + +//将一对(field_name,field_value)转换到metaDB中的KV表示 +class MetaKV { +public: + MetaKV(std::string *field_name,std::string *field_value): + name(field_name),value(field_value) { } + void Trans(Slice &MetaKey,Slice &MetaValue); +private: + std::string *name; + std::string *value; +}; + +class MetaCleaner { +public: + MetaCleaner() = default; + void Collect(WriteBatch &MetaBatch); + void CleanMetaBatch(DB *metaDB); +private: + WriteBatch NeedClean; +}; + +} \ No newline at end of file diff --git a/fielddb/metakv.cpp b/fielddb/metakv.cpp deleted file mode 100644 index 819030f..0000000 --- a/fielddb/metakv.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "fielddb/metakv.h" -#include "util/coding.h" -#include - -namespace fielddb { -using namespace leveldb; - -Slice MetaKV::metaKey() { - std::string buf; - PutLengthPrefixedSlice(&buf, Key); - PutFixed64(&buf, meta_seq); - PutFixed32(&buf, tag); - return Slice(buf); -} - -Slice MetaKV::metaValue() { - return Slice(SerializeValue(Fields)); -} - -} \ No newline at end of file diff --git a/fielddb/metakv.h b/fielddb/metakv.h deleted file mode 100644 index f976830..0000000 --- a/fielddb/metakv.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include -#include -#include "leveldb/slice.h" -#include "util/serialize_value.h" -namespace fielddb { -using namespace leveldb; -/*根据写入的流程可以推断,需要存在metaDB中的数据其实都是带索引的数据,也就是FieldArray*/ -class MetaKV { - MetaKV(Slice &Key,FieldArray Fields): - Key(Key),Fields(Fields),tag(0),meta_seq(0) { } - inline int get_seq() { return meta_seq; } - inline void set_seq(int meta_seq) { this->meta_seq = meta_seq; } - inline void setPut() { tag = PUT; } - inline void setDelete() { tag = DELETE; } - Slice metaKey(); - Slice metaValue(); -private: - enum {PUT = 0x0,DELETE = 0x1}; - uint64_t meta_seq; - uint8_t tag; - Slice &Key; - FieldArray Fields; -}; -} \ No newline at end of file diff --git a/fielddb/request.cpp b/fielddb/request.cpp index e69de29..36d82cf 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -0,0 +1,134 @@ +#include "fielddb/request.h" +#include +#include "leveldb/slice.h" +#include "leveldb/status.h" +#include "leveldb/write_batch.h" +#include "util/mutexlock.h" +#include "util/serialize_value.h" +#include "fielddb/encode_index.h" +#include "fielddb/field_db.h" +#include "fielddb/meta.h" +namespace fielddb { +using namespace leveldb; + +//为虚函数提供最基本的实现 +void Request::PendReq(Request *req) { + assert(0); +} + +//为虚函数提供最基本的实现 +void Request::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) +{ + assert(0); +} + +void Request::Prepare(FieldDB *DB) { + assert(0); +} + +void Request::Finalize(FieldDB *DB) { + assert(0); +} + +//为虚函数提供最基本的实现 +bool Request::isPending() { + //pending中的请求的parent会指向所等待的请求(iCreate/iDelete) + return parent != this; +} + + +/*******FieldsReq*******/ +void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) +{ + KVBatch.Put(Slice(*Key), Slice(SerializeValue(*Fields))); + bool HasIndex = false; + { + // MutexLock L(&DB->index_mu); //互斥访问索引状态表 + DB->index_mu.AssertHeld(); + //1.将存在冲突的put pend到对应的请求 + for(auto [field_name,field_value] : *Fields) { + if(field_name == "") break; + if(DB->index_.count(field_name)) { + auto [index_status,parent_req] = DB->index_[field_name]; + if(index_status == FieldDB::Creating || index_status == FieldDB::Deleting) { + parent_req->PendReq(this); + return; + } else if(index_status == FieldDB::Exist) { + HasIndex = true; + } + assert(0); + } + } + //2.对于没有冲突但含有索引操作的put,构建metaKV,这里直接将KV对简单编码后写入metaDB + if(HasIndex) { + Slice MetaKey,MetaValue; + std::string serialized = SerializeValue(*Fields); + MetaKV MKV = MetaKV(Key,&serialized); + MKV.Trans(MetaKey, MetaValue); + MetaBatch.Put(MetaKey, MetaValue); + } + //3.对于含有索引的field建立索引 + for(auto [field_name,field_value] : *Fields) { + if(field_name == "") continue; + if(DB->index_.count(field_name)) { + std::string indexKey; + AppendIndexKey(&indexKey, ParsedInternalIndexKey( + *Key,field_name,field_value)); + IndexBatch.Put(indexKey, Slice()); + } + } + } +} + + +/*******DeleteReq*******/ +void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) +{ + //TODO: + //1. 读取当前的最新的键值对,判断是否存在含有键值对的field + //2.1 如果无,则正常构造delete + //2.2 如果是有的field的索引状态都是exist,则在meta中写KV_Deleting类型的记录 + //在kvDB和metaDB中写入对应的delete + //2.3 如果存在field的索引状态是Creating或者Deleting,那么在那个队列上面进行等待 +} + +/*******iCreateReq*******/ +void iCreateReq::Prepare(FieldDB *DB) { + //在index_中完成索引状态更新,在这里可以避免重复创建 + DB->index_mu.AssertHeld(); + if(DB->index_.count(*Field)) { + auto [istatus,parent] = DB->index_[*Field]; + if(istatus == FieldDB::Exist) { + //如果已经完成建立索引,则返回成功 + done = true; + s = Status::OK(); + } else { + //如果正在创建或删除,那么进行等待 + parent->PendReq(this); + } + return; + } + //如果索引状态表中没有,则表示尚未创建,更新相应的状态 + //这里将done设置为true表示在taskqueue中需要完成的部分已经完成,不需要pend + DB->index_[*Field] = {FieldDB::Creating,this}; + done = true; +} + +void iCreateReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) +{ + //TODO:遍历数据库,构建二级索引到indexbatch,并且更新metaDB中的元数据为Index类型的(Field,Creating) + //这里或许不需要在metaDB中先写一遍? +} + +void iCreateReq::Finalize(FieldDB *DB) { + //TODO: + //1. 写入完成后,更新index状态表,并将metaDB的值改为Index类型的(Field,Existing) + //2. 将所有的pendinglist重新入队 + +} + +} \ No newline at end of file diff --git a/fielddb/request.h b/fielddb/request.h index 3831fed..731158d 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -1,25 +1,130 @@ +#include #include +#include "leveldb/status.h" +#include "leveldb/write_batch.h" #include "port/port_stdcxx.h" #include "util/mutexlock.h" #include "util/serialize_value.h" +// #include "fielddb/field_db.h" + +#ifndef REQUEST_H +#define REQUEST_H namespace fielddb { using namespace leveldb; // 在taskqueue中的Request,由taskqueue最开始的线程处理一批Request // 这个思路与write写入的思路类似 +class FieldDB; class Request { public: - Request(std::string *Key,std::string *Value,port::Mutex *mu): - Key(Key),Value(Value),hasFields(false),_cond(mu) { } - Request(std::string *Key,FieldArray *Fields,port::Mutex *mu): - Key(Key),Fields(Fields),hasFields(false),_cond(mu) { } + friend class FieldDB; + enum RequestType { + FieldsReq_t, + ValueReq_t, + iCreateReq_t, + iDeleteReq_t, + DeleteReq_t, + }; + +public: + // Request(std::string *Key,std::string *Value,port::Mutex *mu): + // Key(Key),Value(Value),hasFields(false),cond_(mu) { } + // Request(std::string *Key,FieldArray *Fields,port::Mutex *mu): + // Key(Key),Fields(Fields),hasFields(true),cond_(mu) { } + Request(RequestType type,port::Mutex *mu): + type_(type),cond_(mu),done(false) { parent = this; }; + + virtual ~Request(); -private: + inline bool isFieldsReq() { return type_ == FieldsReq_t; } + // inline bool isValueReq() { return type_ == ValueReq_t; } + inline bool isiCreateReq() { return type_ == iCreateReq_t; } + inline bool isiDeleteReq() { return type_ == iDeleteReq_t; } + inline bool isDeleteReq() { return type_ == DeleteReq_t; } + + //用于含有Fields的 + virtual void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB); + //主要用于icreate和idelete在队列中的注册当前状态 + virtual void Prepare(FieldDB *DB); + virtual void Finalize(FieldDB *DB); + + virtual void PendReq(Request *req); + bool isPending(); +// protected: bool done; - port::CondVar _cond; + Status s; + port::CondVar cond_; + RequestType type_; + Request *parent; +}; + +//含有field的put +class FieldsReq : public Request { +public: + FieldsReq(std::string *Key,FieldArray *Fields,port::Mutex *mu): + Key(Key),Fields(Fields),Request(FieldsReq_t,mu) { }; + + void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; - bool hasFields; std::string *Key; - std::string *Value; FieldArray *Fields; }; -} \ No newline at end of file + +//不含有field的put,但是计划被弃用了 +// class ValueReq : public Request { +// public: +// ValueReq(std::string *Key,std::string *Value,port::Mutex *mu): +// Key(Key),Value(Value),Request(ValueReq_t,mu) { }; + +// std::string *Key; +// std::string *Value; +// }; + +//TODO:下面的Field什么的可能通过传引用的方式会更加好? + +//创建索引的request +class iCreateReq : public Request { +public: + iCreateReq(std::string *Field,port::Mutex *mu): + Field(Field),Request(iCreateReq_t, mu) { }; + + void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; + void Prepare(FieldDB *DB) override; + void Finalize(FieldDB *DB) override; + + std::string *Field; + std::deque pending_list; +}; + +//删除索引的request +class iDeleteReq : public Request { +public: + iDeleteReq(std::string *Field,port::Mutex *mu): + Field(Field),Request(iDeleteReq_t, mu) { }; + + void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; + void Prepare(FieldDB *DB) override; + void Finalize(FieldDB *DB) override; + + std::string *Field; + std::deque pending_list; +}; + +//删除key的request +class DeleteReq : public Request { +public: + DeleteReq(std::string *Key,port::Mutex *mu): + Key(Key),Request(DeleteReq_t,mu) { }; + + void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; + + std::string *Key; +}; + +} + +#endif \ No newline at end of file From f2f8f8200d529620bbbf442e889da01f44293a12 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 15 Dec 2024 16:01:54 +0800 Subject: [PATCH 02/32] =?UTF-8?q?=E8=A1=A5=E5=85=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 8 ++++++++ fielddb/request.cpp | 22 ++++++++++++++++++++++ fielddb/request.h | 8 +++++--- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 93c433c..38e390a 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -229,6 +229,10 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name) { std::string Field = field_name; iCreateReq req(&Field,&mutex_); HandleRequest(req); + //如果已经存在索引,那么可以直接返回 + if(req.Existed) { + return req.s; + } WriteBatch KVBatch,IndexBatch,MetaBatch; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); indexDB_->Write(WriteOptions(), &IndexBatch); @@ -257,6 +261,10 @@ Status FieldDB::DeleteIndex(const std::string &field_name) { std::string Field = field_name; iDeleteReq req(&Field,&mutex_); HandleRequest(req); + //如果已经被删除或者不存在,那么可以直接返回 + if(req.Deleted) { + return req.s; + } WriteBatch KVBatch,IndexBatch,MetaBatch; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); indexDB_->Write(WriteOptions(), &IndexBatch); diff --git a/fielddb/request.cpp b/fielddb/request.cpp index 36d82cf..f601a28 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -104,6 +104,7 @@ void iCreateReq::Prepare(FieldDB *DB) { if(istatus == FieldDB::Exist) { //如果已经完成建立索引,则返回成功 done = true; + Existed = true; s = Status::OK(); } else { //如果正在创建或删除,那么进行等待 @@ -131,4 +132,25 @@ void iCreateReq::Finalize(FieldDB *DB) { } +/*******iDeleteReq*******/ +void iDeleteReq::Prepare(FieldDB *DB) { + DB->index_mu.AssertHeld(); + if(DB->index_.count(*Field) == 0) { + done = true; + Deleted = true; + s = Status::OK(); + return ; + } +} + +void iDeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB) +{ + +} + +void iDeleteReq::Finalize(FieldDB *DB) { + +} + } \ No newline at end of file diff --git a/fielddb/request.h b/fielddb/request.h index 731158d..3346c1f 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -87,13 +87,14 @@ public: class iCreateReq : public Request { public: iCreateReq(std::string *Field,port::Mutex *mu): - Field(Field),Request(iCreateReq_t, mu) { }; + Field(Field),Request(iCreateReq_t, mu),Existed(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; + bool Existed; std::string *Field; std::deque pending_list; }; @@ -102,13 +103,14 @@ public: class iDeleteReq : public Request { public: iDeleteReq(std::string *Field,port::Mutex *mu): - Field(Field),Request(iDeleteReq_t, mu) { }; + Field(Field),Request(iDeleteReq_t, mu),Deleted(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; - + + bool Deleted; std::string *Field; std::deque pending_list; }; From d30aaecf56743bb44e5261dd8bbf3129db4e3b43 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 15 Dec 2024 17:56:35 +0800 Subject: [PATCH 03/32] =?UTF-8?q?iDeleteReq=E7=9A=84prepare?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 2 +- fielddb/request.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 38e390a..ad9e72d 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -229,7 +229,7 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name) { std::string Field = field_name; iCreateReq req(&Field,&mutex_); HandleRequest(req); - //如果已经存在索引,那么可以直接返回 + //如果已经存在索引,那么直接返回 if(req.Existed) { return req.s; } diff --git a/fielddb/request.cpp b/fielddb/request.cpp index f601a28..c1e926e 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -141,6 +141,14 @@ void iDeleteReq::Prepare(FieldDB *DB) { s = Status::OK(); return ; } + auto [istatus,parent] = DB->index_[*Field]; + if(istatus == FieldDB::Exist) { + DB->index_[*Field] = {FieldDB::Creating,this}; + done = true; + } else { + //如果正在创建或者删除,那么pend到对应的请求上 + parent->PendReq(this); + } } void iDeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, From 32d8a459895de411e95766fe9356bcc3923f0f79 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Fri, 20 Dec 2024 13:10:21 +0800 Subject: [PATCH 04/32] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E6=A1=86=E6=9E=B6?= =?UTF-8?q?=E5=92=8C=E5=B9=B6=E5=8F=91=E6=B5=8B=E8=AF=95=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E8=B7=91=E9=80=9A=EF=BC=88=E6=B2=A1=E6=9C=89=E5=AE=9E=E7=8E=B0?= =?UTF-8?q?delete=E5=92=8C=E6=81=A2=E5=A4=8D=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 13 +++- fielddb/field_db.cpp | 78 ++++++++----------- fielddb/field_db.h | 18 +++-- fielddb/request.cpp | 104 +++++++++++++++++++------ fielddb/request.h | 6 +- test/basic_function_test.cc | 113 +++------------------------ test/helper.cc | 182 ++++++++++++++++++++++++++++++++++++++++++++ test/parallel_test.cc | 163 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 496 insertions(+), 181 deletions(-) create mode 100644 test/helper.cc create mode 100644 test/parallel_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index b70e461..9529fb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,6 +194,10 @@ target_sources(leveldb "util/serialize_value.cc" "fielddb/field_db.cpp" "fielddb/field_db.h" + "fielddb/meta.cpp" + "fielddb/meta.h" + "fielddb/request.cpp" + "fielddb/request.h" # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". $<$:PUBLIC> @@ -522,7 +526,12 @@ if(LEVELDB_INSTALL) ) endif(LEVELDB_INSTALL) -add_executable(lab1_test +add_executable(basic_function_test "${PROJECT_SOURCE_DIR}/test/basic_function_test.cc" ) -target_link_libraries(lab1_test PRIVATE leveldb gtest) +target_link_libraries(basic_function_test PRIVATE leveldb gtest) + +add_executable(parallel_test + "${PROJECT_SOURCE_DIR}/test/parallel_test.cc" +) +target_link_libraries(parallel_test PRIVATE leveldb gtest) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index ad9e72d..2e00ca9 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -14,6 +14,7 @@ #include "util/serialize_value.h" #include "fielddb/encode_index.h" #include "fielddb/meta.h" +#include "field_db.h" namespace fielddb { using namespace leveldb; @@ -61,7 +62,7 @@ Request *FieldDB::GetHandleInterval() { mutex_.AssertHeld(); //保证队列是互斥访问的 Request *tail = taskqueue_.front(); for(auto *req_ptr : taskqueue_) { - if(req_ptr->isDeleteReq() || req_ptr->isiCreateReq()) { + if(req_ptr->isiDeleteReq() || req_ptr->isiCreateReq()) { return tail; } tail = req_ptr; @@ -83,6 +84,7 @@ Again: WriteBatch KVBatch,IndexBatch,MetaBatch; Status status; if(!tail->isiCreateReq() && !tail->isiDeleteReq()) { + // int debug = tail->type_; //表明这一个区间并没有涉及index的创建删除 { //1. 构建各个Batch。构建的过程中要保证索引状态的一致性,需要上锁。 @@ -97,9 +99,12 @@ Again: mutex_.Unlock(); WriteOptions op; status = metaDB_->Write(op, &MetaBatch); + assert(status.ok()); //TODO:index的写入需要在另外一个线程中同时完成 status = indexDB_->Write(op, &IndexBatch); + assert(status.ok()); status = kvDB_->Write(op, &KVBatch); + assert(status.ok()); //3. 将meta数据清除 MetaCleaner cleaner; cleaner.Collect(MetaBatch); @@ -113,13 +118,13 @@ Again: while(true) { Request *ready = taskqueue_.front(); + // int debug = tail->type_; taskqueue_.pop_front(); //当前ready不是队首,不是和index的创建有关 - if(ready != &req && !ready->isPending() && - !req.isiCreateReq() && !req.isiDeleteReq()) { + if(!ready->isPending() && !req.isiCreateReq() && !req.isiDeleteReq()) { ready->s = status; ready->done = true; - ready->cond_.Signal(); + if (ready != &req) ready->cond_.Signal(); } if (ready == tail) break; } @@ -159,11 +164,11 @@ Status FieldDB::PutFields(const WriteOptions &Options, // todo: 删除有索引的key时indexdb也要同步 Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { // - std::string key_ = key.ToString(); - DeleteReq req(&key_,&mutex_); - Status status = HandleRequest(req); - return status; - // return kvDB_->Delete(options, key); + // std::string key_ = key.ToString(); + // DeleteReq req(&key_,&mutex_); + // Status status = HandleRequest(req); + // return status; + return kvDB_->Delete(options, key); } // TODO:根据updates里面的东西,要对是否需要更新index进行分别处理 Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { @@ -207,25 +212,6 @@ std::vector> FieldDB::FindKeysAndValByFieldN } Status FieldDB::CreateIndexOnField(const std::string& field_name) { - //taskQueue相关 - //写锁 是不是只需要给putfields设置一把锁就行 - - // std::vector> keysAndVal = - // FindKeysAndValByFieldName(field_name); - // WriteBatch writeBatch; - // Slice value = Slice(); - // for (auto &kvPair : keysAndVal){ - // std::string indexKey; - // AppendIndexKey(&indexKey, - // ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); - // writeBatch.Put(indexKey, value); - // } - // Status s = indexDB_->Write(WriteOptions(), &writeBatch); - // if (!s.ok()) return s; - - // index_[field_name].first = Exist; - // //唤醒taskqueue - // return s; std::string Field = field_name; iCreateReq req(&Field,&mutex_); HandleRequest(req); @@ -241,23 +227,6 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name) { } Status FieldDB::DeleteIndex(const std::string &field_name) { - //taskQueue相关 - //写锁 - // std::vector> keysAndVal = - // FindKeysAndValByFieldName(field_name); - // WriteBatch writeBatch; - // for (auto &kvPair : keysAndVal){ - // std::string indexKey; - // AppendIndexKey(&indexKey, - // ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); - // writeBatch.Delete(indexKey); - // } - // Status s = indexDB_->Write(WriteOptions(), &writeBatch); - // if (!s.ok()) return s; - - // index_.erase(field_name); - // //唤醒taskqueue - // return s; std::string Field = field_name; iDeleteReq req(&Field,&mutex_); HandleRequest(req); @@ -299,6 +268,12 @@ std::vector FieldDB::QueryByIndex(const Field &field, Status *s) { return result; } +IndexStatus FieldDB::GetIndexStatus(const std::string &fieldName){ + if (index_.count(fieldName) == 0) return IndexStatus::NotExist; + IndexStatus idxs = index_[fieldName].first; + return idxs; +} + Iterator * FieldDB::NewIterator(const ReadOptions &options) { return kvDB_->NewIterator(options); } @@ -327,4 +302,15 @@ void FieldDB::CompactRange(const Slice *begin, const Slice *end) { kvDB_->CompactRange(begin, end); } -} // end of namespace \ No newline at end of file +Status DestroyDB(const std::string& name, const Options& options) { + Status s; + s = leveldb::DestroyDB(name+"_kvDB", options); + assert(s.ok()); + s = leveldb::DestroyDB(name+"_indexDB", options); + assert(s.ok()); + s = leveldb::DestroyDB(name+"_metaDB", options); + assert(s.ok()); + return s; +} + +} // namespace fielddb diff --git a/fielddb/field_db.h b/fielddb/field_db.h index c54b525..684a820 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -15,6 +15,14 @@ # define FIELD_DB_H namespace fielddb { using namespace leveldb; + +enum IndexStatus{ + Creating, + Deleting, + Exist, + NotExist + }; + class FieldDB : DB { public: friend class Request; @@ -43,6 +51,8 @@ public: Status CreateIndexOnField(const std::string& field_name); Status DeleteIndex(const std::string &field_name); std::vector QueryByIndex(const Field &field, Status *s); + //返回当前数据库中索引状态,用来测试,不过也可以作为一个功能? + IndexStatus GetIndexStatus(const std::string &fieldName); static Status OpenFieldDB(const Options& options,const std::string& name,FieldDB** dbptr); @@ -59,11 +69,6 @@ private: leveldb::DB *indexDB_; leveldb::DB *kvDB_; - enum IndexStatus{ - Creating, - Deleting, - Exist - }; using FieldName = std::string; // 标记index的状态,如果是creating/deleting,则会附带相应的请求 std::map> index_; @@ -80,5 +85,8 @@ private: Request *GetHandleInterval(); //获得任务队列中的待处理区间,区间划分规则和原因见文档 }; + +Status DestroyDB(const std::string& name, + const Options& options); } // end of namespace # endif \ No newline at end of file diff --git a/fielddb/request.cpp b/fielddb/request.cpp index c1e926e..6d36caf 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -8,6 +8,7 @@ #include "fielddb/encode_index.h" #include "fielddb/field_db.h" #include "fielddb/meta.h" +#include "request.h" namespace fielddb { using namespace leveldb; @@ -52,13 +53,13 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, if(field_name == "") break; if(DB->index_.count(field_name)) { auto [index_status,parent_req] = DB->index_[field_name]; - if(index_status == FieldDB::Creating || index_status == FieldDB::Deleting) { + if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { parent_req->PendReq(this); return; - } else if(index_status == FieldDB::Exist) { + } else if(index_status == IndexStatus::Exist) { HasIndex = true; } - assert(0); + //assert(0); } } //2.对于没有冲突但含有索引操作的put,构建metaKV,这里直接将KV对简单编码后写入metaDB @@ -68,15 +69,16 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, MetaKV MKV = MetaKV(Key,&serialized); MKV.Trans(MetaKey, MetaValue); MetaBatch.Put(MetaKey, MetaValue); - } + //第三点是不是应该在这一分支中 //3.对于含有索引的field建立索引 - for(auto [field_name,field_value] : *Fields) { - if(field_name == "") continue; - if(DB->index_.count(field_name)) { - std::string indexKey; - AppendIndexKey(&indexKey, ParsedInternalIndexKey( - *Key,field_name,field_value)); - IndexBatch.Put(indexKey, Slice()); + for(auto [field_name,field_value] : *Fields) { + if(field_name == "") continue; + if(DB->index_.count(field_name)) { + std::string indexKey; + AppendIndexKey(&indexKey, ParsedInternalIndexKey( + *Key,field_name,field_value)); + IndexBatch.Put(indexKey, Slice()); + } } } } @@ -101,7 +103,7 @@ void iCreateReq::Prepare(FieldDB *DB) { DB->index_mu.AssertHeld(); if(DB->index_.count(*Field)) { auto [istatus,parent] = DB->index_[*Field]; - if(istatus == FieldDB::Exist) { + if(istatus == IndexStatus::Exist) { //如果已经完成建立索引,则返回成功 done = true; Existed = true; @@ -114,22 +116,48 @@ void iCreateReq::Prepare(FieldDB *DB) { } //如果索引状态表中没有,则表示尚未创建,更新相应的状态 //这里将done设置为true表示在taskqueue中需要完成的部分已经完成,不需要pend - DB->index_[*Field] = {FieldDB::Creating,this}; + DB->index_[*Field] = {IndexStatus::Creating,this}; done = true; } +void iCreateReq::PendReq(Request *req) { + req->parent = this; + pending_list.push_back(req); +} + void iCreateReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB) { - //TODO:遍历数据库,构建二级索引到indexbatch,并且更新metaDB中的元数据为Index类型的(Field,Creating) - //这里或许不需要在metaDB中先写一遍? + //遍历数据库,构建二级索引到indexbatch,(更新metaDB中的元数据为Index类型的(Field,Creating)) + //一个indexwritebatch写入,那么索引创建删除应该和metadb没有交互 + std::vector> keysAndVal = + DB->FindKeysAndValByFieldName(*Field); + Slice value = Slice(); + for (auto &kvPair : keysAndVal){ + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(kvPair.first, *Field, kvPair.second)); + IndexBatch.Put(indexKey, value); + } } void iCreateReq::Finalize(FieldDB *DB) { - //TODO: - //1. 写入完成后,更新index状态表,并将metaDB的值改为Index类型的(Field,Existing) - //2. 将所有的pendinglist重新入队 - + //1. 写入完成后,更新index状态表,(并将metaDB的值改为Index类型的(Field,Existing)) + MutexLock iL(&DB->index_mu); + DB->index_[*Field] = {IndexStatus::Exist, nullptr}; + DB->index_mu.Unlock(); + + if (pending_list.empty()) return; + //2. 将所有的pendinglist重新入队 + MutexLock L(&DB->mutex_); + for (auto req : pending_list){ + DB->taskqueue_.push_back(req); + req->parent = req; //解绑 + } + if (pending_list[0] == DB->taskqueue_.front()) { + pending_list[0]->cond_.Signal(); + } + this->s = Status::OK(); } /*******iDeleteReq*******/ @@ -142,8 +170,8 @@ void iDeleteReq::Prepare(FieldDB *DB) { return ; } auto [istatus,parent] = DB->index_[*Field]; - if(istatus == FieldDB::Exist) { - DB->index_[*Field] = {FieldDB::Creating,this}; + if(istatus == IndexStatus::Exist) { + DB->index_[*Field] = {IndexStatus::Deleting,this}; done = true; } else { //如果正在创建或者删除,那么pend到对应的请求上 @@ -151,14 +179,42 @@ void iDeleteReq::Prepare(FieldDB *DB) { } } +void iDeleteReq::PendReq(Request* req) { + req->parent = this; + pending_list.push_back(req); +} + void iDeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB) { - + std::vector> keysAndVal = + DB->FindKeysAndValByFieldName(*Field); + Slice value = Slice(); + for (auto &kvPair : keysAndVal){ + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(kvPair.first, *Field, kvPair.second)); + IndexBatch.Delete(indexKey); + } } void iDeleteReq::Finalize(FieldDB *DB) { - + MutexLock iL(&DB->index_mu); + DB->index_.erase(*Field); + DB->index_mu.Unlock(); + + if (pending_list.empty()) return; + //2. 将所有的pendinglist重新入队 + MutexLock L(&DB->mutex_); + for (auto req : pending_list){ + DB->taskqueue_.push_back(req); + req->parent = req; //解绑 + } + if (pending_list[0] == DB->taskqueue_.front()) { + pending_list[0]->cond_.Signal(); + } + this->s = Status::OK(); } -} \ No newline at end of file + +} // namespace fielddb \ No newline at end of file diff --git a/fielddb/request.h b/fielddb/request.h index 3346c1f..e68c4f1 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -19,7 +19,7 @@ public: friend class FieldDB; enum RequestType { FieldsReq_t, - ValueReq_t, + //ValueReq_t, iCreateReq_t, iDeleteReq_t, DeleteReq_t, @@ -33,7 +33,7 @@ public: Request(RequestType type,port::Mutex *mu): type_(type),cond_(mu),done(false) { parent = this; }; - virtual ~Request(); + //virtual ~Request(); inline bool isFieldsReq() { return type_ == FieldsReq_t; } // inline bool isValueReq() { return type_ == ValueReq_t; } @@ -93,6 +93,7 @@ public: WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; + void PendReq(Request *req) override; bool Existed; std::string *Field; @@ -109,6 +110,7 @@ public: WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; + void PendReq(Request *req) override; bool Deleted; std::string *Field; diff --git a/test/basic_function_test.cc b/test/basic_function_test.cc index 4915fe4..28d9fc7 100644 --- a/test/basic_function_test.cc +++ b/test/basic_function_test.cc @@ -2,138 +2,47 @@ // #include "leveldb/env.h" // #include "leveldb/db.h" #include "fielddb/field_db.h" +#include "test/helper.cc" using namespace fielddb; -constexpr int value_size = 2048; -constexpr int data_size = 128 << 20; -std::vector cities = { - "Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou", - "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" - }; -std::vector shanghaiKeys; - -Status OpenDB(std::string dbName, FieldDB **db) { - Options options; - options.create_if_missing = true; - return FieldDB::OpenFieldDB(options, dbName, db); -} - -void ClearDB(FieldDB *db){ - //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染 - WriteOptions writeOptions; - int key_num = data_size / value_size; - for (int i = 0; i < key_num; i++) { - int key_ = i+1; - std::string key = std::to_string(key_); - Status s = db->Delete(WriteOptions(), key); - ASSERT_TRUE(s.ok()); - } -} - -void InsertFieldData(FieldDB *db) { - WriteOptions writeOptions; - int key_num = data_size / value_size; - srand(0); - - for (int i = 0; i < key_num; i++) { - int randThisTime = rand(); //确保读写一个循环只rand一次,否则随机序列会不一致 - int key_ = randThisTime % key_num+1; - std::string key = std::to_string(key_); - - std::string name = "customer#" + std::to_string(key_); - std::string address = cities[randThisTime % cities.size()]; - FieldArray fields = { - {"name", name}, - {"address", address} - }; - if (address == "Shanghai") { - shanghaiKeys.push_back(key); - } - Status s = db->PutFields(WriteOptions(), key, fields); - ASSERT_TRUE(s.ok()); - } -} - -void GetFieldData(FieldDB *db) { - ReadOptions readOptions; - int key_num = data_size / value_size; - - // 点查 - srand(0); - for (int i = 0; i < 100; i++) { - int randThisTime = rand(); - int key_ = randThisTime % key_num+1; - std::string key = std::to_string(key_); - FieldArray fields_ret; - Status s = db->GetFields(readOptions, key, &fields_ret); - ASSERT_TRUE(s.ok()); - for (const Field& pairs : fields_ret) { - if (pairs.first == "name"){ - - } else if (pairs.first == "address"){ - std::string city = pairs.second; - ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end()); - } else assert(false); - } - } -} - -void findKeysByCity(FieldDB *db) { - Field field = {"address", "Shanghai"}; - std::vector resKeys = db->FindKeysByField(field); - std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; - for (const std::string &key : resKeys){ - ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); - } -} - -void findKeysByCityIndex(FieldDB *db, bool expect) { - Field field = {"address", "Shanghai"}; - Status s; - std::vector resKeys = db->QueryByIndex(field, &s); - if (expect) ASSERT_TRUE(s.ok()); - else { - ASSERT_TRUE(s.IsNotFound()); - return; - } - std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; - for (const std::string &key : resKeys){ - ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); - } -} TEST(TestLab1, Basic) { - // DestroyDB("testdb",Options()); + fielddb::DestroyDB("testdb1.1",Options()); //每个测试前,先把对应名称的之前的数据库删了 FieldDB *db = new FieldDB(); - if(OpenDB("testdb", &db).ok() == false) { + if(OpenDB("testdb1.1", &db).ok() == false) { std::cerr << "open db failed" << std::endl; abort(); } // ClearDB(db); InsertFieldData(db); - GetFieldData(db); + bool allowNotFound = false; + GetFieldData(db, allowNotFound); findKeysByCity(db); delete db; } TEST(TestLab2, Basic) { - //destroy + fielddb::DestroyDB("testdb1.2",Options()); FieldDB *db = new FieldDB(); - if(OpenDB("testdb2", &db).ok() == false) { + if(OpenDB("testdb1.2", &db).ok() == false) { std::cerr << "open db failed" << std::endl; abort(); } // ClearDB(db); shanghaiKeys.clear(); + age20Keys.clear(); InsertFieldData(db); // GetFieldData(db); // findKeysByCity(db); db->CreateIndexOnField("address"); + db->CreateIndexOnField("age"); findKeysByCityIndex(db, true); + findKeysByAgeIndex(db, true); db->DeleteIndex("address"); findKeysByCityIndex(db, false); + findKeysByAgeIndex(db, true); delete db; } diff --git a/test/helper.cc b/test/helper.cc new file mode 100644 index 0000000..9de92bc --- /dev/null +++ b/test/helper.cc @@ -0,0 +1,182 @@ +#include "gtest/gtest.h" +// #include "leveldb/env.h" +// #include "leveldb/db.h" +#include "fielddb/field_db.h" +#include +using namespace fielddb; + +constexpr int value_size = 2048; +constexpr int data_size = 128 << 20; +#define AGE_RANGE 100 +std::vector cities = { + "Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou", + "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" + }; +//检查insert和queryByIndex的数据是否对应 +std::set shanghaiKeys; +std::set age20Keys; +//复杂的测试要注意这两个全局变量,目前只有InsertFieldData和InsertOneField会往里加,并且没有清理 + +Status OpenDB(std::string dbName, FieldDB **db) { + Options options; + options.create_if_missing = true; + return FieldDB::OpenFieldDB(options, dbName, db); +} + +// void ClearDB(FieldDB *db){ +// //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染 +// WriteOptions writeOptions; +// int key_num = data_size / value_size; +// for (int i = 0; i < key_num; i++) { +// int key_ = i+1; +// std::string key = std::to_string(key_); +// Status s = db->Delete(WriteOptions(), key); +// ASSERT_TRUE(s.ok()); +// } +// } + +//只插一条特定数据的测试 +void InsertOneField(FieldDB *db, std::string key = "0") { + WriteOptions writeOptions; + FieldArray fields = { + {"name", "special#" + key}, + {"address", "Shanghai"}, + {"age", "20"} + }; + Status s = db->PutFields(WriteOptions(), key, fields); + ASSERT_TRUE(s.ok()); + shanghaiKeys.insert(key); + age20Keys.insert(key); +} + +//与上面对应 +void GetOneField(FieldDB *db, std::string key = "0") { + ReadOptions readOptions; + FieldArray fields_ret; + Status s = db->GetFields(readOptions, key, &fields_ret); + ASSERT_TRUE(s.ok()); + for (const Field& pairs : fields_ret) { + if (pairs.first == "name"){ + ASSERT_EQ(pairs.second, "special#" + key); + } else if (pairs.first == "address"){ + ASSERT_EQ(pairs.second, "Shanghai"); + } else if (pairs.first == "age"){ + ASSERT_EQ(pairs.second, "20"); + } else assert(false); + } +} + +void InsertFieldData(FieldDB *db, int seed = 0/*随机种子*/) { + std::cout << "-------inserting-------" << std::endl; + WriteOptions writeOptions; + int key_num = data_size / value_size; + // srand线程不安全,这种可以保证多线程时随机序列也一致 + std::mt19937 rng(seed); + + for (int i = 0; i < key_num; i++) { + int randThisTime = rng(); //确保读写一个循环只rand一次,否则随机序列会不一致 + //让批量写入的key>0, 单独写入的key<=0,方便测试观察 + int key_ = std::abs(randThisTime) % key_num + 1; + std::string key = std::to_string(key_); + + std::string name = "customer#" + std::to_string(key_); + std::string address = cities[randThisTime % cities.size()]; + std::string age = std::to_string(std::abs(randThisTime) % AGE_RANGE); + FieldArray fields = { + {"name", name}, + {"address", address}, + {"age", age} + }; + if (address == "Shanghai") { + shanghaiKeys.insert(key); + } + if (age == "20") { + age20Keys.insert(key); + } + Status s = db->PutFields(WriteOptions(), key, fields); + ASSERT_TRUE(s.ok()); + } +} + +//并发时不一定能读到,加个参数控制 +void GetFieldData(FieldDB *db, bool allowNotFound, int seed = 0) { + std::cout << "-------getting-------" << std::endl; + ReadOptions readOptions; + int key_num = data_size / value_size; + + // 点查 + std::mt19937 rng(seed); + for (int i = 0; i < 100; i++) { + int randThisTime = rng(); + int key_ = std::abs(randThisTime) % key_num + 1; + std::string key = std::to_string(key_); + FieldArray fields_ret; + Status s = db->GetFields(readOptions, key, &fields_ret); + if (!allowNotFound){ //必须读到 + // if (!s.ok()){ + // std::cout << key << std::endl; + // } + ASSERT_TRUE(s.ok()); + } else { //不必须读到,但只要读到address必须正确 + if(s.IsNotFound()) continue; + } + for (const Field& pairs : fields_ret) { + if (pairs.first == "name"){ + + } else if (pairs.first == "address"){ + std::string city = pairs.second; + ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end()); + + } else if (pairs.first == "age"){ + int age = std::stoi(pairs.second); + ASSERT_TRUE(age >= 0 && age < AGE_RANGE); + + } else assert(false); + } + } +} + +void findKeysByCity(FieldDB *db) { + std::cout << "-------getting field address-------" << std::endl; + Field field = {"address", "Shanghai"}; + std::vector resKeys = db->FindKeysByField(field); + //打印比较,因为shanghaikey可能被后写入的、其他address的key覆盖,打印出的后一个数应该小于前一个数 + //如果随机种子相同,每次打印出的两个数也应该相同 + std::cout << "address: " << shanghaiKeys.size() << " " << resKeys.size() << std::endl; + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + } +} + +// haveIndex表明数据库有没有该索引(address) +void findKeysByCityIndex(FieldDB *db, bool haveIndex) { + std::cout << "-------getting field address by index-------" << std::endl; + Field field = {"address", "Shanghai"}; + Status s; + std::vector resKeys = db->QueryByIndex(field, &s); + if (haveIndex) ASSERT_TRUE(s.ok()); + else { + ASSERT_TRUE(s.IsNotFound()); + return; + } + std::cout << "address: " << shanghaiKeys.size() << " " << resKeys.size() << std::endl;//打印比较 + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + } +} + +void findKeysByAgeIndex(FieldDB *db, bool haveIndex) { + std::cout << "-------getting field age by index-------" << std::endl; + Field field = {"age", "20"}; + Status s; + std::vector resKeys = db->QueryByIndex(field, &s); + if (haveIndex) ASSERT_TRUE(s.ok()); + else { + ASSERT_TRUE(s.IsNotFound()); + return; + } + std::cout << "age: " << age20Keys.size() << " " << resKeys.size() << std::endl; + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(age20Keys.begin(), age20Keys.end(), key), age20Keys.end()); + } +} \ No newline at end of file diff --git a/test/parallel_test.cc b/test/parallel_test.cc new file mode 100644 index 0000000..98bf457 --- /dev/null +++ b/test/parallel_test.cc @@ -0,0 +1,163 @@ +#include "gtest/gtest.h" +#include +// #include "leveldb/env.h" +// #include "leveldb/db.h" +#include "fielddb/field_db.h" +#include "test/helper.cc" +using namespace fielddb; + +// 测试中read/write都表示带索引的读写 + +//读写有索引数据的并发 +TEST(TestReadWrite, Parallel) { + fielddb::DestroyDB("testdb2.1",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2.1", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + // ClearDB(db); + int thread_num_ = 5; + std::vector threads(thread_num_); + //二写三读 + for (size_t i = 0; i < thread_num_; i++) + { + if (i == 0) {//写随机序列0 + threads[i] = std::thread(InsertFieldData, db, 0); + } else if (i == 1) + {//写随机序列1 + threads[i] = std::thread(InsertFieldData, db, 1); + } else {//读 + bool allowNotFound = true; + threads[i] = std::thread(GetFieldData, db, allowNotFound, 0); + } + } + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + + // 此时写已完成,一定能读到两次写 + bool allowNotFound = false; + GetFieldData(db, allowNotFound); + GetFieldData(db, allowNotFound, 1); + findKeysByCity(db); + delete db; +} + +//创建索引与写有该索引数据的并发 +TEST(TestWriteCreatei, Parallel) { + fielddb::DestroyDB("testdb2.2",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2.2", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + + // ClearDB(db); + shanghaiKeys.clear(); + InsertFieldData(db); + int thread_num_ = 2; + std::vector threads(thread_num_); + for (size_t i = 0; i < thread_num_; i++) + { + if (i == 0) {//创建索引 + threads[i] = std::thread([db](){ + db->CreateIndexOnField("address"); + std::cout << "finish create index\n"; + }); + } else {//写 + threads[i] = std::thread([db](){ + while (db->GetIndexStatus("address") == NotExist){ + continue; //开始创建了再并发的写 + } + InsertOneField(db); //先插一条 + }); + } + } + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + //检查索引是否创建成功 + bool haveIndex = true; + findKeysByCityIndex(db, haveIndex); + //检查写入是否成功 + GetOneField(db); + + delete db; +} + +//创建删除不同索引的并发 +TEST(TestCreateiCreatei, Parallel) { + fielddb::DestroyDB("testdb2.3",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2.3", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + + // ClearDB(db); + shanghaiKeys.clear(); + age20Keys.clear(); + InsertFieldData(db); + int thread_num_ = 3; + std::vector threads(thread_num_); + for (size_t i = 0; i < thread_num_; i++) + { + //3线程并发创建索引address + threads[i] = std::thread([db](){ + db->CreateIndexOnField("address"); + std::cout << "finish create index address\n"; + }); + } + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + //检查索引是否创建成功 + bool haveIndex = true; + findKeysByCityIndex(db, haveIndex); + findKeysByAgeIndex(db, false); + + + for (size_t i = 0; i < thread_num_; i++) + { + if (i == 0 || i == 1) {//2线程删除索引address + threads[i] = std::thread([db](){ + db->DeleteIndex("address"); + std::cout << "finish delete index address\n"; + }); + } else {//1线程创建索引age + threads[i] = std::thread([db](){ + db->CreateIndexOnField("age"); + std::cout << "finish create index age\n"; + }); + } + } + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + //检查 + findKeysByCityIndex(db, false); + findKeysByAgeIndex(db, true); + + delete db; +} +int main(int argc, char** argv) { + // All tests currently run with the same read-only file limits. + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file From cefdd3db4fc5be41012286e3dcb01a33f3901f8f Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Fri, 20 Dec 2024 17:29:44 +0800 Subject: [PATCH 05/32] =?UTF-8?q?=E8=A1=A5=E5=85=85delete=EF=BC=8C?= =?UTF-8?q?=E4=B8=80=E5=B9=B6=E4=BF=AE=E6=94=B9=E4=BA=86put=E9=80=BB?= =?UTF-8?q?=E8=BE=91=EF=BC=8C=E6=94=AF=E6=8C=81put=E5=92=8Cdelete=E7=9A=84?= =?UTF-8?q?=E5=90=88=E5=B9=B6req?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 26 +++++---- fielddb/meta.cpp | 12 +++- fielddb/meta.h | 7 ++- fielddb/request.cpp | 131 ++++++++++++++++++++++++++++++++++++++------ fielddb/request.h | 11 ++-- test/basic_function_test.cc | 13 +++++ test/helper.cc | 41 +++++++++++++- 7 files changed, 201 insertions(+), 40 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 2e00ca9..c5c5ad4 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -82,15 +82,15 @@ Again: } Request *tail = GetHandleInterval(); WriteBatch KVBatch,IndexBatch,MetaBatch; + std::unordered_set batchKeySet; Status status; if(!tail->isiCreateReq() && !tail->isiDeleteReq()) { - // int debug = tail->type_; //表明这一个区间并没有涉及index的创建删除 { //1. 构建各个Batch。构建的过程中要保证索引状态的一致性,需要上锁。 MutexLock iL(&index_mu); for(auto *req_ptr : taskqueue_) { - req_ptr->ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); + req_ptr->ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, batchKeySet); if(req_ptr == tail) break; } } @@ -147,7 +147,7 @@ Status FieldDB::Put(const WriteOptions &options, const Slice &key, const Slice & // return kvDB_->Put(options, key, value); } -// TODO:需要对是否进行index更新做处理 +// 需要对是否进行index更新做处理 Status FieldDB::PutFields(const WriteOptions &Options, const Slice &key, const FieldArray &fields) { //这里是为了const和slice-string的转换被迫搞得 @@ -161,14 +161,14 @@ Status FieldDB::PutFields(const WriteOptions &Options, // return kvDB_->PutFields(Options, key, fields); } -// todo: 删除有索引的key时indexdb也要同步 +// 删除有索引的key时indexdb也要同步 Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { - // - // std::string key_ = key.ToString(); - // DeleteReq req(&key_,&mutex_); - // Status status = HandleRequest(req); - // return status; - return kvDB_->Delete(options, key); + + std::string key_ = key.ToString(); + DeleteReq req(&key_,&mutex_); + Status status = HandleRequest(req); + return status; + // return kvDB_->Delete(options, key); } // TODO:根据updates里面的东西,要对是否需要更新index进行分别处理 Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { @@ -220,7 +220,8 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name) { return req.s; } WriteBatch KVBatch,IndexBatch,MetaBatch; - req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); + std::unordered_set useless; + req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); indexDB_->Write(WriteOptions(), &IndexBatch); req.Finalize(this); return req.s; @@ -235,7 +236,8 @@ Status FieldDB::DeleteIndex(const std::string &field_name) { return req.s; } WriteBatch KVBatch,IndexBatch,MetaBatch; - req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this); + std::unordered_set useless; + req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); indexDB_->Write(WriteOptions(), &IndexBatch); req.Finalize(this); return req.s; diff --git a/fielddb/meta.cpp b/fielddb/meta.cpp index 970d1fb..a02f585 100644 --- a/fielddb/meta.cpp +++ b/fielddb/meta.cpp @@ -21,8 +21,8 @@ using namespace leveldb; // } -//对于含有index field的put的meta编码为 (KV|Key,Value) -void MetaKV::Trans(Slice &MetaKey,Slice &MetaValue) { +//对于含有index field的put/delete的meta编码为 (KV|Key,Value) +void MetaKV::TransPut(Slice &MetaKey,Slice &MetaValue) { MetaKey.clear(); MetaValue.clear(); std::string buf; @@ -32,6 +32,14 @@ void MetaKV::Trans(Slice &MetaKey,Slice &MetaValue) { MetaValue = Slice(*value); } +void MetaKV::TransDelete(Slice &MetaKey) { + MetaKey.clear(); + std::string buf; + PutFixed32(&buf, KV_Deleting); + PutLengthPrefixedSlice(&buf, Slice(*name)); + MetaKey = Slice(buf); +} + class CleanerHandler : public WriteBatch::Handler { public: WriteBatch *NeedClean; diff --git a/fielddb/meta.h b/fielddb/meta.h index eb3a927..3dba911 100644 --- a/fielddb/meta.h +++ b/fielddb/meta.h @@ -27,7 +27,7 @@ using namespace leveldb; // }; enum MetaType { - Index, //记录index状态的meta + //Index, //记录index状态的meta KV_Creating, //记录含有index field的put的meta KV_Deleting, }; @@ -35,9 +35,10 @@ enum MetaType { //将一对(field_name,field_value)转换到metaDB中的KV表示 class MetaKV { public: - MetaKV(std::string *field_name,std::string *field_value): + MetaKV(std::string *field_name,std::string *field_value = nullptr): name(field_name),value(field_value) { } - void Trans(Slice &MetaKey,Slice &MetaValue); + void TransPut(Slice &MetaKey,Slice &MetaValue); + void TransDelete(Slice &MetaKey); private: std::string *name; std::string *value; diff --git a/fielddb/request.cpp b/fielddb/request.cpp index 6d36caf..ce2f18e 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -19,7 +19,7 @@ void Request::PendReq(Request *req) { //为虚函数提供最基本的实现 void Request::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { assert(0); } @@ -41,10 +41,28 @@ bool Request::isPending() { /*******FieldsReq*******/ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { + if (batchKeySet.find(Key) != batchKeySet.end()){ + return;//并发的被合并的put/delete请求只处理一次 + } else { + batchKeySet.insert(Key); + } + std::string val_str; + Status s = DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + FieldArray *oldFields; + if (s.IsNotFound()){ + oldFields = nullptr; + } else if (s.ok()) { //得到数据库之前key的fields, 判断需不需要删除其中潜在的索引 + oldFields = ParseValue(val_str); + } else { + assert(0); + } + + KVBatch.Put(Slice(*Key), Slice(SerializeValue(*Fields))); bool HasIndex = false; + bool HasOldIndex = false; { // MutexLock L(&DB->index_mu); //互斥访问索引状态表 DB->index_mu.AssertHeld(); @@ -62,39 +80,118 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //assert(0); } } + //冲突也可能存在于,需要删除旧数据的索引,但该索引正在创删中 + if (oldFields != nullptr){ + for(auto [field_name,field_value] : *oldFields) { + if(field_name == "") break; + if(DB->index_.count(field_name)) { + auto [index_status,parent_req] = DB->index_[field_name]; + if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { + parent_req->PendReq(this); + return; + } else if(index_status == IndexStatus::Exist) { + HasOldIndex = true; + } + //assert(0); + } + } + } + //2.对于没有冲突但含有索引操作的put,构建metaKV,这里直接将KV对简单编码后写入metaDB - if(HasIndex) { + if(HasIndex || HasOldIndex) { Slice MetaKey,MetaValue; std::string serialized = SerializeValue(*Fields); MetaKV MKV = MetaKV(Key,&serialized); - MKV.Trans(MetaKey, MetaValue); + MKV.TransPut(MetaKey, MetaValue); MetaBatch.Put(MetaKey, MetaValue); - //第三点是不是应该在这一分支中 - //3.对于含有索引的field建立索引 - for(auto [field_name,field_value] : *Fields) { - if(field_name == "") continue; - if(DB->index_.count(field_name)) { - std::string indexKey; - AppendIndexKey(&indexKey, ParsedInternalIndexKey( - *Key,field_name,field_value)); - IndexBatch.Put(indexKey, Slice()); + + + //3.1对于含有索引的oldfield删除索引 + if (HasOldIndex) { + for(auto [field_name,field_value] : *oldFields) { + if(field_name == "") continue; + if(DB->index_.count(field_name)) { + std::string indexKey; + AppendIndexKey(&indexKey, ParsedInternalIndexKey( + *Key,field_name,field_value)); + IndexBatch.Delete(indexKey); + } } } + + //3.2对于含有索引的field建立索引 + if (HasIndex) { + for(auto [field_name,field_value] : *Fields) { + if(field_name == "") continue; + if(DB->index_.count(field_name)) { + std::string indexKey; + AppendIndexKey(&indexKey, ParsedInternalIndexKey( + *Key,field_name,field_value)); + IndexBatch.Put(indexKey, Slice()); + } + } + } + } + //优化:对于3.1,3.2中都有的索引只写一次 } } /*******DeleteReq*******/ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { - //TODO: + if (batchKeySet.find(Key) != batchKeySet.end()){ + return;//并发的被合并的put/delete请求只处理一次 + } else { + batchKeySet.insert(Key); + } //1. 读取当前的最新的键值对,判断是否存在含有键值对的field //2.1 如果无,则正常构造delete //2.2 如果是有的field的索引状态都是exist,则在meta中写KV_Deleting类型的记录 //在kvDB和metaDB中写入对应的delete //2.3 如果存在field的索引状态是Creating或者Deleting,那么在那个队列上面进行等待 + std::string val_str; + DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + FieldArray *Fields = ParseValue(val_str); + KVBatch.Delete(Slice(*Key)); + bool HasIndex = false; + { + // MutexLock L(&DB->index_mu); //互斥访问索引状态表 + DB->index_mu.AssertHeld(); + //1.将存在冲突的delete pend到对应的请求 + for(auto [field_name,field_value] : *Fields) { + if(field_name == "") break; + if(DB->index_.count(field_name)) { + auto [index_status,parent_req] = DB->index_[field_name]; + if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { + parent_req->PendReq(this); + return; + } else if(index_status == IndexStatus::Exist) { + HasIndex = true; + } + //assert(0); + } + } + //2.对于没有冲突但含有索引操作的delete,构建metaKV,这里直接将KV对简单编码后写入metaDB + if(HasIndex) { + Slice MetaKey; + MetaKV MKV = MetaKV(Key); + MKV.TransDelete(MetaKey); //meta中写入一个delete不需要value + MetaBatch.Put(MetaKey, Slice()); + //3.对于含有索引的field删除索引 + for(auto [field_name,field_value] : *Fields) { + if(field_name == "") continue; + if(DB->index_.count(field_name)) { + std::string indexKey; + AppendIndexKey(&indexKey, ParsedInternalIndexKey( + *Key,field_name,field_value)); + IndexBatch.Delete(indexKey); + } + } + } + } } /*******iCreateReq*******/ @@ -126,7 +223,7 @@ void iCreateReq::PendReq(Request *req) { } void iCreateReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { //遍历数据库,构建二级索引到indexbatch,(更新metaDB中的元数据为Index类型的(Field,Creating)) //一个indexwritebatch写入,那么索引创建删除应该和metadb没有交互 @@ -185,7 +282,7 @@ void iDeleteReq::PendReq(Request* req) { } void iDeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { std::vector> keysAndVal = DB->FindKeysAndValByFieldName(*Field); diff --git a/fielddb/request.h b/fielddb/request.h index e68c4f1..1ebebfd 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -5,6 +5,7 @@ #include "port/port_stdcxx.h" #include "util/mutexlock.h" #include "util/serialize_value.h" +#include // #include "fielddb/field_db.h" #ifndef REQUEST_H @@ -43,7 +44,7 @@ public: //用于含有Fields的 virtual void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB); + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet); //主要用于icreate和idelete在队列中的注册当前状态 virtual void Prepare(FieldDB *DB); virtual void Finalize(FieldDB *DB); @@ -65,7 +66,7 @@ public: Key(Key),Fields(Fields),Request(FieldsReq_t,mu) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; std::string *Key; FieldArray *Fields; @@ -90,7 +91,7 @@ public: Field(Field),Request(iCreateReq_t, mu),Existed(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; void PendReq(Request *req) override; @@ -107,7 +108,7 @@ public: Field(Field),Request(iDeleteReq_t, mu),Deleted(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; void PendReq(Request *req) override; @@ -124,7 +125,7 @@ public: Key(Key),Request(DeleteReq_t,mu) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; std::string *Key; }; diff --git a/test/basic_function_test.cc b/test/basic_function_test.cc index 28d9fc7..2918b15 100644 --- a/test/basic_function_test.cc +++ b/test/basic_function_test.cc @@ -19,6 +19,10 @@ TEST(TestLab1, Basic) { bool allowNotFound = false; GetFieldData(db, allowNotFound); findKeysByCity(db); + + DeleteFieldData(db); + GetDeleteData(db); + delete db; } @@ -40,10 +44,19 @@ TEST(TestLab2, Basic) { db->CreateIndexOnField("age"); findKeysByCityIndex(db, true); findKeysByAgeIndex(db, true); + db->DeleteIndex("address"); findKeysByCityIndex(db, false); findKeysByAgeIndex(db, true); + DeleteFieldData(db); + // GetDeleteData(db); + //helper太长不再封装函数了,这里因为数据都被delete了,但索引还在,所以能QueryByIndex但返回key数量0 + Field field = {"age", "20"}; + Status s; + std::vector resKeys = db->QueryByIndex(field, &s); + ASSERT_EQ(resKeys.size(), 0); + delete db; } diff --git a/test/helper.cc b/test/helper.cc index 9de92bc..b053871 100644 --- a/test/helper.cc +++ b/test/helper.cc @@ -15,7 +15,9 @@ std::vector cities = { //检查insert和queryByIndex的数据是否对应 std::set shanghaiKeys; std::set age20Keys; -//复杂的测试要注意这两个全局变量,目前只有InsertFieldData和InsertOneField会往里加,并且没有清理 +//复杂的测试要注意这两个全局变量, +//目前只有InsertFieldData和InsertOneField会往里加,DeleteFieldData会全部清空, +//其他测试之间有必要手动clear Status OpenDB(std::string dbName, FieldDB **db) { Options options; @@ -98,6 +100,25 @@ void InsertFieldData(FieldDB *db, int seed = 0/*随机种子*/) { } } +void DeleteFieldData(FieldDB *db, int seed = 0/*随机种子*/) { + std::cout << "-------deleting-------" << std::endl; + WriteOptions writeOptions; + int key_num = data_size / value_size; + // srand线程不安全,这种可以保证多线程时随机序列也一致 + std::mt19937 rng(seed); + + shanghaiKeys.clear(); + age20Keys.clear(); + for (int i = 0; i < key_num; i++) { + int randThisTime = rng(); //确保读写一个循环只rand一次,否则随机序列会不一致 + int key_ = std::abs(randThisTime) % key_num + 1; + std::string key = std::to_string(key_); + + Status s = db->Delete(WriteOptions(), key); + ASSERT_TRUE(s.ok()); + } +} + //并发时不一定能读到,加个参数控制 void GetFieldData(FieldDB *db, bool allowNotFound, int seed = 0) { std::cout << "-------getting-------" << std::endl; @@ -136,6 +157,24 @@ void GetFieldData(FieldDB *db, bool allowNotFound, int seed = 0) { } } +//检查对应种子有没有删除干净 +//删除期间即使其他种子也不能并发写,因为即使种子不同,随机出的key可能相同 +void GetDeleteData(FieldDB *db, int seed = 0) { + std::cout << "-------getting-------" << std::endl; + ReadOptions readOptions; + int key_num = data_size / value_size; + + std::mt19937 rng(seed); + for (int i = 0; i < 100; i++) { + int randThisTime = rng(); + int key_ = std::abs(randThisTime) % key_num + 1; + std::string key = std::to_string(key_); + FieldArray fields_ret; + Status s = db->GetFields(readOptions, key, &fields_ret); + ASSERT_TRUE(s.IsNotFound()); + } +} + void findKeysByCity(FieldDB *db) { std::cout << "-------getting field address-------" << std::endl; Field field = {"address", "Shanghai"}; From 462019353ea5085f55a34bed27187a5291e2242f Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sat, 21 Dec 2024 12:14:46 +0800 Subject: [PATCH 06/32] =?UTF-8?q?=E5=8A=A0=E4=BA=86=E7=82=B9=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=EF=BC=8C=E4=BF=AE=E4=BA=86=E7=82=B9bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 6 ++-- fielddb/request.cpp | 32 ++++++++++--------- fielddb/request.h | 10 +++--- test/helper.cc | 35 +++++++++++++++++---- test/helper.h | 34 ++++++++++++++++++++ test/parallel_test.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 175 insertions(+), 28 deletions(-) create mode 100644 test/helper.h diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index c5c5ad4..4c1cf81 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -82,7 +82,7 @@ Again: } Request *tail = GetHandleInterval(); WriteBatch KVBatch,IndexBatch,MetaBatch; - std::unordered_set batchKeySet; + std::unordered_set batchKeySet; Status status; if(!tail->isiCreateReq() && !tail->isiDeleteReq()) { //表明这一个区间并没有涉及index的创建删除 @@ -220,7 +220,7 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name) { return req.s; } WriteBatch KVBatch,IndexBatch,MetaBatch; - std::unordered_set useless; + std::unordered_set useless; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); indexDB_->Write(WriteOptions(), &IndexBatch); req.Finalize(this); @@ -236,7 +236,7 @@ Status FieldDB::DeleteIndex(const std::string &field_name) { return req.s; } WriteBatch KVBatch,IndexBatch,MetaBatch; - std::unordered_set useless; + std::unordered_set useless; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); indexDB_->Write(WriteOptions(), &IndexBatch); req.Finalize(this); diff --git a/fielddb/request.cpp b/fielddb/request.cpp index ce2f18e..d29d668 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -19,7 +19,8 @@ void Request::PendReq(Request *req) { //为虚函数提供最基本的实现 void Request::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB, + std::unordered_set &batchKeySet) { assert(0); } @@ -41,12 +42,13 @@ bool Request::isPending() { /*******FieldsReq*******/ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB, + std::unordered_set &batchKeySet) { - if (batchKeySet.find(Key) != batchKeySet.end()){ + if (batchKeySet.find(*Key) != batchKeySet.end()){ return;//并发的被合并的put/delete请求只处理一次 } else { - batchKeySet.insert(Key); + batchKeySet.insert(*Key); } std::string val_str; Status s = DB->kvDB_->Get(ReadOptions(), *Key, &val_str); @@ -59,8 +61,6 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, assert(0); } - - KVBatch.Put(Slice(*Key), Slice(SerializeValue(*Fields))); bool HasIndex = false; bool HasOldIndex = false; { @@ -96,7 +96,8 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, } } } - + + KVBatch.Put(Slice(*Key), Slice(SerializeValue(*Fields))); //2.对于没有冲突但含有索引操作的put,构建metaKV,这里直接将KV对简单编码后写入metaDB if(HasIndex || HasOldIndex) { Slice MetaKey,MetaValue; @@ -140,12 +141,13 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, /*******DeleteReq*******/ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB, + std::unordered_set &batchKeySet) { - if (batchKeySet.find(Key) != batchKeySet.end()){ + if (batchKeySet.find(*Key) != batchKeySet.end()){ return;//并发的被合并的put/delete请求只处理一次 } else { - batchKeySet.insert(Key); + batchKeySet.insert(*Key); } //1. 读取当前的最新的键值对,判断是否存在含有键值对的field //2.1 如果无,则正常构造delete @@ -153,9 +155,9 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //在kvDB和metaDB中写入对应的delete //2.3 如果存在field的索引状态是Creating或者Deleting,那么在那个队列上面进行等待 std::string val_str; - DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + Status s = DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + if (s.IsNotFound()) return; FieldArray *Fields = ParseValue(val_str); - KVBatch.Delete(Slice(*Key)); bool HasIndex = false; { // MutexLock L(&DB->index_mu); //互斥访问索引状态表 @@ -174,6 +176,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //assert(0); } } + KVBatch.Delete(Slice(*Key)); //2.对于没有冲突但含有索引操作的delete,构建metaKV,这里直接将KV对简单编码后写入metaDB if(HasIndex) { Slice MetaKey; @@ -223,7 +226,8 @@ void iCreateReq::PendReq(Request *req) { } void iCreateReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB, + std::unordered_set &batchKeySet) { //遍历数据库,构建二级索引到indexbatch,(更新metaDB中的元数据为Index类型的(Field,Creating)) //一个indexwritebatch写入,那么索引创建删除应该和metadb没有交互 @@ -282,7 +286,7 @@ void iDeleteReq::PendReq(Request* req) { } void iDeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { std::vector> keysAndVal = DB->FindKeysAndValByFieldName(*Field); diff --git a/fielddb/request.h b/fielddb/request.h index 1ebebfd..e1ab1e8 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -44,7 +44,7 @@ public: //用于含有Fields的 virtual void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet); + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet); //主要用于icreate和idelete在队列中的注册当前状态 virtual void Prepare(FieldDB *DB); virtual void Finalize(FieldDB *DB); @@ -66,7 +66,7 @@ public: Key(Key),Fields(Fields),Request(FieldsReq_t,mu) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; std::string *Key; FieldArray *Fields; @@ -91,7 +91,7 @@ public: Field(Field),Request(iCreateReq_t, mu),Existed(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; void PendReq(Request *req) override; @@ -108,7 +108,7 @@ public: Field(Field),Request(iDeleteReq_t, mu),Deleted(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; void PendReq(Request *req) override; @@ -125,7 +125,7 @@ public: Key(Key),Request(DeleteReq_t,mu) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; std::string *Key; }; diff --git a/test/helper.cc b/test/helper.cc index b053871..105c05b 100644 --- a/test/helper.cc +++ b/test/helper.cc @@ -3,6 +3,7 @@ // #include "leveldb/db.h" #include "fielddb/field_db.h" #include +#include "helper.h" using namespace fielddb; constexpr int value_size = 2048; @@ -13,10 +14,12 @@ std::vector cities = { "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" }; //检查insert和queryByIndex的数据是否对应 -std::set shanghaiKeys; -std::set age20Keys; +//封装了一个线程安全的全局set + +ThreadSafeSet shanghaiKeys; +ThreadSafeSet age20Keys; //复杂的测试要注意这两个全局变量, -//目前只有InsertFieldData和InsertOneField会往里加,DeleteFieldData会全部清空, +//目前只有InsertFieldData和InsertOneField会往里加,DeleteFieldData和InsertOneField会删除, //其他测试之间有必要手动clear Status OpenDB(std::string dbName, FieldDB **db) { @@ -51,6 +54,15 @@ void InsertOneField(FieldDB *db, std::string key = "0") { age20Keys.insert(key); } +//只删一条特定数据的测试 +void DeleteOneField(FieldDB *db, std::string key = "0") { + WriteOptions writeOptions; + Status s = db->Delete(WriteOptions(), key); + ASSERT_TRUE(s.ok()); + shanghaiKeys.erase(key); + age20Keys.erase(key); +} + //与上面对应 void GetOneField(FieldDB *db, std::string key = "0") { ReadOptions readOptions; @@ -183,7 +195,7 @@ void findKeysByCity(FieldDB *db) { //如果随机种子相同,每次打印出的两个数也应该相同 std::cout << "address: " << shanghaiKeys.size() << " " << resKeys.size() << std::endl; for (const std::string &key : resKeys){ - ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + ASSERT_TRUE(shanghaiKeys.haveKey(key)); } } @@ -200,7 +212,7 @@ void findKeysByCityIndex(FieldDB *db, bool haveIndex) { } std::cout << "address: " << shanghaiKeys.size() << " " << resKeys.size() << std::endl;//打印比较 for (const std::string &key : resKeys){ - ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + ASSERT_TRUE(shanghaiKeys.haveKey(key)); } } @@ -216,6 +228,17 @@ void findKeysByAgeIndex(FieldDB *db, bool haveIndex) { } std::cout << "age: " << age20Keys.size() << " " << resKeys.size() << std::endl; for (const std::string &key : resKeys){ - ASSERT_NE(std::find(age20Keys.begin(), age20Keys.end(), key), age20Keys.end()); + ASSERT_TRUE(age20Keys.haveKey(key)); } +} + +void checkDataInKVAndIndex(FieldDB *db) { + Field field = {"address", "Shanghai"}; + Status s; + std::vector resKeys1 = db->QueryByIndex(field, &s); //indexdb根据索引查到的数据 + std::vector resKeys2 = db->FindKeysByField(field); //kvdb强行遍历查到的数据 + std::sort(resKeys1.begin(), resKeys1.end()); + std::sort(resKeys2.begin(), resKeys2.end()); + std::cout << resKeys1.size() << " " << resKeys2.size() << std::endl; + ASSERT_EQ(resKeys1, resKeys2); } \ No newline at end of file diff --git a/test/helper.h b/test/helper.h new file mode 100644 index 0000000..76752cb --- /dev/null +++ b/test/helper.h @@ -0,0 +1,34 @@ +#include "fielddb/field_db.h" +using namespace fielddb; +class ThreadSafeSet +{ +private: + std::set keys; + std::mutex setMutex; +public: + ThreadSafeSet(){} + + void insert(std::string key){ + std::lock_guard lock(setMutex); + keys.insert(key); + } + + void erase(std::string key){ + std::lock_guard lock(setMutex); + keys.erase(key); + } + + void clear(){ + std::lock_guard lock(setMutex); + keys.clear(); + } + + size_t size(){ + std::lock_guard lock(setMutex); + return keys.size(); + } + + bool haveKey(std::string key){ + return std::find(keys.begin(), keys.end(), key) != keys.end(); + } +}; \ No newline at end of file diff --git a/test/parallel_test.cc b/test/parallel_test.cc index 98bf457..80c28ea 100644 --- a/test/parallel_test.cc +++ b/test/parallel_test.cc @@ -18,6 +18,8 @@ TEST(TestReadWrite, Parallel) { abort(); } // ClearDB(db); + db->CreateIndexOnField("address"); + db->CreateIndexOnField("age"); int thread_num_ = 5; std::vector threads(thread_num_); //二写三读 @@ -45,6 +47,7 @@ TEST(TestReadWrite, Parallel) { GetFieldData(db, allowNotFound); GetFieldData(db, allowNotFound, 1); findKeysByCity(db); + checkDataInKVAndIndex(db); delete db; } @@ -90,6 +93,7 @@ TEST(TestWriteCreatei, Parallel) { findKeysByCityIndex(db, haveIndex); //检查写入是否成功 GetOneField(db); + checkDataInKVAndIndex(db); delete db; } @@ -128,6 +132,7 @@ TEST(TestCreateiCreatei, Parallel) { bool haveIndex = true; findKeysByCityIndex(db, haveIndex); findKeysByAgeIndex(db, false); + checkDataInKVAndIndex(db); for (size_t i = 0; i < thread_num_; i++) @@ -156,6 +161,87 @@ TEST(TestCreateiCreatei, Parallel) { delete db; } + +//有索引时,大量并发put与delete相同key,确保kvdb和indexdb的一致性 +TEST(TestPutDeleteOne, Parallel) { + fielddb::DestroyDB("testdb2.4",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2.4", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + + // ClearDB(db); + shanghaiKeys.clear(); + age20Keys.clear(); + db->CreateIndexOnField("address"); + db->CreateIndexOnField("age"); + int thread_num_ = 20; + std::vector threads(thread_num_); + for (size_t i = 0; i < thread_num_; i++) + { + if (i % 2 == 0) {//2线程删除索引address + threads[i] = std::thread([db](){ + for (size_t j = 0; j < 100; j++) + { + InsertOneField(db, std::to_string(j)); + } + }); + } else {//1线程创建索引age + threads[i] = std::thread([db](){ + for (size_t j = 0; j < 100; j++) + { + DeleteOneField(db, std::to_string(j)); + } + }); + } + } + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + + //检查 + checkDataInKVAndIndex(db); + delete db; +} + +//有索引时,put与delete的并发,确保kvdb和indexdb的一致性 +TEST(TestPutDelete, Parallel) { + fielddb::DestroyDB("testdb2.5",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2.5", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + + // ClearDB(db); + shanghaiKeys.clear(); + age20Keys.clear(); + db->CreateIndexOnField("address"); + db->CreateIndexOnField("age"); + int thread_num_ = 4; + std::vector threads(thread_num_); + threads[0] = std::thread([db](){InsertFieldData(db);}); + threads[1] = std::thread([db](){InsertFieldData(db, 1);}); + threads[2] = std::thread([db](){DeleteFieldData(db);}); + threads[3] = std::thread([db](){DeleteFieldData(db, 1);}); + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + + //检查 + checkDataInKVAndIndex(db); + delete db; +} + int main(int argc, char** argv) { // All tests currently run with the same read-only file limits. testing::InitGoogleTest(&argc, argv); From 8657ec37484d53a58dd64534d9af070c5adf29d3 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sat, 21 Dec 2024 18:09:57 +0800 Subject: [PATCH 07/32] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BA=86recover?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8D=E4=BA=86=E6=9C=89=E5=85=B3ParseValu?= =?UTF-8?q?ede=E7=9A=84=E5=86=85=E5=AD=98=E6=B3=84=E6=BC=8F=E9=97=AE?= =?UTF-8?q?=E9=A2=98=E5=92=8C=E6=9C=89=E5=85=B3Transput=E5=92=8CTransDelet?= =?UTF-8?q?e=E7=9A=84=E6=BD=9C=E5=9C=A8=E5=86=85=E5=AD=98=E8=AE=BF?= =?UTF-8?q?=E9=97=AE=E5=BC=82=E5=B8=B8=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/db_impl.cc | 2 +- fielddb/field_db.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ fielddb/meta.cpp | 16 +++++++++------- fielddb/meta.h | 4 ++-- fielddb/request.cpp | 17 ++++++++++++----- util/serialize_value.cc | 5 +++-- util/serialize_value.h | 2 +- 7 files changed, 71 insertions(+), 18 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 49db131..6879b82 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1169,7 +1169,7 @@ Status DBImpl::GetFields(const ReadOptions& options, const Slice& key, FieldArray* fields) { std::string value; Status s = DBImpl::Get(options, key, &value); - *fields = *ParseValue(value); + ParseValue(value,fields); return s; } diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index c5c5ad4..0eea72e 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -6,10 +6,13 @@ #include #include "leveldb/db.h" #include "leveldb/env.h" +#include "leveldb/iterator.h" #include "leveldb/options.h" +#include "leveldb/slice.h" #include "leveldb/status.h" #include "leveldb/write_batch.h" #include "db/write_batch_internal.h" +#include "util/coding.h" #include "util/mutexlock.h" #include "util/serialize_value.h" #include "fielddb/encode_index.h" @@ -53,7 +56,47 @@ Status FieldDB::OpenFieldDB(const Options& options, Status FieldDB::Recover() { //TODO: //1. 遍历所有Index类型的meta,重建内存中的index_状态表 + Iterator *Iter = indexDB_->NewIterator(ReadOptions()); + std::string IndexKey; + Iter->SeekToFirst(); + while(Iter->Valid()) { + IndexKey = Iter->value().ToString(); + ParsedInternalIndexKey ParsedIndex; + ParseInternalIndexKey(Slice(IndexKey),&ParsedIndex); + index_[ParsedIndex.name_.ToString()] = {Exist,nullptr}; + std::cout << "Existed Index : " << ParsedIndex.name_.ToString() << std::endl; + + //构建下一个搜索的对象,在原来的fieldname的基础上加一个最大的ascii字符(不可见字符) + //TODO:不知道这个做法有没有道理 + std::string Seek; + PutLengthPrefixedSlice(&Seek, ParsedIndex.name_); + Seek.push_back(0xff); + Iter->Seek(Slice(Seek)); + } + delete Iter; //2. 寻找所有KV类型的meta,再次提交一遍请求 + Iter = metaDB_->NewIterator(ReadOptions()); + Slice MetaValue; + Iter->SeekToFirst(); + while (Iter->Valid()) { + MetaValue = Iter->key(); + MetaType type = MetaType(DecodeFixed32(MetaValue.data())); + MetaValue.remove_prefix(4);//移除头上的metaType的部分 + if(type == KV_Creating) { + FieldArray fields; + ParseValue(Iter->value().ToString(), &fields); + PutFields(WriteOptions(), MetaValue, fields); + } else if(type == KV_Deleting) { + Delete(WriteOptions(), MetaValue); + } else { + assert(0 && "Invalid MetaType"); + } + } + delete Iter; + //在所有的请求完成后,会自动把metaDB的内容清空。 + Iter = metaDB_->NewIterator(ReadOptions()); + Iter->SeekToFirst(); + std::cout << "Iter Valid : " << Iter->Valid() << std::endl; //3. 等待所有请求完成 return Status::OK(); } diff --git a/fielddb/meta.cpp b/fielddb/meta.cpp index a02f585..13ee09d 100644 --- a/fielddb/meta.cpp +++ b/fielddb/meta.cpp @@ -22,22 +22,24 @@ using namespace leveldb; //对于含有index field的put/delete的meta编码为 (KV|Key,Value) -void MetaKV::TransPut(Slice &MetaKey,Slice &MetaValue) { +void MetaKV::TransPut(std::string &MetaKey,std::string &MetaValue) { MetaKey.clear(); MetaValue.clear(); - std::string buf; + //这里的改动是为了防止潜在的段错误。原来的写法中,slice(buf)对应的buf是局部的,在函数返回后,buf被销毁 + //但是slice中的指针指向的是析构的string对象的部分内存 + std::string &buf = MetaKey; PutFixed32(&buf, KV_Creating); PutLengthPrefixedSlice(&buf, Slice(*name)); - MetaKey = Slice(buf); - MetaValue = Slice(*value); + // MetaKey = Slice(buf); + // MetaValue = Slice(*value); } -void MetaKV::TransDelete(Slice &MetaKey) { +void MetaKV::TransDelete(std::string &MetaKey) { MetaKey.clear(); - std::string buf; + std::string &buf = MetaKey; PutFixed32(&buf, KV_Deleting); PutLengthPrefixedSlice(&buf, Slice(*name)); - MetaKey = Slice(buf); + // MetaKey = Slice(buf); } class CleanerHandler : public WriteBatch::Handler { diff --git a/fielddb/meta.h b/fielddb/meta.h index 3dba911..2766e88 100644 --- a/fielddb/meta.h +++ b/fielddb/meta.h @@ -37,8 +37,8 @@ class MetaKV { public: MetaKV(std::string *field_name,std::string *field_value = nullptr): name(field_name),value(field_value) { } - void TransPut(Slice &MetaKey,Slice &MetaValue); - void TransDelete(Slice &MetaKey); + void TransPut(std::string &MetaKey,std::string &MetaValue); + void TransDelete(std::string &MetaKey); private: std::string *name; std::string *value; diff --git a/fielddb/request.cpp b/fielddb/request.cpp index ce2f18e..d2c2d8b 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -1,5 +1,6 @@ #include "fielddb/request.h" #include +#include #include "leveldb/slice.h" #include "leveldb/status.h" #include "leveldb/write_batch.h" @@ -54,7 +55,8 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, if (s.IsNotFound()){ oldFields = nullptr; } else if (s.ok()) { //得到数据库之前key的fields, 判断需不需要删除其中潜在的索引 - oldFields = ParseValue(val_str); + oldFields = new FieldArray; + oldFields = ParseValue(val_str,oldFields); } else { assert(0); } @@ -99,11 +101,11 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //2.对于没有冲突但含有索引操作的put,构建metaKV,这里直接将KV对简单编码后写入metaDB if(HasIndex || HasOldIndex) { - Slice MetaKey,MetaValue; + std::string MetaKey,MetaValue; std::string serialized = SerializeValue(*Fields); MetaKV MKV = MetaKV(Key,&serialized); MKV.TransPut(MetaKey, MetaValue); - MetaBatch.Put(MetaKey, MetaValue); + MetaBatch.Put(MetaKey, serialized); //3.1对于含有索引的oldfield删除索引 @@ -135,6 +137,8 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, } //优化:对于3.1,3.2中都有的索引只写一次 } + + if(oldFields) delete oldFields; } @@ -154,7 +158,9 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //2.3 如果存在field的索引状态是Creating或者Deleting,那么在那个队列上面进行等待 std::string val_str; DB->kvDB_->Get(ReadOptions(), *Key, &val_str); - FieldArray *Fields = ParseValue(val_str); + FieldArray *Fields = new FieldArray; + ParseValue(val_str,Fields); + KVBatch.Delete(Slice(*Key)); bool HasIndex = false; { @@ -176,7 +182,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, } //2.对于没有冲突但含有索引操作的delete,构建metaKV,这里直接将KV对简单编码后写入metaDB if(HasIndex) { - Slice MetaKey; + std::string MetaKey; MetaKV MKV = MetaKV(Key); MKV.TransDelete(MetaKey); //meta中写入一个delete不需要value MetaBatch.Put(MetaKey, Slice()); @@ -192,6 +198,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, } } } + delete Fields; } /*******iCreateReq*******/ diff --git a/util/serialize_value.cc b/util/serialize_value.cc index 562360b..88aa844 100644 --- a/util/serialize_value.cc +++ b/util/serialize_value.cc @@ -20,9 +20,10 @@ std::string SerializeValue(const FieldArray& fields){ return result; } -FieldArray *ParseValue(const std::string& value_str){ +FieldArray *ParseValue(const std::string& value_str,FieldArray *fields){ Slice valueSlice(value_str); - FieldArray *res = new FieldArray; + // FieldArray *res = new FieldArray; + FieldArray *res = fields; Slice nameSlice = Slice(); Slice valSlice = Slice(); std::string nameStr; diff --git a/util/serialize_value.h b/util/serialize_value.h index b769fb8..a337bc6 100644 --- a/util/serialize_value.h +++ b/util/serialize_value.h @@ -12,7 +12,7 @@ using Field = std::pair; // field_name:field_value using FieldArray = std::vector>; std::string SerializeValue(const FieldArray& fields); -FieldArray *ParseValue(const std::string& value_str); +FieldArray *ParseValue(const std::string& value_str, FieldArray *fields); class InternalFieldArray { public: From 0fed70d5c30a0de7177ba695b4c78db5cee4fa72 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sat, 21 Dec 2024 21:32:18 +0800 Subject: [PATCH 08/32] =?UTF-8?q?Batchreq=E5=92=8CWrite=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=E7=9A=84=E5=AE=9E=E7=8E=B0=EF=BC=8C=E4=BD=86=E6=9C=AA=E5=AE=8C?= =?UTF-8?q?=E6=88=90=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- fielddb/field_db.cpp | 8 +++++- fielddb/request.cpp | 73 +++++++++++++++++++++++++++++++++++++++++++++++++--- fielddb/request.h | 16 ++++++++++++ 4 files changed, 93 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9529fb0..fa7468f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ endif(NOT CMAKE_C_STANDARD) # C++ standard can be overridden when this is used as a sub-project. if(NOT CMAKE_CXX_STANDARD) # This project requires C++11. - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) endif(NOT CMAKE_CXX_STANDARD) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 0eea72e..8a710de 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -97,6 +97,7 @@ Status FieldDB::Recover() { Iter = metaDB_->NewIterator(ReadOptions()); Iter->SeekToFirst(); std::cout << "Iter Valid : " << Iter->Valid() << std::endl; + delete Iter; //3. 等待所有请求完成 return Status::OK(); } @@ -216,6 +217,10 @@ Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { // TODO:根据updates里面的东西,要对是否需要更新index进行分别处理 Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { //或许应该再做一个接口?或者基于现有的接口进行改造 + BatchReq req(updates,&mutex_); + Status status = HandleRequest(req); + return status; + assert(0); return Status::OK(); } //由于常规put将空串作为name,这里也需要适当修改 @@ -251,6 +256,7 @@ std::vector> FieldDB::FindKeysAndValByFieldN result.push_back(std::make_pair(iter->key().ToString(), val)); } } + delete iter; return result; } @@ -308,7 +314,7 @@ std::vector FieldDB::QueryByIndex(const Field &field, Status *s) { } break; } - + delete indexIterator; *s = Status::OK(); return result; } diff --git a/fielddb/request.cpp b/fielddb/request.cpp index d2c2d8b..5df5310 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -1,9 +1,12 @@ #include "fielddb/request.h" #include +#include #include +#include #include "leveldb/slice.h" #include "leveldb/status.h" #include "leveldb/write_batch.h" +#include "port/port_stdcxx.h" #include "util/mutexlock.h" #include "util/serialize_value.h" #include "fielddb/encode_index.h" @@ -74,7 +77,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, if(DB->index_.count(field_name)) { auto [index_status,parent_req] = DB->index_[field_name]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { - parent_req->PendReq(this); + parent_req->PendReq(this->parent); return; } else if(index_status == IndexStatus::Exist) { HasIndex = true; @@ -89,7 +92,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, if(DB->index_.count(field_name)) { auto [index_status,parent_req] = DB->index_[field_name]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { - parent_req->PendReq(this); + parent_req->PendReq(this->parent); return; } else if(index_status == IndexStatus::Exist) { HasOldIndex = true; @@ -172,7 +175,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, if(DB->index_.count(field_name)) { auto [index_status,parent_req] = DB->index_[field_name]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { - parent_req->PendReq(this); + parent_req->PendReq(this->parent); return; } else if(index_status == IndexStatus::Exist) { HasIndex = true; @@ -214,7 +217,7 @@ void iCreateReq::Prepare(FieldDB *DB) { s = Status::OK(); } else { //如果正在创建或删除,那么进行等待 - parent->PendReq(this); + parent->PendReq(this->parent); } return; } @@ -320,5 +323,67 @@ void iDeleteReq::Finalize(FieldDB *DB) { this->s = Status::OK(); } +BatchReq::BatchReq(WriteBatch *Batch,port::Mutex *mu): + Batch(Batch),Request(BatchReq_t, mu) { + + struct BatchHandler : WriteBatch::Handler { + void Put(const Slice &key, const Slice &value) override { + //为key和value构造存储的地方,防止由于string的析构造成可能得内存访问错误 + str_buf->push_back(key.ToString()); + fa_buf->push_back({{"",value.ToString()}}); + sub_requests->emplace_back(new FieldsReq(&str_buf->back(),&fa_buf->back(),mu)); + sub_requests->back()->parent = req; + } + void Delete(const Slice &key) override { + str_buf->push_back(key.ToString()); + sub_requests->emplace_back(new DeleteReq(&str_buf->back(),mu)); + sub_requests->back()->parent = req; + } + + BatchReq *req; + port::Mutex *mu; + std::deque *str_buf; + std::deque *fa_buf; + std::deque *sub_requests; + }; + + BatchHandler Handler; + Handler.req = this; + Handler.mu = mu; + Handler.str_buf = &str_buf; + Handler.fa_buf = &fa_buf; + Handler.sub_requests = &sub_requests; + + Batch->Iterate(&Handler); +} + +BatchReq::~BatchReq() { + while(!sub_requests.empty()) { + Request *req = sub_requests.front(); + sub_requests.pop_front(); + delete req; + } +} + +void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) +{ + WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; + std::unordered_set Sub_batchKeySet; + //由于batch是有顺序的,根据我们现在的一个key只处理最开始的算法,这里需要反向迭代 + for(auto subreq = sub_requests.rbegin(); subreq != sub_requests.rend(); subreq++ ) { + (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); + //所有的对于pendreq的调用传入的参数被改成了this->parent,因此,对于subrequests来说, + //pendreq的传参为对应的Batchreq,因此,此处判断batchreq是否pending可以得到subreq是否有冲突 + if(isPending()) { + return; + } + } + KVBatch.Append(Sub_KVBatch); + IndexBatch.Append(Sub_IndexBatch); + MetaBatch.Append(Sub_MetaBatch); + batchKeySet.insert(batchKeySet.begin(),batchKeySet.end()); +} + } // namespace fielddb \ No newline at end of file diff --git a/fielddb/request.h b/fielddb/request.h index 1ebebfd..4813888 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -24,6 +24,7 @@ public: iCreateReq_t, iDeleteReq_t, DeleteReq_t, + BatchReq_t, }; public: @@ -41,6 +42,7 @@ public: inline bool isiCreateReq() { return type_ == iCreateReq_t; } inline bool isiDeleteReq() { return type_ == iDeleteReq_t; } inline bool isDeleteReq() { return type_ == DeleteReq_t; } + inline bool isBatchReq() { return type_ == BatchReq_t; } //用于含有Fields的 virtual void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, @@ -130,6 +132,20 @@ public: std::string *Key; }; +class BatchReq : public Request { +public: + BatchReq(WriteBatch *Batch,port::Mutex *mu); + ~BatchReq(); + + void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + + WriteBatch *Batch; + std::deque sub_requests; + std::deque str_buf; + std::deque fa_buf; +}; + } #endif \ No newline at end of file From d5e46b56b375e6f974f6b817bfa193a3fea97869 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sun, 22 Dec 2024 15:51:27 +0800 Subject: [PATCH 09/32] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=B9=B6=E9=80=9A?= =?UTF-8?q?=E8=BF=87=E4=BA=86write=E7=9B=B8=E5=85=B3=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/request.cpp | 18 ++++++++++---- fielddb/request.h | 2 +- test/basic_function_test.cc | 3 +++ test/helper.cc | 45 ++++++++++++++++++++++++++++++++--- test/parallel_test.cc | 57 +++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 114 insertions(+), 11 deletions(-) diff --git a/fielddb/request.cpp b/fielddb/request.cpp index f08937b..35524ee 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -286,7 +286,7 @@ void iDeleteReq::Prepare(FieldDB *DB) { done = true; } else { //如果正在创建或者删除,那么pend到对应的请求上 - parent->PendReq(this); + parent->PendReq(this->parent); } } @@ -334,9 +334,17 @@ BatchReq::BatchReq(WriteBatch *Batch,port::Mutex *mu): void Put(const Slice &key, const Slice &value) override { //为key和value构造存储的地方,防止由于string的析构造成可能得内存访问错误 str_buf->push_back(key.ToString()); - fa_buf->push_back({{"",value.ToString()}}); + FieldArray *field = new FieldArray; + field = ParseValue(value.ToString(), field); + if (field == nullptr){ //batch中的value没有field + fa_buf->push_back({{"",value.ToString()}}); + } else { + fa_buf->push_back(*field); + } + sub_requests->emplace_back(new FieldsReq(&str_buf->back(),&fa_buf->back(),mu)); sub_requests->back()->parent = req; + delete field; } void Delete(const Slice &key) override { str_buf->push_back(key.ToString()); @@ -370,10 +378,10 @@ BatchReq::~BatchReq() { } void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; - std::unordered_set Sub_batchKeySet; + std::unordered_set Sub_batchKeySet; //由于batch是有顺序的,根据我们现在的一个key只处理最开始的算法,这里需要反向迭代 for(auto subreq = sub_requests.rbegin(); subreq != sub_requests.rend(); subreq++ ) { (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); @@ -386,7 +394,7 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, KVBatch.Append(Sub_KVBatch); IndexBatch.Append(Sub_IndexBatch); MetaBatch.Append(Sub_MetaBatch); - batchKeySet.insert(batchKeySet.begin(),batchKeySet.end()); + batchKeySet.insert(Sub_batchKeySet.begin(),Sub_batchKeySet.end()); } diff --git a/fielddb/request.h b/fielddb/request.h index de391e0..19faf49 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -138,7 +138,7 @@ public: ~BatchReq(); void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; WriteBatch *Batch; std::deque sub_requests; diff --git a/test/basic_function_test.cc b/test/basic_function_test.cc index 2918b15..729fb73 100644 --- a/test/basic_function_test.cc +++ b/test/basic_function_test.cc @@ -57,6 +57,9 @@ TEST(TestLab2, Basic) { std::vector resKeys = db->QueryByIndex(field, &s); ASSERT_EQ(resKeys.size(), 0); + WriteFieldData(db); + GetFieldData(db, false); + findKeysByAgeIndex(db, true); delete db; } diff --git a/test/helper.cc b/test/helper.cc index 105c05b..a0b9e79 100644 --- a/test/helper.cc +++ b/test/helper.cc @@ -19,7 +19,8 @@ std::vector cities = { ThreadSafeSet shanghaiKeys; ThreadSafeSet age20Keys; //复杂的测试要注意这两个全局变量, -//目前只有InsertFieldData和InsertOneField会往里加,DeleteFieldData和InsertOneField会删除, +//目前只有InsertFieldData和InsertOneField和writeFieldData会往里加, +//DeleteFieldData和InsertOneField会删除, //其他测试之间有必要手动clear Status OpenDB(std::string dbName, FieldDB **db) { @@ -131,6 +132,41 @@ void DeleteFieldData(FieldDB *db, int seed = 0/*随机种子*/) { } } +void WriteFieldData(FieldDB *db, int seed = 0/*随机种子*/) { + std::cout << "-------writing-------" << std::endl; + WriteOptions writeOptions; + int key_num = data_size / value_size; + // srand线程不安全,这种可以保证多线程时随机序列也一致 + std::mt19937 rng(seed); + + WriteBatch wb; + for (int i = 0; i < key_num; i++) { + int randThisTime = rng(); //确保读写一个循环只rand一次,否则随机序列会不一致 + //让批量写入的key>0, 单独写入的key<=0,方便测试观察 + int key_ = std::abs(randThisTime) % key_num + 1; + std::string key = std::to_string(key_); + + std::string name = "customer#" + std::to_string(key_); + std::string address = cities[randThisTime % cities.size()]; + std::string age = std::to_string(std::abs(randThisTime) % AGE_RANGE); + FieldArray fields = { + {"name", name}, + {"address", address}, + {"age", age} + }; + if (address == "Shanghai") { + shanghaiKeys.insert(key); + } + if (age == "20") { + age20Keys.insert(key); + } + wb.Put(key, SerializeValue(fields)); + + } + Status s = db->Write(writeOptions, &wb); + ASSERT_TRUE(s.ok()); +} + //并发时不一定能读到,加个参数控制 void GetFieldData(FieldDB *db, bool allowNotFound, int seed = 0) { std::cout << "-------getting-------" << std::endl; @@ -232,8 +268,11 @@ void findKeysByAgeIndex(FieldDB *db, bool haveIndex) { } } -void checkDataInKVAndIndex(FieldDB *db) { - Field field = {"address", "Shanghai"}; +void checkDataInKVAndIndex(FieldDB *db, std::string fieldName = "address") { + Field field; + if (fieldName == "address") field = {"address", "Shanghai"}; + else if (fieldName == "age") field = {"age", "20"}; + else assert(0);//只支持这两个字段检查 Status s; std::vector resKeys1 = db->QueryByIndex(field, &s); //indexdb根据索引查到的数据 std::vector resKeys2 = db->FindKeysByField(field); //kvdb强行遍历查到的数据 diff --git a/test/parallel_test.cc b/test/parallel_test.cc index 80c28ea..6925df7 100644 --- a/test/parallel_test.cc +++ b/test/parallel_test.cc @@ -9,7 +9,7 @@ using namespace fielddb; // 测试中read/write都表示带索引的读写 //读写有索引数据的并发 -TEST(TestReadWrite, Parallel) { +TEST(TestReadPut, Parallel) { fielddb::DestroyDB("testdb2.1",Options()); FieldDB *db = new FieldDB(); @@ -52,7 +52,7 @@ TEST(TestReadWrite, Parallel) { } //创建索引与写有该索引数据的并发 -TEST(TestWriteCreatei, Parallel) { +TEST(TestPutCreatei, Parallel) { fielddb::DestroyDB("testdb2.2",Options()); FieldDB *db = new FieldDB(); @@ -242,6 +242,59 @@ TEST(TestPutDelete, Parallel) { delete db; } +//write和其他功能的并发(大杂烩 +TEST(TestWrite, Parallel) { + fielddb::DestroyDB("testdb2.6",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2.6", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + + // ClearDB(db); + shanghaiKeys.clear(); + age20Keys.clear(); + db->CreateIndexOnField("address"); + InsertFieldData(db, 2); //先填点数据,让创建索引的时间久一点 + int thread_num_ = 5; + std::vector threads(thread_num_); + threads[0] = std::thread([db](){db->CreateIndexOnField("age");}); + threads[1] = std::thread([db](){ + while (db->GetIndexStatus("age") == NotExist){ + continue; //开始创建了再并发的写 + } + InsertFieldData(db);}); + threads[2] = std::thread([db](){ + while (db->GetIndexStatus("age") == NotExist){ + continue; + } + WriteFieldData(db, 1);}); + threads[3] = std::thread([db](){ + while (db->GetIndexStatus("age") == NotExist){ + continue; + } + DeleteFieldData(db, 0);}); + threads[4] = std::thread([db](){ + while (db->GetIndexStatus("age") == NotExist){ + continue; + } + db->DeleteIndex("age");}); + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + + //检查 + checkDataInKVAndIndex(db); + ASSERT_EQ(db->GetIndexStatus("age"), NotExist); //删除索引的请求应该被pend在创建之上 + //删掉最后一个线程,可以测试创建age索引时并发的写入能不能保持age的一致性 + //checkDataInKVAndIndex(db, "age"); + delete db; +} + int main(int argc, char** argv) { // All tests currently run with the same read-only file limits. testing::InitGoogleTest(&argc, argv); From 6c0b64cfa391ac5b17d2ea3ecd4d6a94c8947d02 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sun, 22 Dec 2024 20:13:50 +0800 Subject: [PATCH 10/32] =?UTF-8?q?=E6=AD=A3=E5=B8=B8=E9=80=80=E5=87=BA?= =?UTF-8?q?=E7=9A=84=E6=81=A2=E5=A4=8D=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 5 +++ fielddb/field_db.cpp | 13 +++++--- fielddb/field_db.h | 1 + test/recover_test.cc | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 4 deletions(-) create mode 100644 test/recover_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index fa7468f..aadc1cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -535,3 +535,8 @@ add_executable(parallel_test "${PROJECT_SOURCE_DIR}/test/parallel_test.cc" ) target_link_libraries(parallel_test PRIVATE leveldb gtest) + +add_executable(recover_test + "${PROJECT_SOURCE_DIR}/test/recover_test.cc" +) +target_link_libraries(recover_test PRIVATE leveldb gtest) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index c7e094e..a83960f 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -60,11 +60,11 @@ Status FieldDB::Recover() { std::string IndexKey; Iter->SeekToFirst(); while(Iter->Valid()) { - IndexKey = Iter->value().ToString(); + IndexKey = Iter->key().ToString(); ParsedInternalIndexKey ParsedIndex; ParseInternalIndexKey(Slice(IndexKey),&ParsedIndex); index_[ParsedIndex.name_.ToString()] = {Exist,nullptr}; - std::cout << "Existed Index : " << ParsedIndex.name_.ToString() << std::endl; + //std::cout << "Existed Index : " << ParsedIndex.name_.ToString() << std::endl; //构建下一个搜索的对象,在原来的fieldname的基础上加一个最大的ascii字符(不可见字符) //TODO:不知道这个做法有没有道理 @@ -183,8 +183,7 @@ Again: // return status; } - -//这里把一个空串作为常规put的name +// 这里把一个空串作为常规put的name Status FieldDB::Put(const WriteOptions &options, const Slice &key, const Slice &value) { FieldArray FA = {{"",value.ToString()}}; return PutFields(options, key, FA); @@ -364,4 +363,10 @@ Status DestroyDB(const std::string& name, const Options& options) { return s; } +FieldDB::~FieldDB() { + delete indexDB_; + delete kvDB_; + delete metaDB_; +} + } // namespace fielddb diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 684a820..f0fe5f2 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -33,6 +33,7 @@ public: //用的时候必须FieldDB *db = new FieldDB()再open,不能像之前一样DB *db FieldDB() : indexDB_(nullptr), kvDB_(nullptr), metaDB_(nullptr) {}; + ~FieldDB(); /*lab1的要求,作为db派生类要实现的虚函数*/ Status Put(const WriteOptions &options, const Slice &key, const Slice &value) override; Status PutFields(const WriteOptions &, const Slice &key, const FieldArray &fields) override; diff --git a/test/recover_test.cc b/test/recover_test.cc new file mode 100644 index 0000000..4b05c1d --- /dev/null +++ b/test/recover_test.cc @@ -0,0 +1,86 @@ +#include "gtest/gtest.h" +// #include "leveldb/env.h" +// #include "leveldb/db.h" +#include "fielddb/field_db.h" +#include "test/helper.cc" +#include +#include +#include +using namespace fielddb; + +// std::atomic thread_has_error(false); + +// void signalHandler(int signum) { +// // 捕捉段错误 +// } + +TEST(TestNormalRecover, Recover) { + fielddb::DestroyDB("testdb3.1",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb3.1", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + db->CreateIndexOnField("address"); + db->CreateIndexOnField("age"); + InsertFieldData(db); + bool allowNotFound = false; + GetFieldData(db, allowNotFound); + findKeysByCityIndex(db, true); + findKeysByAgeIndex(db, true); + + delete db; + db = new FieldDB(); + if(OpenDB("testdb3.1", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + //仍然能读到之前写入的数据和索引 + GetFieldData(db, allowNotFound); + findKeysByCityIndex(db, true); + findKeysByAgeIndex(db, true); +} + +// TEST(TestParalPutRecover, Recover) { +// signal(SIGSEGV, signalHandler); +// fielddb::DestroyDB("testdb3.2",Options()); +// FieldDB *db = new FieldDB(); + +// if(OpenDB("testdb3.2", &db).ok() == false) { +// std::cerr << "open db failed" << std::endl; +// abort(); +// } +// db->CreateIndexOnField("address"); +// db->CreateIndexOnField("age"); +// shanghaiKeys.clear(); +// age20Keys.clear(); +// int thread_num_ = 2; +// std::vector threads(thread_num_); +// threads[0] = std::thread([db](){ +// InsertFieldData(db); +// }); +// threads[1] = std::thread([db](){ +// InsertOneField(db); +// delete db; +// }); + + +// if (threads[1].joinable()) { +// threads[1].join(); +// } + +// db = new FieldDB(); +// if(OpenDB("testdb3.2", &db).ok() == false) { +// std::cerr << "open db failed" << std::endl; +// abort(); +// } +// GetOneField(db); +// checkDataInKVAndIndex(db); +// } + +int main(int argc, char** argv) { + // All tests currently run with the same read-only file limits. + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file From 23b603dda024e20c6e7238ef5f132be2e3ad59d8 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Mon, 23 Dec 2024 13:54:01 +0800 Subject: [PATCH 11/32] =?UTF-8?q?=E6=A8=A1=E6=8B=9F=E6=8F=92=E5=85=A5?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=97=B6=E5=B4=A9=E6=BA=83=E7=9A=84=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 7 +++-- test/recover_test.cc | 79 ++++++++++++++++++++++++++-------------------------- 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index a83960f..9bd93f1 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -82,15 +82,18 @@ Status FieldDB::Recover() { MetaValue = Iter->key(); MetaType type = MetaType(DecodeFixed32(MetaValue.data())); MetaValue.remove_prefix(4);//移除头上的metaType的部分 + Slice extractKey; + GetLengthPrefixedSlice(&MetaValue, &extractKey); if(type == KV_Creating) { FieldArray fields; ParseValue(Iter->value().ToString(), &fields); - PutFields(WriteOptions(), MetaValue, fields); + PutFields(WriteOptions(), extractKey, fields); } else if(type == KV_Deleting) { - Delete(WriteOptions(), MetaValue); + Delete(WriteOptions(), extractKey); } else { assert(0 && "Invalid MetaType"); } + Iter->Next(); } delete Iter; //在所有的请求完成后,会自动把metaDB的内容清空。 diff --git a/test/recover_test.cc b/test/recover_test.cc index 4b05c1d..47cc731 100644 --- a/test/recover_test.cc +++ b/test/recover_test.cc @@ -8,12 +8,6 @@ #include using namespace fielddb; -// std::atomic thread_has_error(false); - -// void signalHandler(int signum) { -// // 捕捉段错误 -// } - TEST(TestNormalRecover, Recover) { fielddb::DestroyDB("testdb3.1",Options()); FieldDB *db = new FieldDB(); @@ -42,42 +36,47 @@ TEST(TestNormalRecover, Recover) { findKeysByAgeIndex(db, true); } -// TEST(TestParalPutRecover, Recover) { -// signal(SIGSEGV, signalHandler); -// fielddb::DestroyDB("testdb3.2",Options()); -// FieldDB *db = new FieldDB(); +TEST(TestParalPutRecover, Recover) { + //第一次运行 + // fielddb::DestroyDB("testdb3.2",Options()); + // FieldDB *db = new FieldDB(); -// if(OpenDB("testdb3.2", &db).ok() == false) { -// std::cerr << "open db failed" << std::endl; -// abort(); -// } -// db->CreateIndexOnField("address"); -// db->CreateIndexOnField("age"); -// shanghaiKeys.clear(); -// age20Keys.clear(); -// int thread_num_ = 2; -// std::vector threads(thread_num_); -// threads[0] = std::thread([db](){ -// InsertFieldData(db); -// }); -// threads[1] = std::thread([db](){ -// InsertOneField(db); -// delete db; -// }); + // if(OpenDB("testdb3.2", &db).ok() == false) { + // std::cerr << "open db failed" << std::endl; + // abort(); + // } + // db->CreateIndexOnField("address"); + // db->CreateIndexOnField("age"); + // shanghaiKeys.clear(); + // age20Keys.clear(); + // int thread_num_ = 2; + // std::vector threads(thread_num_); + // threads[0] = std::thread([db](){ + // InsertFieldData(db); + // }); + // threads[1] = std::thread([db](){ + // InsertOneField(db); + // delete db; + // }); + // for (auto& t : threads) { + // if (t.joinable()) { + // t.join(); + // } + // } + //线程1导致了线程0错误,测试会终止(模拟数据库崩溃) + //这会导致线程0在写入的各种奇怪的时间点崩溃 + //第二次运行注释掉上面的代码,运行下面的代码测试恢复 - -// if (threads[1].joinable()) { -// threads[1].join(); -// } - -// db = new FieldDB(); -// if(OpenDB("testdb3.2", &db).ok() == false) { -// std::cerr << "open db failed" << std::endl; -// abort(); -// } -// GetOneField(db); -// checkDataInKVAndIndex(db); -// } + + //第二次运行 + FieldDB *db = new FieldDB(); + if(OpenDB("testdb3.2", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + GetOneField(db); + checkDataInKVAndIndex(db); +} int main(int argc, char** argv) { // All tests currently run with the same read-only file limits. From 622ce1e987dea9167c6b2adbec51732be8354271 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sat, 28 Dec 2024 04:53:40 +0800 Subject: [PATCH 12/32] =?UTF-8?q?fielddb=E7=9A=84benchmark=E3=80=81?= =?UTF-8?q?=E6=80=A7=E8=83=BD=E6=8F=92=E6=A1=A9=E4=BB=A5=E5=8F=8A=E4=B8=80?= =?UTF-8?q?=E4=B8=AA=E5=AF=B9=E6=AF=94=E7=9A=84testdb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 +- CMakeLists.txt | 4 + benchmarks/db_bench.cc | 8 +- benchmarks/db_bench_FieldDB.cc | 1144 +++++++++++++++++++++++++++++++++++++++ benchmarks/db_bench_testDB.cc | 1145 ++++++++++++++++++++++++++++++++++++++++ fielddb/field_db.cpp | 83 ++- fielddb/field_db.h | 62 ++- fielddb/meta.cpp | 3 +- fielddb/request.cpp | 28 +- include/leveldb/env.h | 2 + testdb/testdb.cc | 111 ++++ testdb/testdb.h | 72 +++ util/env_posix.cc | 4 + util/serialize_value.cc | 2 +- util/serialize_value.h | 2 +- 15 files changed, 2645 insertions(+), 29 deletions(-) create mode 100644 benchmarks/db_bench_FieldDB.cc create mode 100644 benchmarks/db_bench_testDB.cc create mode 100644 testdb/testdb.cc create mode 100644 testdb/testdb.h diff --git a/.gitignore b/.gitignore index 9e34c6c..c600ed3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,6 @@ out/ # clangd .cache/ -compile_commands.json \ No newline at end of file +compile_commands.json + +benchmark-result/ diff --git a/CMakeLists.txt b/CMakeLists.txt index aadc1cf..d0ea7d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,6 +198,8 @@ target_sources(leveldb "fielddb/meta.h" "fielddb/request.cpp" "fielddb/request.h" + "testdb/testdb.cc" + "testdb/testdb.h" # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". $<$:PUBLIC> @@ -447,6 +449,8 @@ if(LEVELDB_BUILD_BENCHMARKS) if(NOT BUILD_SHARED_LIBS) leveldb_benchmark("benchmarks/db_bench.cc") + leveldb_benchmark("benchmarks/db_bench_FieldDB.cc") + leveldb_benchmark("benchmarks/db_bench_testDB.cc") endif(NOT BUILD_SHARED_LIBS) check_library_exists(sqlite3 sqlite3_open "" HAVE_SQLITE3) diff --git a/benchmarks/db_bench.cc b/benchmarks/db_bench.cc index 8e3f4e7..72d962c 100644 --- a/benchmarks/db_bench.cc +++ b/benchmarks/db_bench.cc @@ -325,8 +325,8 @@ class Stats { // elapsed times. double elapsed = (finish_ - start_) * 1e-6; char rate[100]; - std::snprintf(rate, sizeof(rate), "%6.1f MB/s", - (bytes_ / 1048576.0) / elapsed); + std::snprintf(rate, sizeof(rate), "%6.1f MB/s Bytes:%6.1f elapsed(s):%6.1f seconds:%6.1f ", + (bytes_ / 1048576.0) / elapsed,(bytes_ / 1048576.0),elapsed,seconds_); extra = rate; } AppendWithSpace(&extra, message_); @@ -737,6 +737,10 @@ class Benchmark { } shared.mu.Unlock(); + // for(int i = 0; i < n; i++) { + // arg[i].thread->stats.Report(name.ToString() + "thread:" + std::to_string(i)); + // } + for (int i = 1; i < n; i++) { arg[0].thread->stats.Merge(arg[i].thread->stats); } diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc new file mode 100644 index 0000000..a0f9a21 --- /dev/null +++ b/benchmarks/db_bench_FieldDB.cc @@ -0,0 +1,1144 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include + +#include +#include +#include + +#include "leveldb/cache.h" +#include "leveldb/comparator.h" +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "leveldb/filter_policy.h" +#include "leveldb/status.h" +#include "leveldb/write_batch.h" +#include "port/port.h" +#include "util/crc32c.h" +#include "util/histogram.h" +#include "util/mutexlock.h" +#include "util/random.h" +#include "util/testutil.h" + +#include "fielddb/field_db.h" +using namespace fielddb; + +// Comma-separated list of operations to run in the specified order +// Actual benchmarks: +// fillseq -- write N values in sequential key order in async mode +// fillrandom -- write N values in random key order in async mode +// overwrite -- overwrite N values in random key order in async mode +// fillsync -- write N/100 values in random key order in sync mode +// fill100K -- write N/1000 100K values in random order in async mode +// deleteseq -- delete N keys in sequential order +// deleterandom -- delete N keys in random order +// readseq -- read N times sequentially +// readreverse -- read N times in reverse order +// readrandom -- read N times in random order +// readmissing -- read N missing keys in random order +// readhot -- read N times in random order from 1% section of DB +// seekrandom -- N random seeks +// seekordered -- N ordered seeks +// open -- cost of opening a DB +// crc32c -- repeated crc32c of 4K of data +// Meta operations: +// compact -- Compact the entire DB +// stats -- Print DB stats +// sstables -- Print sstable info +// heapprofile -- Dump a heap profile (if supported by this port) +static const char* FLAGS_benchmarks = + "fillseq," + "fillsync," + "fillrandom," + "overwrite," + "readrandom," + "readrandom," // Extra run to allow previous compactions to quiesce + "readseq," + "readreverse," + "compact," + "readrandom," + "readseq," + "readreverse," + "fill100K," + "crc32c," + "snappycomp," + "snappyuncomp," + "zstdcomp," + "zstduncomp,"; + +// Number of key/values to place in database +static int FLAGS_num = 1000000; + +// Number of read operations to do. If negative, do FLAGS_num reads. +static int FLAGS_reads = -1; + +// Number of concurrent threads to run. +static int FLAGS_threads = 1; + +// Size of each value +static int FLAGS_value_size = 100; + +// Arrange to generate values that shrink to this fraction of +// their original size after compression +static double FLAGS_compression_ratio = 0.5; + +// Print histogram of operation timings +static bool FLAGS_histogram = false; + +// Count the number of string comparisons performed +static bool FLAGS_comparisons = false; + +// Number of bytes to buffer in memtable before compacting +// (initialized to default value by "main") +static int FLAGS_write_buffer_size = 0; + +// Number of bytes written to each file. +// (initialized to default value by "main") +static int FLAGS_max_file_size = 0; + +// Approximate size of user data packed per block (before compression. +// (initialized to default value by "main") +static int FLAGS_block_size = 0; + +// Number of bytes to use as a cache of uncompressed data. +// Negative means use default settings. +static int FLAGS_cache_size = -1; + +// Maximum number of files to keep open at the same time (use default if == 0) +static int FLAGS_open_files = 0; + +// Bloom filter bits per key. +// Negative means use default settings. +static int FLAGS_bloom_bits = -1; + +// Common key prefix length. +static int FLAGS_key_prefix = 0; + +// If true, do not destroy the existing database. If you set this +// flag and also specify a benchmark that wants a fresh database, that +// benchmark will fail. +static bool FLAGS_use_existing_db = false; + +// If true, reuse existing log/MANIFEST files when re-opening a database. +static bool FLAGS_reuse_logs = false; + +// If true, use compression. +static bool FLAGS_compression = true; + +// Use the db with the following name. +static const char* FLAGS_db = nullptr; + +// ZSTD compression level to try out +static int FLAGS_zstd_compression_level = 1; + +namespace leveldb { + +namespace { +leveldb::Env* g_env = nullptr; + +class CountComparator : public Comparator { + public: + CountComparator(const Comparator* wrapped) : wrapped_(wrapped) {} + ~CountComparator() override {} + int Compare(const Slice& a, const Slice& b) const override { + count_.fetch_add(1, std::memory_order_relaxed); + return wrapped_->Compare(a, b); + } + const char* Name() const override { return wrapped_->Name(); } + void FindShortestSeparator(std::string* start, + const Slice& limit) const override { + wrapped_->FindShortestSeparator(start, limit); + } + + void FindShortSuccessor(std::string* key) const override { + return wrapped_->FindShortSuccessor(key); + } + + size_t comparisons() const { return count_.load(std::memory_order_relaxed); } + + void reset() { count_.store(0, std::memory_order_relaxed); } + + private: + mutable std::atomic count_{0}; + const Comparator* const wrapped_; +}; + +// Helper for quickly generating random data. +class RandomGenerator { + private: + std::string data_; + int pos_; + + public: + RandomGenerator() { + // We use a limited amount of data over and over again and ensure + // that it is larger than the compression window (32KB), and also + // large enough to serve all typical value sizes we want to write. + Random rnd(301); + std::string piece; + while (data_.size() < 1048576) { + // Add a short fragment that is as compressible as specified + // by FLAGS_compression_ratio. + test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece); + data_.append(piece); + } + pos_ = 0; + } + + Slice Generate(size_t len) { + if (pos_ + len > data_.size()) { + pos_ = 0; + assert(len < data_.size()); + } + pos_ += len; + return Slice(data_.data() + pos_ - len, len); + } +}; + +class KeyBuffer { + public: + KeyBuffer() { + assert(FLAGS_key_prefix < sizeof(buffer_)); + memset(buffer_, 'a', FLAGS_key_prefix); + } + KeyBuffer& operator=(KeyBuffer& other) = delete; + KeyBuffer(KeyBuffer& other) = delete; + + void Set(int k) { + std::snprintf(buffer_ + FLAGS_key_prefix, + sizeof(buffer_) - FLAGS_key_prefix, "%016d", k); + } + + Slice slice() const { return Slice(buffer_, FLAGS_key_prefix + 16); } + + private: + char buffer_[1024]; +}; + +#if defined(__linux) +static Slice TrimSpace(Slice s) { + size_t start = 0; + while (start < s.size() && isspace(s[start])) { + start++; + } + size_t limit = s.size(); + while (limit > start && isspace(s[limit - 1])) { + limit--; + } + return Slice(s.data() + start, limit - start); +} +#endif + +static void AppendWithSpace(std::string* str, Slice msg) { + if (msg.empty()) return; + if (!str->empty()) { + str->push_back(' '); + } + str->append(msg.data(), msg.size()); +} + +class Stats { + private: + double start_; + double finish_; + double seconds_; + int done_; + int next_report_; + int64_t bytes_; + double last_op_finish_; + Histogram hist_; + std::string message_; + + public: + Stats() { Start(); } + + void Start() { + next_report_ = 100; + hist_.Clear(); + done_ = 0; + bytes_ = 0; + seconds_ = 0; + message_.clear(); + start_ = finish_ = last_op_finish_ = g_env->NowMicros(); + } + + void Merge(const Stats& other) { + hist_.Merge(other.hist_); + done_ += other.done_; + bytes_ += other.bytes_; + seconds_ += other.seconds_; + if (other.start_ < start_) start_ = other.start_; + if (other.finish_ > finish_) finish_ = other.finish_; + + // Just keep the messages from one thread + if (message_.empty()) message_ = other.message_; + } + + void Stop() { + finish_ = g_env->NowMicros(); + seconds_ = (finish_ - start_) * 1e-6; + } + + void AddMessage(Slice msg) { AppendWithSpace(&message_, msg); } + + void FinishedSingleOp() { + if (FLAGS_histogram) { + double now = g_env->NowMicros(); + double micros = now - last_op_finish_; + hist_.Add(micros); + if (micros > 20000) { + std::fprintf(stderr, "long op: %.1f micros%30s\r", micros, ""); + std::fflush(stderr); + } + last_op_finish_ = now; + } + + done_++; + if (done_ >= next_report_) { + if (next_report_ < 1000) + next_report_ += 100; + else if (next_report_ < 5000) + next_report_ += 500; + else if (next_report_ < 10000) + next_report_ += 1000; + else if (next_report_ < 50000) + next_report_ += 5000; + else if (next_report_ < 100000) + next_report_ += 10000; + else if (next_report_ < 500000) + next_report_ += 50000; + else + next_report_ += 100000; + std::fprintf(stderr, "... finished %d ops%30s\r", done_, ""); + std::fflush(stderr); + } + } + + void AddBytes(int64_t n) { bytes_ += n; } + + void Report(const Slice& name) { + // Pretend at least one op was done in case we are running a benchmark + // that does not call FinishedSingleOp(). + if (done_ < 1) done_ = 1; + + std::string extra; + if (bytes_ > 0) { + // Rate is computed on actual elapsed time, not the sum of per-thread + // elapsed times. + double elapsed = (finish_ - start_) * 1e-6; + char rate[100]; + std::snprintf(rate, sizeof(rate), "%6.1f MB/s", + (bytes_ / 1048576.0) / elapsed); + extra = rate; + } + AppendWithSpace(&extra, message_); + + std::fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", + name.ToString().c_str(), seconds_ * 1e6 / done_, + (extra.empty() ? "" : " "), extra.c_str()); + if (FLAGS_histogram) { + std::fprintf(stdout, "Microseconds per op:\n%s\n", + hist_.ToString().c_str()); + } + std::fflush(stdout); + } +}; + +// State shared by all concurrent executions of the same benchmark. +struct SharedState { + port::Mutex mu; + port::CondVar cv GUARDED_BY(mu); + int total GUARDED_BY(mu); + + // Each thread goes through the following states: + // (1) initializing + // (2) waiting for others to be initialized + // (3) running + // (4) done + + int num_initialized GUARDED_BY(mu); + int num_done GUARDED_BY(mu); + bool start GUARDED_BY(mu); + + SharedState(int total) + : cv(&mu), total(total), num_initialized(0), num_done(0), start(false) {} +}; + +// Per-thread state for concurrent executions of the same benchmark. +struct ThreadState { + int tid; // 0..n-1 when running in n threads + Random rand; // Has different seeds for different threads + Stats stats; + SharedState* shared; + + ThreadState(int index, int seed) : tid(index), rand(seed), shared(nullptr) {} +}; + +void Compress( + ThreadState* thread, std::string name, + std::function compress_func) { + RandomGenerator gen; + Slice input = gen.Generate(Options().block_size); + int64_t bytes = 0; + int64_t produced = 0; + bool ok = true; + std::string compressed; + while (ok && bytes < 1024 * 1048576) { // Compress 1G + ok = compress_func(input.data(), input.size(), &compressed); + produced += compressed.size(); + bytes += input.size(); + thread->stats.FinishedSingleOp(); + } + + if (!ok) { + thread->stats.AddMessage("(" + name + " failure)"); + } else { + char buf[100]; + std::snprintf(buf, sizeof(buf), "(output: %.1f%%)", + (produced * 100.0) / bytes); + thread->stats.AddMessage(buf); + thread->stats.AddBytes(bytes); + } +} + +void Uncompress( + ThreadState* thread, std::string name, + std::function compress_func, + std::function uncompress_func) { + RandomGenerator gen; + Slice input = gen.Generate(Options().block_size); + std::string compressed; + bool ok = compress_func(input.data(), input.size(), &compressed); + int64_t bytes = 0; + char* uncompressed = new char[input.size()]; + while (ok && bytes < 1024 * 1048576) { // Compress 1G + ok = uncompress_func(compressed.data(), compressed.size(), uncompressed); + bytes += input.size(); + thread->stats.FinishedSingleOp(); + } + delete[] uncompressed; + + if (!ok) { + thread->stats.AddMessage("(" + name + " failure)"); + } else { + thread->stats.AddBytes(bytes); + } +} + +} // namespace + +class Benchmark { + private: + Cache* cache_; + const FilterPolicy* filter_policy_; + fielddb::FieldDB* db_; + int num_; + int value_size_; + int entries_per_batch_; + WriteOptions write_options_; + int reads_; + int heap_counter_; + CountComparator count_comparator_; + int total_thread_count_; + + void PrintHeader() { + const int kKeySize = 16 + FLAGS_key_prefix; + PrintEnvironment(); + std::fprintf(stdout, "Keys: %d bytes each\n", kKeySize); + std::fprintf( + stdout, "Values: %d bytes each (%d bytes after compression)\n", + FLAGS_value_size, + static_cast(FLAGS_value_size * FLAGS_compression_ratio + 0.5)); + std::fprintf(stdout, "Entries: %d\n", num_); + std::fprintf(stdout, "RawSize: %.1f MB (estimated)\n", + ((static_cast(kKeySize + FLAGS_value_size) * num_) / + 1048576.0)); + std::fprintf( + stdout, "FileSize: %.1f MB (estimated)\n", + (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) / + 1048576.0)); + PrintWarnings(); + std::fprintf(stdout, "------------------------------------------------\n"); + } + + void PrintWarnings() { +#if defined(__GNUC__) && !defined(__OPTIMIZE__) + std::fprintf( + stdout, + "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"); +#endif +#ifndef NDEBUG + std::fprintf( + stdout, + "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); +#endif + + // See if snappy is working by attempting to compress a compressible string + const char text[] = "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"; + std::string compressed; + if (!port::Snappy_Compress(text, sizeof(text), &compressed)) { + std::fprintf(stdout, "WARNING: Snappy compression is not enabled\n"); + } else if (compressed.size() >= sizeof(text)) { + std::fprintf(stdout, "WARNING: Snappy compression is not effective\n"); + } + } + + void PrintEnvironment() { + std::fprintf(stderr, "LevelDB: version %d.%d\n", kMajorVersion, + kMinorVersion); + +#if defined(__linux) + time_t now = time(nullptr); + std::fprintf(stderr, "Date: %s", + ctime(&now)); // ctime() adds newline + + FILE* cpuinfo = std::fopen("/proc/cpuinfo", "r"); + if (cpuinfo != nullptr) { + char line[1000]; + int num_cpus = 0; + std::string cpu_type; + std::string cache_size; + while (fgets(line, sizeof(line), cpuinfo) != nullptr) { + const char* sep = strchr(line, ':'); + if (sep == nullptr) { + continue; + } + Slice key = TrimSpace(Slice(line, sep - 1 - line)); + Slice val = TrimSpace(Slice(sep + 1)); + if (key == "model name") { + ++num_cpus; + cpu_type = val.ToString(); + } else if (key == "cache size") { + cache_size = val.ToString(); + } + } + std::fclose(cpuinfo); + std::fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str()); + std::fprintf(stderr, "CPUCache: %s\n", cache_size.c_str()); + } +#endif + } + + public: + Benchmark() + : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : nullptr), + filter_policy_(FLAGS_bloom_bits >= 0 + ? NewBloomFilterPolicy(FLAGS_bloom_bits) + : nullptr), + db_(nullptr), + num_(FLAGS_num), + value_size_(FLAGS_value_size), + entries_per_batch_(1), + reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), + heap_counter_(0), + count_comparator_(BytewiseComparator()), + total_thread_count_(0) { + std::vector files; + g_env->GetChildren(FLAGS_db, &files); + for (size_t i = 0; i < files.size(); i++) { + if (Slice(files[i]).starts_with("heap-")) { + g_env->RemoveFile(std::string(FLAGS_db) + "/" + files[i]); + } + } + if (!FLAGS_use_existing_db) { + DestroyDB(FLAGS_db, Options()); + } + } + + ~Benchmark() { + delete db_; + delete cache_; + delete filter_policy_; + } + + void Run() { + PrintHeader(); + Open(); + + const char* benchmarks = FLAGS_benchmarks; + while (benchmarks != nullptr) { + const char* sep = strchr(benchmarks, ','); + Slice name; + if (sep == nullptr) { + name = benchmarks; + benchmarks = nullptr; + } else { + name = Slice(benchmarks, sep - benchmarks); + benchmarks = sep + 1; + } + + // Reset parameters that may be overridden below + num_ = FLAGS_num; + reads_ = (FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads); + value_size_ = FLAGS_value_size; + entries_per_batch_ = 1; + write_options_ = WriteOptions(); + + void (Benchmark::*method)(ThreadState*) = nullptr; + bool fresh_db = false; + int num_threads = FLAGS_threads; + + if (name == Slice("open")) { + method = &Benchmark::OpenBench; + num_ /= 10000; + if (num_ < 1) num_ = 1; + } else if (name == Slice("fillseq")) { + fresh_db = true; + method = &Benchmark::WriteSeq; + } else if (name == Slice("fillbatch")) { + fresh_db = true; + entries_per_batch_ = 1000; + method = &Benchmark::WriteSeq; + } else if (name == Slice("fillrandom")) { + fresh_db = true; + method = &Benchmark::WriteRandom; + } else if (name == Slice("overwrite")) { + fresh_db = false; + method = &Benchmark::WriteRandom; + } else if (name == Slice("fillsync")) { + fresh_db = true; + num_ /= 1000; + write_options_.sync = true; + method = &Benchmark::WriteRandom; + } else if (name == Slice("fill100K")) { + fresh_db = true; + num_ /= 1000; + value_size_ = 100 * 1000; + method = &Benchmark::WriteRandom; + } else if (name == Slice("readseq")) { + method = &Benchmark::ReadSequential; + } else if (name == Slice("readreverse")) { + method = &Benchmark::ReadReverse; + } else if (name == Slice("readrandom")) { + method = &Benchmark::ReadRandom; + } else if (name == Slice("readmissing")) { + method = &Benchmark::ReadMissing; + } else if (name == Slice("seekrandom")) { + method = &Benchmark::SeekRandom; + } else if (name == Slice("seekordered")) { + method = &Benchmark::SeekOrdered; + } else if (name == Slice("readhot")) { + method = &Benchmark::ReadHot; + } else if (name == Slice("readrandomsmall")) { + reads_ /= 1000; + method = &Benchmark::ReadRandom; + } else if (name == Slice("deleteseq")) { + method = &Benchmark::DeleteSeq; + } else if (name == Slice("deleterandom")) { + method = &Benchmark::DeleteRandom; + } else if (name == Slice("readwhilewriting")) { + num_threads++; // Add extra thread for writing + method = &Benchmark::ReadWhileWriting; + } else if (name == Slice("compact")) { + method = &Benchmark::Compact; + } else if (name == Slice("crc32c")) { + method = &Benchmark::Crc32c; + } else if (name == Slice("snappycomp")) { + method = &Benchmark::SnappyCompress; + } else if (name == Slice("snappyuncomp")) { + method = &Benchmark::SnappyUncompress; + } else if (name == Slice("zstdcomp")) { + method = &Benchmark::ZstdCompress; + } else if (name == Slice("zstduncomp")) { + method = &Benchmark::ZstdUncompress; + } else if (name == Slice("heapprofile")) { + HeapProfile(); + } else if (name == Slice("stats")) { + PrintStats("leveldb.stats"); + } else if (name == Slice("sstables")) { + PrintStats("leveldb.sstables"); + } else { + if (!name.empty()) { // No error message for empty name + std::fprintf(stderr, "unknown benchmark '%s'\n", + name.ToString().c_str()); + } + } + + if (fresh_db) { + if (FLAGS_use_existing_db) { + std::fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n", + name.ToString().c_str()); + method = nullptr; + } else { + delete db_; + db_ = nullptr; + DestroyDB(FLAGS_db, Options()); + Open(); + } + } + + if (method != nullptr) { + RunBenchmark(num_threads, name, method); + } + } + } + + private: + struct ThreadArg { + Benchmark* bm; + SharedState* shared; + ThreadState* thread; + void (Benchmark::*method)(ThreadState*); + }; + + static void ThreadBody(void* v) { + ThreadArg* arg = reinterpret_cast(v); + SharedState* shared = arg->shared; + ThreadState* thread = arg->thread; + { + MutexLock l(&shared->mu); + shared->num_initialized++; + if (shared->num_initialized >= shared->total) { + shared->cv.SignalAll(); + } + while (!shared->start) { + shared->cv.Wait(); + } + } + + thread->stats.Start(); + (arg->bm->*(arg->method))(thread); + thread->stats.Stop(); + + { + MutexLock l(&shared->mu); + shared->num_done++; + if (shared->num_done >= shared->total) { + shared->cv.SignalAll(); + } + } + } + + void RunBenchmark(int n, Slice name, + void (Benchmark::*method)(ThreadState*)) { + SharedState shared(n); + + ThreadArg* arg = new ThreadArg[n]; + for (int i = 0; i < n; i++) { + arg[i].bm = this; + arg[i].method = method; + arg[i].shared = &shared; + ++total_thread_count_; + // Seed the thread's random state deterministically based upon thread + // creation across all benchmarks. This ensures that the seeds are unique + // but reproducible when rerunning the same set of benchmarks. + arg[i].thread = new ThreadState(i, /*seed=*/1000 + total_thread_count_); + arg[i].thread->shared = &shared; + g_env->StartThread(ThreadBody, &arg[i]); + } + + shared.mu.Lock(); + while (shared.num_initialized < n) { + shared.cv.Wait(); + } + + shared.start = true; + shared.cv.SignalAll(); + while (shared.num_done < n) { + shared.cv.Wait(); + } + shared.mu.Unlock(); + + for (int i = 1; i < n; i++) { + arg[0].thread->stats.Merge(arg[i].thread->stats); + } + arg[0].thread->stats.Report(name); + if (FLAGS_comparisons) { + fprintf(stdout, "Comparisons: %zu\n", count_comparator_.comparisons()); + count_comparator_.reset(); + fflush(stdout); + } + + for (int i = 0; i < n; i++) { + delete arg[i].thread; + } + delete[] arg; + } + + void Crc32c(ThreadState* thread) { + // Checksum about 500MB of data total + const int size = 4096; + const char* label = "(4K per op)"; + std::string data(size, 'x'); + int64_t bytes = 0; + uint32_t crc = 0; + while (bytes < 500 * 1048576) { + crc = crc32c::Value(data.data(), size); + thread->stats.FinishedSingleOp(); + bytes += size; + } + // Print so result is not dead + std::fprintf(stderr, "... crc=0x%x\r", static_cast(crc)); + + thread->stats.AddBytes(bytes); + thread->stats.AddMessage(label); + } + + void SnappyCompress(ThreadState* thread) { + Compress(thread, "snappy", &port::Snappy_Compress); + } + + void SnappyUncompress(ThreadState* thread) { + Uncompress(thread, "snappy", &port::Snappy_Compress, + &port::Snappy_Uncompress); + } + + void ZstdCompress(ThreadState* thread) { + Compress(thread, "zstd", + [](const char* input, size_t length, std::string* output) { + return port::Zstd_Compress(FLAGS_zstd_compression_level, input, + length, output); + }); + } + + void ZstdUncompress(ThreadState* thread) { + Uncompress( + thread, "zstd", + [](const char* input, size_t length, std::string* output) { + return port::Zstd_Compress(FLAGS_zstd_compression_level, input, + length, output); + }, + &port::Zstd_Uncompress); + } + + void Open() { + assert(db_ == nullptr); + Options options; + options.env = g_env; + options.create_if_missing = !FLAGS_use_existing_db; + options.block_cache = cache_; + options.write_buffer_size = FLAGS_write_buffer_size; + options.max_file_size = FLAGS_max_file_size; + options.block_size = FLAGS_block_size; + if (FLAGS_comparisons) { + options.comparator = &count_comparator_; + } + options.max_open_files = FLAGS_open_files; + options.filter_policy = filter_policy_; + options.reuse_logs = FLAGS_reuse_logs; + options.compression = + FLAGS_compression ? kSnappyCompression : kNoCompression; + // Status s = DB::Open(options, FLAGS_db, &db_); + db_ = new FieldDB(); + Status s = FieldDB::OpenFieldDB(options, FLAGS_db, &db_); + if (!s.ok()) { + std::fprintf(stderr, "open error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + + void OpenBench(ThreadState* thread) { + for (int i = 0; i < num_; i++) { + delete db_; + Open(); + thread->stats.FinishedSingleOp(); + } + } + + void WriteSeq(ThreadState* thread) { DoWrite(thread, true); } + + void WriteRandom(ThreadState* thread) { DoWrite(thread, false); } + + void DoWrite(ThreadState* thread, bool seq) { + if (num_ != FLAGS_num) { + char msg[100]; + std::snprintf(msg, sizeof(msg), "(%d ops)", num_); + thread->stats.AddMessage(msg); + } + + RandomGenerator gen; + WriteBatch batch; + Status s; + int64_t bytes = 0; + KeyBuffer key; + for (int i = 0; i < num_; i += entries_per_batch_) { + batch.Clear(); + for (int j = 0; j < entries_per_batch_; j++) { + const int k = seq ? i + j : thread->rand.Uniform(FLAGS_num); + key.Set(k); + batch.Put(key.slice(), gen.Generate(value_size_)); + bytes += value_size_ + key.slice().size(); + thread->stats.FinishedSingleOp(); + } + s = db_->Write(write_options_, &batch); + if (!s.ok()) { + std::fprintf(stderr, "put error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + thread->stats.AddBytes(bytes); + } + + void ReadSequential(ThreadState* thread) { + Iterator* iter = db_->NewIterator(ReadOptions()); + int i = 0; + int64_t bytes = 0; + for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) { + bytes += iter->key().size() + iter->value().size(); + thread->stats.FinishedSingleOp(); + ++i; + } + delete iter; + thread->stats.AddBytes(bytes); + } + + void ReadReverse(ThreadState* thread) { + Iterator* iter = db_->NewIterator(ReadOptions()); + int i = 0; + int64_t bytes = 0; + for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) { + bytes += iter->key().size() + iter->value().size(); + thread->stats.FinishedSingleOp(); + ++i; + } + delete iter; + thread->stats.AddBytes(bytes); + } + + void ReadRandom(ThreadState* thread) { + ReadOptions options; + std::string value; + int found = 0; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + if (db_->Get(options, key.slice(), &value).ok()) { + found++; + } + thread->stats.FinishedSingleOp(); + } + char msg[100]; + std::snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); + thread->stats.AddMessage(msg); + } + + void ReadMissing(ThreadState* thread) { + ReadOptions options; + std::string value; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + Slice s = Slice(key.slice().data(), key.slice().size() - 1); + db_->Get(options, s, &value); + thread->stats.FinishedSingleOp(); + } + } + + void ReadHot(ThreadState* thread) { + ReadOptions options; + std::string value; + const int range = (FLAGS_num + 99) / 100; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + const int k = thread->rand.Uniform(range); + key.Set(k); + db_->Get(options, key.slice(), &value); + thread->stats.FinishedSingleOp(); + } + } + + void SeekRandom(ThreadState* thread) { + ReadOptions options; + int found = 0; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + Iterator* iter = db_->NewIterator(options); + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + iter->Seek(key.slice()); + if (iter->Valid() && iter->key() == key.slice()) found++; + delete iter; + thread->stats.FinishedSingleOp(); + } + char msg[100]; + snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); + thread->stats.AddMessage(msg); + } + + void SeekOrdered(ThreadState* thread) { + ReadOptions options; + Iterator* iter = db_->NewIterator(options); + int found = 0; + int k = 0; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + k = (k + (thread->rand.Uniform(100))) % FLAGS_num; + key.Set(k); + iter->Seek(key.slice()); + if (iter->Valid() && iter->key() == key.slice()) found++; + thread->stats.FinishedSingleOp(); + } + delete iter; + char msg[100]; + std::snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); + thread->stats.AddMessage(msg); + } + + void DoDelete(ThreadState* thread, bool seq) { + RandomGenerator gen; + WriteBatch batch; + Status s; + KeyBuffer key; + for (int i = 0; i < num_; i += entries_per_batch_) { + batch.Clear(); + for (int j = 0; j < entries_per_batch_; j++) { + const int k = seq ? i + j : (thread->rand.Uniform(FLAGS_num)); + key.Set(k); + batch.Delete(key.slice()); + thread->stats.FinishedSingleOp(); + } + s = db_->Write(write_options_, &batch); + if (!s.ok()) { + std::fprintf(stderr, "del error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + } + + void DeleteSeq(ThreadState* thread) { DoDelete(thread, true); } + + void DeleteRandom(ThreadState* thread) { DoDelete(thread, false); } + + void ReadWhileWriting(ThreadState* thread) { + if (thread->tid > 0) { + ReadRandom(thread); + } else { + // Special thread that keeps writing until other threads are done. + RandomGenerator gen; + KeyBuffer key; + while (true) { + { + MutexLock l(&thread->shared->mu); + if (thread->shared->num_done + 1 >= thread->shared->num_initialized) { + // Other threads have finished + break; + } + } + + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + Status s = + db_->Put(write_options_, key.slice(), gen.Generate(value_size_)); + if (!s.ok()) { + std::fprintf(stderr, "put error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void Compact(ThreadState* thread) { db_->CompactRange(nullptr, nullptr); } + + void PrintStats(const char* key) { + std::string stats; + if (!db_->GetProperty(key, &stats)) { + stats = "(failed)"; + } + std::fprintf(stdout, "\n%s\n", stats.c_str()); + } + + static void WriteToFile(void* arg, const char* buf, int n) { + reinterpret_cast(arg)->Append(Slice(buf, n)); + } + + void HeapProfile() { + char fname[100]; + std::snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, + ++heap_counter_); + WritableFile* file; + Status s = g_env->NewWritableFile(fname, &file); + if (!s.ok()) { + std::fprintf(stderr, "%s\n", s.ToString().c_str()); + return; + } + bool ok = port::GetHeapProfile(WriteToFile, file); + delete file; + if (!ok) { + std::fprintf(stderr, "heap profiling not supported\n"); + g_env->RemoveFile(fname); + } + } +}; + +} // namespace leveldb + +int main(int argc, char** argv) { + FLAGS_write_buffer_size = leveldb::Options().write_buffer_size; + FLAGS_max_file_size = leveldb::Options().max_file_size; + FLAGS_block_size = leveldb::Options().block_size; + FLAGS_open_files = leveldb::Options().max_open_files; + std::string default_db_path; + + for (int i = 1; i < argc; i++) { + double d; + int n; + char junk; + if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) { + FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); + } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { + FLAGS_compression_ratio = d; + } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_histogram = n; + } else if (sscanf(argv[i], "--comparisons=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_comparisons = n; + } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_use_existing_db = n; + } else if (sscanf(argv[i], "--reuse_logs=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_reuse_logs = n; + } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_compression = n; + } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { + FLAGS_num = n; + } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) { + FLAGS_reads = n; + } else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) { + FLAGS_threads = n; + } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) { + FLAGS_value_size = n; + } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) { + FLAGS_write_buffer_size = n; + } else if (sscanf(argv[i], "--max_file_size=%d%c", &n, &junk) == 1) { + FLAGS_max_file_size = n; + } else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) { + FLAGS_block_size = n; + } else if (sscanf(argv[i], "--key_prefix=%d%c", &n, &junk) == 1) { + FLAGS_key_prefix = n; + } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) { + FLAGS_cache_size = n; + } else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) { + FLAGS_bloom_bits = n; + } else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) { + FLAGS_open_files = n; + } else if (strncmp(argv[i], "--db=", 5) == 0) { + FLAGS_db = argv[i] + 5; + } else { + std::fprintf(stderr, "Invalid flag '%s'\n", argv[i]); + std::exit(1); + } + } + + leveldb::g_env = leveldb::Env::Default(); + + // Choose a location for the test database if none given with --db= + if (FLAGS_db == nullptr) { + leveldb::g_env->GetTestDirectory(&default_db_path); + default_db_path += "/dbbench"; + FLAGS_db = default_db_path.c_str(); + } + + leveldb::Benchmark benchmark; + benchmark.Run(); + return 0; +} diff --git a/benchmarks/db_bench_testDB.cc b/benchmarks/db_bench_testDB.cc new file mode 100644 index 0000000..6191132 --- /dev/null +++ b/benchmarks/db_bench_testDB.cc @@ -0,0 +1,1145 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include + +#include +#include +#include + +#include "leveldb/cache.h" +#include "leveldb/comparator.h" +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "leveldb/filter_policy.h" +#include "leveldb/status.h" +#include "leveldb/write_batch.h" +#include "port/port.h" +#include "util/crc32c.h" +#include "util/histogram.h" +#include "util/mutexlock.h" +#include "util/random.h" +#include "util/testutil.h" + +// #include "fielddb/field_db.h" +// using namespace fielddb; +#include "testdb/testdb.h" +using namespace testdb; +// Comma-separated list of operations to run in the specified order +// Actual benchmarks: +// fillseq -- write N values in sequential key order in async mode +// fillrandom -- write N values in random key order in async mode +// overwrite -- overwrite N values in random key order in async mode +// fillsync -- write N/100 values in random key order in sync mode +// fill100K -- write N/1000 100K values in random order in async mode +// deleteseq -- delete N keys in sequential order +// deleterandom -- delete N keys in random order +// readseq -- read N times sequentially +// readreverse -- read N times in reverse order +// readrandom -- read N times in random order +// readmissing -- read N missing keys in random order +// readhot -- read N times in random order from 1% section of DB +// seekrandom -- N random seeks +// seekordered -- N ordered seeks +// open -- cost of opening a DB +// crc32c -- repeated crc32c of 4K of data +// Meta operations: +// compact -- Compact the entire DB +// stats -- Print DB stats +// sstables -- Print sstable info +// heapprofile -- Dump a heap profile (if supported by this port) +static const char* FLAGS_benchmarks = + "fillseq," + "fillsync," + "fillrandom," + "overwrite," + "readrandom," + "readrandom," // Extra run to allow previous compactions to quiesce + "readseq," + "readreverse," + "compact," + "readrandom," + "readseq," + "readreverse," + "fill100K," + "crc32c," + "snappycomp," + "snappyuncomp," + "zstdcomp," + "zstduncomp,"; + +// Number of key/values to place in database +static int FLAGS_num = 1000000; + +// Number of read operations to do. If negative, do FLAGS_num reads. +static int FLAGS_reads = -1; + +// Number of concurrent threads to run. +static int FLAGS_threads = 1; + +// Size of each value +static int FLAGS_value_size = 100; + +// Arrange to generate values that shrink to this fraction of +// their original size after compression +static double FLAGS_compression_ratio = 0.5; + +// Print histogram of operation timings +static bool FLAGS_histogram = false; + +// Count the number of string comparisons performed +static bool FLAGS_comparisons = false; + +// Number of bytes to buffer in memtable before compacting +// (initialized to default value by "main") +static int FLAGS_write_buffer_size = 0; + +// Number of bytes written to each file. +// (initialized to default value by "main") +static int FLAGS_max_file_size = 0; + +// Approximate size of user data packed per block (before compression. +// (initialized to default value by "main") +static int FLAGS_block_size = 0; + +// Number of bytes to use as a cache of uncompressed data. +// Negative means use default settings. +static int FLAGS_cache_size = -1; + +// Maximum number of files to keep open at the same time (use default if == 0) +static int FLAGS_open_files = 0; + +// Bloom filter bits per key. +// Negative means use default settings. +static int FLAGS_bloom_bits = -1; + +// Common key prefix length. +static int FLAGS_key_prefix = 0; + +// If true, do not destroy the existing database. If you set this +// flag and also specify a benchmark that wants a fresh database, that +// benchmark will fail. +static bool FLAGS_use_existing_db = false; + +// If true, reuse existing log/MANIFEST files when re-opening a database. +static bool FLAGS_reuse_logs = false; + +// If true, use compression. +static bool FLAGS_compression = true; + +// Use the db with the following name. +static const char* FLAGS_db = nullptr; + +// ZSTD compression level to try out +static int FLAGS_zstd_compression_level = 1; + +namespace leveldb { + +namespace { +leveldb::Env* g_env = nullptr; + +class CountComparator : public Comparator { + public: + CountComparator(const Comparator* wrapped) : wrapped_(wrapped) {} + ~CountComparator() override {} + int Compare(const Slice& a, const Slice& b) const override { + count_.fetch_add(1, std::memory_order_relaxed); + return wrapped_->Compare(a, b); + } + const char* Name() const override { return wrapped_->Name(); } + void FindShortestSeparator(std::string* start, + const Slice& limit) const override { + wrapped_->FindShortestSeparator(start, limit); + } + + void FindShortSuccessor(std::string* key) const override { + return wrapped_->FindShortSuccessor(key); + } + + size_t comparisons() const { return count_.load(std::memory_order_relaxed); } + + void reset() { count_.store(0, std::memory_order_relaxed); } + + private: + mutable std::atomic count_{0}; + const Comparator* const wrapped_; +}; + +// Helper for quickly generating random data. +class RandomGenerator { + private: + std::string data_; + int pos_; + + public: + RandomGenerator() { + // We use a limited amount of data over and over again and ensure + // that it is larger than the compression window (32KB), and also + // large enough to serve all typical value sizes we want to write. + Random rnd(301); + std::string piece; + while (data_.size() < 1048576) { + // Add a short fragment that is as compressible as specified + // by FLAGS_compression_ratio. + test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece); + data_.append(piece); + } + pos_ = 0; + } + + Slice Generate(size_t len) { + if (pos_ + len > data_.size()) { + pos_ = 0; + assert(len < data_.size()); + } + pos_ += len; + return Slice(data_.data() + pos_ - len, len); + } +}; + +class KeyBuffer { + public: + KeyBuffer() { + assert(FLAGS_key_prefix < sizeof(buffer_)); + memset(buffer_, 'a', FLAGS_key_prefix); + } + KeyBuffer& operator=(KeyBuffer& other) = delete; + KeyBuffer(KeyBuffer& other) = delete; + + void Set(int k) { + std::snprintf(buffer_ + FLAGS_key_prefix, + sizeof(buffer_) - FLAGS_key_prefix, "%016d", k); + } + + Slice slice() const { return Slice(buffer_, FLAGS_key_prefix + 16); } + + private: + char buffer_[1024]; +}; + +#if defined(__linux) +static Slice TrimSpace(Slice s) { + size_t start = 0; + while (start < s.size() && isspace(s[start])) { + start++; + } + size_t limit = s.size(); + while (limit > start && isspace(s[limit - 1])) { + limit--; + } + return Slice(s.data() + start, limit - start); +} +#endif + +static void AppendWithSpace(std::string* str, Slice msg) { + if (msg.empty()) return; + if (!str->empty()) { + str->push_back(' '); + } + str->append(msg.data(), msg.size()); +} + +class Stats { + private: + double start_; + double finish_; + double seconds_; + int done_; + int next_report_; + int64_t bytes_; + double last_op_finish_; + Histogram hist_; + std::string message_; + + public: + Stats() { Start(); } + + void Start() { + next_report_ = 100; + hist_.Clear(); + done_ = 0; + bytes_ = 0; + seconds_ = 0; + message_.clear(); + start_ = finish_ = last_op_finish_ = g_env->NowMicros(); + } + + void Merge(const Stats& other) { + hist_.Merge(other.hist_); + done_ += other.done_; + bytes_ += other.bytes_; + seconds_ += other.seconds_; + if (other.start_ < start_) start_ = other.start_; + if (other.finish_ > finish_) finish_ = other.finish_; + + // Just keep the messages from one thread + if (message_.empty()) message_ = other.message_; + } + + void Stop() { + finish_ = g_env->NowMicros(); + seconds_ = (finish_ - start_) * 1e-6; + } + + void AddMessage(Slice msg) { AppendWithSpace(&message_, msg); } + + void FinishedSingleOp() { + if (FLAGS_histogram) { + double now = g_env->NowMicros(); + double micros = now - last_op_finish_; + hist_.Add(micros); + if (micros > 20000) { + std::fprintf(stderr, "long op: %.1f micros%30s\r", micros, ""); + std::fflush(stderr); + } + last_op_finish_ = now; + } + + done_++; + if (done_ >= next_report_) { + if (next_report_ < 1000) + next_report_ += 100; + else if (next_report_ < 5000) + next_report_ += 500; + else if (next_report_ < 10000) + next_report_ += 1000; + else if (next_report_ < 50000) + next_report_ += 5000; + else if (next_report_ < 100000) + next_report_ += 10000; + else if (next_report_ < 500000) + next_report_ += 50000; + else + next_report_ += 100000; + std::fprintf(stderr, "... finished %d ops%30s\r", done_, ""); + std::fflush(stderr); + } + } + + void AddBytes(int64_t n) { bytes_ += n; } + + void Report(const Slice& name) { + // Pretend at least one op was done in case we are running a benchmark + // that does not call FinishedSingleOp(). + if (done_ < 1) done_ = 1; + + std::string extra; + if (bytes_ > 0) { + // Rate is computed on actual elapsed time, not the sum of per-thread + // elapsed times. + double elapsed = (finish_ - start_) * 1e-6; + char rate[100]; + std::snprintf(rate, sizeof(rate), "%6.1f MB/s", + (bytes_ / 1048576.0) / elapsed); + extra = rate; + } + AppendWithSpace(&extra, message_); + + std::fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", + name.ToString().c_str(), seconds_ * 1e6 / done_, + (extra.empty() ? "" : " "), extra.c_str()); + if (FLAGS_histogram) { + std::fprintf(stdout, "Microseconds per op:\n%s\n", + hist_.ToString().c_str()); + } + std::fflush(stdout); + } +}; + +// State shared by all concurrent executions of the same benchmark. +struct SharedState { + port::Mutex mu; + port::CondVar cv GUARDED_BY(mu); + int total GUARDED_BY(mu); + + // Each thread goes through the following states: + // (1) initializing + // (2) waiting for others to be initialized + // (3) running + // (4) done + + int num_initialized GUARDED_BY(mu); + int num_done GUARDED_BY(mu); + bool start GUARDED_BY(mu); + + SharedState(int total) + : cv(&mu), total(total), num_initialized(0), num_done(0), start(false) {} +}; + +// Per-thread state for concurrent executions of the same benchmark. +struct ThreadState { + int tid; // 0..n-1 when running in n threads + Random rand; // Has different seeds for different threads + Stats stats; + SharedState* shared; + + ThreadState(int index, int seed) : tid(index), rand(seed), shared(nullptr) {} +}; + +void Compress( + ThreadState* thread, std::string name, + std::function compress_func) { + RandomGenerator gen; + Slice input = gen.Generate(Options().block_size); + int64_t bytes = 0; + int64_t produced = 0; + bool ok = true; + std::string compressed; + while (ok && bytes < 1024 * 1048576) { // Compress 1G + ok = compress_func(input.data(), input.size(), &compressed); + produced += compressed.size(); + bytes += input.size(); + thread->stats.FinishedSingleOp(); + } + + if (!ok) { + thread->stats.AddMessage("(" + name + " failure)"); + } else { + char buf[100]; + std::snprintf(buf, sizeof(buf), "(output: %.1f%%)", + (produced * 100.0) / bytes); + thread->stats.AddMessage(buf); + thread->stats.AddBytes(bytes); + } +} + +void Uncompress( + ThreadState* thread, std::string name, + std::function compress_func, + std::function uncompress_func) { + RandomGenerator gen; + Slice input = gen.Generate(Options().block_size); + std::string compressed; + bool ok = compress_func(input.data(), input.size(), &compressed); + int64_t bytes = 0; + char* uncompressed = new char[input.size()]; + while (ok && bytes < 1024 * 1048576) { // Compress 1G + ok = uncompress_func(compressed.data(), compressed.size(), uncompressed); + bytes += input.size(); + thread->stats.FinishedSingleOp(); + } + delete[] uncompressed; + + if (!ok) { + thread->stats.AddMessage("(" + name + " failure)"); + } else { + thread->stats.AddBytes(bytes); + } +} + +} // namespace + +class Benchmark { + private: + Cache* cache_; + const FilterPolicy* filter_policy_; + testDB* db_; + int num_; + int value_size_; + int entries_per_batch_; + WriteOptions write_options_; + int reads_; + int heap_counter_; + CountComparator count_comparator_; + int total_thread_count_; + + void PrintHeader() { + const int kKeySize = 16 + FLAGS_key_prefix; + PrintEnvironment(); + std::fprintf(stdout, "Keys: %d bytes each\n", kKeySize); + std::fprintf( + stdout, "Values: %d bytes each (%d bytes after compression)\n", + FLAGS_value_size, + static_cast(FLAGS_value_size * FLAGS_compression_ratio + 0.5)); + std::fprintf(stdout, "Entries: %d\n", num_); + std::fprintf(stdout, "RawSize: %.1f MB (estimated)\n", + ((static_cast(kKeySize + FLAGS_value_size) * num_) / + 1048576.0)); + std::fprintf( + stdout, "FileSize: %.1f MB (estimated)\n", + (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) / + 1048576.0)); + PrintWarnings(); + std::fprintf(stdout, "------------------------------------------------\n"); + } + + void PrintWarnings() { +#if defined(__GNUC__) && !defined(__OPTIMIZE__) + std::fprintf( + stdout, + "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"); +#endif +#ifndef NDEBUG + std::fprintf( + stdout, + "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); +#endif + + // See if snappy is working by attempting to compress a compressible string + const char text[] = "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"; + std::string compressed; + if (!port::Snappy_Compress(text, sizeof(text), &compressed)) { + std::fprintf(stdout, "WARNING: Snappy compression is not enabled\n"); + } else if (compressed.size() >= sizeof(text)) { + std::fprintf(stdout, "WARNING: Snappy compression is not effective\n"); + } + } + + void PrintEnvironment() { + std::fprintf(stderr, "LevelDB: version %d.%d\n", kMajorVersion, + kMinorVersion); + +#if defined(__linux) + time_t now = time(nullptr); + std::fprintf(stderr, "Date: %s", + ctime(&now)); // ctime() adds newline + + FILE* cpuinfo = std::fopen("/proc/cpuinfo", "r"); + if (cpuinfo != nullptr) { + char line[1000]; + int num_cpus = 0; + std::string cpu_type; + std::string cache_size; + while (fgets(line, sizeof(line), cpuinfo) != nullptr) { + const char* sep = strchr(line, ':'); + if (sep == nullptr) { + continue; + } + Slice key = TrimSpace(Slice(line, sep - 1 - line)); + Slice val = TrimSpace(Slice(sep + 1)); + if (key == "model name") { + ++num_cpus; + cpu_type = val.ToString(); + } else if (key == "cache size") { + cache_size = val.ToString(); + } + } + std::fclose(cpuinfo); + std::fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str()); + std::fprintf(stderr, "CPUCache: %s\n", cache_size.c_str()); + } +#endif + } + + public: + Benchmark() + : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : nullptr), + filter_policy_(FLAGS_bloom_bits >= 0 + ? NewBloomFilterPolicy(FLAGS_bloom_bits) + : nullptr), + db_(nullptr), + num_(FLAGS_num), + value_size_(FLAGS_value_size), + entries_per_batch_(1), + reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), + heap_counter_(0), + count_comparator_(BytewiseComparator()), + total_thread_count_(0) { + std::vector files; + g_env->GetChildren(FLAGS_db, &files); + for (size_t i = 0; i < files.size(); i++) { + if (Slice(files[i]).starts_with("heap-")) { + g_env->RemoveFile(std::string(FLAGS_db) + "/" + files[i]); + } + } + if (!FLAGS_use_existing_db) { + DestroyDB(FLAGS_db, Options()); + } + } + + ~Benchmark() { + delete db_; + delete cache_; + delete filter_policy_; + } + + void Run() { + PrintHeader(); + Open(); + + const char* benchmarks = FLAGS_benchmarks; + while (benchmarks != nullptr) { + const char* sep = strchr(benchmarks, ','); + Slice name; + if (sep == nullptr) { + name = benchmarks; + benchmarks = nullptr; + } else { + name = Slice(benchmarks, sep - benchmarks); + benchmarks = sep + 1; + } + + // Reset parameters that may be overridden below + num_ = FLAGS_num; + reads_ = (FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads); + value_size_ = FLAGS_value_size; + entries_per_batch_ = 1; + write_options_ = WriteOptions(); + + void (Benchmark::*method)(ThreadState*) = nullptr; + bool fresh_db = false; + int num_threads = FLAGS_threads; + + if (name == Slice("open")) { + method = &Benchmark::OpenBench; + num_ /= 10000; + if (num_ < 1) num_ = 1; + } else if (name == Slice("fillseq")) { + fresh_db = true; + method = &Benchmark::WriteSeq; + } else if (name == Slice("fillbatch")) { + fresh_db = true; + entries_per_batch_ = 1000; + method = &Benchmark::WriteSeq; + } else if (name == Slice("fillrandom")) { + fresh_db = true; + method = &Benchmark::WriteRandom; + } else if (name == Slice("overwrite")) { + fresh_db = false; + method = &Benchmark::WriteRandom; + } else if (name == Slice("fillsync")) { + fresh_db = true; + num_ /= 1000; + write_options_.sync = true; + method = &Benchmark::WriteRandom; + } else if (name == Slice("fill100K")) { + fresh_db = true; + num_ /= 1000; + value_size_ = 100 * 1000; + method = &Benchmark::WriteRandom; + } else if (name == Slice("readseq")) { + method = &Benchmark::ReadSequential; + } else if (name == Slice("readreverse")) { + method = &Benchmark::ReadReverse; + } else if (name == Slice("readrandom")) { + method = &Benchmark::ReadRandom; + } else if (name == Slice("readmissing")) { + method = &Benchmark::ReadMissing; + } else if (name == Slice("seekrandom")) { + method = &Benchmark::SeekRandom; + } else if (name == Slice("seekordered")) { + method = &Benchmark::SeekOrdered; + } else if (name == Slice("readhot")) { + method = &Benchmark::ReadHot; + } else if (name == Slice("readrandomsmall")) { + reads_ /= 1000; + method = &Benchmark::ReadRandom; + } else if (name == Slice("deleteseq")) { + method = &Benchmark::DeleteSeq; + } else if (name == Slice("deleterandom")) { + method = &Benchmark::DeleteRandom; + } else if (name == Slice("readwhilewriting")) { + num_threads++; // Add extra thread for writing + method = &Benchmark::ReadWhileWriting; + } else if (name == Slice("compact")) { + method = &Benchmark::Compact; + } else if (name == Slice("crc32c")) { + method = &Benchmark::Crc32c; + } else if (name == Slice("snappycomp")) { + method = &Benchmark::SnappyCompress; + } else if (name == Slice("snappyuncomp")) { + method = &Benchmark::SnappyUncompress; + } else if (name == Slice("zstdcomp")) { + method = &Benchmark::ZstdCompress; + } else if (name == Slice("zstduncomp")) { + method = &Benchmark::ZstdUncompress; + } else if (name == Slice("heapprofile")) { + HeapProfile(); + } else if (name == Slice("stats")) { + PrintStats("leveldb.stats"); + } else if (name == Slice("sstables")) { + PrintStats("leveldb.sstables"); + } else { + if (!name.empty()) { // No error message for empty name + std::fprintf(stderr, "unknown benchmark '%s'\n", + name.ToString().c_str()); + } + } + + if (fresh_db) { + if (FLAGS_use_existing_db) { + std::fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n", + name.ToString().c_str()); + method = nullptr; + } else { + delete db_; + db_ = nullptr; + DestroyDB(FLAGS_db, Options()); + Open(); + } + } + + if (method != nullptr) { + RunBenchmark(num_threads, name, method); + } + } + } + + private: + struct ThreadArg { + Benchmark* bm; + SharedState* shared; + ThreadState* thread; + void (Benchmark::*method)(ThreadState*); + }; + + static void ThreadBody(void* v) { + ThreadArg* arg = reinterpret_cast(v); + SharedState* shared = arg->shared; + ThreadState* thread = arg->thread; + { + MutexLock l(&shared->mu); + shared->num_initialized++; + if (shared->num_initialized >= shared->total) { + shared->cv.SignalAll(); + } + while (!shared->start) { + shared->cv.Wait(); + } + } + + thread->stats.Start(); + (arg->bm->*(arg->method))(thread); + thread->stats.Stop(); + + { + MutexLock l(&shared->mu); + shared->num_done++; + if (shared->num_done >= shared->total) { + shared->cv.SignalAll(); + } + } + } + + void RunBenchmark(int n, Slice name, + void (Benchmark::*method)(ThreadState*)) { + SharedState shared(n); + + ThreadArg* arg = new ThreadArg[n]; + for (int i = 0; i < n; i++) { + arg[i].bm = this; + arg[i].method = method; + arg[i].shared = &shared; + ++total_thread_count_; + // Seed the thread's random state deterministically based upon thread + // creation across all benchmarks. This ensures that the seeds are unique + // but reproducible when rerunning the same set of benchmarks. + arg[i].thread = new ThreadState(i, /*seed=*/1000 + total_thread_count_); + arg[i].thread->shared = &shared; + g_env->StartThread(ThreadBody, &arg[i]); + } + + shared.mu.Lock(); + while (shared.num_initialized < n) { + shared.cv.Wait(); + } + + shared.start = true; + shared.cv.SignalAll(); + while (shared.num_done < n) { + shared.cv.Wait(); + } + shared.mu.Unlock(); + + for (int i = 1; i < n; i++) { + arg[0].thread->stats.Merge(arg[i].thread->stats); + } + arg[0].thread->stats.Report(name); + if (FLAGS_comparisons) { + fprintf(stdout, "Comparisons: %zu\n", count_comparator_.comparisons()); + count_comparator_.reset(); + fflush(stdout); + } + + for (int i = 0; i < n; i++) { + delete arg[i].thread; + } + delete[] arg; + } + + void Crc32c(ThreadState* thread) { + // Checksum about 500MB of data total + const int size = 4096; + const char* label = "(4K per op)"; + std::string data(size, 'x'); + int64_t bytes = 0; + uint32_t crc = 0; + while (bytes < 500 * 1048576) { + crc = crc32c::Value(data.data(), size); + thread->stats.FinishedSingleOp(); + bytes += size; + } + // Print so result is not dead + std::fprintf(stderr, "... crc=0x%x\r", static_cast(crc)); + + thread->stats.AddBytes(bytes); + thread->stats.AddMessage(label); + } + + void SnappyCompress(ThreadState* thread) { + Compress(thread, "snappy", &port::Snappy_Compress); + } + + void SnappyUncompress(ThreadState* thread) { + Uncompress(thread, "snappy", &port::Snappy_Compress, + &port::Snappy_Uncompress); + } + + void ZstdCompress(ThreadState* thread) { + Compress(thread, "zstd", + [](const char* input, size_t length, std::string* output) { + return port::Zstd_Compress(FLAGS_zstd_compression_level, input, + length, output); + }); + } + + void ZstdUncompress(ThreadState* thread) { + Uncompress( + thread, "zstd", + [](const char* input, size_t length, std::string* output) { + return port::Zstd_Compress(FLAGS_zstd_compression_level, input, + length, output); + }, + &port::Zstd_Uncompress); + } + + void Open() { + assert(db_ == nullptr); + Options options; + options.env = g_env; + options.create_if_missing = !FLAGS_use_existing_db; + options.block_cache = cache_; + options.write_buffer_size = FLAGS_write_buffer_size; + options.max_file_size = FLAGS_max_file_size; + options.block_size = FLAGS_block_size; + if (FLAGS_comparisons) { + options.comparator = &count_comparator_; + } + options.max_open_files = FLAGS_open_files; + options.filter_policy = filter_policy_; + options.reuse_logs = FLAGS_reuse_logs; + options.compression = + FLAGS_compression ? kSnappyCompression : kNoCompression; + // Status s = DB::Open(options, FLAGS_db, &db_); + db_ = new testDB(); + Status s = testDB::OpentestDB(options, FLAGS_db, &db_); + if (!s.ok()) { + std::fprintf(stderr, "open error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + + void OpenBench(ThreadState* thread) { + for (int i = 0; i < num_; i++) { + delete db_; + Open(); + thread->stats.FinishedSingleOp(); + } + } + + void WriteSeq(ThreadState* thread) { DoWrite(thread, true); } + + void WriteRandom(ThreadState* thread) { DoWrite(thread, false); } + + void DoWrite(ThreadState* thread, bool seq) { + if (num_ != FLAGS_num) { + char msg[100]; + std::snprintf(msg, sizeof(msg), "(%d ops)", num_); + thread->stats.AddMessage(msg); + } + + RandomGenerator gen; + WriteBatch batch; + Status s; + int64_t bytes = 0; + KeyBuffer key; + for (int i = 0; i < num_; i += entries_per_batch_) { + batch.Clear(); + for (int j = 0; j < entries_per_batch_; j++) { + const int k = seq ? i + j : thread->rand.Uniform(FLAGS_num); + key.Set(k); + batch.Put(key.slice(), gen.Generate(value_size_)); + bytes += value_size_ + key.slice().size(); + thread->stats.FinishedSingleOp(); + } + s = db_->Write(write_options_, &batch); + if (!s.ok()) { + std::fprintf(stderr, "put error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + thread->stats.AddBytes(bytes); + } + + void ReadSequential(ThreadState* thread) { + Iterator* iter = db_->NewIterator(ReadOptions()); + int i = 0; + int64_t bytes = 0; + for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) { + bytes += iter->key().size() + iter->value().size(); + thread->stats.FinishedSingleOp(); + ++i; + } + delete iter; + thread->stats.AddBytes(bytes); + } + + void ReadReverse(ThreadState* thread) { + Iterator* iter = db_->NewIterator(ReadOptions()); + int i = 0; + int64_t bytes = 0; + for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) { + bytes += iter->key().size() + iter->value().size(); + thread->stats.FinishedSingleOp(); + ++i; + } + delete iter; + thread->stats.AddBytes(bytes); + } + + void ReadRandom(ThreadState* thread) { + ReadOptions options; + std::string value; + int found = 0; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + if (db_->Get(options, key.slice(), &value).ok()) { + found++; + } + thread->stats.FinishedSingleOp(); + } + char msg[100]; + std::snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); + thread->stats.AddMessage(msg); + } + + void ReadMissing(ThreadState* thread) { + ReadOptions options; + std::string value; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + Slice s = Slice(key.slice().data(), key.slice().size() - 1); + db_->Get(options, s, &value); + thread->stats.FinishedSingleOp(); + } + } + + void ReadHot(ThreadState* thread) { + ReadOptions options; + std::string value; + const int range = (FLAGS_num + 99) / 100; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + const int k = thread->rand.Uniform(range); + key.Set(k); + db_->Get(options, key.slice(), &value); + thread->stats.FinishedSingleOp(); + } + } + + void SeekRandom(ThreadState* thread) { + ReadOptions options; + int found = 0; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + Iterator* iter = db_->NewIterator(options); + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + iter->Seek(key.slice()); + if (iter->Valid() && iter->key() == key.slice()) found++; + delete iter; + thread->stats.FinishedSingleOp(); + } + char msg[100]; + snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); + thread->stats.AddMessage(msg); + } + + void SeekOrdered(ThreadState* thread) { + ReadOptions options; + Iterator* iter = db_->NewIterator(options); + int found = 0; + int k = 0; + KeyBuffer key; + for (int i = 0; i < reads_; i++) { + k = (k + (thread->rand.Uniform(100))) % FLAGS_num; + key.Set(k); + iter->Seek(key.slice()); + if (iter->Valid() && iter->key() == key.slice()) found++; + thread->stats.FinishedSingleOp(); + } + delete iter; + char msg[100]; + std::snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); + thread->stats.AddMessage(msg); + } + + void DoDelete(ThreadState* thread, bool seq) { + RandomGenerator gen; + WriteBatch batch; + Status s; + KeyBuffer key; + for (int i = 0; i < num_; i += entries_per_batch_) { + batch.Clear(); + for (int j = 0; j < entries_per_batch_; j++) { + const int k = seq ? i + j : (thread->rand.Uniform(FLAGS_num)); + key.Set(k); + batch.Delete(key.slice()); + thread->stats.FinishedSingleOp(); + } + s = db_->Write(write_options_, &batch); + if (!s.ok()) { + std::fprintf(stderr, "del error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + } + + void DeleteSeq(ThreadState* thread) { DoDelete(thread, true); } + + void DeleteRandom(ThreadState* thread) { DoDelete(thread, false); } + + void ReadWhileWriting(ThreadState* thread) { + if (thread->tid > 0) { + ReadRandom(thread); + } else { + // Special thread that keeps writing until other threads are done. + RandomGenerator gen; + KeyBuffer key; + while (true) { + { + MutexLock l(&thread->shared->mu); + if (thread->shared->num_done + 1 >= thread->shared->num_initialized) { + // Other threads have finished + break; + } + } + + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + Status s = + db_->Put(write_options_, key.slice(), gen.Generate(value_size_)); + if (!s.ok()) { + std::fprintf(stderr, "put error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void Compact(ThreadState* thread) { db_->CompactRange(nullptr, nullptr); } + + void PrintStats(const char* key) { + std::string stats; + if (!db_->GetProperty(key, &stats)) { + stats = "(failed)"; + } + std::fprintf(stdout, "\n%s\n", stats.c_str()); + } + + static void WriteToFile(void* arg, const char* buf, int n) { + reinterpret_cast(arg)->Append(Slice(buf, n)); + } + + void HeapProfile() { + char fname[100]; + std::snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, + ++heap_counter_); + WritableFile* file; + Status s = g_env->NewWritableFile(fname, &file); + if (!s.ok()) { + std::fprintf(stderr, "%s\n", s.ToString().c_str()); + return; + } + bool ok = port::GetHeapProfile(WriteToFile, file); + delete file; + if (!ok) { + std::fprintf(stderr, "heap profiling not supported\n"); + g_env->RemoveFile(fname); + } + } +}; + +} // namespace leveldb + +int main(int argc, char** argv) { + FLAGS_write_buffer_size = leveldb::Options().write_buffer_size; + FLAGS_max_file_size = leveldb::Options().max_file_size; + FLAGS_block_size = leveldb::Options().block_size; + FLAGS_open_files = leveldb::Options().max_open_files; + std::string default_db_path; + + for (int i = 1; i < argc; i++) { + double d; + int n; + char junk; + if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) { + FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); + } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { + FLAGS_compression_ratio = d; + } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_histogram = n; + } else if (sscanf(argv[i], "--comparisons=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_comparisons = n; + } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_use_existing_db = n; + } else if (sscanf(argv[i], "--reuse_logs=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_reuse_logs = n; + } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 && + (n == 0 || n == 1)) { + FLAGS_compression = n; + } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { + FLAGS_num = n; + } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) { + FLAGS_reads = n; + } else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) { + FLAGS_threads = n; + } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) { + FLAGS_value_size = n; + } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) { + FLAGS_write_buffer_size = n; + } else if (sscanf(argv[i], "--max_file_size=%d%c", &n, &junk) == 1) { + FLAGS_max_file_size = n; + } else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) { + FLAGS_block_size = n; + } else if (sscanf(argv[i], "--key_prefix=%d%c", &n, &junk) == 1) { + FLAGS_key_prefix = n; + } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) { + FLAGS_cache_size = n; + } else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) { + FLAGS_bloom_bits = n; + } else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) { + FLAGS_open_files = n; + } else if (strncmp(argv[i], "--db=", 5) == 0) { + FLAGS_db = argv[i] + 5; + } else { + std::fprintf(stderr, "Invalid flag '%s'\n", argv[i]); + std::exit(1); + } + } + + leveldb::g_env = leveldb::Env::Default(); + + // Choose a location for the test database if none given with --db= + if (FLAGS_db == nullptr) { + leveldb::g_env->GetTestDirectory(&default_db_path); + default_db_path += "/dbbench"; + FLAGS_db = default_db_path.c_str(); + } + + leveldb::Benchmark benchmark; + benchmark.Run(); + return 0; +} diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 9bd93f1..c42a370 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -1,9 +1,14 @@ #include "fielddb/field_db.h" +#include #include #include +#include #include +#include #include #include +#include "leveldb/c.h" +#include "leveldb/cache.h" #include "leveldb/db.h" #include "leveldb/env.h" #include "leveldb/iterator.h" @@ -22,7 +27,7 @@ namespace fielddb { using namespace leveldb; //TODO:打开fieldDB -Status FieldDB::OpenFieldDB(const Options& options, +Status FieldDB::OpenFieldDB(Options& options, const std::string& name, FieldDB** dbptr) { // options.env->CreateDir("./abc") if(*dbptr == nullptr){ @@ -32,11 +37,18 @@ Status FieldDB::OpenFieldDB(const Options& options, // Status status; DB *indexdb, *kvdb, *metadb; + // options.block_cache = NewLRUCache(ULONG_MAX); + // options.max_open_files = 1000; + // options.write_buffer_size = 512 * 1024 * 1024; + // options.env = getPosixEnv(); status = Open(options, name+"_indexDB", &indexdb); if(!status.ok()) return status; - + + // options.env = getPosixEnv(); status = Open(options, name+"_kvDB", &kvdb); if(!status.ok()) return status; + + // options.env = getPosixEnv(); status = Open(options, name+"_metaDB", &metadb); if(!status.ok()) return status; @@ -45,7 +57,7 @@ Status FieldDB::OpenFieldDB(const Options& options, (*dbptr)->metaDB_ = metadb; (*dbptr)->dbname_ = name; - status = (*dbptr)->Recover(); + // status = (*dbptr)->Recover(); (*dbptr)->options_ = &options; (*dbptr)->env_ = options.env; @@ -118,6 +130,7 @@ Request *FieldDB::GetHandleInterval() { } Status FieldDB::HandleRequest(Request &req) { + uint64_t start_ = env_->NowMicros(); MutexLock L(&mutex_); taskqueue_.push_back(&req); Again: @@ -136,33 +149,61 @@ Again: { //1. 构建各个Batch。构建的过程中要保证索引状态的一致性,需要上锁。 MutexLock iL(&index_mu); + uint64_t start_construct = env_->NowMicros(); for(auto *req_ptr : taskqueue_) { req_ptr->ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, batchKeySet); if(req_ptr == tail) break; } + construct_elapsed += env_->NowMicros() - start_construct; } //2. 首先写入meta,再并发写入index和kv,完成之后清除meta数据 //此处可以放锁是因为写入的有序性可以通过队列来保证 mutex_.Unlock(); + uint64_t start_write = env_->NowMicros(); WriteOptions op; - status = metaDB_->Write(op, &MetaBatch); - assert(status.ok()); + if(MetaBatch.ApproximateSize() > 12) { + uint64_t start_meta = env_->NowMicros(); + status = metaDB_->Write(op, &MetaBatch); + write_meta_elapsed += env_->NowMicros() - start_meta; + write_bytes += MetaBatch.ApproximateSize(); + assert(status.ok()); + } //TODO:index的写入需要在另外一个线程中同时完成 - status = indexDB_->Write(op, &IndexBatch); - assert(status.ok()); - status = kvDB_->Write(op, &KVBatch); - assert(status.ok()); + if(IndexBatch.ApproximateSize() > 12) { + uint64_t start_index = env_->NowMicros(); + status = indexDB_->Write(op, &IndexBatch); + write_index_elapsed += env_->NowMicros() - start_index; + write_bytes += IndexBatch.ApproximateSize(); + assert(status.ok()); + } + if(KVBatch.ApproximateSize() > 12) { + uint64_t start_kv = env_->NowMicros(); + status = kvDB_->Write(op, &KVBatch); + write_kv_elapsed += env_->NowMicros() - start_kv; + write_bytes += KVBatch.ApproximateSize(); + assert(status.ok()); + } //3. 将meta数据清除 - MetaCleaner cleaner; - cleaner.Collect(MetaBatch); - cleaner.CleanMetaBatch(metaDB_); + if(MetaBatch.ApproximateSize() > 12) { + uint64_t start_clean = env_->NowMicros(); + MetaCleaner cleaner; + cleaner.Collect(MetaBatch); + cleaner.CleanMetaBatch(metaDB_); + write_clean_elapsed += env_->NowMicros() - start_clean; + } + write_elapsed += env_->NowMicros() - start_write; mutex_.Lock(); } else { //对于创建和删除索引的请求,通过prepare完成索引状态的更新 MutexLock iL(&index_mu); req.Prepare(this); } - + // { + // static int count = 0; + // if(count++ % 100000 == 0) { + // std::cout << "TaskQueue Size : " << taskqueue_.size() << std::endl; + // } + // } while(true) { Request *ready = taskqueue_.front(); // int debug = tail->type_; @@ -175,6 +216,11 @@ Again: } if (ready == tail) break; } + + elapsed += env_->NowMicros() - start_; + count ++; + dumpStatistics(); + if(!taskqueue_.empty()) { taskqueue_.front()->cond_.Signal(); } @@ -218,8 +264,19 @@ Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { } // TODO:根据updates里面的东西,要对是否需要更新index进行分别处理 Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { + { + uint64_t start_ = env_->NowMicros(); + Status status = kvDB_->Write(options, updates); + temp_elapsed += env_->NowMicros() - start_; + count ++; + dumpStatistics(); + return status; + } + //或许应该再做一个接口?或者基于现有的接口进行改造 + uint64_t start_ = env_->NowMicros(); BatchReq req(updates,&mutex_); + construct_BatchReq_init_elapsed += env_->NowMicros() - start_; Status status = HandleRequest(req); return status; assert(0); diff --git a/fielddb/field_db.h b/fielddb/field_db.h index f0fe5f2..27e8a86 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -1,5 +1,7 @@ #include "port/port_stdcxx.h" #include "db/db_impl.h" +#include +#include #include #include #include @@ -30,6 +32,7 @@ public: friend class iCreateReq; friend class iDeleteReq; friend class DeleteReq; + friend class BatchReq; //用的时候必须FieldDB *db = new FieldDB()再open,不能像之前一样DB *db FieldDB() : indexDB_(nullptr), kvDB_(nullptr), metaDB_(nullptr) {}; @@ -55,20 +58,21 @@ public: //返回当前数据库中索引状态,用来测试,不过也可以作为一个功能? IndexStatus GetIndexStatus(const std::string &fieldName); - static Status OpenFieldDB(const Options& options,const std::string& name,FieldDB** dbptr); + static Status OpenFieldDB(Options& options,const std::string& name,FieldDB** dbptr); private: //根据metaDB的内容进行恢复 Status Recover(); private: + leveldb::DB *kvDB_; + leveldb::DB *metaDB_; + leveldb::DB *indexDB_; + std::string dbname_; const Options *options_; Env *env_; - leveldb::DB *metaDB_; - leveldb::DB *indexDB_; - leveldb::DB *kvDB_; using FieldName = std::string; // 标记index的状态,如果是creating/deleting,则会附带相应的请求 @@ -85,6 +89,56 @@ private: Status HandleRequest(Request &req); //每个请求自行构造请求后交由这个函数处理 Request *GetHandleInterval(); //获得任务队列中的待处理区间,区间划分规则和原因见文档 +private: + int count = 0; + int count_Batch = 0; + int count_Batch_Sub = 0; + uint64_t elapsed = 0; + + uint64_t construct_elapsed = 0; + uint64_t construct_BatchReq_init_elapsed = 0; + uint64_t construct_BatchReq_elapsed = 0; + uint64_t construct_BatchReq_Sub_elapsed = 0; + uint64_t construct_BatchReq_perSub_elapsed = 0; + uint64_t construct_FieldsReq_Read_elapsed = 0; + + uint64_t write_elapsed = 0; + uint64_t write_meta_elapsed = 0; + uint64_t write_index_elapsed = 0; + uint64_t write_kv_elapsed = 0; + uint64_t write_clean_elapsed = 0; + + uint64_t write_bytes = 0; + uint64_t write_bytes_lim = 20 * 1024 * 1024; + + uint64_t temp_elapsed = 0; + + inline void dumpStatistics() { + if(count && count % 500000 == 0 || write_bytes && write_bytes > write_bytes_lim) { + std::cout << "=====================================================\n"; + std::cout << "Total Count : " << count; + std::cout << "\tTotal Write Bytes(MB) : " << write_bytes / 1048576.0 << std::endl; + std::cout << "Average Time(ms) : " << elapsed * 1.0 / count; + std::cout << "\tAverage Write rates(MB/s) : " << write_bytes / 1048576.0 / elapsed * 1000000 << std::endl; + std::cout << "Construct Time(ms) : " << construct_elapsed * 1.0 / count << std::endl; + std::cout << "\tConstruct BatchReq Init Time(ms) : " << construct_BatchReq_init_elapsed * 1.0 / count << std::endl; + std::cout << "\tConstruct BatchReq Time(ms) : " << construct_BatchReq_elapsed * 1.0 / count << std::endl; + std::cout << "\tConstruct BatchReq Sub Time(ms) : " << construct_BatchReq_Sub_elapsed * 1.0 / count << std::endl; + std::cout << "\tConstruct BatchReq perSub Time(ms) : " << construct_BatchReq_perSub_elapsed * 1.0 / count_Batch_Sub << std::endl; + std::cout << "\tConstruct FieldsReq Read Time(ms) : " << construct_FieldsReq_Read_elapsed * 1.0 / count << std::endl; + std::cout << "Write Time(ms) : " << write_elapsed * 1.0 / count << std::endl; + std::cout << "\tWrite Meta Time(ms) : " << write_meta_elapsed * 1.0 / count << std::endl; + std::cout << "\tWrite Index Time(ms) : " << write_index_elapsed * 1.0 / count << std::endl; + std::cout << "\tWrite KV Time(ms) : " << write_kv_elapsed * 1.0 / count << std::endl; + std::cout << "\tWrite Clean Time(ms) : " << write_clean_elapsed * 1.0 / count << std::endl; + std::cout << "TaskQueue Size : " << taskqueue_.size() << std::endl; + std::cout << "temp_elased : " << temp_elapsed * 1.0 / count<< std::endl; + // std::cout << MetaBatch.ApproximateSize() << " " << IndexBatch.ApproximateSize() << " " << KVBatch.ApproximateSize() << std::endl; + std::cout << "=====================================================\n"; + write_bytes_lim = write_bytes + 20 * 1024 * 1024; + std::fflush(stdout); + } + } }; Status DestroyDB(const std::string& name, diff --git a/fielddb/meta.cpp b/fielddb/meta.cpp index 13ee09d..11e1241 100644 --- a/fielddb/meta.cpp +++ b/fielddb/meta.cpp @@ -56,13 +56,14 @@ public: }; void MetaCleaner::Collect(WriteBatch &MetaBatch) { + if(MetaBatch.ApproximateSize() <= 12) return; CleanerHandler Handler; Handler.NeedClean = &NeedClean; MetaBatch.Iterate(&Handler); } void MetaCleaner::CleanMetaBatch(DB *metaDB) { - if(NeedClean.ApproximateSize() == 0) return; + if(NeedClean.ApproximateSize() <= 12) return; metaDB->Write(WriteOptions(), &NeedClean); } } \ No newline at end of file diff --git a/fielddb/request.cpp b/fielddb/request.cpp index 35524ee..d7757f9 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -1,5 +1,6 @@ #include "fielddb/request.h" #include +#include #include #include #include @@ -55,7 +56,10 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, batchKeySet.insert(*Key); } std::string val_str; - Status s = DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + Status s = Status::NotFound("test"); + uint64_t start_ = DB->env_->NowMicros(); + s = DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + DB->construct_FieldsReq_Read_elapsed += DB->env_->NowMicros() - start_; FieldArray *oldFields; if (s.IsNotFound()){ oldFields = nullptr; @@ -335,8 +339,8 @@ BatchReq::BatchReq(WriteBatch *Batch,port::Mutex *mu): //为key和value构造存储的地方,防止由于string的析构造成可能得内存访问错误 str_buf->push_back(key.ToString()); FieldArray *field = new FieldArray; - field = ParseValue(value.ToString(), field); - if (field == nullptr){ //batch中的value没有field + // field = ParseValue(value.ToString(), field); + if (field->empty()){ //batch中的value没有field fa_buf->push_back({{"",value.ToString()}}); } else { fa_buf->push_back(*field); @@ -383,18 +387,30 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; std::unordered_set Sub_batchKeySet; //由于batch是有顺序的,根据我们现在的一个key只处理最开始的算法,这里需要反向迭代 + uint64_t start_ = DB->env_->NowMicros(); for(auto subreq = sub_requests.rbegin(); subreq != sub_requests.rend(); subreq++ ) { + uint64_t start_sub = DB->env_->NowMicros(); (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); + DB->construct_BatchReq_perSub_elapsed += DB->env_->NowMicros() - start_sub; + DB->count_Batch_Sub ++; //所有的对于pendreq的调用传入的参数被改成了this->parent,因此,对于subrequests来说, //pendreq的传参为对应的Batchreq,因此,此处判断batchreq是否pending可以得到subreq是否有冲突 if(isPending()) { return; } } - KVBatch.Append(Sub_KVBatch); - IndexBatch.Append(Sub_IndexBatch); - MetaBatch.Append(Sub_MetaBatch); + DB->construct_BatchReq_Sub_elapsed += DB->env_->NowMicros() - start_; + if(Sub_KVBatch.ApproximateSize() > 12) { + KVBatch.Append(Sub_KVBatch); + } + if(Sub_IndexBatch.ApproximateSize() > 12) { + IndexBatch.Append(Sub_IndexBatch); + } + if(Sub_MetaBatch.ApproximateSize() > 12) { + MetaBatch.Append(Sub_MetaBatch); + } batchKeySet.insert(Sub_batchKeySet.begin(),Sub_batchKeySet.end()); + DB->construct_BatchReq_elapsed += DB->env_->NowMicros() - start_; } diff --git a/include/leveldb/env.h b/include/leveldb/env.h index e00895a..c165487 100644 --- a/include/leveldb/env.h +++ b/include/leveldb/env.h @@ -218,6 +218,8 @@ class LEVELDB_EXPORT Env { virtual void SleepForMicroseconds(int micros) = 0; }; +Env* getPosixEnv(); + // A file abstraction for reading sequentially through a file class LEVELDB_EXPORT SequentialFile { public: diff --git a/testdb/testdb.cc b/testdb/testdb.cc new file mode 100644 index 0000000..6baa209 --- /dev/null +++ b/testdb/testdb.cc @@ -0,0 +1,111 @@ +#include "testdb/testdb.h" +#include "db/db_impl.h" +#include +#include "leveldb/status.h" +using namespace testdb; + +Status testDB::OpentestDB(Options& options, + const std::string& name, testDB** dbptr) { + // options.env->CreateDir("./abc") + if(*dbptr == nullptr){ + return Status::NotSupported(name, "new a testDb first\n"); + } + + // + Status status; + DB *indexdb, *kvdb, *metadb; + // options.block_cache = NewLRUCache(ULONG_MAX); + // options.max_open_files = 1000; + // options.write_buffer_size = 512 * 1024 * 1024; + // options.env = getPosixEnv(); + // status = Open(options, name+"_indexDB", &indexdb); + // if(!status.ok()) return status; + // (*dbptr)->indexDB_ = indexdb; + + // options.env = getPosixEnv(); + status = DB::Open(options, name+"_kvDB", &kvdb); + if(!status.ok()) return status; + (*dbptr)->kvDB_ = kvdb; + + // options.env = getPosixEnv(); + // status = Open(options, name+"_metaDB", &metadb); + // if(!status.ok()) return status; + // (*dbptr)->metaDB_ = metadb; + + (*dbptr)->dbname_ = name; + + // status = (*dbptr)->Recover(); + + (*dbptr)->options_ = &options; + (*dbptr)->env_ = options.env; + return status; +} + +Status testDB::Put(const WriteOptions &options, const Slice &key, const Slice &value) { + return kvDB_->Put(options, key, value); +} + +Status testDB::PutFields(const WriteOptions &, const Slice &key, const FieldArray &tests) { + return Status::OK(); +} + +Status testDB::Delete(const WriteOptions &options, const Slice &key) { + return kvDB_->Delete(options, key); +} + +Status testDB::Write(const WriteOptions &options, WriteBatch *updates) { + return kvDB_->Write(options, updates); +} + +Status testDB::Get(const ReadOptions &options, const Slice &key, std::string *value) { + return kvDB_->Get(options, key, value); +} + +Status testDB::GetFields(const ReadOptions &options, const Slice &key, FieldArray *tests) { + return Status::OK(); +} + +std::vector testDB::FindKeysByField(Field &test) { + return std::vector(); +} + +Iterator * testDB::NewIterator(const ReadOptions &options) { + return kvDB_->NewIterator(options); +} + +const Snapshot * testDB::GetSnapshot() { + return kvDB_->GetSnapshot(); +} + +void testDB::ReleaseSnapshot(const Snapshot *snapshot) { + kvDB_->ReleaseSnapshot(snapshot); +} + +bool testDB::GetProperty(const Slice &property, std::string *value) { + return kvDB_->GetProperty(property, value); +} + +void testDB::GetApproximateSizes(const Range *range, int n, uint64_t *sizes) { + kvDB_->GetApproximateSizes(range, n, sizes); +} + +void testDB::CompactRange(const Slice *begin, const Slice *end) { + kvDB_->CompactRange(begin, end); +} + +Status DestroyDB(const std::string& name, const Options& options) { + Status s; + s = leveldb::DestroyDB(name+"_kvDB", options); + assert(s.ok()); +// s = leveldb::DestroyDB(name+"_indexDB", options); +// assert(s.ok()); +// s = leveldb::DestroyDB(name+"_metaDB", options); +// assert(s.ok()); + return s; +} + +testDB::~testDB() { + delete kvDB_; + // delete indexDB_; + // delete metaDB_; +} \ No newline at end of file diff --git a/testdb/testdb.h b/testdb/testdb.h new file mode 100644 index 0000000..d51598b --- /dev/null +++ b/testdb/testdb.h @@ -0,0 +1,72 @@ +#include "port/port_stdcxx.h" +#include "db/db_impl.h" +#include +#include +#include +#include +#include +#include +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "leveldb/options.h" +#include "leveldb/slice.h" +#include "leveldb/status.h" +#include +# ifndef test_DB_H +# define test_DB_H +namespace testdb { +using namespace leveldb; + +enum IndexStatus{ + Creating, + Deleting, + Exist, + NotExist + }; + +class testDB { +private: + leveldb::DB *kvDB_; + // leveldb::DB *metaDB_; + // leveldb::DB *indexDB_; + + std::string dbname_; + const Options *options_; + Env *env_; +public: + friend class Request; + friend class testsReq; + friend class iCreateReq; + friend class iDeleteReq; + friend class DeleteReq; + friend class BatchReq; + + //用的时候必须testDB *db = new testDB()再open,不能像之前一样DB *db + // testDB() : indexDB_(nullptr), kvDB_(nullptr), metaDB_(nullptr) {}; + testDB() : kvDB_(nullptr) { } + ~testDB(); +/*lab1的要求,作为db派生类要实现的虚函数*/ + Status Put(const WriteOptions &options, const Slice &key, const Slice &value) ; + Status PutFields(const WriteOptions &, const Slice &key, const FieldArray &tests) ; + Status Delete(const WriteOptions &options, const Slice &key) ; + Status Write(const WriteOptions &options, WriteBatch *updates) ; + Status Get(const ReadOptions &options, const Slice &key, std::string *value) ; + Status GetFields(const ReadOptions &options, const Slice &key, FieldArray *tests) ; + std::vector FindKeysByField(Field &test) ; + Iterator * NewIterator(const ReadOptions &options) ; + const Snapshot * GetSnapshot() ; + void ReleaseSnapshot(const Snapshot *snapshot) ; + bool GetProperty(const Slice &property, std::string *value) ; + void GetApproximateSizes(const Range *range, int n, uint64_t *sizes) ; + void CompactRange(const Slice *begin, const Slice *end) ; + + static Status OpentestDB(Options& options,const std::string& name,testDB** dbptr); + + + +}; + +Status DestroyDB(const std::string& name, + const Options& options); +} // end of namespace +# endif \ No newline at end of file diff --git a/util/env_posix.cc b/util/env_posix.cc index ffd06c4..ec18875 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -923,4 +923,8 @@ Env* Env::Default() { return env_container.env(); } +Env* getPosixEnv() { + return new PosixEnv; +} + } // namespace leveldb diff --git a/util/serialize_value.cc b/util/serialize_value.cc index 88aa844..73fb092 100644 --- a/util/serialize_value.cc +++ b/util/serialize_value.cc @@ -35,7 +35,7 @@ FieldArray *ParseValue(const std::string& value_str,FieldArray *fields){ valStr = valSlice.ToString(); res->emplace_back(nameStr, valStr); } else { - std::cout << "name and val not match!" << std::endl; + std::cout << "name and val not match! From ParseValue" << std::endl; } nameSlice.clear(); valSlice.clear(); diff --git a/util/serialize_value.h b/util/serialize_value.h index a337bc6..2405773 100644 --- a/util/serialize_value.h +++ b/util/serialize_value.h @@ -31,7 +31,7 @@ public: if(GetLengthPrefixedSlice(&valueSlice, &valSlice)) { map[nameSlice.ToString()] = valSlice.ToString(); } else { - std::cout << "name and val not match!" << std::endl; + std::cout << "name and val not match! From InternalFieldArray" << std::endl; } nameSlice.clear(); valSlice.clear(); From ea3d7c31f574e21cb6bf979d5db526bfa70d09f7 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sat, 28 Dec 2024 09:57:13 +0800 Subject: [PATCH 13/32] =?UTF-8?q?=E6=A8=A1=E6=8B=9F=E5=B4=A9=E6=BA=83?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E7=BB=BC=E5=90=88=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/recover_test.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/recover_test.cc b/test/recover_test.cc index 47cc731..36480b7 100644 --- a/test/recover_test.cc +++ b/test/recover_test.cc @@ -36,7 +36,7 @@ TEST(TestNormalRecover, Recover) { findKeysByAgeIndex(db, true); } -TEST(TestParalPutRecover, Recover) { +TEST(TestParalRecover, Recover) { //第一次运行 // fielddb::DestroyDB("testdb3.2",Options()); // FieldDB *db = new FieldDB(); @@ -47,24 +47,28 @@ TEST(TestParalPutRecover, Recover) { // } // db->CreateIndexOnField("address"); // db->CreateIndexOnField("age"); - // shanghaiKeys.clear(); - // age20Keys.clear(); - // int thread_num_ = 2; + // int thread_num_ = 4; // std::vector threads(thread_num_); // threads[0] = std::thread([db](){ // InsertFieldData(db); // }); // threads[1] = std::thread([db](){ + // WriteFieldData(db); + // }); + // threads[2] = std::thread([db](){ + // DeleteFieldData(db); + // }); + // threads[3] = std::thread([db](){ // InsertOneField(db); // delete db; // }); - // for (auto& t : threads) { + // for (auto& t : threads) { // if (t.joinable()) { // t.join(); // } // } - //线程1导致了线程0错误,测试会终止(模拟数据库崩溃) - //这会导致线程0在写入的各种奇怪的时间点崩溃 + //线程3导致了其他线程错误,测试会终止(模拟数据库崩溃) + //这会导致各线程在各种奇怪的时间点崩溃 //第二次运行注释掉上面的代码,运行下面的代码测试恢复 From 8dfdd9b9e92b8c26d4e4b37b24e3c90a47d49762 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sat, 28 Dec 2024 14:11:53 +0800 Subject: [PATCH 14/32] =?UTF-8?q?findkeysbyfield=20=E8=BF=AD=E4=BB=A3?= =?UTF-8?q?=E5=99=A8=E5=BC=95=E7=94=A8=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=EF=BC=8C=E6=B3=A8=E9=87=8A=E6=95=B4=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/db_impl.cc | 1 + fielddb/field_db.cpp | 210 ++++++++++++++++++++++++--------------------------- fielddb/request.cpp | 2 +- 3 files changed, 100 insertions(+), 113 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 6879b82..122760c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1183,6 +1183,7 @@ std::vector DBImpl::FindKeysByField(Field &field){ result.push_back(iter->key().ToString()); } } + delete iter; return result; } diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index c42a370..121f991 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -57,16 +57,14 @@ Status FieldDB::OpenFieldDB(Options& options, (*dbptr)->metaDB_ = metadb; (*dbptr)->dbname_ = name; - // status = (*dbptr)->Recover(); + status = (*dbptr)->Recover(); (*dbptr)->options_ = &options; (*dbptr)->env_ = options.env; return status; } -// TODO:Recover Status FieldDB::Recover() { - //TODO: //1. 遍历所有Index类型的meta,重建内存中的index_状态表 Iterator *Iter = indexDB_->NewIterator(ReadOptions()); std::string IndexKey; @@ -76,10 +74,8 @@ Status FieldDB::Recover() { ParsedInternalIndexKey ParsedIndex; ParseInternalIndexKey(Slice(IndexKey),&ParsedIndex); index_[ParsedIndex.name_.ToString()] = {Exist,nullptr}; - //std::cout << "Existed Index : " << ParsedIndex.name_.ToString() << std::endl; //构建下一个搜索的对象,在原来的fieldname的基础上加一个最大的ascii字符(不可见字符) - //TODO:不知道这个做法有没有道理 std::string Seek; PutLengthPrefixedSlice(&Seek, ParsedIndex.name_); Seek.push_back(0xff); @@ -133,103 +129,100 @@ Status FieldDB::HandleRequest(Request &req) { uint64_t start_ = env_->NowMicros(); MutexLock L(&mutex_); taskqueue_.push_back(&req); -Again: - while(!req.done && &req != taskqueue_.front()) { - req.cond_.Wait(); - } - if(req.done) { - return req.s; //在返回时自动释放锁L - } - Request *tail = GetHandleInterval(); - WriteBatch KVBatch,IndexBatch,MetaBatch; - std::unordered_set batchKeySet; - Status status; - if(!tail->isiCreateReq() && !tail->isiDeleteReq()) { - //表明这一个区间并没有涉及index的创建删除 - { - //1. 构建各个Batch。构建的过程中要保证索引状态的一致性,需要上锁。 - MutexLock iL(&index_mu); - uint64_t start_construct = env_->NowMicros(); - for(auto *req_ptr : taskqueue_) { - req_ptr->ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, batchKeySet); - if(req_ptr == tail) break; - } - construct_elapsed += env_->NowMicros() - start_construct; - } - //2. 首先写入meta,再并发写入index和kv,完成之后清除meta数据 - //此处可以放锁是因为写入的有序性可以通过队列来保证 - mutex_.Unlock(); - uint64_t start_write = env_->NowMicros(); - WriteOptions op; - if(MetaBatch.ApproximateSize() > 12) { - uint64_t start_meta = env_->NowMicros(); - status = metaDB_->Write(op, &MetaBatch); - write_meta_elapsed += env_->NowMicros() - start_meta; - write_bytes += MetaBatch.ApproximateSize(); - assert(status.ok()); - } - //TODO:index的写入需要在另外一个线程中同时完成 - if(IndexBatch.ApproximateSize() > 12) { - uint64_t start_index = env_->NowMicros(); - status = indexDB_->Write(op, &IndexBatch); - write_index_elapsed += env_->NowMicros() - start_index; - write_bytes += IndexBatch.ApproximateSize(); - assert(status.ok()); + while(true){ + while(!req.done && &req != taskqueue_.front()) { + req.cond_.Wait(); } - if(KVBatch.ApproximateSize() > 12) { - uint64_t start_kv = env_->NowMicros(); - status = kvDB_->Write(op, &KVBatch); - write_kv_elapsed += env_->NowMicros() - start_kv; - write_bytes += KVBatch.ApproximateSize(); - assert(status.ok()); + if(req.done) { + return req.s; //在返回时自动释放锁L } - //3. 将meta数据清除 - if(MetaBatch.ApproximateSize() > 12) { - uint64_t start_clean = env_->NowMicros(); - MetaCleaner cleaner; - cleaner.Collect(MetaBatch); - cleaner.CleanMetaBatch(metaDB_); - write_clean_elapsed += env_->NowMicros() - start_clean; + Request *tail = GetHandleInterval(); + WriteBatch KVBatch,IndexBatch,MetaBatch; + std::unordered_set batchKeySet; + Status status; + if(!tail->isiCreateReq() && !tail->isiDeleteReq()) { + //表明这一个区间并没有涉及index的创建删除 + { + //1. 构建各个Batch。构建的过程中要保证索引状态的一致性,需要上锁。 + MutexLock iL(&index_mu); + uint64_t start_construct = env_->NowMicros(); + for(auto *req_ptr : taskqueue_) { + req_ptr->ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, batchKeySet); + if(req_ptr == tail) break; + } + construct_elapsed += env_->NowMicros() - start_construct; + } + //2. 首先写入meta,再并发写入index和kv,完成之后清除meta数据 + //此处可以放锁是因为写入的有序性可以通过队列来保证 + mutex_.Unlock(); + uint64_t start_write = env_->NowMicros(); + WriteOptions op; + if(MetaBatch.ApproximateSize() > 12) { + uint64_t start_meta = env_->NowMicros(); + status = metaDB_->Write(op, &MetaBatch); + write_meta_elapsed += env_->NowMicros() - start_meta; + write_bytes += MetaBatch.ApproximateSize(); + assert(status.ok()); + } + //TODO:index的写入需要在另外一个线程中同时完成 + if(IndexBatch.ApproximateSize() > 12) { + uint64_t start_index = env_->NowMicros(); + status = indexDB_->Write(op, &IndexBatch); + write_index_elapsed += env_->NowMicros() - start_index; + write_bytes += IndexBatch.ApproximateSize(); + assert(status.ok()); + } + if(KVBatch.ApproximateSize() > 12) { + uint64_t start_kv = env_->NowMicros(); + status = kvDB_->Write(op, &KVBatch); + write_kv_elapsed += env_->NowMicros() - start_kv; + write_bytes += KVBatch.ApproximateSize(); + assert(status.ok()); + } + //3. 将meta数据清除 + if(MetaBatch.ApproximateSize() > 12) { + uint64_t start_clean = env_->NowMicros(); + MetaCleaner cleaner; + cleaner.Collect(MetaBatch); + cleaner.CleanMetaBatch(metaDB_); + write_clean_elapsed += env_->NowMicros() - start_clean; + } + write_elapsed += env_->NowMicros() - start_write; + mutex_.Lock(); + } else { + //对于创建和删除索引的请求,通过prepare完成索引状态的更新 + MutexLock iL(&index_mu); + req.Prepare(this); } - write_elapsed += env_->NowMicros() - start_write; - mutex_.Lock(); - } else { - //对于创建和删除索引的请求,通过prepare完成索引状态的更新 - MutexLock iL(&index_mu); - req.Prepare(this); - } - // { - // static int count = 0; - // if(count++ % 100000 == 0) { - // std::cout << "TaskQueue Size : " << taskqueue_.size() << std::endl; - // } - // } - while(true) { - Request *ready = taskqueue_.front(); - // int debug = tail->type_; - taskqueue_.pop_front(); - //当前ready不是队首,不是和index的创建有关 - if(!ready->isPending() && !req.isiCreateReq() && !req.isiDeleteReq()) { - ready->s = status; - ready->done = true; - if (ready != &req) ready->cond_.Signal(); + // { + // static int count = 0; + // if(count++ % 100000 == 0) { + // std::cout << "TaskQueue Size : " << taskqueue_.size() << std::endl; + // } + // } + while(true) { + Request *ready = taskqueue_.front(); + // int debug = tail->type_; + taskqueue_.pop_front(); + //当前ready不是队首,不是和index的创建有关 + if(!ready->isPending() && !req.isiCreateReq() && !req.isiDeleteReq()) { + ready->s = status; + ready->done = true; + if (ready != &req) ready->cond_.Signal(); + } + if (ready == tail) break; } - if (ready == tail) break; - } - elapsed += env_->NowMicros() - start_; - count ++; - dumpStatistics(); + elapsed += env_->NowMicros() - start_; + count ++; + dumpStatistics(); - if(!taskqueue_.empty()) { - taskqueue_.front()->cond_.Signal(); + if(!taskqueue_.empty()) { + taskqueue_.front()->cond_.Signal(); + } + //如果done==true,那么就不会继续等待直接退出 + //如果处于某个请求的pending list里面,那么就会继续等待重新入队 } - //如果done==true,那么就不会继续等待直接退出 - //如果处于某个请求的pending list里面,那么就会继续等待重新入队 - //这里用了万恶的goto,蛤蛤 - goto Again; - - // return status; } // 这里把一个空串作为常规put的name @@ -242,7 +235,6 @@ Status FieldDB::Put(const WriteOptions &options, const Slice &key, const Slice & // 需要对是否进行index更新做处理 Status FieldDB::PutFields(const WriteOptions &Options, const Slice &key, const FieldArray &fields) { - //这里是为了const和slice-string的转换被迫搞得 std::string key_ = key.ToString(); FieldArray fields_ = fields; @@ -250,7 +242,6 @@ Status FieldDB::PutFields(const WriteOptions &Options, Status status = HandleRequest(req); return status; - // return kvDB_->PutFields(Options, key, fields); } // 删除有索引的key时indexdb也要同步 @@ -260,31 +251,26 @@ Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { DeleteReq req(&key_,&mutex_); Status status = HandleRequest(req); return status; - // return kvDB_->Delete(options, key); } -// TODO:根据updates里面的东西,要对是否需要更新index进行分别处理 -Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { - { - uint64_t start_ = env_->NowMicros(); - Status status = kvDB_->Write(options, updates); - temp_elapsed += env_->NowMicros() - start_; - count ++; - dumpStatistics(); - return status; - } - //或许应该再做一个接口?或者基于现有的接口进行改造 +// 根据updates里面的东西,要对是否需要更新index进行分别处理 +Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { + // { + // uint64_t start_ = env_->NowMicros(); + // Status status = kvDB_->Write(options, updates); + // temp_elapsed += env_->NowMicros() - start_; + // count ++; + // dumpStatistics(); + // return status; + // } uint64_t start_ = env_->NowMicros(); BatchReq req(updates,&mutex_); construct_BatchReq_init_elapsed += env_->NowMicros() - start_; Status status = HandleRequest(req); return status; - assert(0); - return Status::OK(); } //由于常规put将空串作为name,这里也需要适当修改 Status FieldDB::Get(const ReadOptions &options, const Slice &key, std::string *value) { - // return kvDB_->Get(options, key, value); FieldArray fields; Status s = GetFields(options, key, &fields); if(!s.ok()) { diff --git a/fielddb/request.cpp b/fielddb/request.cpp index d7757f9..b07ab2d 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -339,7 +339,7 @@ BatchReq::BatchReq(WriteBatch *Batch,port::Mutex *mu): //为key和value构造存储的地方,防止由于string的析构造成可能得内存访问错误 str_buf->push_back(key.ToString()); FieldArray *field = new FieldArray; - // field = ParseValue(value.ToString(), field); + field = ParseValue(value.ToString(), field); if (field->empty()){ //batch中的value没有field fa_buf->push_back({{"",value.ToString()}}); } else { From 0c1d366879fb4d50b4b5b2c3417f29a578821a02 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sat, 28 Dec 2024 17:36:09 +0800 Subject: [PATCH 15/32] fix bug --- benchmarks/db_bench.cc | 2 ++ fielddb/field_db.h | 4 ++-- fielddb/request.cpp | 30 ++++++++++++++++-------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/benchmarks/db_bench.cc b/benchmarks/db_bench.cc index 72d962c..4ff7f05 100644 --- a/benchmarks/db_bench.cc +++ b/benchmarks/db_bench.cc @@ -1127,6 +1127,8 @@ int main(int argc, char** argv) { } } + FLAGS_num /= FLAGS_threads; + leveldb::g_env = leveldb::Env::Default(); // Choose a location for the test database if none given with --db= diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 27e8a86..c19e7da 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -109,7 +109,7 @@ private: uint64_t write_clean_elapsed = 0; uint64_t write_bytes = 0; - uint64_t write_bytes_lim = 20 * 1024 * 1024; + uint64_t write_bytes_lim = 50 * 1024 * 1024; uint64_t temp_elapsed = 0; @@ -135,7 +135,7 @@ private: std::cout << "temp_elased : " << temp_elapsed * 1.0 / count<< std::endl; // std::cout << MetaBatch.ApproximateSize() << " " << IndexBatch.ApproximateSize() << " " << KVBatch.ApproximateSize() << std::endl; std::cout << "=====================================================\n"; - write_bytes_lim = write_bytes + 20 * 1024 * 1024; + write_bytes_lim = write_bytes + 50 * 1024 * 1024; std::fflush(stdout); } } diff --git a/fielddb/request.cpp b/fielddb/request.cpp index b07ab2d..cefb159 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -17,6 +17,8 @@ namespace fielddb { using namespace leveldb; +const char EMPTY[1] = {0}; + //为虚函数提供最基本的实现 void Request::PendReq(Request *req) { assert(0); @@ -77,7 +79,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, DB->index_mu.AssertHeld(); //1.将存在冲突的put pend到对应的请求 for(auto [field_name,field_value] : *Fields) { - if(field_name == "") break; + if(field_name == EMPTY) break; if(DB->index_.count(field_name)) { auto [index_status,parent_req] = DB->index_[field_name]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { @@ -92,7 +94,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //冲突也可能存在于,需要删除旧数据的索引,但该索引正在创删中 if (oldFields != nullptr){ for(auto [field_name,field_value] : *oldFields) { - if(field_name == "") break; + if(field_name == EMPTY) break; if(DB->index_.count(field_name)) { auto [index_status,parent_req] = DB->index_[field_name]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { @@ -119,7 +121,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //3.1对于含有索引的oldfield删除索引 if (HasOldIndex) { for(auto [field_name,field_value] : *oldFields) { - if(field_name == "") continue; + if(field_name == EMPTY) continue; if(DB->index_.count(field_name)) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( @@ -132,7 +134,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //3.2对于含有索引的field建立索引 if (HasIndex) { for(auto [field_name,field_value] : *Fields) { - if(field_name == "") continue; + if(field_name == EMPTY) continue; if(DB->index_.count(field_name)) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( @@ -177,7 +179,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, DB->index_mu.AssertHeld(); //1.将存在冲突的delete pend到对应的请求 for(auto [field_name,field_value] : *Fields) { - if(field_name == "") break; + if(field_name == EMPTY) break; if(DB->index_.count(field_name)) { auto [index_status,parent_req] = DB->index_[field_name]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { @@ -198,7 +200,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, MetaBatch.Put(MetaKey, Slice()); //3.对于含有索引的field删除索引 for(auto [field_name,field_value] : *Fields) { - if(field_name == "") continue; + if(field_name == EMPTY) continue; if(DB->index_.count(field_name)) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( @@ -338,17 +340,17 @@ BatchReq::BatchReq(WriteBatch *Batch,port::Mutex *mu): void Put(const Slice &key, const Slice &value) override { //为key和value构造存储的地方,防止由于string的析构造成可能得内存访问错误 str_buf->push_back(key.ToString()); - FieldArray *field = new FieldArray; - field = ParseValue(value.ToString(), field); - if (field->empty()){ //batch中的value没有field - fa_buf->push_back({{"",value.ToString()}}); - } else { - fa_buf->push_back(*field); - } + fa_buf->push_back({{EMPTY,value.ToString()}}); + // FieldArray *field = new FieldArray; + // field = ParseValue(value.ToString(), field); + // if (field->empty()){ //batch中的value没有field + // } else { + // fa_buf->push_back(*field); + // } sub_requests->emplace_back(new FieldsReq(&str_buf->back(),&fa_buf->back(),mu)); sub_requests->back()->parent = req; - delete field; + // delete field; } void Delete(const Slice &key) override { str_buf->push_back(key.ToString()); From 65bbece7d0ee8bed9d12e6f9a5cfc11cd1b6690c Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sat, 28 Dec 2024 17:38:33 +0800 Subject: [PATCH 16/32] =?UTF-8?q?=E5=AF=B9benchmark=E9=83=A8=E5=88=86?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench_FieldDB.cc | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index a0f9a21..3abf7c5 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -664,7 +664,7 @@ class Benchmark { } else { delete db_; db_ = nullptr; - DestroyDB(FLAGS_db, Options()); + //DestroyDB(FLAGS_db, Options()); Open(); } } @@ -821,6 +821,7 @@ class Benchmark { options.compression = FLAGS_compression ? kSnappyCompression : kNoCompression; // Status s = DB::Open(options, FLAGS_db, &db_); + fielddb::DestroyDB(FLAGS_db, options); db_ = new FieldDB(); Status s = FieldDB::OpenFieldDB(options, FLAGS_db, &db_); if (!s.ok()) { @@ -858,8 +859,20 @@ class Benchmark { for (int j = 0; j < entries_per_batch_; j++) { const int k = seq ? i + j : thread->rand.Uniform(FLAGS_num); key.Set(k); - batch.Put(key.slice(), gen.Generate(value_size_)); - bytes += value_size_ + key.slice().size(); + + std::string name = "customer#" + std::to_string(k); + //这个字段用来查找 + std::string age = std::to_string(thread->rand.Uniform(FLAGS_num) % 100); + //这个字段填充长度 + std::string tag = gen.Generate(value_size_).ToString(); + FieldArray fields = { + {"name", name}, + {"age", age}, + {"tag", tag} + }; + Slice value = SerializeValue(fields); + batch.Put(key.slice(), value); + bytes += value.size() + key.slice().size(); thread->stats.FinishedSingleOp(); } s = db_->Write(write_options_, &batch); @@ -899,13 +912,13 @@ class Benchmark { void ReadRandom(ThreadState* thread) { ReadOptions options; - std::string value; int found = 0; KeyBuffer key; for (int i = 0; i < reads_; i++) { const int k = thread->rand.Uniform(FLAGS_num); key.Set(k); - if (db_->Get(options, key.slice(), &value).ok()) { + FieldArray fields_ret; + if (db_->GetFields(options, key.slice(), &fields_ret).ok()) { found++; } thread->stats.FinishedSingleOp(); @@ -917,26 +930,26 @@ class Benchmark { void ReadMissing(ThreadState* thread) { ReadOptions options; - std::string value; + FieldArray fields_ret; KeyBuffer key; for (int i = 0; i < reads_; i++) { const int k = thread->rand.Uniform(FLAGS_num); key.Set(k); Slice s = Slice(key.slice().data(), key.slice().size() - 1); - db_->Get(options, s, &value); + db_->GetFields(options, s, &fields_ret); thread->stats.FinishedSingleOp(); } } void ReadHot(ThreadState* thread) { ReadOptions options; - std::string value; + FieldArray fields_ret; const int range = (FLAGS_num + 99) / 100; KeyBuffer key; for (int i = 0; i < reads_; i++) { const int k = thread->rand.Uniform(range); key.Set(k); - db_->Get(options, key.slice(), &value); + db_->GetFields(options, key.slice(), &fields_ret); thread->stats.FinishedSingleOp(); } } From 4c938eb956ae880d70d4a835f5d5f93db9231d9b Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sat, 28 Dec 2024 18:34:20 +0800 Subject: [PATCH 17/32] =?UTF-8?q?=E7=BB=9F=E4=B8=80=E4=BA=86writeoption?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 25 ++++++++++++------------- fielddb/field_db.h | 6 +++--- test/basic_function_test.cc | 7 +++---- test/helper.cc | 1 + test/parallel_test.cc | 26 +++++++++++++------------- test/recover_test.cc | 4 ++-- 6 files changed, 34 insertions(+), 35 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 121f991..db7cee1 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -107,7 +107,7 @@ Status FieldDB::Recover() { //在所有的请求完成后,会自动把metaDB的内容清空。 Iter = metaDB_->NewIterator(ReadOptions()); Iter->SeekToFirst(); - std::cout << "Iter Valid : " << Iter->Valid() << std::endl; + //std::cout << "Iter Valid : " << Iter->Valid() << std::endl; delete Iter; //3. 等待所有请求完成 return Status::OK(); @@ -125,7 +125,7 @@ Request *FieldDB::GetHandleInterval() { return tail; } -Status FieldDB::HandleRequest(Request &req) { +Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { uint64_t start_ = env_->NowMicros(); MutexLock L(&mutex_); taskqueue_.push_back(&req); @@ -156,7 +156,6 @@ Status FieldDB::HandleRequest(Request &req) { //此处可以放锁是因为写入的有序性可以通过队列来保证 mutex_.Unlock(); uint64_t start_write = env_->NowMicros(); - WriteOptions op; if(MetaBatch.ApproximateSize() > 12) { uint64_t start_meta = env_->NowMicros(); status = metaDB_->Write(op, &MetaBatch); @@ -215,7 +214,7 @@ Status FieldDB::HandleRequest(Request &req) { elapsed += env_->NowMicros() - start_; count ++; - dumpStatistics(); + //dumpStatistics(); if(!taskqueue_.empty()) { taskqueue_.front()->cond_.Signal(); @@ -240,7 +239,7 @@ Status FieldDB::PutFields(const WriteOptions &Options, FieldsReq req(&key_,&fields_,&mutex_); - Status status = HandleRequest(req); + Status status = HandleRequest(req, Options); return status; } @@ -249,7 +248,7 @@ Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { std::string key_ = key.ToString(); DeleteReq req(&key_,&mutex_); - Status status = HandleRequest(req); + Status status = HandleRequest(req, options); return status; } @@ -266,7 +265,7 @@ Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { uint64_t start_ = env_->NowMicros(); BatchReq req(updates,&mutex_); construct_BatchReq_init_elapsed += env_->NowMicros() - start_; - Status status = HandleRequest(req); + Status status = HandleRequest(req, options); return status; } //由于常规put将空串作为name,这里也需要适当修改 @@ -305,10 +304,10 @@ std::vector> FieldDB::FindKeysAndValByFieldN return result; } -Status FieldDB::CreateIndexOnField(const std::string& field_name) { +Status FieldDB::CreateIndexOnField(const std::string& field_name, const WriteOptions &op) { std::string Field = field_name; iCreateReq req(&Field,&mutex_); - HandleRequest(req); + HandleRequest(req, op); //如果已经存在索引,那么直接返回 if(req.Existed) { return req.s; @@ -316,15 +315,15 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name) { WriteBatch KVBatch,IndexBatch,MetaBatch; std::unordered_set useless; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); - indexDB_->Write(WriteOptions(), &IndexBatch); + indexDB_->Write(op, &IndexBatch); req.Finalize(this); return req.s; } -Status FieldDB::DeleteIndex(const std::string &field_name) { +Status FieldDB::DeleteIndex(const std::string &field_name, const WriteOptions &op) { std::string Field = field_name; iDeleteReq req(&Field,&mutex_); - HandleRequest(req); + HandleRequest(req, op); //如果已经被删除或者不存在,那么可以直接返回 if(req.Deleted) { return req.s; @@ -332,7 +331,7 @@ Status FieldDB::DeleteIndex(const std::string &field_name) { WriteBatch KVBatch,IndexBatch,MetaBatch; std::unordered_set useless; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); - indexDB_->Write(WriteOptions(), &IndexBatch); + indexDB_->Write(op, &IndexBatch); req.Finalize(this); return req.s; } diff --git a/fielddb/field_db.h b/fielddb/field_db.h index c19e7da..8775952 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -52,8 +52,8 @@ public: void GetApproximateSizes(const Range *range, int n, uint64_t *sizes) override; void CompactRange(const Slice *begin, const Slice *end) override; /*与索引相关*/ - Status CreateIndexOnField(const std::string& field_name); - Status DeleteIndex(const std::string &field_name); + Status CreateIndexOnField(const std::string& field_name, const WriteOptions &op); + Status DeleteIndex(const std::string &field_name, const WriteOptions &op); std::vector QueryByIndex(const Field &field, Status *s); //返回当前数据库中索引状态,用来测试,不过也可以作为一个功能? IndexStatus GetIndexStatus(const std::string &fieldName); @@ -86,7 +86,7 @@ private: const std::string &fieldName); /*For request handling*/ - Status HandleRequest(Request &req); //每个请求自行构造请求后交由这个函数处理 + Status HandleRequest(Request &req, const WriteOptions &op); //每个请求自行构造请求后交由这个函数处理 Request *GetHandleInterval(); //获得任务队列中的待处理区间,区间划分规则和原因见文档 private: diff --git a/test/basic_function_test.cc b/test/basic_function_test.cc index 729fb73..73ba4cc 100644 --- a/test/basic_function_test.cc +++ b/test/basic_function_test.cc @@ -34,18 +34,17 @@ TEST(TestLab2, Basic) { std::cerr << "open db failed" << std::endl; abort(); } - // ClearDB(db); shanghaiKeys.clear(); age20Keys.clear(); InsertFieldData(db); // GetFieldData(db); // findKeysByCity(db); - db->CreateIndexOnField("address"); - db->CreateIndexOnField("age"); + db->CreateIndexOnField("address", op); + db->CreateIndexOnField("age", op); findKeysByCityIndex(db, true); findKeysByAgeIndex(db, true); - db->DeleteIndex("address"); + db->DeleteIndex("address", op); findKeysByCityIndex(db, false); findKeysByAgeIndex(db, true); diff --git a/test/helper.cc b/test/helper.cc index a0b9e79..2ce5e2d 100644 --- a/test/helper.cc +++ b/test/helper.cc @@ -22,6 +22,7 @@ ThreadSafeSet age20Keys; //目前只有InsertFieldData和InsertOneField和writeFieldData会往里加, //DeleteFieldData和InsertOneField会删除, //其他测试之间有必要手动clear +const WriteOptions op; Status OpenDB(std::string dbName, FieldDB **db) { Options options; diff --git a/test/parallel_test.cc b/test/parallel_test.cc index 6925df7..f9cfcfc 100644 --- a/test/parallel_test.cc +++ b/test/parallel_test.cc @@ -18,8 +18,8 @@ TEST(TestReadPut, Parallel) { abort(); } // ClearDB(db); - db->CreateIndexOnField("address"); - db->CreateIndexOnField("age"); + db->CreateIndexOnField("address", op); + db->CreateIndexOnField("age", op); int thread_num_ = 5; std::vector threads(thread_num_); //二写三读 @@ -70,7 +70,7 @@ TEST(TestPutCreatei, Parallel) { { if (i == 0) {//创建索引 threads[i] = std::thread([db](){ - db->CreateIndexOnField("address"); + db->CreateIndexOnField("address", op); std::cout << "finish create index\n"; }); } else {//写 @@ -118,7 +118,7 @@ TEST(TestCreateiCreatei, Parallel) { { //3线程并发创建索引address threads[i] = std::thread([db](){ - db->CreateIndexOnField("address"); + db->CreateIndexOnField("address", op); std::cout << "finish create index address\n"; }); } @@ -139,12 +139,12 @@ TEST(TestCreateiCreatei, Parallel) { { if (i == 0 || i == 1) {//2线程删除索引address threads[i] = std::thread([db](){ - db->DeleteIndex("address"); + db->DeleteIndex("address", op); std::cout << "finish delete index address\n"; }); } else {//1线程创建索引age threads[i] = std::thread([db](){ - db->CreateIndexOnField("age"); + db->CreateIndexOnField("age", op); std::cout << "finish create index age\n"; }); } @@ -175,8 +175,8 @@ TEST(TestPutDeleteOne, Parallel) { // ClearDB(db); shanghaiKeys.clear(); age20Keys.clear(); - db->CreateIndexOnField("address"); - db->CreateIndexOnField("age"); + db->CreateIndexOnField("address", op); + db->CreateIndexOnField("age", op); int thread_num_ = 20; std::vector threads(thread_num_); for (size_t i = 0; i < thread_num_; i++) @@ -222,8 +222,8 @@ TEST(TestPutDelete, Parallel) { // ClearDB(db); shanghaiKeys.clear(); age20Keys.clear(); - db->CreateIndexOnField("address"); - db->CreateIndexOnField("age"); + db->CreateIndexOnField("address", op); + db->CreateIndexOnField("age", op); int thread_num_ = 4; std::vector threads(thread_num_); threads[0] = std::thread([db](){InsertFieldData(db);}); @@ -255,11 +255,11 @@ TEST(TestWrite, Parallel) { // ClearDB(db); shanghaiKeys.clear(); age20Keys.clear(); - db->CreateIndexOnField("address"); + db->CreateIndexOnField("address", op); InsertFieldData(db, 2); //先填点数据,让创建索引的时间久一点 int thread_num_ = 5; std::vector threads(thread_num_); - threads[0] = std::thread([db](){db->CreateIndexOnField("age");}); + threads[0] = std::thread([db](){db->CreateIndexOnField("age", op);}); threads[1] = std::thread([db](){ while (db->GetIndexStatus("age") == NotExist){ continue; //开始创建了再并发的写 @@ -279,7 +279,7 @@ TEST(TestWrite, Parallel) { while (db->GetIndexStatus("age") == NotExist){ continue; } - db->DeleteIndex("age");}); + db->DeleteIndex("age", op);}); for (auto& t : threads) { if (t.joinable()) { diff --git a/test/recover_test.cc b/test/recover_test.cc index 36480b7..de06aea 100644 --- a/test/recover_test.cc +++ b/test/recover_test.cc @@ -16,8 +16,8 @@ TEST(TestNormalRecover, Recover) { std::cerr << "open db failed" << std::endl; abort(); } - db->CreateIndexOnField("address"); - db->CreateIndexOnField("age"); + db->CreateIndexOnField("address", op); + db->CreateIndexOnField("age", op); InsertFieldData(db); bool allowNotFound = false; GetFieldData(db, allowNotFound); From 55c59fe0215eaee11db76b5162a72b6c35fb0f79 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sat, 28 Dec 2024 19:38:52 +0800 Subject: [PATCH 18/32] fix --- fielddb/field_db.cpp | 2 +- fielddb/field_db.h | 2 ++ fielddb/request.cpp | 16 +++++++--------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 121f991..a9ebc01 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -227,7 +227,7 @@ Status FieldDB::HandleRequest(Request &req) { // 这里把一个空串作为常规put的name Status FieldDB::Put(const WriteOptions &options, const Slice &key, const Slice &value) { - FieldArray FA = {{"",value.ToString()}}; + FieldArray FA = {{EMPTY,value.ToString()}}; return PutFields(options, key, FA); // return kvDB_->Put(options, key, value); } diff --git a/fielddb/field_db.h b/fielddb/field_db.h index c19e7da..c21680a 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -18,6 +18,8 @@ namespace fielddb { using namespace leveldb; +const char EMPTY[1] = {0}; + enum IndexStatus{ Creating, Deleting, diff --git a/fielddb/request.cpp b/fielddb/request.cpp index cefb159..c08c58a 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -17,8 +17,6 @@ namespace fielddb { using namespace leveldb; -const char EMPTY[1] = {0}; - //为虚函数提供最基本的实现 void Request::PendReq(Request *req) { assert(0); @@ -340,13 +338,13 @@ BatchReq::BatchReq(WriteBatch *Batch,port::Mutex *mu): void Put(const Slice &key, const Slice &value) override { //为key和value构造存储的地方,防止由于string的析构造成可能得内存访问错误 str_buf->push_back(key.ToString()); - fa_buf->push_back({{EMPTY,value.ToString()}}); - // FieldArray *field = new FieldArray; - // field = ParseValue(value.ToString(), field); - // if (field->empty()){ //batch中的value没有field - // } else { - // fa_buf->push_back(*field); - // } + FieldArray *field = new FieldArray; + field = ParseValue(value.ToString(), field); + if (field->empty()){ //batch中的value没有field + fa_buf->push_back({{EMPTY,value.ToString()}}); + } else { + fa_buf->push_back(*field); + } sub_requests->emplace_back(new FieldsReq(&str_buf->back(),&fa_buf->back(),mu)); sub_requests->back()->parent = req; From d33737f1a10b1511e53c81397ba4e90b34cd84bd Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sat, 28 Dec 2024 22:01:56 +0800 Subject: [PATCH 19/32] =?UTF-8?q?=E5=B0=86string=E6=9B=BF=E6=8D=A2?= =?UTF-8?q?=E4=B8=BASlice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench_FieldDB.cc | 2 +- fielddb/field_db.cpp | 31 ++++++-------- fielddb/field_db.h | 2 +- fielddb/meta.cpp | 4 +- fielddb/meta.h | 6 +-- fielddb/request.cpp | 96 +++++++++++++++++++++--------------------- fielddb/request.h | 43 +++++++++++++------ util/serialize_value.cc | 24 +++++++++-- util/serialize_value.h | 9 ++-- 9 files changed, 126 insertions(+), 91 deletions(-) diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index 3abf7c5..0471ee5 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -870,7 +870,7 @@ class Benchmark { {"age", age}, {"tag", tag} }; - Slice value = SerializeValue(fields); + std::string value = SerializeValue(fields); batch.Put(key.slice(), value); bytes += value.size() + key.slice().size(); thread->stats.FinishedSingleOp(); diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 8a5abbd..a9ad365 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -2,13 +2,10 @@ #include #include #include -#include #include #include #include #include -#include "leveldb/c.h" -#include "leveldb/cache.h" #include "leveldb/db.h" #include "leveldb/env.h" #include "leveldb/iterator.h" @@ -16,7 +13,6 @@ #include "leveldb/slice.h" #include "leveldb/status.h" #include "leveldb/write_batch.h" -#include "db/write_batch_internal.h" #include "util/coding.h" #include "util/mutexlock.h" #include "util/serialize_value.h" @@ -234,10 +230,10 @@ Status FieldDB::Put(const WriteOptions &options, const Slice &key, const Slice & // 需要对是否进行index更新做处理 Status FieldDB::PutFields(const WriteOptions &Options, const Slice &key, const FieldArray &fields) { - std::string key_ = key.ToString(); - FieldArray fields_ = fields; + // std::string key_ = key.ToString(); + // FieldArray fields_ = fields; - FieldsReq req(&key_,&fields_,&mutex_); + FieldsReq req(key,fields,&mutex_); Status status = HandleRequest(req, Options); return status; @@ -246,8 +242,8 @@ Status FieldDB::PutFields(const WriteOptions &Options, // 删除有索引的key时indexdb也要同步 Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { - std::string key_ = key.ToString(); - DeleteReq req(&key_,&mutex_); + // std::string key_ = key.ToString(); + DeleteReq req(key,&mutex_); Status status = HandleRequest(req, options); return status; } @@ -289,15 +285,15 @@ std::vector FieldDB::FindKeysByField(Field &field) { } std::vector> FieldDB::FindKeysAndValByFieldName ( - const std::string &fieldName){ + const Slice fieldName){ std::vector> result; auto iter = kvDB_->NewIterator(ReadOptions()); - std::string val; + Slice val; for(iter->SeekToFirst();iter->Valid();iter->Next()) { InternalFieldArray fields(iter->value()); - val = fields.ValOfName(fieldName); + val = fields.ValOfName(fieldName.ToString()); if(!val.empty()) { - result.push_back(std::make_pair(iter->key().ToString(), val)); + result.push_back(std::make_pair(iter->key().ToString(), val.ToString())); } } delete iter; @@ -305,8 +301,9 @@ std::vector> FieldDB::FindKeysAndValByFieldN } Status FieldDB::CreateIndexOnField(const std::string& field_name, const WriteOptions &op) { - std::string Field = field_name; - iCreateReq req(&Field,&mutex_); + // std::string Field = field_name; + // iCreateReq req(&Field,&mutex_); + iCreateReq req(field_name,&mutex_); HandleRequest(req, op); //如果已经存在索引,那么直接返回 if(req.Existed) { @@ -321,8 +318,8 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name, const WriteOpt } Status FieldDB::DeleteIndex(const std::string &field_name, const WriteOptions &op) { - std::string Field = field_name; - iDeleteReq req(&Field,&mutex_); + // std::string Field = field_name; + iDeleteReq req(field_name,&mutex_); HandleRequest(req, op); //如果已经被删除或者不存在,那么可以直接返回 if(req.Deleted) { diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 1135635..5b7973d 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -85,7 +85,7 @@ private: std::deque taskqueue_; std::vector> FindKeysAndValByFieldName ( - const std::string &fieldName); + const Slice fieldName); /*For request handling*/ Status HandleRequest(Request &req, const WriteOptions &op); //每个请求自行构造请求后交由这个函数处理 diff --git a/fielddb/meta.cpp b/fielddb/meta.cpp index 11e1241..08e9ebb 100644 --- a/fielddb/meta.cpp +++ b/fielddb/meta.cpp @@ -29,7 +29,7 @@ void MetaKV::TransPut(std::string &MetaKey,std::string &MetaValue) { //但是slice中的指针指向的是析构的string对象的部分内存 std::string &buf = MetaKey; PutFixed32(&buf, KV_Creating); - PutLengthPrefixedSlice(&buf, Slice(*name)); + PutLengthPrefixedSlice(&buf, Slice(name)); // MetaKey = Slice(buf); // MetaValue = Slice(*value); } @@ -38,7 +38,7 @@ void MetaKV::TransDelete(std::string &MetaKey) { MetaKey.clear(); std::string &buf = MetaKey; PutFixed32(&buf, KV_Deleting); - PutLengthPrefixedSlice(&buf, Slice(*name)); + PutLengthPrefixedSlice(&buf, Slice(name)); // MetaKey = Slice(buf); } diff --git a/fielddb/meta.h b/fielddb/meta.h index 2766e88..c2f04cc 100644 --- a/fielddb/meta.h +++ b/fielddb/meta.h @@ -35,13 +35,13 @@ enum MetaType { //将一对(field_name,field_value)转换到metaDB中的KV表示 class MetaKV { public: - MetaKV(std::string *field_name,std::string *field_value = nullptr): + MetaKV(Slice field_name,Slice field_value = Slice()): name(field_name),value(field_value) { } void TransPut(std::string &MetaKey,std::string &MetaValue); void TransDelete(std::string &MetaKey); private: - std::string *name; - std::string *value; + Slice name; + Slice value; }; class MetaCleaner { diff --git a/fielddb/request.cpp b/fielddb/request.cpp index c08c58a..9c30f1d 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -50,15 +50,15 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB, std::unordered_set &batchKeySet) { - if (batchKeySet.find(*Key) != batchKeySet.end()){ + if (batchKeySet.find(Key.ToString()) != batchKeySet.end()){ return;//并发的被合并的put/delete请求只处理一次 } else { - batchKeySet.insert(*Key); + batchKeySet.insert(Key.ToString()); } std::string val_str; Status s = Status::NotFound("test"); uint64_t start_ = DB->env_->NowMicros(); - s = DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + s = DB->kvDB_->Get(ReadOptions(), Key.ToString(), &val_str); DB->construct_FieldsReq_Read_elapsed += DB->env_->NowMicros() - start_; FieldArray *oldFields; if (s.IsNotFound()){ @@ -76,10 +76,10 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, // MutexLock L(&DB->index_mu); //互斥访问索引状态表 DB->index_mu.AssertHeld(); //1.将存在冲突的put pend到对应的请求 - for(auto [field_name,field_value] : *Fields) { + for(auto [field_name,field_value] : SliceFields) { if(field_name == EMPTY) break; - if(DB->index_.count(field_name)) { - auto [index_status,parent_req] = DB->index_[field_name]; + if(DB->index_.count(field_name.ToString())) { + auto [index_status,parent_req] = DB->index_[field_name.ToString()]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { parent_req->PendReq(this->parent); return; @@ -105,13 +105,13 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, } } } - - KVBatch.Put(Slice(*Key), Slice(SerializeValue(*Fields))); + std::string scrach = SerializeValue(SliceFields); + KVBatch.Put(Slice(Key), Slice(scrach)); //2.对于没有冲突但含有索引操作的put,构建metaKV,这里直接将KV对简单编码后写入metaDB if(HasIndex || HasOldIndex) { std::string MetaKey,MetaValue; - std::string serialized = SerializeValue(*Fields); - MetaKV MKV = MetaKV(Key,&serialized); + std::string serialized = SerializeValue(SliceFields); + MetaKV MKV = MetaKV(Key,serialized); MKV.TransPut(MetaKey, MetaValue); MetaBatch.Put(MetaKey, serialized); @@ -123,7 +123,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, if(DB->index_.count(field_name)) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( - *Key,field_name,field_value)); + Key,field_name,field_value)); IndexBatch.Delete(indexKey); } } @@ -131,12 +131,12 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //3.2对于含有索引的field建立索引 if (HasIndex) { - for(auto [field_name,field_value] : *Fields) { + for(auto [field_name,field_value] : SliceFields) { if(field_name == EMPTY) continue; - if(DB->index_.count(field_name)) { + if(DB->index_.count(field_name.ToString())) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( - *Key,field_name,field_value)); + Key,field_name,field_value)); IndexBatch.Put(indexKey, Slice()); } } @@ -155,10 +155,10 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB, std::unordered_set &batchKeySet) { - if (batchKeySet.find(*Key) != batchKeySet.end()){ + if (batchKeySet.find(Key.ToString()) != batchKeySet.end()){ return;//并发的被合并的put/delete请求只处理一次 } else { - batchKeySet.insert(*Key); + batchKeySet.insert(Key.ToString()); } //1. 读取当前的最新的键值对,判断是否存在含有键值对的field //2.1 如果无,则正常构造delete @@ -166,11 +166,11 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //在kvDB和metaDB中写入对应的delete //2.3 如果存在field的索引状态是Creating或者Deleting,那么在那个队列上面进行等待 std::string val_str; - Status s = DB->kvDB_->Get(ReadOptions(), *Key, &val_str); + Status s = DB->kvDB_->Get(ReadOptions(), Key, &val_str); if (s.IsNotFound()) return; FieldArray *Fields = new FieldArray; ParseValue(val_str,Fields); - KVBatch.Delete(Slice(*Key)); + KVBatch.Delete(Slice(Key)); bool HasIndex = false; { // MutexLock L(&DB->index_mu); //互斥访问索引状态表 @@ -189,7 +189,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //assert(0); } } - KVBatch.Delete(Slice(*Key)); + KVBatch.Delete(Slice(Key)); //2.对于没有冲突但含有索引操作的delete,构建metaKV,这里直接将KV对简单编码后写入metaDB if(HasIndex) { std::string MetaKey; @@ -202,7 +202,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, if(DB->index_.count(field_name)) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( - *Key,field_name,field_value)); + Key,field_name,field_value)); IndexBatch.Delete(indexKey); } } @@ -215,8 +215,8 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, void iCreateReq::Prepare(FieldDB *DB) { //在index_中完成索引状态更新,在这里可以避免重复创建 DB->index_mu.AssertHeld(); - if(DB->index_.count(*Field)) { - auto [istatus,parent] = DB->index_[*Field]; + if(DB->index_.count(Field.ToString())) { + auto [istatus,parent] = DB->index_[Field.ToString()]; if(istatus == IndexStatus::Exist) { //如果已经完成建立索引,则返回成功 done = true; @@ -230,7 +230,7 @@ void iCreateReq::Prepare(FieldDB *DB) { } //如果索引状态表中没有,则表示尚未创建,更新相应的状态 //这里将done设置为true表示在taskqueue中需要完成的部分已经完成,不需要pend - DB->index_[*Field] = {IndexStatus::Creating,this}; + DB->index_[Field.ToString()] = {IndexStatus::Creating,this}; done = true; } @@ -246,12 +246,12 @@ void iCreateReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //遍历数据库,构建二级索引到indexbatch,(更新metaDB中的元数据为Index类型的(Field,Creating)) //一个indexwritebatch写入,那么索引创建删除应该和metadb没有交互 std::vector> keysAndVal = - DB->FindKeysAndValByFieldName(*Field); + DB->FindKeysAndValByFieldName(Field.ToString()); Slice value = Slice(); for (auto &kvPair : keysAndVal){ std::string indexKey; AppendIndexKey(&indexKey, - ParsedInternalIndexKey(kvPair.first, *Field, kvPair.second)); + ParsedInternalIndexKey(kvPair.first, Field, kvPair.second)); IndexBatch.Put(indexKey, value); } } @@ -259,7 +259,7 @@ void iCreateReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, void iCreateReq::Finalize(FieldDB *DB) { //1. 写入完成后,更新index状态表,(并将metaDB的值改为Index类型的(Field,Existing)) MutexLock iL(&DB->index_mu); - DB->index_[*Field] = {IndexStatus::Exist, nullptr}; + DB->index_[Field.ToString()] = {IndexStatus::Exist, nullptr}; DB->index_mu.Unlock(); if (pending_list.empty()) return; @@ -278,15 +278,15 @@ void iCreateReq::Finalize(FieldDB *DB) { /*******iDeleteReq*******/ void iDeleteReq::Prepare(FieldDB *DB) { DB->index_mu.AssertHeld(); - if(DB->index_.count(*Field) == 0) { + if(DB->index_.count(Field.ToString()) == 0) { done = true; Deleted = true; s = Status::OK(); return ; } - auto [istatus,parent] = DB->index_[*Field]; + auto [istatus,parent] = DB->index_[Field.ToString()]; if(istatus == IndexStatus::Exist) { - DB->index_[*Field] = {IndexStatus::Deleting,this}; + DB->index_[Field.ToString()] = {IndexStatus::Deleting,this}; done = true; } else { //如果正在创建或者删除,那么pend到对应的请求上 @@ -303,19 +303,19 @@ void iDeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { std::vector> keysAndVal = - DB->FindKeysAndValByFieldName(*Field); + DB->FindKeysAndValByFieldName(Field); Slice value = Slice(); for (auto &kvPair : keysAndVal){ std::string indexKey; AppendIndexKey(&indexKey, - ParsedInternalIndexKey(kvPair.first, *Field, kvPair.second)); + ParsedInternalIndexKey(kvPair.first, Field, kvPair.second)); IndexBatch.Delete(indexKey); } } void iDeleteReq::Finalize(FieldDB *DB) { MutexLock iL(&DB->index_mu); - DB->index_.erase(*Field); + DB->index_.erase(Field.ToString()); DB->index_mu.Unlock(); if (pending_list.empty()) return; @@ -337,37 +337,37 @@ BatchReq::BatchReq(WriteBatch *Batch,port::Mutex *mu): struct BatchHandler : WriteBatch::Handler { void Put(const Slice &key, const Slice &value) override { //为key和value构造存储的地方,防止由于string的析构造成可能得内存访问错误 - str_buf->push_back(key.ToString()); - FieldArray *field = new FieldArray; - field = ParseValue(value.ToString(), field); - if (field->empty()){ //batch中的value没有field - fa_buf->push_back({{EMPTY,value.ToString()}}); - } else { - fa_buf->push_back(*field); - } - - sub_requests->emplace_back(new FieldsReq(&str_buf->back(),&fa_buf->back(),mu)); + // str_buf->push_back(key.ToString()); + // FieldArray *field = new FieldArray; + // field = ParseValue(value.ToString(), field); + // if (field->empty()){ //batch中的value没有field + // fa_buf->push_back({{EMPTY,value.ToString()}}); + // } else { + // fa_buf->push_back(*field); + // } + //默认所有WriteBatch中的东西都是有Field的!!!!! + sub_requests->emplace_back(new FieldsReq(key,value,mu)); sub_requests->back()->parent = req; // delete field; } void Delete(const Slice &key) override { - str_buf->push_back(key.ToString()); - sub_requests->emplace_back(new DeleteReq(&str_buf->back(),mu)); + // str_buf->push_back(key.ToString()); + sub_requests->emplace_back(new DeleteReq(key,mu)); sub_requests->back()->parent = req; } BatchReq *req; port::Mutex *mu; - std::deque *str_buf; - std::deque *fa_buf; + // std::deque *str_buf; + // std::deque *fa_buf; std::deque *sub_requests; }; BatchHandler Handler; Handler.req = this; Handler.mu = mu; - Handler.str_buf = &str_buf; - Handler.fa_buf = &fa_buf; + // Handler.str_buf = &str_buf; + // Handler.fa_buf = &fa_buf; Handler.sub_requests = &sub_requests; Batch->Iterate(&Handler); diff --git a/fielddb/request.h b/fielddb/request.h index 19faf49..4e68596 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -1,8 +1,10 @@ #include #include +#include "leveldb/slice.h" #include "leveldb/status.h" #include "leveldb/write_batch.h" #include "port/port_stdcxx.h" +#include "util/coding.h" #include "util/mutexlock.h" #include "util/serialize_value.h" #include @@ -64,14 +66,31 @@ public: //含有field的put class FieldsReq : public Request { public: - FieldsReq(std::string *Key,FieldArray *Fields,port::Mutex *mu): - Key(Key),Fields(Fields),Request(FieldsReq_t,mu) { }; + FieldsReq(Slice Key,const FieldArray &Fields,port::Mutex *mu): + Key(Key),Request(FieldsReq_t,mu) { + for(auto &[name,value] : Fields) { + SliceFields.push_back({name,value}); + } + }; + + FieldsReq(Slice Key, Slice Value,port::Mutex *mu): + Key(Key),Request(FieldsReq_t,mu) { + Slice nameSlice, valSlice; + while(GetLengthPrefixedSlice(&Value, &nameSlice)) { + if(GetLengthPrefixedSlice(&Value, &valSlice)) { + SliceFields.push_back({nameSlice,valSlice}); + } else { + std::cout << "name and val not match! From FieldsReq Init" << std::endl; + } + nameSlice.clear(), valSlice.clear(); + } + } void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; - std::string *Key; - FieldArray *Fields; + Slice Key; + FieldSliceArray SliceFields; }; //不含有field的put,但是计划被弃用了 @@ -89,7 +108,7 @@ public: //创建索引的request class iCreateReq : public Request { public: - iCreateReq(std::string *Field,port::Mutex *mu): + iCreateReq(Slice Field,port::Mutex *mu): Field(Field),Request(iCreateReq_t, mu),Existed(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, @@ -99,14 +118,14 @@ public: void PendReq(Request *req) override; bool Existed; - std::string *Field; + Slice Field; std::deque pending_list; }; //删除索引的request class iDeleteReq : public Request { public: - iDeleteReq(std::string *Field,port::Mutex *mu): + iDeleteReq(Slice Field,port::Mutex *mu): Field(Field),Request(iDeleteReq_t, mu),Deleted(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, @@ -116,20 +135,20 @@ public: void PendReq(Request *req) override; bool Deleted; - std::string *Field; + Slice Field; std::deque pending_list; }; //删除key的request class DeleteReq : public Request { public: - DeleteReq(std::string *Key,port::Mutex *mu): + DeleteReq(Slice Key,port::Mutex *mu): Key(Key),Request(DeleteReq_t,mu) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; - std::string *Key; + Slice Key; }; class BatchReq : public Request { @@ -142,8 +161,8 @@ public: WriteBatch *Batch; std::deque sub_requests; - std::deque str_buf; - std::deque fa_buf; + // std::deque str_buf; + // std::deque fa_buf; }; } diff --git a/util/serialize_value.cc b/util/serialize_value.cc index 73fb092..f60ba30 100644 --- a/util/serialize_value.cc +++ b/util/serialize_value.cc @@ -3,6 +3,7 @@ #include #include "util/coding.h" #include +#include "leveldb/slice.h" namespace leveldb{ bool compareByFirst(const Field& a, const Field& b) { @@ -20,6 +21,21 @@ std::string SerializeValue(const FieldArray& fields){ return result; } +std::string SerializeValue(const FieldSliceArray& fields) { + using pss = std::pair; + FieldSliceArray sortFields = fields; + std::sort(sortFields.begin(), sortFields.end(), + [&](const pss &lhs, const pss &rhs) { + return lhs.first.compare(rhs.first); + }); + std::string result; + for (const pss& pairs : sortFields) { + PutLengthPrefixedSlice(&result, pairs.first); + PutLengthPrefixedSlice(&result, pairs.second); + } + return result; +} + FieldArray *ParseValue(const std::string& value_str,FieldArray *fields){ Slice valueSlice(value_str); // FieldArray *res = new FieldArray; @@ -74,22 +90,22 @@ bool InternalFieldArray::HasField(const Field& field) { return std::find(fields.begin(),fields.end(),field) != fields.end(); } -std::string InternalFieldArray::ValOfName(const std::string &name) { +Slice InternalFieldArray::ValOfName(const std::string &name) { if(isMapped) { if(map.count(name)) { return map[name]; } - return std::string(); + return Slice(); } for (auto iter = fields.begin(); iter != fields.end(); iter++){ if (iter->first == name) { return iter->second; } else if (iter->first > name) { - return std::string(); + return Slice(); } } - return std::string(); + return Slice(); } } \ No newline at end of file diff --git a/util/serialize_value.h b/util/serialize_value.h index 2405773..ba677c1 100644 --- a/util/serialize_value.h +++ b/util/serialize_value.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include "leveldb/slice.h" @@ -10,13 +11,15 @@ namespace leveldb{ using Field = std::pair; // field_name:field_value using FieldArray = std::vector>; +using FieldSliceArray = std::vector>; std::string SerializeValue(const FieldArray& fields); +std::string SerializeValue(const FieldSliceArray& fields); FieldArray *ParseValue(const std::string& value_str, FieldArray *fields); class InternalFieldArray { public: - using FieldMap = std::map; + using FieldMap = std::map; InternalFieldArray(const FieldArray &fields, bool to_map = false): fields(fields),isMapped(false) { @@ -29,7 +32,7 @@ public: Slice nameSlice, valSlice; while(GetLengthPrefixedSlice(&valueSlice, &nameSlice)) { if(GetLengthPrefixedSlice(&valueSlice, &valSlice)) { - map[nameSlice.ToString()] = valSlice.ToString(); + map[nameSlice.ToString()] = valSlice; } else { std::cout << "name and val not match! From InternalFieldArray" << std::endl; } @@ -48,7 +51,7 @@ public: std::string Serialize(); bool HasField(const Field& field); - std::string ValOfName(const std::string& name); + Slice ValOfName(const std::string& name); private: bool isMapped; From b7c1d557fa35e3795bfa219eabb1bbe94a54524d Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 29 Dec 2024 01:00:01 +0800 Subject: [PATCH 20/32] =?UTF-8?q?=E8=A1=A5=E5=85=85=E9=83=A8=E5=88=86?= =?UTF-8?q?=E8=BD=ACslice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 8 ++-- fielddb/field_db.h | 5 ++- fielddb/request.cpp | 105 +++++++++++++++++++++++++++++++-------------------- 3 files changed, 73 insertions(+), 45 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index a9ad365..5685168 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -126,10 +126,15 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { MutexLock L(&mutex_); taskqueue_.push_back(&req); while(true){ + uint64_t start_waiting = env_->NowMicros(); while(!req.done && &req != taskqueue_.front()) { req.cond_.Wait(); } + waiting_elasped += env_->NowMicros() - start_waiting; if(req.done) { + elapsed += env_->NowMicros() - start_; + count ++; + dumpStatistics(); return req.s; //在返回时自动释放锁L } Request *tail = GetHandleInterval(); @@ -208,9 +213,6 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { if (ready == tail) break; } - elapsed += env_->NowMicros() - start_; - count ++; - //dumpStatistics(); if(!taskqueue_.empty()) { taskqueue_.front()->cond_.Signal(); diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 5b7973d..b920293 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -115,6 +115,8 @@ private: uint64_t temp_elapsed = 0; + uint64_t waiting_elasped = 0; + inline void dumpStatistics() { if(count && count % 500000 == 0 || write_bytes && write_bytes > write_bytes_lim) { std::cout << "=====================================================\n"; @@ -134,7 +136,8 @@ private: std::cout << "\tWrite KV Time(ms) : " << write_kv_elapsed * 1.0 / count << std::endl; std::cout << "\tWrite Clean Time(ms) : " << write_clean_elapsed * 1.0 / count << std::endl; std::cout << "TaskQueue Size : " << taskqueue_.size() << std::endl; - std::cout << "temp_elased : " << temp_elapsed * 1.0 / count<< std::endl; + std::cout << "temp_elased : " << temp_elapsed * 1.0 / count << std::endl; + std::cout << "waiting elapsed : " << waiting_elasped * 1.0 / count << std::endl; // std::cout << MetaBatch.ApproximateSize() << " " << IndexBatch.ApproximateSize() << " " << KVBatch.ApproximateSize() << std::endl; std::cout << "=====================================================\n"; write_bytes_lim = write_bytes + 50 * 1024 * 1024; diff --git a/fielddb/request.cpp b/fielddb/request.cpp index 9c30f1d..ca83f70 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -58,14 +58,25 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, std::string val_str; Status s = Status::NotFound("test"); uint64_t start_ = DB->env_->NowMicros(); - s = DB->kvDB_->Get(ReadOptions(), Key.ToString(), &val_str); + s = DB->kvDB_->Get(ReadOptions(), Key, &val_str); DB->construct_FieldsReq_Read_elapsed += DB->env_->NowMicros() - start_; - FieldArray *oldFields; + // FieldArray *oldFields; + FieldSliceArray oldFields; if (s.IsNotFound()){ - oldFields = nullptr; + // oldFields = nullptr; } else if (s.ok()) { //得到数据库之前key的fields, 判断需不需要删除其中潜在的索引 - oldFields = new FieldArray; - oldFields = ParseValue(val_str,oldFields); + // oldFields = new FieldArray; + // oldFields = ParseValue(val_str,oldFields); + Slice nameSlice, valSlice; + Slice Value(val_str); + while(GetLengthPrefixedSlice(&Value, &nameSlice)) { + if(GetLengthPrefixedSlice(&Value, &valSlice)) { + oldFields.push_back({nameSlice,valSlice}); + } else { + std::cout << "name and val not match! From FieldsReq Init" << std::endl; + } + nameSlice.clear(), valSlice.clear(); + } } else { assert(0); } @@ -76,8 +87,8 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, // MutexLock L(&DB->index_mu); //互斥访问索引状态表 DB->index_mu.AssertHeld(); //1.将存在冲突的put pend到对应的请求 - for(auto [field_name,field_value] : SliceFields) { - if(field_name == EMPTY) break; + for(auto &[field_name,field_value] : SliceFields) { + if(field_name.data() == EMPTY) break; if(DB->index_.count(field_name.ToString())) { auto [index_status,parent_req] = DB->index_[field_name.ToString()]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { @@ -90,11 +101,11 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, } } //冲突也可能存在于,需要删除旧数据的索引,但该索引正在创删中 - if (oldFields != nullptr){ - for(auto [field_name,field_value] : *oldFields) { - if(field_name == EMPTY) break; - if(DB->index_.count(field_name)) { - auto [index_status,parent_req] = DB->index_[field_name]; + if (!oldFields.empty()){ + for(auto &[field_name,field_value] : oldFields) { + if(field_name.data() == EMPTY) break; + if(DB->index_.count(field_name.ToString())) { + auto [index_status,parent_req] = DB->index_[field_name.ToString()]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { parent_req->PendReq(this->parent); return; @@ -118,9 +129,9 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //3.1对于含有索引的oldfield删除索引 if (HasOldIndex) { - for(auto [field_name,field_value] : *oldFields) { - if(field_name == EMPTY) continue; - if(DB->index_.count(field_name)) { + for(auto &[field_name,field_value] : oldFields) { + if(field_name.data() == EMPTY) continue; + if(DB->index_.count(field_name.ToString())) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( Key,field_name,field_value)); @@ -131,8 +142,8 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //3.2对于含有索引的field建立索引 if (HasIndex) { - for(auto [field_name,field_value] : SliceFields) { - if(field_name == EMPTY) continue; + for(auto &[field_name,field_value] : SliceFields) { + if(field_name.data() == EMPTY) continue; if(DB->index_.count(field_name.ToString())) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( @@ -146,7 +157,7 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, //优化:对于3.1,3.2中都有的索引只写一次 } - if(oldFields) delete oldFields; + // if(oldFields) delete oldFields; } @@ -168,18 +179,29 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, std::string val_str; Status s = DB->kvDB_->Get(ReadOptions(), Key, &val_str); if (s.IsNotFound()) return; - FieldArray *Fields = new FieldArray; - ParseValue(val_str,Fields); + // FieldArray *Fields = new FieldArray; + // ParseValue(val_str,Fields); + FieldSliceArray Fields; + Slice nameSlice, valSlice; + Slice Value(val_str); + while(GetLengthPrefixedSlice(&Value, &nameSlice)) { + if(GetLengthPrefixedSlice(&Value, &valSlice)) { + Fields.push_back({nameSlice,valSlice}); + } else { + std::cout << "name and val not match! From FieldsReq Init" << std::endl; + } + nameSlice.clear(), valSlice.clear(); + } KVBatch.Delete(Slice(Key)); bool HasIndex = false; { // MutexLock L(&DB->index_mu); //互斥访问索引状态表 DB->index_mu.AssertHeld(); //1.将存在冲突的delete pend到对应的请求 - for(auto [field_name,field_value] : *Fields) { - if(field_name == EMPTY) break; - if(DB->index_.count(field_name)) { - auto [index_status,parent_req] = DB->index_[field_name]; + for(auto &[field_name,field_value] : Fields) { + if(field_name.data() == EMPTY) break; + if(DB->index_.count(field_name.ToString())) { + auto [index_status,parent_req] = DB->index_[field_name.ToString()]; if(index_status == IndexStatus::Creating || index_status == IndexStatus::Deleting) { parent_req->PendReq(this->parent); return; @@ -197,9 +219,9 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, MKV.TransDelete(MetaKey); //meta中写入一个delete不需要value MetaBatch.Put(MetaKey, Slice()); //3.对于含有索引的field删除索引 - for(auto [field_name,field_value] : *Fields) { - if(field_name == EMPTY) continue; - if(DB->index_.count(field_name)) { + for(auto &[field_name,field_value] : Fields) { + if(field_name.data() == EMPTY) continue; + if(DB->index_.count(field_name.ToString())) { std::string indexKey; AppendIndexKey(&indexKey, ParsedInternalIndexKey( Key,field_name,field_value)); @@ -208,7 +230,7 @@ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, } } } - delete Fields; + // delete Fields; } /*******iCreateReq*******/ @@ -384,13 +406,14 @@ BatchReq::~BatchReq() { void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { - WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; + // WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; std::unordered_set Sub_batchKeySet; //由于batch是有顺序的,根据我们现在的一个key只处理最开始的算法,这里需要反向迭代 uint64_t start_ = DB->env_->NowMicros(); for(auto subreq = sub_requests.rbegin(); subreq != sub_requests.rend(); subreq++ ) { uint64_t start_sub = DB->env_->NowMicros(); - (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); + // (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); + (*subreq)->ConstructBatch(KVBatch, IndexBatch, MetaBatch, DB, batchKeySet); DB->construct_BatchReq_perSub_elapsed += DB->env_->NowMicros() - start_sub; DB->count_Batch_Sub ++; //所有的对于pendreq的调用传入的参数被改成了this->parent,因此,对于subrequests来说, @@ -399,17 +422,17 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, return; } } - DB->construct_BatchReq_Sub_elapsed += DB->env_->NowMicros() - start_; - if(Sub_KVBatch.ApproximateSize() > 12) { - KVBatch.Append(Sub_KVBatch); - } - if(Sub_IndexBatch.ApproximateSize() > 12) { - IndexBatch.Append(Sub_IndexBatch); - } - if(Sub_MetaBatch.ApproximateSize() > 12) { - MetaBatch.Append(Sub_MetaBatch); - } - batchKeySet.insert(Sub_batchKeySet.begin(),Sub_batchKeySet.end()); + // DB->construct_BatchReq_Sub_elapsed += DB->env_->NowMicros() - start_; + // if(Sub_KVBatch.ApproximateSize() > 12) { + // KVBatch.Append(Sub_KVBatch); + // } + // if(Sub_IndexBatch.ApproximateSize() > 12) { + // IndexBatch.Append(Sub_IndexBatch); + // } + // if(Sub_MetaBatch.ApproximateSize() > 12) { + // MetaBatch.Append(Sub_MetaBatch); + // } + // batchKeySet.insert(Sub_batchKeySet.begin(),Sub_batchKeySet.end()); DB->construct_BatchReq_elapsed += DB->env_->NowMicros() - start_; } From d32cb6b3a7580c1a78a3d9f026a0064fd90bb4b3 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 29 Dec 2024 01:00:20 +0800 Subject: [PATCH 21/32] =?UTF-8?q?=E5=AF=B9=E9=BD=90bench=E7=9A=84=E5=86=99?= =?UTF-8?q?=E5=85=A5=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench.cc | 16 ++++++++++++++-- benchmarks/db_bench_FieldDB.cc | 4 ++-- benchmarks/db_bench_testDB.cc | 16 ++++++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/benchmarks/db_bench.cc b/benchmarks/db_bench.cc index 4ff7f05..554a6c5 100644 --- a/benchmarks/db_bench.cc +++ b/benchmarks/db_bench.cc @@ -856,8 +856,20 @@ class Benchmark { for (int j = 0; j < entries_per_batch_; j++) { const int k = seq ? i + j : thread->rand.Uniform(FLAGS_num); key.Set(k); - batch.Put(key.slice(), gen.Generate(value_size_)); - bytes += value_size_ + key.slice().size(); + + std::string name = "customer#" + std::to_string(k); + //这个字段用来查找 + std::string age = std::to_string(thread->rand.Uniform(FLAGS_num) % 100); + //这个字段填充长度 + std::string tag = gen.Generate(value_size_).ToString(); + FieldArray fields = { + {"name", name}, + {"age", age}, + {"tag", tag} + }; + std::string value = SerializeValue(fields); + batch.Put(key.slice(), value); + bytes += value.size() + key.slice().size(); thread->stats.FinishedSingleOp(); } s = db_->Write(write_options_, &batch); diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index 0471ee5..2514328 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -329,8 +329,8 @@ class Stats { // elapsed times. double elapsed = (finish_ - start_) * 1e-6; char rate[100]; - std::snprintf(rate, sizeof(rate), "%6.1f MB/s", - (bytes_ / 1048576.0) / elapsed); + std::snprintf(rate, sizeof(rate), "%6.1f MB/s Bytes:%6.1f elapsed(s):%6.1f seconds:%6.1f ", + (bytes_ / 1048576.0) / elapsed,(bytes_ / 1048576.0),elapsed,seconds_); extra = rate; } AppendWithSpace(&extra, message_); diff --git a/benchmarks/db_bench_testDB.cc b/benchmarks/db_bench_testDB.cc index 6191132..c738c09 100644 --- a/benchmarks/db_bench_testDB.cc +++ b/benchmarks/db_bench_testDB.cc @@ -859,8 +859,20 @@ class Benchmark { for (int j = 0; j < entries_per_batch_; j++) { const int k = seq ? i + j : thread->rand.Uniform(FLAGS_num); key.Set(k); - batch.Put(key.slice(), gen.Generate(value_size_)); - bytes += value_size_ + key.slice().size(); + + std::string name = "customer#" + std::to_string(k); + //这个字段用来查找 + std::string age = std::to_string(thread->rand.Uniform(FLAGS_num) % 100); + //这个字段填充长度 + std::string tag = gen.Generate(value_size_).ToString(); + FieldArray fields = { + {"name", name}, + {"age", age}, + {"tag", tag} + }; + std::string value = SerializeValue(fields); + batch.Put(key.slice(), value); + bytes += value.size() + key.slice().size(); thread->stats.FinishedSingleOp(); } s = db_->Write(write_options_, &batch); From 51129477cec2b741301ce76c36f061242d4e1544 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sun, 29 Dec 2024 09:05:23 +0800 Subject: [PATCH 22/32] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E7=AE=80=E5=8D=95?= =?UTF-8?q?=E7=9A=84=E7=B4=A2=E5=BC=95=E6=80=A7=E8=83=BD=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench_FieldDB.cc | 39 ++++++++++++++++++++++++++++++++++++--- util/serialize_value.cc | 1 + 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index 3abf7c5..dfb8f61 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -63,6 +63,10 @@ static const char* FLAGS_benchmarks = "readreverse," "fill100K," "crc32c," + "CreateIndex," + "FindKeysByField," + "QueryByIndex," + "DeleteIndex," "snappycomp," "snappyuncomp," "zstdcomp," @@ -635,6 +639,14 @@ class Benchmark { method = &Benchmark::Compact; } else if (name == Slice("crc32c")) { method = &Benchmark::Crc32c; + } else if (name == Slice("CreateIndex")) { + method = &Benchmark::CreateIndex; + } else if (name == Slice("FindKeysByField")) { + method = &Benchmark::FindKeysByField; + } else if (name == Slice("QueryByIndex")) { + method = &Benchmark::QueryByIndex; + } else if (name == Slice("DeleteIndex")) { + method = &Benchmark::DeleteIndex; } else if (name == Slice("snappycomp")) { method = &Benchmark::SnappyCompress; } else if (name == Slice("snappyuncomp")) { @@ -664,7 +676,7 @@ class Benchmark { } else { delete db_; db_ = nullptr; - //DestroyDB(FLAGS_db, Options()); + DestroyDB(FLAGS_db, Options()); Open(); } } @@ -821,7 +833,7 @@ class Benchmark { options.compression = FLAGS_compression ? kSnappyCompression : kNoCompression; // Status s = DB::Open(options, FLAGS_db, &db_); - fielddb::DestroyDB(FLAGS_db, options); + //fielddb::DestroyDB(FLAGS_db, options); db_ = new FieldDB(); Status s = FieldDB::OpenFieldDB(options, FLAGS_db, &db_); if (!s.ok()) { @@ -870,7 +882,7 @@ class Benchmark { {"age", age}, {"tag", tag} }; - Slice value = SerializeValue(fields); + std::string value = SerializeValue(fields); batch.Put(key.slice(), value); bytes += value.size() + key.slice().size(); thread->stats.FinishedSingleOp(); @@ -1078,6 +1090,27 @@ class Benchmark { g_env->RemoveFile(fname); } } + + void CreateIndex(ThreadState* thread) { + db_->CreateIndexOnField("age", write_options_); + } + + void FindKeysByField(ThreadState* thread) { + Field f = {"age", "20"}; + std::vector res; + res = db_->FindKeysByField(f); + } + + void QueryByIndex(ThreadState* thread) { + Field f = {"age", "20"}; + Status s; + db_->QueryByIndex(f, &s); + } + + void DeleteIndex(ThreadState* thread) { + db_->DeleteIndex("age", write_options_); + } + }; } // namespace leveldb diff --git a/util/serialize_value.cc b/util/serialize_value.cc index 73fb092..a66aef7 100644 --- a/util/serialize_value.cc +++ b/util/serialize_value.cc @@ -36,6 +36,7 @@ FieldArray *ParseValue(const std::string& value_str,FieldArray *fields){ res->emplace_back(nameStr, valStr); } else { std::cout << "name and val not match! From ParseValue" << std::endl; + assert(0); } nameSlice.clear(); valSlice.clear(); From b3de788556382c690af8c8db37294e9ea28c2b25 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sun, 29 Dec 2024 09:47:59 +0800 Subject: [PATCH 23/32] =?UTF-8?q?=E6=9F=A5=E5=87=BA=E4=BA=86=E6=80=A7?= =?UTF-8?q?=E8=83=BD=E6=B5=8B=E8=AF=95destroy=E9=94=99=E8=AF=AF=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench_FieldDB.cc | 5 ++--- benchmarks/db_bench_testDB.cc | 4 ++-- testdb/testdb.cc | 5 +++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index dfb8f61..a07795a 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -547,7 +547,7 @@ class Benchmark { } } if (!FLAGS_use_existing_db) { - DestroyDB(FLAGS_db, Options()); + fielddb::DestroyDB(FLAGS_db, Options()); } } @@ -676,7 +676,7 @@ class Benchmark { } else { delete db_; db_ = nullptr; - DestroyDB(FLAGS_db, Options()); + fielddb::DestroyDB(FLAGS_db, Options()); Open(); } } @@ -833,7 +833,6 @@ class Benchmark { options.compression = FLAGS_compression ? kSnappyCompression : kNoCompression; // Status s = DB::Open(options, FLAGS_db, &db_); - //fielddb::DestroyDB(FLAGS_db, options); db_ = new FieldDB(); Status s = FieldDB::OpenFieldDB(options, FLAGS_db, &db_); if (!s.ok()) { diff --git a/benchmarks/db_bench_testDB.cc b/benchmarks/db_bench_testDB.cc index 6191132..5a4acb9 100644 --- a/benchmarks/db_bench_testDB.cc +++ b/benchmarks/db_bench_testDB.cc @@ -544,7 +544,7 @@ class Benchmark { } } if (!FLAGS_use_existing_db) { - DestroyDB(FLAGS_db, Options()); + testdb::DestroyDB(FLAGS_db, Options()); } } @@ -665,7 +665,7 @@ class Benchmark { } else { delete db_; db_ = nullptr; - DestroyDB(FLAGS_db, Options()); + testdb::DestroyDB(FLAGS_db, Options()); Open(); } } diff --git a/testdb/testdb.cc b/testdb/testdb.cc index 6baa209..d932f97 100644 --- a/testdb/testdb.cc +++ b/testdb/testdb.cc @@ -2,6 +2,7 @@ #include "db/db_impl.h" #include #include "leveldb/status.h" +#include "testdb.h" using namespace testdb; Status testDB::OpentestDB(Options& options, @@ -93,7 +94,7 @@ void testDB::CompactRange(const Slice *begin, const Slice *end) { kvDB_->CompactRange(begin, end); } -Status DestroyDB(const std::string& name, const Options& options) { +Status testdb::DestroyDB(const std::string& name, const Options& options) { Status s; s = leveldb::DestroyDB(name+"_kvDB", options); assert(s.ok()); @@ -108,4 +109,4 @@ testDB::~testDB() { delete kvDB_; // delete indexDB_; // delete metaDB_; -} \ No newline at end of file +} From 75426ffb064de60dc9ecf06ca882334abae8738e Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Sun, 29 Dec 2024 11:33:12 +0800 Subject: [PATCH 24/32] =?UTF-8?q?=E5=88=9B=E5=88=A0=E7=B4=A2=E5=BC=95?= =?UTF-8?q?=E5=AF=B9=E8=AF=BB=E5=86=99=E6=80=A7=E8=83=BD=E5=BD=B1=E5=93=8D?= =?UTF-8?q?=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench_FieldDB.cc | 219 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index a07795a..4238ef6 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -63,10 +63,22 @@ static const char* FLAGS_benchmarks = "readreverse," "fill100K," "crc32c," + "readwhilewriting," "CreateIndex," "FindKeysByField," "QueryByIndex," "DeleteIndex," + "compact," + "WriteSeqWhileCreating," + "WriteSeqWhileDeleting," + "compact," + "WriteRandomWhileCreating," + "WriteRandomWhileDeleting," + "compact," + "ReadSeqWhileCreating," + "ReadSeqWhileDeleting," + "ReadRandomWhileCreating," + "ReadRandomWhileDeleting," "snappycomp," "snappyuncomp," "zstdcomp," @@ -647,6 +659,30 @@ class Benchmark { method = &Benchmark::QueryByIndex; } else if (name == Slice("DeleteIndex")) { method = &Benchmark::DeleteIndex; + } else if (name == Slice("WriteSeqWhileCreating")) { + num_threads++; + method = &Benchmark::WriteSeqWhileCreating; + } else if (name == Slice("WriteSeqWhileDeleting")) { + num_threads++; + method = &Benchmark::WriteSeqWhileDeleting; + } else if (name == Slice("WriteRandomWhileCreating")) { + num_threads++; + method = &Benchmark::WriteRandomWhileCreating; + } else if (name == Slice("WriteRandomWhileDeleting")) { + num_threads++; + method = &Benchmark::WriteRandomWhileDeleting; + } else if (name == Slice("ReadSeqWhileCreating")) { + num_threads++; + method = &Benchmark::ReadSeqWhileCreating; + } else if (name == Slice("ReadSeqWhileDeleting")) { + num_threads++; + method = &Benchmark::ReadSeqWhileDeleting; + } else if (name == Slice("ReadRandomWhileCreating")) { + num_threads++; + method = &Benchmark::ReadRandomWhileCreating; + } else if (name == Slice("ReadRandomWhileDeleting")) { + num_threads++; + method = &Benchmark::ReadRandomWhileDeleting; } else if (name == Slice("snappycomp")) { method = &Benchmark::SnappyCompress; } else if (name == Slice("snappyuncomp")) { @@ -1110,6 +1146,189 @@ class Benchmark { db_->DeleteIndex("age", write_options_); } + void WriteSeqWhileCreating(ThreadState* thread) { + if (thread->tid > 0) { + WriteSeq(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::NotExist) { + std::fprintf(stderr, "index status error in WriteWhileCreating\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::Exist) { + break; + } + + db_->CreateIndexOnField("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void WriteSeqWhileDeleting(ThreadState* thread) { + if (thread->tid > 0) { + WriteSeq(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::Exist) { + std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { + break; + } + + db_->DeleteIndex("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void WriteRandomWhileCreating(ThreadState* thread) { + if (thread->tid > 0) { + WriteRandom(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::NotExist) { + std::fprintf(stderr, "index status error in WriteWhileCreating\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::Exist) { + break; + } + + db_->CreateIndexOnField("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void WriteRandomWhileDeleting(ThreadState* thread) { + if (thread->tid > 0) { + WriteRandom(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::Exist) { + std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { + break; + } + + db_->DeleteIndex("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void ReadSeqWhileCreating(ThreadState* thread) { + if (thread->tid > 0) { + ReadSequential(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::NotExist) { + std::fprintf(stderr, "index status error in WriteWhileCreating\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::Exist) { + break; + } + + db_->CreateIndexOnField("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void ReadSeqWhileDeleting(ThreadState* thread) { + if (thread->tid > 0) { + ReadSequential(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::Exist) { + std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { + break; + } + + db_->DeleteIndex("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void ReadRandomWhileCreating(ThreadState* thread) { + if (thread->tid > 0) { + ReadRandom(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::NotExist) { + std::fprintf(stderr, "index status error in WriteWhileCreating\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::Exist) { + break; + } + + db_->CreateIndexOnField("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } + + void ReadRandomWhileDeleting(ThreadState* thread) { + if (thread->tid > 0) { + ReadRandom(thread); + } else { + // Special thread that keeps creating index until other threads are done. + if (db_->GetIndexStatus("age") != IndexStatus::Exist) { + std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); + std::exit(1); + } + + while (true) { + if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { + break; + } + + db_->DeleteIndex("age", write_options_); + } + + // Do not count any of the preceding work/delay in stats. + thread->stats.Start(); + } + } }; } // namespace leveldb From 37721f31feca0fa3d588cce435dbf56674d95a8a Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 29 Dec 2024 13:17:36 +0800 Subject: [PATCH 25/32] =?UTF-8?q?=E4=B9=8B=E5=89=8D=E6=94=B9=E9=94=99?= =?UTF-8?q?=E4=BA=86=E4=B8=80=E9=83=A8=E5=88=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/request.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fielddb/request.cpp b/fielddb/request.cpp index ca83f70..b90470a 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -406,14 +406,14 @@ BatchReq::~BatchReq() { void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) { - // WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; + WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; std::unordered_set Sub_batchKeySet; //由于batch是有顺序的,根据我们现在的一个key只处理最开始的算法,这里需要反向迭代 uint64_t start_ = DB->env_->NowMicros(); for(auto subreq = sub_requests.rbegin(); subreq != sub_requests.rend(); subreq++ ) { uint64_t start_sub = DB->env_->NowMicros(); - // (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); - (*subreq)->ConstructBatch(KVBatch, IndexBatch, MetaBatch, DB, batchKeySet); + (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); + // (*subreq)->ConstructBatch(KVBatch, IndexBatch, MetaBatch, DB, batchKeySet); DB->construct_BatchReq_perSub_elapsed += DB->env_->NowMicros() - start_sub; DB->count_Batch_Sub ++; //所有的对于pendreq的调用传入的参数被改成了this->parent,因此,对于subrequests来说, @@ -422,17 +422,17 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, return; } } - // DB->construct_BatchReq_Sub_elapsed += DB->env_->NowMicros() - start_; - // if(Sub_KVBatch.ApproximateSize() > 12) { - // KVBatch.Append(Sub_KVBatch); - // } - // if(Sub_IndexBatch.ApproximateSize() > 12) { - // IndexBatch.Append(Sub_IndexBatch); - // } - // if(Sub_MetaBatch.ApproximateSize() > 12) { - // MetaBatch.Append(Sub_MetaBatch); - // } - // batchKeySet.insert(Sub_batchKeySet.begin(),Sub_batchKeySet.end()); + DB->construct_BatchReq_Sub_elapsed += DB->env_->NowMicros() - start_; + if(Sub_KVBatch.ApproximateSize() > 12) { + KVBatch.Append(Sub_KVBatch); + } + if(Sub_IndexBatch.ApproximateSize() > 12) { + IndexBatch.Append(Sub_IndexBatch); + } + if(Sub_MetaBatch.ApproximateSize() > 12) { + MetaBatch.Append(Sub_MetaBatch); + } + batchKeySet.insert(Sub_batchKeySet.begin(),Sub_batchKeySet.end()); DB->construct_BatchReq_elapsed += DB->env_->NowMicros() - start_; } From fd14a089d59fb075a0117f41eb88e5b33034c22d Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 29 Dec 2024 16:30:42 +0800 Subject: [PATCH 26/32] =?UTF-8?q?=E5=AF=B9=E5=8E=9F=E7=89=88leveldb?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E6=8F=92=E6=A1=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/db_impl.cc | 14 +++++++++++++- db/db_impl.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 122760c..78e2382 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1235,12 +1235,18 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) { w.sync = options.sync; w.done = false; + uint64_t start_ = env_->NowMicros(); MutexLock l(&mutex_); + count ++; writers_.push_back(&w); while (!w.done && &w != writers_.front()) { w.cv.Wait(); } if (w.done) { + Waiting_elapsed += env_->NowMicros() - start_; + waited_count ++; + Total_elapsed += env_->NowMicros() - start_; + // dumpStatistics(); return w.status; } @@ -1259,6 +1265,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) { // into mem_. { mutex_.Unlock(); + uint64_t start_write = env_->NowMicros(); status = log_->AddRecord(WriteBatchInternal::Contents(write_batch)); bool sync_error = false; if (status.ok() && options.sync) { @@ -1270,6 +1277,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) { if (status.ok()) { status = WriteBatchInternal::InsertInto(write_batch, mem_); } + BatchSize += write_batch->ApproximateSize(); + write_elapsed += env_->NowMicros() - start_write; mutex_.Lock(); if (sync_error) { // The state of the log file is indeterminate: the log record we @@ -1298,7 +1307,10 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) { if (!writers_.empty()) { writers_.front()->cv.Signal(); } - + Total_elapsed += env_->NowMicros() - start_; + NoWaiting_elapsed += env_->NowMicros() - start_; + Nowaited_count ++; + dumpStatistics(); return status; } diff --git a/db/db_impl.h b/db/db_impl.h index 6848077..0d77e55 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -6,7 +6,10 @@ #define STORAGE_LEVELDB_DB_DB_IMPL_H_ #include +#include +#include #include +#include #include #include @@ -210,6 +213,33 @@ class DBImpl : public DB { Status bg_error_ GUARDED_BY(mutex_); CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_); + + int count = 0; + int waited_count = 0; + int Nowaited_count = 0; + uint64_t Total_elapsed = 0; + uint64_t Waiting_elapsed = 0; + uint64_t NoWaiting_elapsed = 0; + uint64_t write_elapsed = 0; + uint64_t BatchSize = 0; + const double MB = 1024 * 1024; + const double KB = 1024; + inline void dumpStatistics() { + if(count && count % 500000 == 0) { + printf("==================================\n"); + printf("Count: Total:%d Waited:%d Nowaited:%d\n",count,waited_count,Nowaited_count); + printf("%ld %ld %ld\n",Total_elapsed,Waiting_elapsed,NoWaiting_elapsed); + printf("Average Total elapsed: %lf ms\n",Total_elapsed * 1.0 / count); + printf("Average Waiting elapsed: %lf ms\n",Waiting_elapsed * 1.0 / count); + printf("For waiting request: %lf ms\n",Waiting_elapsed * 1.0 / waited_count); + printf("For Nowait request: %lf ms\n",NoWaiting_elapsed * 1.0 / Nowaited_count); + printf("Write elapsed: %lf ms\n",write_elapsed * 1.0 / Nowaited_count); + printf("Average BatchSize: %lfKB\n",BatchSize / KB / count); + printf("Average BatchSize per write:%lfKB\n",BatchSize / KB / Nowaited_count); + printf("==================================\n"); + std::fflush(stdout); + } + } }; // Sanitize db options. The caller should delete result.info_log if From 9e6d5eb832be2096e7f6984ba1a53ecb88355049 Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 29 Dec 2024 22:03:11 +0800 Subject: [PATCH 27/32] =?UTF-8?q?=E4=BD=BF=E7=94=A8SliceHashTable=E4=BB=A3?= =?UTF-8?q?=E6=9B=BFunordered=5Fset,=E5=B9=B6=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E4=BA=86=E4=B8=80=E4=BA=9Bbenchmark?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/db_impl.cc | 2 +- fielddb/SliceHashSet.h | 24 ++++++++++++++++++++++++ fielddb/field_db.cpp | 17 ++++++++++------- fielddb/field_db.h | 5 +++-- fielddb/request.cpp | 22 +++++++++++----------- fielddb/request.h | 13 +++++++------ 6 files changed, 56 insertions(+), 27 deletions(-) create mode 100644 fielddb/SliceHashSet.h diff --git a/db/db_impl.cc b/db/db_impl.cc index 78e2382..36afa1c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1310,7 +1310,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) { Total_elapsed += env_->NowMicros() - start_; NoWaiting_elapsed += env_->NowMicros() - start_; Nowaited_count ++; - dumpStatistics(); + // dumpStatistics(); return status; } diff --git a/fielddb/SliceHashSet.h b/fielddb/SliceHashSet.h new file mode 100644 index 0000000..1b728e2 --- /dev/null +++ b/fielddb/SliceHashSet.h @@ -0,0 +1,24 @@ +# ifndef SLICE_HASH_SET_H +# define SLICE_HASH_SET_H + +#include "leveldb/slice.h" +#include "util/hash.h" +#include +using namespace leveldb; +class SliceHash { +public: + uint32_t operator()(const Slice &lhs) const { + return Hash(lhs.data(),lhs.size(),0x1234); + } +}; + +class SliceEq { +public: + bool operator()(const Slice &lhs, const Slice &rhs) const { + return lhs == rhs; + } +}; + +using SliceHashSet = std::unordered_set; + +#endif \ No newline at end of file diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 5685168..e757b66 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -19,6 +19,7 @@ #include "fielddb/encode_index.h" #include "fielddb/meta.h" #include "field_db.h" +#include "fielddb/SliceHashSet.h" namespace fielddb { using namespace leveldb; @@ -36,15 +37,17 @@ Status FieldDB::OpenFieldDB(Options& options, // options.block_cache = NewLRUCache(ULONG_MAX); // options.max_open_files = 1000; // options.write_buffer_size = 512 * 1024 * 1024; - // options.env = getPosixEnv(); + + //这里是为了让3个数据库有独立的的Background thread + options.env = getPosixEnv(); status = Open(options, name+"_indexDB", &indexdb); if(!status.ok()) return status; - // options.env = getPosixEnv(); + options.env = getPosixEnv(); status = Open(options, name+"_kvDB", &kvdb); if(!status.ok()) return status; - // options.env = getPosixEnv(); + options.env = getPosixEnv(); status = Open(options, name+"_metaDB", &metadb); if(!status.ok()) return status; @@ -127,7 +130,7 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { taskqueue_.push_back(&req); while(true){ uint64_t start_waiting = env_->NowMicros(); - while(!req.done && &req != taskqueue_.front()) { + while(req.isPending() || !req.done && &req != taskqueue_.front()) { req.cond_.Wait(); } waiting_elasped += env_->NowMicros() - start_waiting; @@ -139,7 +142,7 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { } Request *tail = GetHandleInterval(); WriteBatch KVBatch,IndexBatch,MetaBatch; - std::unordered_set batchKeySet; + SliceHashSet batchKeySet; Status status; if(!tail->isiCreateReq() && !tail->isiDeleteReq()) { //表明这一个区间并没有涉及index的创建删除 @@ -312,7 +315,7 @@ Status FieldDB::CreateIndexOnField(const std::string& field_name, const WriteOpt return req.s; } WriteBatch KVBatch,IndexBatch,MetaBatch; - std::unordered_set useless; + SliceHashSet useless; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); indexDB_->Write(op, &IndexBatch); req.Finalize(this); @@ -328,7 +331,7 @@ Status FieldDB::DeleteIndex(const std::string &field_name, const WriteOptions &o return req.s; } WriteBatch KVBatch,IndexBatch,MetaBatch; - std::unordered_set useless; + SliceHashSet useless; req.ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, useless); indexDB_->Write(op, &IndexBatch); req.Finalize(this); diff --git a/fielddb/field_db.h b/fielddb/field_db.h index b920293..2cd458f 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -111,7 +111,8 @@ private: uint64_t write_clean_elapsed = 0; uint64_t write_bytes = 0; - uint64_t write_bytes_lim = 50 * 1024 * 1024; + uint64_t write_step = 500 * 1024 * 1024; + uint64_t write_bytes_lim = write_step; uint64_t temp_elapsed = 0; @@ -140,7 +141,7 @@ private: std::cout << "waiting elapsed : " << waiting_elasped * 1.0 / count << std::endl; // std::cout << MetaBatch.ApproximateSize() << " " << IndexBatch.ApproximateSize() << " " << KVBatch.ApproximateSize() << std::endl; std::cout << "=====================================================\n"; - write_bytes_lim = write_bytes + 50 * 1024 * 1024; + write_bytes_lim = write_bytes + write_step; std::fflush(stdout); } } diff --git a/fielddb/request.cpp b/fielddb/request.cpp index b90470a..9a53a6a 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -25,7 +25,7 @@ void Request::PendReq(Request *req) { //为虚函数提供最基本的实现 void Request::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB, - std::unordered_set &batchKeySet) + SliceHashSet &batchKeySet) { assert(0); } @@ -48,12 +48,12 @@ bool Request::isPending() { /*******FieldsReq*******/ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB, - std::unordered_set &batchKeySet) + SliceHashSet &batchKeySet) { - if (batchKeySet.find(Key.ToString()) != batchKeySet.end()){ + if (batchKeySet.find(Key) != batchKeySet.end()){ return;//并发的被合并的put/delete请求只处理一次 } else { - batchKeySet.insert(Key.ToString()); + batchKeySet.insert(Key); } std::string val_str; Status s = Status::NotFound("test"); @@ -164,12 +164,12 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, /*******DeleteReq*******/ void DeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB, - std::unordered_set &batchKeySet) + SliceHashSet &batchKeySet) { - if (batchKeySet.find(Key.ToString()) != batchKeySet.end()){ + if (batchKeySet.find(Key) != batchKeySet.end()){ return;//并发的被合并的put/delete请求只处理一次 } else { - batchKeySet.insert(Key.ToString()); + batchKeySet.insert(Key); } //1. 读取当前的最新的键值对,判断是否存在含有键值对的field //2.1 如果无,则正常构造delete @@ -263,7 +263,7 @@ void iCreateReq::PendReq(Request *req) { void iCreateReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch &MetaBatch,fielddb::FieldDB *DB, - std::unordered_set &batchKeySet) + SliceHashSet &batchKeySet) { //遍历数据库,构建二级索引到indexbatch,(更新metaDB中的元数据为Index类型的(Field,Creating)) //一个indexwritebatch写入,那么索引创建删除应该和metadb没有交互 @@ -322,7 +322,7 @@ void iDeleteReq::PendReq(Request* req) { } void iDeleteReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet) { std::vector> keysAndVal = DB->FindKeysAndValByFieldName(Field); @@ -404,10 +404,10 @@ BatchReq::~BatchReq() { } void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet) { WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; - std::unordered_set Sub_batchKeySet; + SliceHashSet Sub_batchKeySet; //由于batch是有顺序的,根据我们现在的一个key只处理最开始的算法,这里需要反向迭代 uint64_t start_ = DB->env_->NowMicros(); for(auto subreq = sub_requests.rbegin(); subreq != sub_requests.rend(); subreq++ ) { diff --git a/fielddb/request.h b/fielddb/request.h index 4e68596..b4847fe 100644 --- a/fielddb/request.h +++ b/fielddb/request.h @@ -9,6 +9,7 @@ #include "util/serialize_value.h" #include // #include "fielddb/field_db.h" +#include "fielddb/SliceHashSet.h" #ifndef REQUEST_H #define REQUEST_H @@ -48,7 +49,7 @@ public: //用于含有Fields的 virtual void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet); + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet); //主要用于icreate和idelete在队列中的注册当前状态 virtual void Prepare(FieldDB *DB); virtual void Finalize(FieldDB *DB); @@ -87,7 +88,7 @@ public: } void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet) override; Slice Key; FieldSliceArray SliceFields; @@ -112,7 +113,7 @@ public: Field(Field),Request(iCreateReq_t, mu),Existed(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; void PendReq(Request *req) override; @@ -129,7 +130,7 @@ public: Field(Field),Request(iDeleteReq_t, mu),Deleted(false) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet) override; void Prepare(FieldDB *DB) override; void Finalize(FieldDB *DB) override; void PendReq(Request *req) override; @@ -146,7 +147,7 @@ public: Key(Key),Request(DeleteReq_t,mu) { }; void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet) override; Slice Key; }; @@ -157,7 +158,7 @@ public: ~BatchReq(); void ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, - WriteBatch &MetaBatch,fielddb::FieldDB *DB,std::unordered_set &batchKeySet) override; + WriteBatch &MetaBatch,fielddb::FieldDB *DB,SliceHashSet &batchKeySet) override; WriteBatch *Batch; std::deque sub_requests; From e2ddb615d276df01709d212aa0b3ca1ff3c01abb Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 29 Dec 2024 22:14:05 +0800 Subject: [PATCH 28/32] =?UTF-8?q?=E6=8E=A5=E4=B8=8A=E4=B8=80=E4=B8=AAcommi?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench_FieldDB.cc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index fc5e6f4..9b032a7 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -79,6 +79,8 @@ static const char* FLAGS_benchmarks = "ReadSeqWhileDeleting," "ReadRandomWhileCreating," "ReadRandomWhileDeleting," + "WriteRandomWithIndex," + "WriteSeqWithIndex," "snappycomp," "snappyuncomp," "zstdcomp," @@ -683,6 +685,12 @@ class Benchmark { } else if (name == Slice("ReadRandomWhileDeleting")) { num_threads++; method = &Benchmark::ReadRandomWhileDeleting; + } else if (name == Slice("WriteRandomWithIndex")) { + fresh_db = true; + method = &Benchmark::WriteRandomWithIndex; + } else if (name == Slice("WriteSeqWithIndex")) { + fresh_db = true; + method = &Benchmark::WriteSeqWithIndex; } else if (name == Slice("snappycomp")) { method = &Benchmark::SnappyCompress; } else if (name == Slice("snappyuncomp")) { @@ -1146,6 +1154,18 @@ class Benchmark { db_->DeleteIndex("age", write_options_); } + void WriteSeqWithIndex(ThreadState* thread) { + CreateIndex(thread); + thread->stats.Start(); + WriteSeq(thread); + } + + void WriteRandomWithIndex(ThreadState* thread) { + CreateIndex(thread); + thread->stats.Start(); + WriteRandom(thread); + } + void WriteSeqWhileCreating(ThreadState* thread) { if (thread->tid > 0) { WriteSeq(thread); From 7bdcf46bd73a79a0bbce4e95d4283bb9aa88c2be Mon Sep 17 00:00:00 2001 From: cyq <1056374449@qq.com> Date: Sun, 29 Dec 2024 22:21:13 +0800 Subject: [PATCH 29/32] =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index e757b66..346c8c9 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -137,7 +137,7 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { if(req.done) { elapsed += env_->NowMicros() - start_; count ++; - dumpStatistics(); + // dumpStatistics(); return req.s; //在返回时自动释放锁L } Request *tail = GetHandleInterval(); From 7f6ccefce0fad8aae3101bc5b7f752872643918a Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Mon, 30 Dec 2024 10:32:31 +0800 Subject: [PATCH 30/32] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86=E5=88=9B?= =?UTF-8?q?=E5=88=A0=E7=B4=A2=E5=BC=95=E6=97=B6=E9=97=B4=E7=AE=97=E5=9C=A8?= =?UTF-8?q?=E5=B9=B6=E5=8F=91=E8=AF=BB=E4=B8=8A=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench_FieldDB.cc | 181 +++++++++++++++++++---------------------- test/recover_test.cc | 4 +- 2 files changed, 85 insertions(+), 100 deletions(-) diff --git a/benchmarks/db_bench_FieldDB.cc b/benchmarks/db_bench_FieldDB.cc index 4238ef6..c7029aa 100644 --- a/benchmarks/db_bench_FieldDB.cc +++ b/benchmarks/db_bench_FieldDB.cc @@ -63,7 +63,6 @@ static const char* FLAGS_benchmarks = "readreverse," "fill100K," "crc32c," - "readwhilewriting," "CreateIndex," "FindKeysByField," "QueryByIndex," @@ -78,11 +77,7 @@ static const char* FLAGS_benchmarks = "ReadSeqWhileCreating," "ReadSeqWhileDeleting," "ReadRandomWhileCreating," - "ReadRandomWhileDeleting," - "snappycomp," - "snappyuncomp," - "zstdcomp," - "zstduncomp,"; + "ReadRandomWhileDeleting,"; // Number of key/values to place in database static int FLAGS_num = 1000000; @@ -1155,17 +1150,7 @@ class Benchmark { std::fprintf(stderr, "index status error in WriteWhileCreating\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::Exist) { - break; - } - - db_->CreateIndexOnField("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->CreateIndexOnField("age", write_options_); } } @@ -1178,17 +1163,7 @@ class Benchmark { std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { - break; - } - - db_->DeleteIndex("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->DeleteIndex("age", write_options_); } } @@ -1201,17 +1176,7 @@ class Benchmark { std::fprintf(stderr, "index status error in WriteWhileCreating\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::Exist) { - break; - } - - db_->CreateIndexOnField("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->CreateIndexOnField("age", write_options_); } } @@ -1224,109 +1189,129 @@ class Benchmark { std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { - break; - } - - db_->DeleteIndex("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->DeleteIndex("age", write_options_); } } void ReadSeqWhileCreating(ThreadState* thread) { if (thread->tid > 0) { - ReadSequential(thread); + Iterator* iter = db_->NewIterator(ReadOptions()); + iter->SeekToFirst(); + int64_t bytes = 0; + while (true) { + { + MutexLock l(&thread->shared->mu); + if (thread->shared->num_done == 1) { + // 创删索引完成 + delete iter; + thread->stats.AddBytes(bytes); + break; + } + } + bytes += iter->key().size() + iter->value().size(); + thread->stats.FinishedSingleOp(); + iter->Next(); + if (!iter->Valid()) iter->SeekToFirst(); + } } else { // Special thread that keeps creating index until other threads are done. if (db_->GetIndexStatus("age") != IndexStatus::NotExist) { std::fprintf(stderr, "index status error in WriteWhileCreating\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::Exist) { - break; - } - - db_->CreateIndexOnField("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->CreateIndexOnField("age", write_options_); } } void ReadSeqWhileDeleting(ThreadState* thread) { if (thread->tid > 0) { - ReadSequential(thread); + Iterator* iter = db_->NewIterator(ReadOptions()); + iter->SeekToFirst(); + int64_t bytes = 0; + while (true) { + { + MutexLock l(&thread->shared->mu); + if (thread->shared->num_done == 1) { + // 创删索引完成 + delete iter; + thread->stats.AddBytes(bytes); + break; + } + } + bytes += iter->key().size() + iter->value().size(); + thread->stats.FinishedSingleOp(); + iter->Next(); + if (!iter->Valid()) iter->SeekToFirst(); + } } else { // Special thread that keeps creating index until other threads are done. if (db_->GetIndexStatus("age") != IndexStatus::Exist) { std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { - break; - } - - db_->DeleteIndex("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->DeleteIndex("age", write_options_); } } void ReadRandomWhileCreating(ThreadState* thread) { if (thread->tid > 0) { - ReadRandom(thread); + ReadOptions options; + int found = 0; + KeyBuffer key; + while (true) { + { + MutexLock l(&thread->shared->mu); + if (thread->shared->num_done == 1) { + // 创删索引完成 + break; + } + } + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + FieldArray fields_ret; + if (db_->GetFields(options, key.slice(), &fields_ret).ok()) { + found++; + } + thread->stats.FinishedSingleOp(); + } } else { // Special thread that keeps creating index until other threads are done. if (db_->GetIndexStatus("age") != IndexStatus::NotExist) { std::fprintf(stderr, "index status error in WriteWhileCreating\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::Exist) { - break; - } - - db_->CreateIndexOnField("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->CreateIndexOnField("age", write_options_); } } void ReadRandomWhileDeleting(ThreadState* thread) { if (thread->tid > 0) { - ReadRandom(thread); + ReadOptions options; + int found = 0; + KeyBuffer key; + while (true) { + { + MutexLock l(&thread->shared->mu); + if (thread->shared->num_done == 1) { + // 创删索引完成 + break; + } + } + const int k = thread->rand.Uniform(FLAGS_num); + key.Set(k); + FieldArray fields_ret; + if (db_->GetFields(options, key.slice(), &fields_ret).ok()) { + found++; + } + thread->stats.FinishedSingleOp(); + } } else { // Special thread that keeps creating index until other threads are done. if (db_->GetIndexStatus("age") != IndexStatus::Exist) { std::fprintf(stderr, "index status error in WriteWhileDeleting\n"); std::exit(1); } - - while (true) { - if (db_->GetIndexStatus("age") == IndexStatus::NotExist) { - break; - } - - db_->DeleteIndex("age", write_options_); - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); + db_->DeleteIndex("age", write_options_); } } }; diff --git a/test/recover_test.cc b/test/recover_test.cc index de06aea..3d42f80 100644 --- a/test/recover_test.cc +++ b/test/recover_test.cc @@ -45,8 +45,8 @@ TEST(TestParalRecover, Recover) { // std::cerr << "open db failed" << std::endl; // abort(); // } - // db->CreateIndexOnField("address"); - // db->CreateIndexOnField("age"); + // db->CreateIndexOnField("address", op); + // db->CreateIndexOnField("age", op); // int thread_num_ = 4; // std::vector threads(thread_num_); // threads[0] = std::thread([db](){ From 75ff6b6c12be69785c67e007adf57d6301f6ca9f Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Mon, 30 Dec 2024 10:50:22 +0800 Subject: [PATCH 31/32] =?UTF-8?q?=E6=8F=92=E6=A1=A9=E4=BC=9A=E5=BD=B1?= =?UTF-8?q?=E5=93=8D=E6=81=A2=E5=A4=8D=E6=B5=8B=E8=AF=95=EF=BC=88env?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=89=E5=85=88=E6=B3=A8=E9=87=8A?= =?UTF-8?q?=E4=BA=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 36 ++++++++++++++++++------------------ fielddb/request.cpp | 15 +++++++-------- test/recover_test.cc | 1 + 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 346c8c9..63c9f2a 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -125,17 +125,17 @@ Request *FieldDB::GetHandleInterval() { } Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { - uint64_t start_ = env_->NowMicros(); + //uint64_t start_ = env_->NowMicros(); MutexLock L(&mutex_); taskqueue_.push_back(&req); while(true){ - uint64_t start_waiting = env_->NowMicros(); + //uint64_t start_waiting = env_->NowMicros(); while(req.isPending() || !req.done && &req != taskqueue_.front()) { req.cond_.Wait(); } - waiting_elasped += env_->NowMicros() - start_waiting; + //waiting_elasped += env_->NowMicros() - start_waiting; if(req.done) { - elapsed += env_->NowMicros() - start_; + //elapsed += env_->NowMicros() - start_; count ++; // dumpStatistics(); return req.s; //在返回时自动释放锁L @@ -149,48 +149,48 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { { //1. 构建各个Batch。构建的过程中要保证索引状态的一致性,需要上锁。 MutexLock iL(&index_mu); - uint64_t start_construct = env_->NowMicros(); + //uint64_t start_construct = env_->NowMicros(); for(auto *req_ptr : taskqueue_) { req_ptr->ConstructBatch(KVBatch, IndexBatch, MetaBatch, this, batchKeySet); if(req_ptr == tail) break; } - construct_elapsed += env_->NowMicros() - start_construct; + //construct_elapsed += env_->NowMicros() - start_construct; } //2. 首先写入meta,再并发写入index和kv,完成之后清除meta数据 //此处可以放锁是因为写入的有序性可以通过队列来保证 mutex_.Unlock(); - uint64_t start_write = env_->NowMicros(); + //uint64_t start_write = env_->NowMicros(); if(MetaBatch.ApproximateSize() > 12) { - uint64_t start_meta = env_->NowMicros(); + //uint64_t start_meta = env_->NowMicros(); status = metaDB_->Write(op, &MetaBatch); - write_meta_elapsed += env_->NowMicros() - start_meta; + //write_meta_elapsed += env_->NowMicros() - start_meta; write_bytes += MetaBatch.ApproximateSize(); assert(status.ok()); } //TODO:index的写入需要在另外一个线程中同时完成 if(IndexBatch.ApproximateSize() > 12) { - uint64_t start_index = env_->NowMicros(); + //uint64_t start_index = env_->NowMicros(); status = indexDB_->Write(op, &IndexBatch); - write_index_elapsed += env_->NowMicros() - start_index; + //write_index_elapsed += env_->NowMicros() - start_index; write_bytes += IndexBatch.ApproximateSize(); assert(status.ok()); } if(KVBatch.ApproximateSize() > 12) { - uint64_t start_kv = env_->NowMicros(); + //uint64_t start_kv = env_->NowMicros(); status = kvDB_->Write(op, &KVBatch); - write_kv_elapsed += env_->NowMicros() - start_kv; + //write_kv_elapsed += env_->NowMicros() - start_kv; write_bytes += KVBatch.ApproximateSize(); assert(status.ok()); } //3. 将meta数据清除 if(MetaBatch.ApproximateSize() > 12) { - uint64_t start_clean = env_->NowMicros(); + //uint64_t start_clean = env_->NowMicros(); MetaCleaner cleaner; cleaner.Collect(MetaBatch); cleaner.CleanMetaBatch(metaDB_); - write_clean_elapsed += env_->NowMicros() - start_clean; + //write_clean_elapsed += env_->NowMicros() - start_clean; } - write_elapsed += env_->NowMicros() - start_write; + //write_elapsed += env_->NowMicros() - start_write; mutex_.Lock(); } else { //对于创建和删除索引的请求,通过prepare完成索引状态的更新 @@ -263,9 +263,9 @@ Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { // dumpStatistics(); // return status; // } - uint64_t start_ = env_->NowMicros(); + //uint64_t start_ = env_->NowMicros(); BatchReq req(updates,&mutex_); - construct_BatchReq_init_elapsed += env_->NowMicros() - start_; + //construct_BatchReq_init_elapsed += env_->NowMicros() - start_; Status status = HandleRequest(req, options); return status; } diff --git a/fielddb/request.cpp b/fielddb/request.cpp index 9a53a6a..2fe585b 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -56,10 +56,9 @@ void FieldsReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, batchKeySet.insert(Key); } std::string val_str; - Status s = Status::NotFound("test"); - uint64_t start_ = DB->env_->NowMicros(); + //uint64_t start_ = DB->env_->NowMicros(); s = DB->kvDB_->Get(ReadOptions(), Key, &val_str); - DB->construct_FieldsReq_Read_elapsed += DB->env_->NowMicros() - start_; + //DB->construct_FieldsReq_Read_elapsed += DB->env_->NowMicros() - start_; // FieldArray *oldFields; FieldSliceArray oldFields; if (s.IsNotFound()){ @@ -409,12 +408,12 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, WriteBatch Sub_KVBatch,Sub_IndexBatch,Sub_MetaBatch; SliceHashSet Sub_batchKeySet; //由于batch是有顺序的,根据我们现在的一个key只处理最开始的算法,这里需要反向迭代 - uint64_t start_ = DB->env_->NowMicros(); + //uint64_t start_ = DB->env_->NowMicros(); for(auto subreq = sub_requests.rbegin(); subreq != sub_requests.rend(); subreq++ ) { - uint64_t start_sub = DB->env_->NowMicros(); + //uint64_t start_sub = DB->env_->NowMicros(); (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); // (*subreq)->ConstructBatch(KVBatch, IndexBatch, MetaBatch, DB, batchKeySet); - DB->construct_BatchReq_perSub_elapsed += DB->env_->NowMicros() - start_sub; + //DB->construct_BatchReq_perSub_elapsed += DB->env_->NowMicros() - start_sub; DB->count_Batch_Sub ++; //所有的对于pendreq的调用传入的参数被改成了this->parent,因此,对于subrequests来说, //pendreq的传参为对应的Batchreq,因此,此处判断batchreq是否pending可以得到subreq是否有冲突 @@ -422,7 +421,7 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, return; } } - DB->construct_BatchReq_Sub_elapsed += DB->env_->NowMicros() - start_; + //DB->construct_BatchReq_Sub_elapsed += DB->env_->NowMicros() - start_; if(Sub_KVBatch.ApproximateSize() > 12) { KVBatch.Append(Sub_KVBatch); } @@ -433,7 +432,7 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, MetaBatch.Append(Sub_MetaBatch); } batchKeySet.insert(Sub_batchKeySet.begin(),Sub_batchKeySet.end()); - DB->construct_BatchReq_elapsed += DB->env_->NowMicros() - start_; + //DB->construct_BatchReq_elapsed += DB->env_->NowMicros() - start_; } diff --git a/test/recover_test.cc b/test/recover_test.cc index 3d42f80..d2b104d 100644 --- a/test/recover_test.cc +++ b/test/recover_test.cc @@ -80,6 +80,7 @@ TEST(TestParalRecover, Recover) { } GetOneField(db); checkDataInKVAndIndex(db); + //这里会出现两个数字,如果>1说明除了线程3插入的一条数据,其他线程也有数据在崩溃前被正确恢复了 } int main(int argc, char** argv) { From 90a15c06fec8b640ca2e278c2a4d64a855d1f62c Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Mon, 30 Dec 2024 10:53:39 +0800 Subject: [PATCH 32/32] =?UTF-8?q?=E4=B8=8A=E6=9D=A1=E6=B2=A1=E6=B3=A8?= =?UTF-8?q?=E9=87=8A=E5=AE=8C=E5=85=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fielddb/field_db.cpp | 8 ++-- fielddb/field_db.h | 108 +++++++++++++++++++++++++-------------------------- fielddb/request.cpp | 2 +- 3 files changed, 59 insertions(+), 59 deletions(-) diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 63c9f2a..1e8cafe 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -136,7 +136,7 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { //waiting_elasped += env_->NowMicros() - start_waiting; if(req.done) { //elapsed += env_->NowMicros() - start_; - count ++; + //count ++; // dumpStatistics(); return req.s; //在返回时自动释放锁L } @@ -164,7 +164,7 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { //uint64_t start_meta = env_->NowMicros(); status = metaDB_->Write(op, &MetaBatch); //write_meta_elapsed += env_->NowMicros() - start_meta; - write_bytes += MetaBatch.ApproximateSize(); + //write_bytes += MetaBatch.ApproximateSize(); assert(status.ok()); } //TODO:index的写入需要在另外一个线程中同时完成 @@ -172,14 +172,14 @@ Status FieldDB::HandleRequest(Request &req, const WriteOptions &op) { //uint64_t start_index = env_->NowMicros(); status = indexDB_->Write(op, &IndexBatch); //write_index_elapsed += env_->NowMicros() - start_index; - write_bytes += IndexBatch.ApproximateSize(); + //write_bytes += IndexBatch.ApproximateSize(); assert(status.ok()); } if(KVBatch.ApproximateSize() > 12) { //uint64_t start_kv = env_->NowMicros(); status = kvDB_->Write(op, &KVBatch); //write_kv_elapsed += env_->NowMicros() - start_kv; - write_bytes += KVBatch.ApproximateSize(); + //write_bytes += KVBatch.ApproximateSize(); assert(status.ok()); } //3. 将meta数据清除 diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 2cd458f..ed9b87f 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -91,60 +91,60 @@ private: Status HandleRequest(Request &req, const WriteOptions &op); //每个请求自行构造请求后交由这个函数处理 Request *GetHandleInterval(); //获得任务队列中的待处理区间,区间划分规则和原因见文档 -private: - int count = 0; - int count_Batch = 0; - int count_Batch_Sub = 0; - uint64_t elapsed = 0; - - uint64_t construct_elapsed = 0; - uint64_t construct_BatchReq_init_elapsed = 0; - uint64_t construct_BatchReq_elapsed = 0; - uint64_t construct_BatchReq_Sub_elapsed = 0; - uint64_t construct_BatchReq_perSub_elapsed = 0; - uint64_t construct_FieldsReq_Read_elapsed = 0; - - uint64_t write_elapsed = 0; - uint64_t write_meta_elapsed = 0; - uint64_t write_index_elapsed = 0; - uint64_t write_kv_elapsed = 0; - uint64_t write_clean_elapsed = 0; - - uint64_t write_bytes = 0; - uint64_t write_step = 500 * 1024 * 1024; - uint64_t write_bytes_lim = write_step; - - uint64_t temp_elapsed = 0; - - uint64_t waiting_elasped = 0; - - inline void dumpStatistics() { - if(count && count % 500000 == 0 || write_bytes && write_bytes > write_bytes_lim) { - std::cout << "=====================================================\n"; - std::cout << "Total Count : " << count; - std::cout << "\tTotal Write Bytes(MB) : " << write_bytes / 1048576.0 << std::endl; - std::cout << "Average Time(ms) : " << elapsed * 1.0 / count; - std::cout << "\tAverage Write rates(MB/s) : " << write_bytes / 1048576.0 / elapsed * 1000000 << std::endl; - std::cout << "Construct Time(ms) : " << construct_elapsed * 1.0 / count << std::endl; - std::cout << "\tConstruct BatchReq Init Time(ms) : " << construct_BatchReq_init_elapsed * 1.0 / count << std::endl; - std::cout << "\tConstruct BatchReq Time(ms) : " << construct_BatchReq_elapsed * 1.0 / count << std::endl; - std::cout << "\tConstruct BatchReq Sub Time(ms) : " << construct_BatchReq_Sub_elapsed * 1.0 / count << std::endl; - std::cout << "\tConstruct BatchReq perSub Time(ms) : " << construct_BatchReq_perSub_elapsed * 1.0 / count_Batch_Sub << std::endl; - std::cout << "\tConstruct FieldsReq Read Time(ms) : " << construct_FieldsReq_Read_elapsed * 1.0 / count << std::endl; - std::cout << "Write Time(ms) : " << write_elapsed * 1.0 / count << std::endl; - std::cout << "\tWrite Meta Time(ms) : " << write_meta_elapsed * 1.0 / count << std::endl; - std::cout << "\tWrite Index Time(ms) : " << write_index_elapsed * 1.0 / count << std::endl; - std::cout << "\tWrite KV Time(ms) : " << write_kv_elapsed * 1.0 / count << std::endl; - std::cout << "\tWrite Clean Time(ms) : " << write_clean_elapsed * 1.0 / count << std::endl; - std::cout << "TaskQueue Size : " << taskqueue_.size() << std::endl; - std::cout << "temp_elased : " << temp_elapsed * 1.0 / count << std::endl; - std::cout << "waiting elapsed : " << waiting_elasped * 1.0 / count << std::endl; - // std::cout << MetaBatch.ApproximateSize() << " " << IndexBatch.ApproximateSize() << " " << KVBatch.ApproximateSize() << std::endl; - std::cout << "=====================================================\n"; - write_bytes_lim = write_bytes + write_step; - std::fflush(stdout); - } - } +// private: +// int count = 0; +// int count_Batch = 0; +// int count_Batch_Sub = 0; +// uint64_t elapsed = 0; + +// uint64_t construct_elapsed = 0; +// uint64_t construct_BatchReq_init_elapsed = 0; +// uint64_t construct_BatchReq_elapsed = 0; +// uint64_t construct_BatchReq_Sub_elapsed = 0; +// uint64_t construct_BatchReq_perSub_elapsed = 0; +// uint64_t construct_FieldsReq_Read_elapsed = 0; + +// uint64_t write_elapsed = 0; +// uint64_t write_meta_elapsed = 0; +// uint64_t write_index_elapsed = 0; +// uint64_t write_kv_elapsed = 0; +// uint64_t write_clean_elapsed = 0; + +// uint64_t write_bytes = 0; +// uint64_t write_step = 500 * 1024 * 1024; +// uint64_t write_bytes_lim = write_step; + +// uint64_t temp_elapsed = 0; + +// uint64_t waiting_elasped = 0; + +// inline void dumpStatistics() { +// if(count && count % 500000 == 0 || write_bytes && write_bytes > write_bytes_lim) { +// std::cout << "=====================================================\n"; +// std::cout << "Total Count : " << count; +// std::cout << "\tTotal Write Bytes(MB) : " << write_bytes / 1048576.0 << std::endl; +// std::cout << "Average Time(ms) : " << elapsed * 1.0 / count; +// std::cout << "\tAverage Write rates(MB/s) : " << write_bytes / 1048576.0 / elapsed * 1000000 << std::endl; +// std::cout << "Construct Time(ms) : " << construct_elapsed * 1.0 / count << std::endl; +// std::cout << "\tConstruct BatchReq Init Time(ms) : " << construct_BatchReq_init_elapsed * 1.0 / count << std::endl; +// std::cout << "\tConstruct BatchReq Time(ms) : " << construct_BatchReq_elapsed * 1.0 / count << std::endl; +// std::cout << "\tConstruct BatchReq Sub Time(ms) : " << construct_BatchReq_Sub_elapsed * 1.0 / count << std::endl; +// std::cout << "\tConstruct BatchReq perSub Time(ms) : " << construct_BatchReq_perSub_elapsed * 1.0 / count_Batch_Sub << std::endl; +// std::cout << "\tConstruct FieldsReq Read Time(ms) : " << construct_FieldsReq_Read_elapsed * 1.0 / count << std::endl; +// std::cout << "Write Time(ms) : " << write_elapsed * 1.0 / count << std::endl; +// std::cout << "\tWrite Meta Time(ms) : " << write_meta_elapsed * 1.0 / count << std::endl; +// std::cout << "\tWrite Index Time(ms) : " << write_index_elapsed * 1.0 / count << std::endl; +// std::cout << "\tWrite KV Time(ms) : " << write_kv_elapsed * 1.0 / count << std::endl; +// std::cout << "\tWrite Clean Time(ms) : " << write_clean_elapsed * 1.0 / count << std::endl; +// std::cout << "TaskQueue Size : " << taskqueue_.size() << std::endl; +// std::cout << "temp_elased : " << temp_elapsed * 1.0 / count << std::endl; +// std::cout << "waiting elapsed : " << waiting_elasped * 1.0 / count << std::endl; +// // std::cout << MetaBatch.ApproximateSize() << " " << IndexBatch.ApproximateSize() << " " << KVBatch.ApproximateSize() << std::endl; +// std::cout << "=====================================================\n"; +// write_bytes_lim = write_bytes + write_step; +// std::fflush(stdout); +// } +// } }; Status DestroyDB(const std::string& name, diff --git a/fielddb/request.cpp b/fielddb/request.cpp index 2fe585b..a614f7b 100644 --- a/fielddb/request.cpp +++ b/fielddb/request.cpp @@ -414,7 +414,7 @@ void BatchReq::ConstructBatch(WriteBatch &KVBatch,WriteBatch &IndexBatch, (*subreq)->ConstructBatch(Sub_KVBatch, Sub_IndexBatch, Sub_MetaBatch, DB, Sub_batchKeySet); // (*subreq)->ConstructBatch(KVBatch, IndexBatch, MetaBatch, DB, batchKeySet); //DB->construct_BatchReq_perSub_elapsed += DB->env_->NowMicros() - start_sub; - DB->count_Batch_Sub ++; + //DB->count_Batch_Sub ++; //所有的对于pendreq的调用传入的参数被改成了this->parent,因此,对于subrequests来说, //pendreq的传参为对应的Batchreq,因此,此处判断batchreq是否pending可以得到subreq是否有冲突 if(isPending()) {