From abe34b2c6bd94d462def7d45aec003fb2c0d50ae Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Tue, 26 Nov 2024 11:03:27 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E5=88=86=E5=B7=A5=E4=B8=8E=E8=A1=A5?= =?UTF-8?q?=E5=85=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 设计文档.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/设计文档.md b/设计文档.md index 12ad731..babc37e 100644 --- a/设计文档.md +++ b/设计文档.md @@ -8,7 +8,19 @@ leveldb中的存储原本只支持简单的字节序列,在这个项目中我 设计目标:对value存储读取时进行序列化编码,使其支持字段。 实现思路:设计之初有考虑增加一些元数据(例如过滤器、字段偏移支持二分)来加速查询。但考虑到在数据库中kv的数量是十分庞大的,新加数据结构会带来巨大的空间开销。因此我们决定在这里牺牲时间换取空间,而将时间的加速放在索引中。 -在这一基础上,我们对序列化进行了简单的优化:将字段名排序后,一一调用leveldb中原本的编码方法`PutLengthPrefixedSlice`存入value。这样不会有额外的空间开销,而好处在于遍历一个value的字段时,如果得到的字段名比目标大,就可以提前结束遍历。 +在这一基础上,我们对序列化进行了简单的优化:将字段名排序后,一一调用leveldb中原本的编码方法`PutLengthPrefixedSlice`存入value。这样不会有额外的空间开销,而好处在于遍历一个value的字段时,如果得到的字段名比目标大,就可以提前结束遍历。 +``` +std::string SerializeValue(const FieldArray& fields){ + FieldArray sortFields = fields; + std::sort(sortFields.begin(), sortFields.end(), compareByFirst); + std::string result; + for (const Field& pairs : sortFields) { + PutLengthPrefixedSlice(&result, pairs.first); + PutLengthPrefixedSlice(&result, pairs.second); + } + return result; +} +``` 最终db类提供了新接口`putFields`, `getFields`,分别对传入的字段序列化后调用原来的`put`, `get`接口。 `FindKeysByField`调用`NewIterator`遍历所有数据,field名和值符合则加入返回的key中。 @@ -49,14 +61,14 @@ class FieldDb { 5. fieldDb也提供了`put`, `get`,`getFields`, `putFields`,`delete`接口。对前三者,简单调用`kvDb`中的对应接口(不涉及索引)。 对`putFields`,先判断是否有`fieldWithIndex`中有的字段,如果有,并对`kvDb`调用一个(多个)`put`,**但在写日志时一并加上索引日志写入**。 `delete`逻辑一致。 -6. 针对索引的日志:为了保证两个数据库间的一致性,由`kvDb`的日志模块统一管理。这其中包含了两种chunk(kv写入和索引写入),在恢复时需要分别解析,决定往哪一个数据库中写入。索引写入的时机在4、5中的**加粗**部分,如何编码还有待设计。也就是说,`indexDb`本身的索引模块不再起到作用,项目后期可以修改关闭这一部分。 +6. 针对索引的日志:为了保证两个数据库间的一致性,由`kvDb`的日志模块统一管理。这其中包含了两种chunk(kv写入和索引写入),在恢复时需要分别解析,决定往哪一个数据库中写入。索引写入的时机在4、5中的**加粗**部分,如何编码还有待设计。也就是说,`indexDb`本身的日志模块不再起到作用,项目后期可以修改关闭这一部分。 7. 对两个数据库的其他部分,理论上每个数据库内部的其他模块不会互相影响。 # 3. 数据结构设计 具体设计模块化,实现时再具体考虑。 `indexDb`的kv编码:**暂时考虑助教文档那种** -区分日志中kv部分和index部分:**有待完成** +区分日志中kv部分和index部分:思路是在writebatch中某个地方加个标识区分,每一类的编码与各自的key编码类似**细节有待完成** # 4. 接口/函数设计 `FieldDb`的对外接口函数之前已展示,这里补充一些子数据库需提供给`FieldDb`的抽象功能(暂时想到的): @@ -84,3 +96,9 @@ mark一些原代码(db中)的修改点:recover时日志解析修改,write时 功能 | 完成日期 | 分工 :------|:---------|:------ value序列化|11.19 | 李度 +fieldDb接口|11.25|陈胤遒 +lab1整体+测试|11.30|高宇菲 +fieldDb功能实现|12.10|李度 +kvdb功能实现与原代码修改|12.10|陈胤遒 +整体系统整合+测试|12.20|李度、陈胤遒、高宇菲 +性能测试|12.30|高宇菲 From d6995373eadc459a536b8aaea0d7ca6f0e371c32 Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Thu, 5 Dec 2024 14:19:56 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E5=AE=8Cbug=EF=BC=8C=E8=A1=A5?= =?UTF-8?q?=E5=85=85lab1=E6=B5=8B=E8=AF=95=EF=BC=8C=E8=B7=91=E9=80=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 + db/db_impl.cc | 15 +++++-- fielddb/field_db.cpp | 89 +++++++++++++++++++------------------ fielddb/field_db.h | 39 +++++++++------- test/lab1_test.cc | 116 ++++++++++++++++++++++++++++++++++++------------ util/serialize_value.cc | 4 +- util/serialize_value.h | 49 ++++++++++---------- 7 files changed, 197 insertions(+), 117 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 981cda2..31536ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,6 +192,8 @@ target_sources(leveldb "util/status.cc" "util/serialize_value.h" "util/serialize_value.cc" + "fielddb/field_db.cpp" + "fielddb/field_db.h" # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". $<$:PUBLIC> diff --git a/db/db_impl.cc b/db/db_impl.cc index aaff6fb..4a7d25c 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1173,8 +1173,17 @@ Status DBImpl::GetFields(const ReadOptions& options, const Slice& key, return s; } -std::vector DBImpl::FindKeysByField(Field &field){//todo - return std::vector(); +std::vector DBImpl::FindKeysByField(Field &field){ + std::vector result; + auto iter = NewIterator(ReadOptions()); + for(iter->SeekToFirst();iter->Valid();iter->Next()) { + std::string k = iter->key().ToString(); + InternalFieldArray fields(iter->value()); + if(fields.HasField(field)) { + result.push_back(iter->key().ToString()); + } + } + return result; } Iterator* DBImpl::NewIterator(const ReadOptions& options) { @@ -1510,7 +1519,7 @@ Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { Status DB::PutFields(const WriteOptions& opt, const Slice& key, const FieldArray& fields) { std::string value = SerializeValue(fields); - DB::Put(opt, key, value); + return DB::Put(opt, key, value); } Status DB::Delete(const WriteOptions& opt, const Slice& key) { diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index 5d28813..d867411 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -11,40 +11,53 @@ namespace fielddb { using namespace leveldb; //TODO:打开fieldDB -Status FieldDB::OpenFieldDB(const Options& options,const std::string& name,DB** dbptr) { +Status FieldDB::OpenFieldDB(const Options& options, + const std::string& name, FieldDB** dbptr) { // options.env->CreateDir("./abc") - *dbptr = new FieldDB(options,name); - return Status::OK(); -} - -Status FieldDB::Recover() { - -} + if(*dbptr == nullptr){ + return Status::NotSupported(name, "new a fieldDb first\n"); + } -FieldDB::FieldDB(const Options& options,const std::string& name) { Status status; - status = Open(options, name+"_indexDB", &indexDB); - if(!status.ok()) return; - status = Open(options, name+"_kvDB", &kvDB); - if(!status.ok()) return; - status = Open(options, name+"_metaDB", &metaDB); - if(!status.ok()) return; - - Recover(); + DB *indexdb, *kvdb, *metadb; + status = Open(options, name+"_indexDB", &indexdb); + if(!status.ok()) return status; + + status = Open(options, name+"_kvDB", &kvdb); + if(!status.ok()) return status; + status = Open(options, name+"_metaDB", &metadb); + if(!status.ok()) return status; + + (*dbptr)->indexDB_ = indexdb; + (*dbptr)->kvDB_ = kvdb; + (*dbptr)->metaDB_ = metadb; + (*dbptr)->dbname_ = name; + + status = (*dbptr)->Recover(); + return status; } +// todo +Status FieldDB::Recover() { + // + return Status::OK(); +} Status FieldDB::Put(const WriteOptions &options, const Slice &key, const Slice &value) { - return kvDB->Put(options, key, value); + return kvDB_->Put(options, key, value); } // TODO:需要对是否进行index更新做处理 -Status FieldDB::PutFields(const WriteOptions &, const Slice &key, const FieldArray &fields) { - return Status::OK(); +Status FieldDB::PutFields(const WriteOptions &Options, + const Slice &key, const FieldArray &fields) { + // + return kvDB_->PutFields(Options, key, fields); } +// todo: 删除有索引的key时indexdb也要同步 Status FieldDB::Delete(const WriteOptions &options, const Slice &key) { - return kvDB->Delete(options, key); + // + return kvDB_->Delete(options, key); } // TODO:根据updates里面的东西,要对是否需要更新index进行分别处理 Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { @@ -52,56 +65,44 @@ Status FieldDB::Write(const WriteOptions &options, WriteBatch *updates) { } Status FieldDB::Get(const ReadOptions &options, const Slice &key, std::string *value) { - return kvDB->Get(options, key, value); + return kvDB_->Get(options, key, value); } Status FieldDB::GetFields(const ReadOptions &options, const Slice &key, FieldArray *fields) { - std::string value; - Status status; - status = kvDB->Get(options, key, &value); - if(status.ok() == false) return status; - fields = ParseValue(value); - return status; + return kvDB_->GetFields(options, key, fields); + } std::vector FieldDB::FindKeysByField(Field &field) { - std::vector result; - auto iter = kvDB->NewIterator(ReadOptions()); - for(iter->SeekToFirst();iter->Valid();iter->Next()) { - InternalFieldArray fields(iter->value()); - if(fields.HasField(field)) { - result.push_back(iter->key().ToString()); - } - } - return result; + return kvDB_->FindKeysByField(field); } Iterator * FieldDB::NewIterator(const ReadOptions &options) { - return kvDB->NewIterator(options); + return kvDB_->NewIterator(options); } // TODO:使用统一seq进行snapshot管理 const Snapshot * FieldDB::GetSnapshot() { - return kvDB->GetSnapshot(); + return kvDB_->GetSnapshot(); } // TODO:同上 void FieldDB::ReleaseSnapshot(const Snapshot *snapshot) { - kvDB->ReleaseSnapshot(snapshot); + kvDB_->ReleaseSnapshot(snapshot); } bool FieldDB::GetProperty(const Slice &property, std::string *value) { - return kvDB->GetProperty(property, value) | indexDB->GetProperty(property, value); + return kvDB_->GetProperty(property, value) | indexDB_->GetProperty(property, value); } void FieldDB::GetApproximateSizes(const Range *range, int n, uint64_t *sizes) { uint64_t temp = 0; - kvDB->GetApproximateSizes(range, n, sizes); - indexDB->GetApproximateSizes(range, n, &temp); + kvDB_->GetApproximateSizes(range, n, sizes); + indexDB_->GetApproximateSizes(range, n, &temp); *sizes += temp; } void FieldDB::CompactRange(const Slice *begin, const Slice *end) { - kvDB->CompactRange(begin, end); + kvDB_->CompactRange(begin, end); } } // end of namespace \ No newline at end of file diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 411f564..6a3c845 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -1,3 +1,7 @@ +# ifndef FIELD_DB_H +# define FIELD_DB_H + +#include "port/port_stdcxx.h" #include "db/db_impl.h" #include #include @@ -7,16 +11,16 @@ #include "leveldb/options.h" #include "leveldb/slice.h" #include "leveldb/status.h" -#include "port/port_stdcxx.h" + #include "fielddb/request.h" namespace fielddb { using namespace leveldb; -class FieldDB : leveldb::DB { +class FieldDB : DB { public: - FieldDB() = default; - FieldDB(const Options& options,const std::string& name); -/*lab1的要求*/ + //用的时候必须FieldDB *db = new FieldDB()再open,不能像之前一样DB *db + FieldDB() : indexDB_(nullptr), kvDB_(nullptr), metaDB_(nullptr) {}; +/*lab1的要求,作为db派生类要实现的虚函数*/ Status Put(const WriteOptions &options, const Slice &key, const Slice &value) override; Status PutFields(const WriteOptions &, const Slice &key, const FieldArray &fields) override; Status Delete(const WriteOptions &options, const Slice &key) override; @@ -31,29 +35,32 @@ public: void GetApproximateSizes(const Range *range, int n, uint64_t *sizes) override; void CompactRange(const Slice *begin, const Slice *end) override; /*与索引相关*/ - bool CreateIndexOnField(const std::string& field_name); - bool DeleteIndex(std::string &field_name); - std::vector QueryByIndex(Field &field); + Status CreateIndexOnField(const std::string& field_name); + Status DeleteIndex(std::string &field_name); + std::vector QueryByIndex(Field &field, Status *s); - static Status OpenFieldDB(const Options& options,const std::string& name,DB** dbptr); + static Status OpenFieldDB(const Options& options,const std::string& name,FieldDB** dbptr); private: //根据metaDB的内容进行恢复 Status Recover(); private: - leveldb::DB *metaDB; - leveldb::DB *indexDB; - leveldb::DB *kvDB; + std::string dbname_; + + leveldb::DB *metaDB_; + leveldb::DB *indexDB_; + leveldb::DB *kvDB_; enum IndexStatus{ Creating, Deleting, Exist }; - std::map index; - port::Mutex _mutex; // mutex for taskqueue - std::deque taskqueue; + std::map index_; + leveldb::port::Mutex mutex_; // mutex for taskqueue + std::deque taskqueue_; }; -} // end of namespace \ No newline at end of file +} // end of namespace +# endif \ No newline at end of file diff --git a/test/lab1_test.cc b/test/lab1_test.cc index 94b0ee4..7c9a50d 100644 --- a/test/lab1_test.cc +++ b/test/lab1_test.cc @@ -1,46 +1,104 @@ #include "gtest/gtest.h" -#include "leveldb/env.h" -#include "leveldb/db.h" -using namespace leveldb; -using Field = std::pair; // field_name:field_value -using FieldArray = std::vector>; +// #include "leveldb/env.h" +// #include "leveldb/db.h" +#include "fielddb/field_db.h" +using namespace fielddb; -Status OpenDB(std::string dbName, DB **db) { +constexpr int value_size = 2048; +constexpr int data_size = 128 << 20; +std::vector cities = { + "Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou", + "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" + }; +std::vector shanghaiKeys; + +Status OpenDB(std::string dbName, FieldDB **db) { Options options; options.create_if_missing = true; - return DB::Open(options, dbName, db); + return FieldDB::OpenFieldDB(options, dbName, db); } -TEST(TestLab1, Basic) { - DestroyDB("testdb",Options()); - DB *db; +void ClearDB(FieldDB *db){ + //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染 + WriteOptions writeOptions; + int key_num = data_size / value_size; + for (int i = 0; i < key_num; i++) { + int key_ = i+1; + std::string key = std::to_string(key_); + Status s = db->Delete(WriteOptions(), key); + ASSERT_TRUE(s.ok()); + } +} - if(OpenDB("testdb", &db).ok() == false) { - std::cerr << "open db failed" << std::endl; - abort(); - } - std::string key = "k_1"; - +void InsertFieldData(FieldDB *db) { + WriteOptions writeOptions; + int key_num = data_size / value_size; + srand(0); + + for (int i = 0; i < key_num; i++) { + int randThisTime = rand(); //确保读写一个循环只rand一次,否则随机序列会不一致 + int key_ = randThisTime % key_num+1; + std::string key = std::to_string(key_); + + std::string name = "customer#" + std::to_string(key_); + std::string address = cities[randThisTime % cities.size()]; FieldArray fields = { - {"name", "Customer#000000001"}, - {"address", "IVhzIApeRb"}, - {"phone", "25-989-741-2988"} + {"name", name}, + {"address", address} }; + if (address == "Shanghai") { + shanghaiKeys.push_back(key); + } + Status s = db->PutFields(WriteOptions(), key, fields); + ASSERT_TRUE(s.ok()); + } +} - // 序列化并插入 - db->PutFields(WriteOptions(), key, fields); - - // 读取并反序列化 +void GetFieldData(FieldDB *db) { + ReadOptions readOptions; + int key_num = data_size / value_size; + + // 点查 + srand(0); + for (int i = 0; i < 100; i++) { + int randThisTime = rand(); + int key_ = randThisTime % key_num+1; + std::string key = std::to_string(key_); FieldArray fields_ret; - db->GetFields(ReadOptions(), key, &fields_ret); - // ASSERT_EQ(fields, fields_ret); 顺序不一样 + Status s = db->GetFields(readOptions, key, &fields_ret); + ASSERT_TRUE(s.ok()); for (const Field& pairs : fields_ret) { - ASSERT_NE(std::find(fields.begin(), fields.end(), pairs), fields.end()); + if (pairs.first == "name"){ + + } else if (pairs.first == "address"){ + std::string city = pairs.second; + ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end()); + } else assert(false); } + } +} - //todo - Field field = {"name", "Customer#000000001"}; - std::vector resKeys = db->FindKeysByField(field); +void findKeysByCity(FieldDB *db) { + Field field = {"address", "Shanghai"}; + std::vector resKeys = db->FindKeysByField(field); + // std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + } +} + +TEST(TestLab1, Basic) { + // DestroyDB("testdb",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + // ClearDB(db); + InsertFieldData(db); + GetFieldData(db); + findKeysByCity(db); } diff --git a/util/serialize_value.cc b/util/serialize_value.cc index a1bdb08..b93d8b7 100644 --- a/util/serialize_value.cc +++ b/util/serialize_value.cc @@ -36,8 +36,8 @@ FieldArray *ParseValue(const std::string& value_str){ } else { std::cout << "name and val not match!" << std::endl; } - nameSlice = Slice(); - valSlice = Slice(); + nameSlice.clear(); + valSlice.clear(); } return res; } diff --git a/util/serialize_value.h b/util/serialize_value.h index ff6e68c..a1ca30a 100644 --- a/util/serialize_value.h +++ b/util/serialize_value.h @@ -16,41 +16,44 @@ FieldArray *ParseValue(const std::string& value_str); class InternalFieldArray { public: -using FieldMap = std::map; + using FieldMap = std::map; -InternalFieldArray(const FieldArray &fields, bool to_map = false):fields(fields),isMapped(false) { - if(to_map) Map(); -} + InternalFieldArray(const FieldArray &fields, bool to_map = false): + fields(fields),isMapped(false) { + if(to_map) Map(); + } -InternalFieldArray(const std::string& value_str) { - Slice valueSlice(value_str); - Slice nameSlice,valSlice; - while(GetLengthPrefixedSlice(&valueSlice, &nameSlice)) { - if(GetLengthPrefixedSlice(&valueSlice, &valueSlice)) { - map[nameSlice.ToString()] = valueSlice.ToString(); - } else { - std::cout << "name and val not match!" << std::endl; + InternalFieldArray(const Slice value_slice) { + Slice valueSlice = value_slice; + Slice nameSlice, valSlice; + while(GetLengthPrefixedSlice(&valueSlice, &nameSlice)) { + if(GetLengthPrefixedSlice(&valueSlice, &valSlice)) { + map[nameSlice.ToString()] = valSlice.ToString(); + } else { + std::cout << "name and val not match!" << std::endl; + } + nameSlice.clear(); + valSlice.clear(); } - nameSlice.clear(); - valSlice.clear(); + isMapped = true; } -} -InternalFieldArray(const Slice& slice):leveldb::InternalFieldArray(slice.ToString()) {} + InternalFieldArray(const std::string& value_str) + :leveldb::InternalFieldArray(Slice(value_str)) {} -//将vector变为用map存 -void Map(); + //将vector变为用map存 + void Map(); -std::string Serialize(); + std::string Serialize(); -bool HasField(const Field& field); + bool HasField(const Field& field); private: -bool isMapped; -const FieldArray fields; -FieldMap map; + bool isMapped; + const FieldArray fields; + FieldMap map; }; } #endif \ No newline at end of file From 099d8183448d6fe2d38f21bfe768b1f5651324ae Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Fri, 6 Dec 2024 16:57:00 +0800 Subject: [PATCH 3/3] =?UTF-8?q?lab2=E9=83=A8=E5=88=86=E5=9F=BA=E7=A1=80?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=EF=BC=88=E6=B2=A1=E6=9C=89=E5=B9=B6=E5=8F=91?= =?UTF-8?q?=E5=92=8Cmetadb)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- db/db_impl.cc | 2 +- fielddb/encode_index.h | 37 +++++++++++ fielddb/field_db.cpp | 86 ++++++++++++++++++++++++++ fielddb/field_db.h | 8 ++- test/basic_function_test.cc | 146 ++++++++++++++++++++++++++++++++++++++++++++ test/lab1_test.cc | 109 --------------------------------- util/serialize_value.cc | 18 +++++- util/serialize_value.h | 2 +- 9 files changed, 294 insertions(+), 116 deletions(-) create mode 100644 fielddb/encode_index.h create mode 100644 test/basic_function_test.cc delete mode 100644 test/lab1_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 31536ca..b70e461 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -523,6 +523,6 @@ if(LEVELDB_INSTALL) endif(LEVELDB_INSTALL) add_executable(lab1_test - "${PROJECT_SOURCE_DIR}/test/lab1_test.cc" + "${PROJECT_SOURCE_DIR}/test/basic_function_test.cc" ) target_link_libraries(lab1_test PRIVATE leveldb gtest) diff --git a/db/db_impl.cc b/db/db_impl.cc index 4a7d25c..49db131 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1177,7 +1177,7 @@ std::vector DBImpl::FindKeysByField(Field &field){ std::vector result; auto iter = NewIterator(ReadOptions()); for(iter->SeekToFirst();iter->Valid();iter->Next()) { - std::string k = iter->key().ToString(); + // std::string k = iter->key().ToString(); InternalFieldArray fields(iter->value()); if(fields.HasField(field)) { result.push_back(iter->key().ToString()); diff --git a/fielddb/encode_index.h b/fielddb/encode_index.h new file mode 100644 index 0000000..eceeab4 --- /dev/null +++ b/fielddb/encode_index.h @@ -0,0 +1,37 @@ +#ifndef ENCODE_INDEX_H +#define ENCODE_INDEX_H + +#include "leveldb/slice.h" +#include "util/coding.h" +namespace fielddb{ +using namespace leveldb; + + +struct ParsedInternalIndexKey { //key : {name : val} + Slice user_key_; + Slice name_; + Slice val_; + + ParsedInternalIndexKey() {} // Intentionally left uninitialized (for speed) + ParsedInternalIndexKey(const Slice& user_key, const Slice& name, const Slice& val) + : user_key_(user_key), name_(name), val_(val) {} +}; + +bool ParseInternalIndexKey(Slice input, ParsedInternalIndexKey* result); +void AppendIndexKey(std::string* result, const ParsedInternalIndexKey& key); + + +inline bool ParseInternalIndexKey(Slice input, ParsedInternalIndexKey* result){ + return GetLengthPrefixedSlice(&input, &result->name_) && + GetLengthPrefixedSlice(&input, &result->val_) && + GetLengthPrefixedSlice(&input, &result->user_key_); +} + +inline void AppendIndexKey(std::string* result, const ParsedInternalIndexKey& key){ + PutLengthPrefixedSlice(result, key.name_); + PutLengthPrefixedSlice(result, key.val_); + PutLengthPrefixedSlice(result, key.user_key_); +} + +} +#endif \ No newline at end of file diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index d867411..637046e 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -6,7 +6,9 @@ #include "leveldb/env.h" #include "leveldb/options.h" #include "leveldb/status.h" +#include "db/write_batch_internal.h" #include "util/serialize_value.h" +#include "fielddb/encode_index.h" namespace fielddb { using namespace leveldb; @@ -18,6 +20,7 @@ Status FieldDB::OpenFieldDB(const Options& options, return Status::NotSupported(name, "new a fieldDb first\n"); } + // Status status; DB *indexdb, *kvdb, *metadb; status = Open(options, name+"_indexDB", &indexdb); @@ -77,6 +80,89 @@ std::vector FieldDB::FindKeysByField(Field &field) { return kvDB_->FindKeysByField(field); } +std::vector> FieldDB::FindKeysAndValByFieldName ( + const std::string &fieldName){ + std::vector> result; + auto iter = kvDB_->NewIterator(ReadOptions()); + std::string val; + for(iter->SeekToFirst();iter->Valid();iter->Next()) { + InternalFieldArray fields(iter->value()); + val = fields.ValOfName(fieldName); + if(!val.empty()) { + result.push_back(std::make_pair(iter->key().ToString(), val)); + } + } + return result; +} + +Status FieldDB::CreateIndexOnField(const std::string& field_name) { + //taskQueue相关 + //写锁 是不是只需要给putfields设置一把锁就行 + + std::vector> keysAndVal = + FindKeysAndValByFieldName(field_name); + WriteBatch writeBatch; + Slice value = Slice(); + for (auto &kvPair : keysAndVal){ + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); + writeBatch.Put(indexKey, value); + } + Status s = indexDB_->Write(WriteOptions(), &writeBatch); + if (!s.ok()) return s; + + index_[field_name] = Exist; + //唤醒taskqueue + +} + +Status FieldDB::DeleteIndex(const std::string &field_name) { + //taskQueue相关 + //写锁 + std::vector> keysAndVal = + FindKeysAndValByFieldName(field_name); + WriteBatch writeBatch; + for (auto &kvPair : keysAndVal){ + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); + writeBatch.Delete(indexKey); + } + Status s = indexDB_->Write(WriteOptions(), &writeBatch); + if (!s.ok()) return s; + + index_.erase(field_name); + //唤醒taskqueue +} + +std::vector FieldDB::QueryByIndex(const Field &field, Status *s) { + if (index_.count(field.first) == 0 || index_[field.first] != Exist){ + *s = Status::NotFound(Slice()); + return std::vector(); + } + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(Slice(), field.first, field.second)); + Iterator *indexIterator = indexDB_->NewIterator(ReadOptions()); + indexIterator->Seek(indexKey); + + std::vector result; + for (; indexIterator->Valid(); indexIterator->Next()) { + ParsedInternalIndexKey iterKey; + if (ParseInternalIndexKey(indexIterator->key(), &iterKey)){ + if (iterKey.name_ == field.first && iterKey.val_ == field.second){ + result.push_back(iterKey.user_key_.ToString()); + continue; //查到说明在范围里,否则break + } + } + break; + } + + *s = Status::OK(); + return result; +} + Iterator * FieldDB::NewIterator(const ReadOptions &options) { return kvDB_->NewIterator(options); } diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 6a3c845..5c7e8d5 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -36,8 +36,8 @@ public: void CompactRange(const Slice *begin, const Slice *end) override; /*与索引相关*/ Status CreateIndexOnField(const std::string& field_name); - Status DeleteIndex(std::string &field_name); - std::vector QueryByIndex(Field &field, Status *s); + Status DeleteIndex(const std::string &field_name); + std::vector QueryByIndex(const Field &field, Status *s); static Status OpenFieldDB(const Options& options,const std::string& name,FieldDB** dbptr); @@ -57,10 +57,12 @@ private: Deleting, Exist }; - std::map index_; + std::map index_; leveldb::port::Mutex mutex_; // mutex for taskqueue std::deque taskqueue_; + std::vector> FindKeysAndValByFieldName ( + const std::string &fieldName); }; } // end of namespace # endif \ No newline at end of file diff --git a/test/basic_function_test.cc b/test/basic_function_test.cc new file mode 100644 index 0000000..4915fe4 --- /dev/null +++ b/test/basic_function_test.cc @@ -0,0 +1,146 @@ +#include "gtest/gtest.h" +// #include "leveldb/env.h" +// #include "leveldb/db.h" +#include "fielddb/field_db.h" +using namespace fielddb; + +constexpr int value_size = 2048; +constexpr int data_size = 128 << 20; +std::vector cities = { + "Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou", + "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" + }; +std::vector shanghaiKeys; + +Status OpenDB(std::string dbName, FieldDB **db) { + Options options; + options.create_if_missing = true; + return FieldDB::OpenFieldDB(options, dbName, db); +} + +void ClearDB(FieldDB *db){ + //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染 + WriteOptions writeOptions; + int key_num = data_size / value_size; + for (int i = 0; i < key_num; i++) { + int key_ = i+1; + std::string key = std::to_string(key_); + Status s = db->Delete(WriteOptions(), key); + ASSERT_TRUE(s.ok()); + } +} + +void InsertFieldData(FieldDB *db) { + WriteOptions writeOptions; + int key_num = data_size / value_size; + srand(0); + + for (int i = 0; i < key_num; i++) { + int randThisTime = rand(); //确保读写一个循环只rand一次,否则随机序列会不一致 + int key_ = randThisTime % key_num+1; + std::string key = std::to_string(key_); + + std::string name = "customer#" + std::to_string(key_); + std::string address = cities[randThisTime % cities.size()]; + FieldArray fields = { + {"name", name}, + {"address", address} + }; + if (address == "Shanghai") { + shanghaiKeys.push_back(key); + } + Status s = db->PutFields(WriteOptions(), key, fields); + ASSERT_TRUE(s.ok()); + } +} + +void GetFieldData(FieldDB *db) { + ReadOptions readOptions; + int key_num = data_size / value_size; + + // 点查 + srand(0); + for (int i = 0; i < 100; i++) { + int randThisTime = rand(); + int key_ = randThisTime % key_num+1; + std::string key = std::to_string(key_); + FieldArray fields_ret; + Status s = db->GetFields(readOptions, key, &fields_ret); + ASSERT_TRUE(s.ok()); + for (const Field& pairs : fields_ret) { + if (pairs.first == "name"){ + + } else if (pairs.first == "address"){ + std::string city = pairs.second; + ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end()); + } else assert(false); + } + } +} + +void findKeysByCity(FieldDB *db) { + Field field = {"address", "Shanghai"}; + std::vector resKeys = db->FindKeysByField(field); + std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + } +} + +void findKeysByCityIndex(FieldDB *db, bool expect) { + Field field = {"address", "Shanghai"}; + Status s; + std::vector resKeys = db->QueryByIndex(field, &s); + if (expect) ASSERT_TRUE(s.ok()); + else { + ASSERT_TRUE(s.IsNotFound()); + return; + } + std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + } +} + +TEST(TestLab1, Basic) { + // DestroyDB("testdb",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + // ClearDB(db); + InsertFieldData(db); + GetFieldData(db); + findKeysByCity(db); + delete db; +} + +TEST(TestLab2, Basic) { + //destroy + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + // ClearDB(db); + shanghaiKeys.clear(); + InsertFieldData(db); + // GetFieldData(db); + // findKeysByCity(db); + db->CreateIndexOnField("address"); + findKeysByCityIndex(db, true); + db->DeleteIndex("address"); + findKeysByCityIndex(db, false); + + delete db; +} + + +int main(int argc, char** argv) { + // All tests currently run with the same read-only file limits. + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/test/lab1_test.cc b/test/lab1_test.cc deleted file mode 100644 index 7c9a50d..0000000 --- a/test/lab1_test.cc +++ /dev/null @@ -1,109 +0,0 @@ -#include "gtest/gtest.h" -// #include "leveldb/env.h" -// #include "leveldb/db.h" -#include "fielddb/field_db.h" -using namespace fielddb; - -constexpr int value_size = 2048; -constexpr int data_size = 128 << 20; -std::vector cities = { - "Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou", - "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" - }; -std::vector shanghaiKeys; - -Status OpenDB(std::string dbName, FieldDB **db) { - Options options; - options.create_if_missing = true; - return FieldDB::OpenFieldDB(options, dbName, db); -} - -void ClearDB(FieldDB *db){ - //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染 - WriteOptions writeOptions; - int key_num = data_size / value_size; - for (int i = 0; i < key_num; i++) { - int key_ = i+1; - std::string key = std::to_string(key_); - Status s = db->Delete(WriteOptions(), key); - ASSERT_TRUE(s.ok()); - } -} - -void InsertFieldData(FieldDB *db) { - WriteOptions writeOptions; - int key_num = data_size / value_size; - srand(0); - - for (int i = 0; i < key_num; i++) { - int randThisTime = rand(); //确保读写一个循环只rand一次,否则随机序列会不一致 - int key_ = randThisTime % key_num+1; - std::string key = std::to_string(key_); - - std::string name = "customer#" + std::to_string(key_); - std::string address = cities[randThisTime % cities.size()]; - FieldArray fields = { - {"name", name}, - {"address", address} - }; - if (address == "Shanghai") { - shanghaiKeys.push_back(key); - } - Status s = db->PutFields(WriteOptions(), key, fields); - ASSERT_TRUE(s.ok()); - } -} - -void GetFieldData(FieldDB *db) { - ReadOptions readOptions; - int key_num = data_size / value_size; - - // 点查 - srand(0); - for (int i = 0; i < 100; i++) { - int randThisTime = rand(); - int key_ = randThisTime % key_num+1; - std::string key = std::to_string(key_); - FieldArray fields_ret; - Status s = db->GetFields(readOptions, key, &fields_ret); - ASSERT_TRUE(s.ok()); - for (const Field& pairs : fields_ret) { - if (pairs.first == "name"){ - - } else if (pairs.first == "address"){ - std::string city = pairs.second; - ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end()); - } else assert(false); - } - } -} - -void findKeysByCity(FieldDB *db) { - Field field = {"address", "Shanghai"}; - std::vector resKeys = db->FindKeysByField(field); - // std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; - for (const std::string &key : resKeys){ - ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); - } -} - -TEST(TestLab1, Basic) { - // DestroyDB("testdb",Options()); - FieldDB *db = new FieldDB(); - - if(OpenDB("testdb", &db).ok() == false) { - std::cerr << "open db failed" << std::endl; - abort(); - } - // ClearDB(db); - InsertFieldData(db); - GetFieldData(db); - findKeysByCity(db); -} - - -int main(int argc, char** argv) { - // All tests currently run with the same read-only file limits. - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} \ No newline at end of file diff --git a/util/serialize_value.cc b/util/serialize_value.cc index b93d8b7..562360b 100644 --- a/util/serialize_value.cc +++ b/util/serialize_value.cc @@ -73,6 +73,22 @@ bool InternalFieldArray::HasField(const Field& field) { return std::find(fields.begin(),fields.end(),field) != fields.end(); } - +std::string InternalFieldArray::ValOfName(const std::string &name) { + if(isMapped) { + if(map.count(name)) { + return map[name]; + } + return std::string(); + } + + for (auto iter = fields.begin(); iter != fields.end(); iter++){ + if (iter->first == name) { + return iter->second; + } else if (iter->first > name) { + return std::string(); + } + } + return std::string(); +} } \ No newline at end of file diff --git a/util/serialize_value.h b/util/serialize_value.h index a1ca30a..b769fb8 100644 --- a/util/serialize_value.h +++ b/util/serialize_value.h @@ -48,7 +48,7 @@ public: std::string Serialize(); bool HasField(const Field& field); - + std::string ValOfName(const std::string& name); private: bool isMapped;