From 099d8183448d6fe2d38f21bfe768b1f5651324ae Mon Sep 17 00:00:00 2001 From: augurier <14434658+augurier@user.noreply.gitee.com> Date: Fri, 6 Dec 2024 16:57:00 +0800 Subject: [PATCH] =?UTF-8?q?lab2=E9=83=A8=E5=88=86=E5=9F=BA=E7=A1=80?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=EF=BC=88=E6=B2=A1=E6=9C=89=E5=B9=B6=E5=8F=91?= =?UTF-8?q?=E5=92=8Cmetadb)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- db/db_impl.cc | 2 +- fielddb/encode_index.h | 37 +++++++++++ fielddb/field_db.cpp | 86 ++++++++++++++++++++++++++ fielddb/field_db.h | 8 ++- test/basic_function_test.cc | 146 ++++++++++++++++++++++++++++++++++++++++++++ test/lab1_test.cc | 109 --------------------------------- util/serialize_value.cc | 18 +++++- util/serialize_value.h | 2 +- 9 files changed, 294 insertions(+), 116 deletions(-) create mode 100644 fielddb/encode_index.h create mode 100644 test/basic_function_test.cc delete mode 100644 test/lab1_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 31536ca..b70e461 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -523,6 +523,6 @@ if(LEVELDB_INSTALL) endif(LEVELDB_INSTALL) add_executable(lab1_test - "${PROJECT_SOURCE_DIR}/test/lab1_test.cc" + "${PROJECT_SOURCE_DIR}/test/basic_function_test.cc" ) target_link_libraries(lab1_test PRIVATE leveldb gtest) diff --git a/db/db_impl.cc b/db/db_impl.cc index 4a7d25c..49db131 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1177,7 +1177,7 @@ std::vector DBImpl::FindKeysByField(Field &field){ std::vector result; auto iter = NewIterator(ReadOptions()); for(iter->SeekToFirst();iter->Valid();iter->Next()) { - std::string k = iter->key().ToString(); + // std::string k = iter->key().ToString(); InternalFieldArray fields(iter->value()); if(fields.HasField(field)) { result.push_back(iter->key().ToString()); diff --git a/fielddb/encode_index.h b/fielddb/encode_index.h new file mode 100644 index 0000000..eceeab4 --- /dev/null +++ b/fielddb/encode_index.h @@ -0,0 +1,37 @@ +#ifndef ENCODE_INDEX_H +#define ENCODE_INDEX_H + +#include "leveldb/slice.h" +#include "util/coding.h" +namespace fielddb{ +using namespace leveldb; + + +struct ParsedInternalIndexKey { //key : {name : val} + Slice user_key_; + Slice name_; + Slice val_; + + ParsedInternalIndexKey() {} // Intentionally left uninitialized (for speed) + ParsedInternalIndexKey(const Slice& user_key, const Slice& name, const Slice& val) + : user_key_(user_key), name_(name), val_(val) {} +}; + +bool ParseInternalIndexKey(Slice input, ParsedInternalIndexKey* result); +void AppendIndexKey(std::string* result, const ParsedInternalIndexKey& key); + + +inline bool ParseInternalIndexKey(Slice input, ParsedInternalIndexKey* result){ + return GetLengthPrefixedSlice(&input, &result->name_) && + GetLengthPrefixedSlice(&input, &result->val_) && + GetLengthPrefixedSlice(&input, &result->user_key_); +} + +inline void AppendIndexKey(std::string* result, const ParsedInternalIndexKey& key){ + PutLengthPrefixedSlice(result, key.name_); + PutLengthPrefixedSlice(result, key.val_); + PutLengthPrefixedSlice(result, key.user_key_); +} + +} +#endif \ No newline at end of file diff --git a/fielddb/field_db.cpp b/fielddb/field_db.cpp index d867411..637046e 100644 --- a/fielddb/field_db.cpp +++ b/fielddb/field_db.cpp @@ -6,7 +6,9 @@ #include "leveldb/env.h" #include "leveldb/options.h" #include "leveldb/status.h" +#include "db/write_batch_internal.h" #include "util/serialize_value.h" +#include "fielddb/encode_index.h" namespace fielddb { using namespace leveldb; @@ -18,6 +20,7 @@ Status FieldDB::OpenFieldDB(const Options& options, return Status::NotSupported(name, "new a fieldDb first\n"); } + // Status status; DB *indexdb, *kvdb, *metadb; status = Open(options, name+"_indexDB", &indexdb); @@ -77,6 +80,89 @@ std::vector FieldDB::FindKeysByField(Field &field) { return kvDB_->FindKeysByField(field); } +std::vector> FieldDB::FindKeysAndValByFieldName ( + const std::string &fieldName){ + std::vector> result; + auto iter = kvDB_->NewIterator(ReadOptions()); + std::string val; + for(iter->SeekToFirst();iter->Valid();iter->Next()) { + InternalFieldArray fields(iter->value()); + val = fields.ValOfName(fieldName); + if(!val.empty()) { + result.push_back(std::make_pair(iter->key().ToString(), val)); + } + } + return result; +} + +Status FieldDB::CreateIndexOnField(const std::string& field_name) { + //taskQueue相关 + //写锁 是不是只需要给putfields设置一把锁就行 + + std::vector> keysAndVal = + FindKeysAndValByFieldName(field_name); + WriteBatch writeBatch; + Slice value = Slice(); + for (auto &kvPair : keysAndVal){ + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); + writeBatch.Put(indexKey, value); + } + Status s = indexDB_->Write(WriteOptions(), &writeBatch); + if (!s.ok()) return s; + + index_[field_name] = Exist; + //唤醒taskqueue + +} + +Status FieldDB::DeleteIndex(const std::string &field_name) { + //taskQueue相关 + //写锁 + std::vector> keysAndVal = + FindKeysAndValByFieldName(field_name); + WriteBatch writeBatch; + for (auto &kvPair : keysAndVal){ + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(kvPair.first, field_name, kvPair.second)); + writeBatch.Delete(indexKey); + } + Status s = indexDB_->Write(WriteOptions(), &writeBatch); + if (!s.ok()) return s; + + index_.erase(field_name); + //唤醒taskqueue +} + +std::vector FieldDB::QueryByIndex(const Field &field, Status *s) { + if (index_.count(field.first) == 0 || index_[field.first] != Exist){ + *s = Status::NotFound(Slice()); + return std::vector(); + } + std::string indexKey; + AppendIndexKey(&indexKey, + ParsedInternalIndexKey(Slice(), field.first, field.second)); + Iterator *indexIterator = indexDB_->NewIterator(ReadOptions()); + indexIterator->Seek(indexKey); + + std::vector result; + for (; indexIterator->Valid(); indexIterator->Next()) { + ParsedInternalIndexKey iterKey; + if (ParseInternalIndexKey(indexIterator->key(), &iterKey)){ + if (iterKey.name_ == field.first && iterKey.val_ == field.second){ + result.push_back(iterKey.user_key_.ToString()); + continue; //查到说明在范围里,否则break + } + } + break; + } + + *s = Status::OK(); + return result; +} + Iterator * FieldDB::NewIterator(const ReadOptions &options) { return kvDB_->NewIterator(options); } diff --git a/fielddb/field_db.h b/fielddb/field_db.h index 6a3c845..5c7e8d5 100644 --- a/fielddb/field_db.h +++ b/fielddb/field_db.h @@ -36,8 +36,8 @@ public: void CompactRange(const Slice *begin, const Slice *end) override; /*与索引相关*/ Status CreateIndexOnField(const std::string& field_name); - Status DeleteIndex(std::string &field_name); - std::vector QueryByIndex(Field &field, Status *s); + Status DeleteIndex(const std::string &field_name); + std::vector QueryByIndex(const Field &field, Status *s); static Status OpenFieldDB(const Options& options,const std::string& name,FieldDB** dbptr); @@ -57,10 +57,12 @@ private: Deleting, Exist }; - std::map index_; + std::map index_; leveldb::port::Mutex mutex_; // mutex for taskqueue std::deque taskqueue_; + std::vector> FindKeysAndValByFieldName ( + const std::string &fieldName); }; } // end of namespace # endif \ No newline at end of file diff --git a/test/basic_function_test.cc b/test/basic_function_test.cc new file mode 100644 index 0000000..4915fe4 --- /dev/null +++ b/test/basic_function_test.cc @@ -0,0 +1,146 @@ +#include "gtest/gtest.h" +// #include "leveldb/env.h" +// #include "leveldb/db.h" +#include "fielddb/field_db.h" +using namespace fielddb; + +constexpr int value_size = 2048; +constexpr int data_size = 128 << 20; +std::vector cities = { + "Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou", + "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" + }; +std::vector shanghaiKeys; + +Status OpenDB(std::string dbName, FieldDB **db) { + Options options; + options.create_if_missing = true; + return FieldDB::OpenFieldDB(options, dbName, db); +} + +void ClearDB(FieldDB *db){ + //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染 + WriteOptions writeOptions; + int key_num = data_size / value_size; + for (int i = 0; i < key_num; i++) { + int key_ = i+1; + std::string key = std::to_string(key_); + Status s = db->Delete(WriteOptions(), key); + ASSERT_TRUE(s.ok()); + } +} + +void InsertFieldData(FieldDB *db) { + WriteOptions writeOptions; + int key_num = data_size / value_size; + srand(0); + + for (int i = 0; i < key_num; i++) { + int randThisTime = rand(); //确保读写一个循环只rand一次,否则随机序列会不一致 + int key_ = randThisTime % key_num+1; + std::string key = std::to_string(key_); + + std::string name = "customer#" + std::to_string(key_); + std::string address = cities[randThisTime % cities.size()]; + FieldArray fields = { + {"name", name}, + {"address", address} + }; + if (address == "Shanghai") { + shanghaiKeys.push_back(key); + } + Status s = db->PutFields(WriteOptions(), key, fields); + ASSERT_TRUE(s.ok()); + } +} + +void GetFieldData(FieldDB *db) { + ReadOptions readOptions; + int key_num = data_size / value_size; + + // 点查 + srand(0); + for (int i = 0; i < 100; i++) { + int randThisTime = rand(); + int key_ = randThisTime % key_num+1; + std::string key = std::to_string(key_); + FieldArray fields_ret; + Status s = db->GetFields(readOptions, key, &fields_ret); + ASSERT_TRUE(s.ok()); + for (const Field& pairs : fields_ret) { + if (pairs.first == "name"){ + + } else if (pairs.first == "address"){ + std::string city = pairs.second; + ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end()); + } else assert(false); + } + } +} + +void findKeysByCity(FieldDB *db) { + Field field = {"address", "Shanghai"}; + std::vector resKeys = db->FindKeysByField(field); + std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + } +} + +void findKeysByCityIndex(FieldDB *db, bool expect) { + Field field = {"address", "Shanghai"}; + Status s; + std::vector resKeys = db->QueryByIndex(field, &s); + if (expect) ASSERT_TRUE(s.ok()); + else { + ASSERT_TRUE(s.IsNotFound()); + return; + } + std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; + for (const std::string &key : resKeys){ + ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); + } +} + +TEST(TestLab1, Basic) { + // DestroyDB("testdb",Options()); + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + // ClearDB(db); + InsertFieldData(db); + GetFieldData(db); + findKeysByCity(db); + delete db; +} + +TEST(TestLab2, Basic) { + //destroy + FieldDB *db = new FieldDB(); + + if(OpenDB("testdb2", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + // ClearDB(db); + shanghaiKeys.clear(); + InsertFieldData(db); + // GetFieldData(db); + // findKeysByCity(db); + db->CreateIndexOnField("address"); + findKeysByCityIndex(db, true); + db->DeleteIndex("address"); + findKeysByCityIndex(db, false); + + delete db; +} + + +int main(int argc, char** argv) { + // All tests currently run with the same read-only file limits. + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/test/lab1_test.cc b/test/lab1_test.cc deleted file mode 100644 index 7c9a50d..0000000 --- a/test/lab1_test.cc +++ /dev/null @@ -1,109 +0,0 @@ -#include "gtest/gtest.h" -// #include "leveldb/env.h" -// #include "leveldb/db.h" -#include "fielddb/field_db.h" -using namespace fielddb; - -constexpr int value_size = 2048; -constexpr int data_size = 128 << 20; -std::vector cities = { - "Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou", - "Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin" - }; -std::vector shanghaiKeys; - -Status OpenDB(std::string dbName, FieldDB **db) { - Options options; - options.create_if_missing = true; - return FieldDB::OpenFieldDB(options, dbName, db); -} - -void ClearDB(FieldDB *db){ - //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染 - WriteOptions writeOptions; - int key_num = data_size / value_size; - for (int i = 0; i < key_num; i++) { - int key_ = i+1; - std::string key = std::to_string(key_); - Status s = db->Delete(WriteOptions(), key); - ASSERT_TRUE(s.ok()); - } -} - -void InsertFieldData(FieldDB *db) { - WriteOptions writeOptions; - int key_num = data_size / value_size; - srand(0); - - for (int i = 0; i < key_num; i++) { - int randThisTime = rand(); //确保读写一个循环只rand一次,否则随机序列会不一致 - int key_ = randThisTime % key_num+1; - std::string key = std::to_string(key_); - - std::string name = "customer#" + std::to_string(key_); - std::string address = cities[randThisTime % cities.size()]; - FieldArray fields = { - {"name", name}, - {"address", address} - }; - if (address == "Shanghai") { - shanghaiKeys.push_back(key); - } - Status s = db->PutFields(WriteOptions(), key, fields); - ASSERT_TRUE(s.ok()); - } -} - -void GetFieldData(FieldDB *db) { - ReadOptions readOptions; - int key_num = data_size / value_size; - - // 点查 - srand(0); - for (int i = 0; i < 100; i++) { - int randThisTime = rand(); - int key_ = randThisTime % key_num+1; - std::string key = std::to_string(key_); - FieldArray fields_ret; - Status s = db->GetFields(readOptions, key, &fields_ret); - ASSERT_TRUE(s.ok()); - for (const Field& pairs : fields_ret) { - if (pairs.first == "name"){ - - } else if (pairs.first == "address"){ - std::string city = pairs.second; - ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end()); - } else assert(false); - } - } -} - -void findKeysByCity(FieldDB *db) { - Field field = {"address", "Shanghai"}; - std::vector resKeys = db->FindKeysByField(field); - // std::cout << shanghaiKeys.size() << " " << resKeys.size() << std::endl; - for (const std::string &key : resKeys){ - ASSERT_NE(std::find(shanghaiKeys.begin(), shanghaiKeys.end(), key), shanghaiKeys.end()); - } -} - -TEST(TestLab1, Basic) { - // DestroyDB("testdb",Options()); - FieldDB *db = new FieldDB(); - - if(OpenDB("testdb", &db).ok() == false) { - std::cerr << "open db failed" << std::endl; - abort(); - } - // ClearDB(db); - InsertFieldData(db); - GetFieldData(db); - findKeysByCity(db); -} - - -int main(int argc, char** argv) { - // All tests currently run with the same read-only file limits. - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} \ No newline at end of file diff --git a/util/serialize_value.cc b/util/serialize_value.cc index b93d8b7..562360b 100644 --- a/util/serialize_value.cc +++ b/util/serialize_value.cc @@ -73,6 +73,22 @@ bool InternalFieldArray::HasField(const Field& field) { return std::find(fields.begin(),fields.end(),field) != fields.end(); } - +std::string InternalFieldArray::ValOfName(const std::string &name) { + if(isMapped) { + if(map.count(name)) { + return map[name]; + } + return std::string(); + } + + for (auto iter = fields.begin(); iter != fields.end(); iter++){ + if (iter->first == name) { + return iter->second; + } else if (iter->first > name) { + return std::string(); + } + } + return std::string(); +} } \ No newline at end of file diff --git a/util/serialize_value.h b/util/serialize_value.h index a1ca30a..b769fb8 100644 --- a/util/serialize_value.h +++ b/util/serialize_value.h @@ -48,7 +48,7 @@ public: std::string Serialize(); bool HasField(const Field& field); - + std::string ValOfName(const std::string& name); private: bool isMapped;