From 547e33d3622b90f4266054c3bef445a325b7e4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E4=BA=BA=E9=B1=BC?= <1823748191@qq.com> Date: Sun, 8 Dec 2024 23:15:26 +0800 Subject: [PATCH 1/3] remove filesystem and fix bug for version_3 --- CMakeLists.txt | 4 ++-- benchmarks/db_bench.cc | 2 +- db/db_impl.cc | 27 +++++++++++++-------------- util/coding.cc | 48 +++++++++++++++++++++++++++++------------------- 4 files changed, 45 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 122df31..7307c47 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ project(leveldb VERSION 1.23.0 LANGUAGES C CXX) if(NOT CMAKE_C_STANDARD) # This project can use C11, but will gracefully decay down to C89. # 我改到17了 - set(CMAKE_C_STANDARD 17) + set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED OFF) set(CMAKE_C_EXTENSIONS OFF) endif(NOT CMAKE_C_STANDARD) @@ -18,7 +18,7 @@ endif(NOT CMAKE_C_STANDARD) # C++ standard can be overridden when this is used as a sub-project. if(NOT CMAKE_CXX_STANDARD) # This project requires C++17. - set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) endif(NOT CMAKE_CXX_STANDARD) diff --git a/benchmarks/db_bench.cc b/benchmarks/db_bench.cc index 717fd77..3aa5a57 100644 --- a/benchmarks/db_bench.cc +++ b/benchmarks/db_bench.cc @@ -74,7 +74,7 @@ static int FLAGS_reads = -1; static int FLAGS_threads = 1; // Size of each value -static int FLAGS_value_size = 1000; +static int FLAGS_value_size = 5000; // Arrange to generate values that shrink to this fraction of // their original size after compression diff --git a/db/db_impl.cc b/db/db_impl.cc index d1572d8..81962ca 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -39,7 +38,6 @@ #include "util/coding.h" #include "util/logging.h" #include "util/mutexlock.h" -namespace fs = std::filesystem; namespace leveldb { @@ -1740,21 +1738,16 @@ void DBImpl::GarbageCollect() { gc_mutex_.AssertHeld(); // 遍历数据库目录,找到所有 valuelog 文件 Log(options_.info_log, "start gc "); - auto files_set = fs::directory_iterator(dbname_); + std::vector filenames; + Status s = env_->GetChildren(dbname_, &filenames); + assert(s.ok()); std::set valuelog_set; - // std::string cur_valuelog_name = - // ValueLogFileName(dbname_, valuelogfile_number_); - for (const auto& cur_log_file : files_set) { - if (fs::exists(cur_log_file) && - fs::is_regular_file(fs::status(cur_log_file)) && - IsValueLogFile(cur_log_file.path().filename().string())) { - // if (cur_valuelog_name == cur_log_file.path().filename().string()) - // continue; - valuelog_set.emplace(cur_log_file.path().filename().string()); + for (const auto& filename:filenames) { + if (IsValueLogFile(filename)) { + valuelog_set.emplace(filename); } } for (std::string valuelog_name : valuelog_set) { - // std::cout << valuelog_name << std::endl; uint64_t cur_log_number = GetValueLogID(valuelog_name); valuelog_name = ValueLogFileName(dbname_, cur_log_number); if (cur_log_number == valuelogfile_number_) { @@ -1887,6 +1880,10 @@ void DBImpl::GarbageCollect() { // Key 不存在,忽略此记录 continue; } + else if(stored_value.data()[0]==(char)(0x00)){ + //value is too small + continue; + } if (!status.ok()) { std::cerr << "Error accessing sstable: " << status.ToString() @@ -1923,7 +1920,9 @@ void DBImpl::GarbageCollect() { // 清理旧文件(如果需要) cur_valuelog.close(); - fs::remove(valuelog_name.c_str()); // 删除旧的 ValueLog 文件 + env_->RemoveFile(valuelog_name); + + Log(options_.info_log, "remove file during gc %s", valuelog_name.c_str()); } } diff --git a/util/coding.cc b/util/coding.cc index 65527d3..7e98eb9 100644 --- a/util/coding.cc +++ b/util/coding.cc @@ -3,8 +3,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "util/coding.h" -#include - +#include namespace leveldb { @@ -174,27 +173,38 @@ void ParseStoredValue(const std::string& stored_value, uint64_t& valuelog_id, } // 示例:获取 ValueLog 文件 ID +// 示例:获取 ValueLog 文件 ID uint64_t GetValueLogID(const std::string& valuelog_name) { - // 使用 std::filesystem::path 解析文件名 - std::filesystem::path file_path(valuelog_name); - std::string filename = file_path.filename().string(); // 获取文件名部分 - - // 查找文件名中的 '.' 位置,提取数字部分 - auto pos = filename.find('.'); - if (pos == std::string::npos) { - assert(0); - } - // 提取数字部分 - std::string id_str = filename.substr(0, pos); - // 检查提取的部分是否为有效数字 - for (char c : id_str) { - if (!isdigit(c)) { - assert(0); + // 获取文件名部分(假设文件名格式为 "number.extension") + size_t pos = valuelog_name.find_last_of('/'); + std::string filename; + if (pos != std::string::npos) { + filename = valuelog_name.substr(pos + 1); + } else { + filename = valuelog_name; + } + + // 查找文件名中的 '.' 位置,提取数字部分 + pos = filename.find('.'); + assert(pos != std::string::npos); + + // 提取数字部分 + std::string id_str = filename.substr(0, pos); + + // 检查文件扩展名是否为 .valuelog + if (filename.substr(pos + 1) != "valuelog") { + assert(0); + } + + // 转换为 uint64_t + uint64_t id; + std::istringstream iss(id_str); + if (!(iss >> id)) { + assert(0); } - } - return std::stoull(id_str); // 转换为 uint64_t + return id; } // Helper function to split the set of files into chunks From dbe115b782eb89a9cae646eee5dc25b6cd9eeeef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E4=BA=BA=E9=B1=BC?= <1823748191@qq.com> Date: Mon, 9 Dec 2024 00:56:10 +0800 Subject: [PATCH 2/3] update benchmark --- benchmarks/db_bench.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/db_bench.cc b/benchmarks/db_bench.cc index 3aa5a57..584b253 100644 --- a/benchmarks/db_bench.cc +++ b/benchmarks/db_bench.cc @@ -65,7 +65,7 @@ static const char* FLAGS_benchmarks = "zstduncomp,"; // Number of key/values to place in database -static int FLAGS_num = 1000000; +static int FLAGS_num = 100000; // Number of read operations to do. If negative, do FLAGS_num reads. static int FLAGS_reads = -1; From f9e1cd5d24670923351925cf0dc52a19f0208a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E7=91=9E=E9=98=B3?= <10225101483@stu.ecnu.edu.cn> Date: Mon, 9 Dec 2024 01:32:20 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20'benchmarks/db=5Fbench?= =?UTF-8?q?.cc'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/db_bench.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/db_bench.cc b/benchmarks/db_bench.cc index 584b253..a2e9d1d 100644 --- a/benchmarks/db_bench.cc +++ b/benchmarks/db_bench.cc @@ -65,7 +65,7 @@ static const char* FLAGS_benchmarks = "zstduncomp,"; // Number of key/values to place in database -static int FLAGS_num = 100000; +static int FLAGS_num = 1000000; // Number of read operations to do. If negative, do FLAGS_num reads. static int FLAGS_reads = -1; @@ -1127,8 +1127,8 @@ int main(int argc, char** argv) { // Choose a location for the test database if none given with --db= if (FLAGS_db == nullptr) { - leveldb::g_env->GetTestDirectory(&default_db_path); - default_db_path += "/dbbench"; + //leveldb::g_env->GetTestDirectory(&default_db_path); + default_db_path = "dbbench"; FLAGS_db = default_db_path.c_str(); }