From 19a9a1204a5a91a5e4d74dbd3f5c677053bf1161 Mon Sep 17 00:00:00 2001 From: xxy <3237539022@qq.com> Date: Thu, 12 Dec 2024 22:23:18 +0800 Subject: [PATCH] add fize data size sign --- db/db_impl.cc | 93 ++++++++++++++++++++++++++++++++++++++++++----------------- test/test.cpp | 8 ++--- 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 45c7a4b..16687f8 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1249,6 +1249,7 @@ Status DBImpl::Get(const ReadOptions& options, const Slice& key, if (!res) return Status::Corruption("can't decode file id"); res = GetVarint64(&value_log_slice, &valuelog_offset); if (!res) return Status::Corruption("can't decode valuelog offset"); + // std::cout<<"file_id: "<> DBImpl::WriteValueLog( std::vector> kv) { std::string file_name_ = ValueLogFileName(dbname_, valuelogfile_number_); - std::ofstream valueFile(file_name_, std::ios::app | std::ios::binary); + std::fstream valueFile(file_name_, std::ios::in | std::ios::out | std::ios::binary); if (!valueFile.is_open()) { assert(0); } + valueFile.seekg(0, std::ios::end); // 移动到文件末尾 + uint64_t offset = valueFile.tellg(); - uint64_t offset = valueFile.tellp(); - + // 如果超出fixed_size if(offset>=config::value_log_size){ addNewValueLog(); valueFile.close(); file_name_ = ValueLogFileName(dbname_, valuelogfile_number_); - valueFile =std::ofstream(file_name_, std::ios::app | std::ios::binary); + valueFile =std::fstream(file_name_, std::ios::in | std::ios::out | std::ios::binary); if (!valueFile.is_open()) { assert(0); } - offset = valueFile.tellp(); - + valueFile.seekg(0, std::ios::end); // 移动到文件末尾 + offset = valueFile.tellg(); } + + uint64_t file_data_size = 0; // 文件数据大小标志位 + valueFile.seekg(0, std::ios::beg); + valueFile.read(reinterpret_cast(&file_data_size), sizeof(uint64_t)); + valueFile.clear(); // 清除错误状态 + valueFile.seekp(0, std::ios::end); // 返回文件末尾准备写入 + // std::cout<<"file_data_size: "<> res; for (const auto& [key_slice, value_slice] : kv) { @@ -1640,15 +1650,30 @@ std::vector> DBImpl::WriteValueLog( assert(0); } + // 更新文件数据大小 + file_data_size += sizeof(uint64_t) + key_len + sizeof(uint64_t) + value_len; // 记录 file_id 和 offset res.push_back({valuelogfile_number_, offset}); - // 更新偏移量 offset += sizeof(uint64_t) + key_len + sizeof(uint64_t) + value_len; } - // 解锁资源或进行其他清理操作 + // 在所有数据写入后,将更新的数据大小写回文件开头 + if (!res.empty()) { + valueFile.seekp(0, std::ios::beg); // 移动到文件开头 + valueFile.write(reinterpret_cast(&file_data_size), sizeof(uint64_t)); + if (!valueFile.good()) { + valueFile.close(); + assert(0); + } + } + else{ + valueFile.close(); + assert(0); + } + // 解锁资源或进行其他清理操作 + valueFile.flush(); // 确保所有缓冲区的数据都被写入文件 valueFile.close(); return res; } @@ -1656,6 +1681,28 @@ std::vector> DBImpl::WriteValueLog( void DBImpl::addNewValueLog() { valuelogfile_number_ = versions_->NewFileNumber(); + + std::string file_name_ = ValueLogFileName(dbname_, valuelogfile_number_); + std::fstream valueFile(file_name_, std::ios::app | std::ios::binary); + if (!valueFile.is_open()) { + assert(0); + } + uint64_t file_data_size = 0; // 新增的文件数据大小标志位 + if (valueFile.tellp() != 0) { + assert(0); + } + else{ + valueFile.write(reinterpret_cast(&file_data_size), sizeof(uint64_t)); + if (!valueFile.good()) { + valueFile.close(); + assert(0); + } + else{ + // 正常关闭文件 + valueFile.flush(); // 确保所有缓冲区的数据都被写入文件 + valueFile.close(); + } + } } Status DBImpl::ReadValueLog(uint64_t file_id, uint64_t offset, Slice* key, @@ -1769,8 +1816,9 @@ void DBImpl::GarbageCollect() { continue; } - uint64_t current_offset = 0; - uint64_t tmp_offset = 0; + // 初始化offset为占用大小 + uint64_t current_offset = sizeof(uint64_t); + uint64_t tmp_offset = current_offset; int cnt = 0; @@ -1810,11 +1858,12 @@ void DBImpl::GarbageCollect() { if (!cur_valuelog.good()) { delete[] key_buf_len; cur_valuelog.close(); - std::cerr << "Failed to read file: " << valuelog_name << std::endl; + std::cerr << "1Failed to read file: " << valuelog_name << std::endl; break; } // 更新当前偏移 current_offset += sizeof(uint64_t); + // std::cout << cnt <<" "< values; - for(int i=0;i<500000;i++){ + for(int i=0;i<50000;i++){ std::string key=std::to_string(i); std::string value; for(int j=0;j<5000;j++){ @@ -106,7 +106,7 @@ TEST(Test, LARGE_DATA_COMPACT_TEST) { values.push_back(value); db->Put(writeOptions,key,value); } - for(int i=0;i<500000;i++){ + for(int i=0;i<50000;i++){ std::string key=std::to_string(i); std::string value; Status s=db->Get(readOptions,key,&value);