优化 MyLevelDB::PutWithFields 方法，增加批量写入和错误处理机制，修改报告

9 months ago · cd0d9fd851
--- a/myLevelDB/my_leveldb.cc
+++ b/myLevelDB/my_leveldb.cc
@ -48,32 +48,65 @@ void MyLevelDB::SerializeValue(const FieldArray& fields,
  }
 }
 Status MyLevelDB::PutWithFields(const WriteOptions& options,const std::string& key,const FieldArray& fields) {
 Status MyLevelDB::PutWithFields(const WriteOptions& options,
                                const std::string& key,
                                const FieldArray& fields) {
  std::string value;
  SerializeValue(fields, value);
  SerializeValue(fields, value);  // 将 fields 序列化为字符串
  auto slice_key = Slice(key.c_str());
  auto slice_value = Slice(value.c_str());
  Status s = _fields_db->Put(options, slice_key, slice_value);
  WriteBatch batch;
  Status s = _fields_db->Put(options, slice_key, slice_value);  // 写入数据
  if (!s.ok()) {
    return s;  // 如果写入失败，直接返回错误
  }
  // 记录所有操作，以便失败时可以回滚
  std::vector<std::pair<std::string, std::string>> changes;
  changes.push_back({key, value});  // 记录字段数据的变化
  // 更新索引
  std::unordered_map<int, int> match;
  std::unique_lock<std::mutex> l(mutex_);
  for (int i = 0; i < fields.size(); i++) {
    for (size_t idx = 0; idx < index_list_.size(); idx++) {
      const auto& i_name = index_list_[idx];
      if (fields[i].first == i_name) {
        match[i] = idx;
        match[i] = idx;  // 找到匹配的字段
        break;
      }
    }
  }
  // 生成索引的键值对
  for (auto item : match) {
    std::string composed_key;
    composed_key += fields[item.first].second + ":" + key;
    s = index_db[item.second]->Put(options, composed_key, Slice());
    batch.Put(Slice(composed_key.c_str()), Slice());  // 将索引数据加入批处理中
    changes.push_back({composed_key, ""});            // 记录索引的变化
  }
  return s;
  // 提交批处理
  s = _fields_db->Write(options, &batch);
  if (!s.ok()) {
    // 如果批处理写入失败，回滚之前的操作
    std::cerr << "Failed to commit batch, rolling back." << std::endl;
    // 执行回滚：撤销所有已经提交的变更
    for (const auto& change : changes) {
      _fields_db->Delete(WriteOptions(),
                         Slice(change.first));  // 删除已修改的键值对
    }
    return s;  // 返回错误，表示操作失败
  }
  return Status::OK();  // 一切正常，返回成功
 }
 Status MyLevelDB::FindKeysByField(const ReadOptions& options, const Field field,
                                  std::vector<std::string>* keys) {
  auto it = _fields_db->NewIterator(options);
@ -98,35 +131,94 @@ Status MyLevelDB::FindKeysByField(const ReadOptions& options, const Field field,
 }
 Status MyLevelDB::CreateIndexOnField(const std::string& field_name) {
  // 检查索引是否已经存在
  for (const auto& field : this->index_list_) {
    if (field == field_name) {
      return Status::InvalidArgument(field_name,
                                     "Index already exists for this field");
    }
  }
  // 将新的索引字段添加到索引列表
  index_list_.push_back(field_name);
  Options op = _op;
  DB* field_db;
  op.index_mode = true;
  WriteBatch batch;  // 创建批处理操作
  Status status = DB::Open(op, _db_name + "_index_" + field_name, &field_db);
  index_db.push_back(field_db);
  if (!status.ok()) {
    std::cerr << "Failed to open index DB: " << status.ToString() << std::endl;
    abort();  
    return status;  // 如果打开数据库失败，返回错误
  }
  return status;
  index_db.push_back(field_db);  // 将新创建的索引数据库添加到列表
  // 记录所有变更
  std::vector<std::string> changes;
  changes.push_back("index_" + field_name);  // 记录索引字段的变更
  // 在批处理中添加索引数据
  batch.Put(Slice(("index_" + field_name).c_str()),
            Slice());  // 添加索引记录到批处理中
  // 提交批处理
  status = _fields_db->Write(WriteOptions(), &batch);
  if (!status.ok()) {
    // 如果写入失败，执行回滚操作
    std::cerr << "Failed to commit index: " << status.ToString() << std::endl;
    // 执行回滚：撤销所有已做的更改
    for (const auto& change : changes) {
      _fields_db->Delete(WriteOptions(),
                         Slice(change.c_str()));  // 删除已创建的索引
    }
    return status;  // 返回失败状态，确保不会提交任何变更
  }
  return Status::OK();  // 操作成功，返回成功状态
 }
 Status MyLevelDB::DeleteIndex(std::string& field_name) {
  // 查找索引字段
  auto it = std::find(index_list_.begin(), index_list_.end(), field_name);
  if (it == index_list_.end()) {
    return Status::NotFound("Index not found for this field");
  }
  // 从列表中移除该字段
  WriteBatch batch;  // 创建批处理操作
  // 删除索引字段
  index_list_.erase(it);
  return Status::OK();
  batch.Delete(Slice(("index_" + field_name).c_str()));  // 删除索引字段记录
  // 记录已删除的索引
  std::vector<std::string> changes;
  changes.push_back("index_" + field_name);
  // 提交批处理
  Status s = _fields_db->Write(WriteOptions(), &batch);
  if (!s.ok()) {
    // 如果删除操作失败，回滚事务
    std::cerr << "Failed to delete index: " << s.ToString() << std::endl;
    // 执行回滚：恢复删除的索引
    for (const auto& change : changes) {
      batch.Put(Slice(change.c_str()), Slice());  // 恢复索引记录
    }
    _fields_db->Write(WriteOptions(), &batch);  // 再次提交恢复的批处理
    return s;  // 返回失败，确保不会提交任何变化
  }
  return Status::OK();  // 删除成功，返回成功状态
 }
 void MyLevelDB::QueryByIndex(const ReadOptions& options, Field& field,
                             std::vector<std::string>& keys) {
  int i = 0;
@ -136,7 +228,7 @@ void MyLevelDB::QueryByIndex(const ReadOptions& options, Field& field,
    }
  }
  assert(i != index_list_.size());
  auto it = index_db[i]->NewIterator(options);
  it->SeekToFirst();
  while (it->Valid()) {
@ -157,6 +249,7 @@ void MyLevelDB::QueryByIndex(const ReadOptions& options, Field& field,
  delete it;
 }
 Status MyLevelDB::Put(const WriteOptions& options, const Slice& key,
                    const Slice& value) {
  return _fields_db->Put(options, key, value);
--- a/构建数据库项目报告.md
+++ b/构建数据库项目报告.md
@ -137,35 +137,44 @@ Status MyLevelDB::FindKeysByField(const ReadOptions& options, const Field field,
 ### 字段插入并创建索引
 **思路：**
 为了支持快速查询，需要在插入数据时为部分字段创建索引。索引创建的核心步骤包括：
 为了支持快速查询，需要在插入数据时为部分字段创建索引。索引创建的核心步骤包括：
 1. 对字段数组进行序列化存储。
 2. 遍历字段数组，检查是否需要为字段创建索引。
 3. 对需要创建索引的字段，将其值与主键 `key` 组合存入索引数据库。
 1. 对字段数组进行序列化并使用WriteBatch进行原子存储。
 2. 遍历字段数组，检查是否需要为字段创建索引，使用互斥锁保护并发访问。
 3. 对需要创建索引的字段，将其值与主键`key`组合存入批处理中，并跟踪所有变更以支持回滚。
 **代码解释：**
 - 调用 `SerializeValue` 将字段数组序列化后存入 `_fields_db` 数据库。
 - 遍历字段数组，通过与 `index_list_` 中的字段名匹配，确定需要创建索引的字段。
 - 为每个需要创建索引的字段，构造索引键 `field_value:key`，并存入对应的 `index_db` 数据库。
 这一设计实现了字段的高效存储与索引管理，通过索引提升了字段查询的性能。
 - 调用`SerializeValue`将字段数组序列化，通过WriteBatch原子写入`_fields_db`数据库。
 - 使用互斥锁保护索引列表，遍历字段数组与`index_list_`匹配需要创建索引的字段。
 - 为匹配的字段构造索引键`field_value:key`，加入批处理并记录变更，失败时可回滚所有操作。
 ```c++
 Status MyLevelDB::PutWithFields(const WriteOptions& options,const std::string& key,const FieldArray& fields) {
 Status MyLevelDB::PutWithFields(const WriteOptions& options,
                                const std::string& key,
                                const FieldArray& fields) {
  std::string value;
  SerializeValue(fields, value);
  SerializeValue(fields, value);  
  auto slice_key = Slice(key.c_str());
  auto slice_value = Slice(value.c_str());
  Status s = _fields_db->Put(options, slice_key, slice_value);
  WriteBatch batch;
  Status s = _fields_db->Put(options, slice_key, slice_value); 
  if (!s.ok()) {
    return s;  
  }
  std::vector<std::pair<std::string, std::string>> changes;
  changes.push_back({key, value});  
  // 更新索引
  std::unordered_map<int, int> match;
  std::unique_lock<std::mutex> l(mutex_);
  for (int i = 0; i < fields.size(); i++) {
    for (size_t idx = 0; idx < index_list_.size(); idx++) {
      const auto& i_name = index_list_[idx];
      if (fields[i].first == i_name) {
        match[i] = idx;
        match[i] = idx;  // 找到匹配的字段
        break;
      }
    }
@ -174,10 +183,27 @@ Status MyLevelDB::PutWithFields(const WriteOptions& options,const std::string& k
  for (auto item : match) {
    std::string composed_key;
    composed_key += fields[item.first].second + ":" + key;
    s = index_db[item.second]->Put(options, composed_key, Slice());
    batch.Put(Slice(composed_key.c_str()), Slice()); 
    changes.push_back({composed_key, ""});           
  }
  // 提交批处理
  s = _fields_db->Write(options, &batch);
  if (!s.ok()) {
    // 如果批处理写入失败，回滚之前的操作
    std::cerr << "Failed to commit batch, rolling back." << std::endl;
    for (const auto& change : changes) {
      _fields_db->Delete(WriteOptions(),
                         Slice(change.first));  
    }
    return s;  
  }
  return s;
  return Status::OK(); 
 }
 ```
 # 实验二：二级索引
@ -185,81 +211,141 @@ Status MyLevelDB::PutWithFields(const WriteOptions& options,const std::string& k
 ### 创建索引
 **思路：**
 `CreateIndexOnField` 函数用于为指定字段 `field_name` 创建索引。索引的核心在于维护一个字段值到主键的映射关系，索引字段的存储格式为：`value:key:null`，其中 `value` 是字段值，`key` 是主键。
 在实现中，`index_list_` 用于记录已创建索引的字段名，而 `index_db` 是索引数据库的集合，专门存储各字段的索引数据。
 `CreateIndexOnField`函数用于为指定字段`field_name`创建索引。索引的核心在于维护一个字段值到主键的映射关系，索引字段的存储格式为：`value:key:null`，其中`value`是字段值，`key`是主键。
 在实现中，`index_list_`用于记录已创建索引的字段名，而`index_db`是索引数据库的集合，专门存储各字段的索引数据。
 **实现步骤：**
 1. **检查索引是否已存在：**
    遍历 `index_list_`
，判断指定字段是否已存在索引。如果存在，则返回错误状态，避免重复创建。
    遍历`index_list_`，判断指定字段是否已存在索引。如果存在，则返回错误状态，避免重复创建。
 2. **创建索引数据库：**
    如果索引不存在，则将字段名加入 `index_list_` 并为其创建新的数据库实例。索引数据库以 `_db_name + "_index_" + field_name` 命名，便于区分和管理。
 3. **索引存储：**
    新创建的索引数据库存入 `index_db`，供后续使用。
 4. **异常处理：**
    如果索引数据库创建失败，系统会打印错误信息并终止程序，以避免数据库状态不一致。
    - 将字段名加入`index_list_`
    - 使用批处理创建新的数据库实例
    - 索引数据库以`_db_name + "_index_" + field_name`命名
 3. **提交与错误处理：**
    - 使用WriteBatch确保原子操作
    - 记录所有变更用于可能的回滚
    - 如果提交失败，执行完整的回滚操作
 **代码逻辑解释：**
 - `index_list_` 和 `index_db` 实现了索引的动态管理。
 - 使用 `DB::Open` 函数为字段名创建独立的索引数据库，确保索引和数据的分离。
 - 返回状态表示操作结果，便于调用方处理。
 这种设计将字段索引的创建与基础数据存储解耦，确保了功能模块的独立性。
 - 使用`index_list_`和`index_db`实现索引的动态管理。
 - 采用WriteBatch批处理确保索引创建的原子性。
 - 通过变更跟踪（changes vector）支持失败时的回滚操作。
 ```c++
 Status MyLevelDB::CreateIndexOnField(const std::string& field_name) {
  // 检查索引是否已经存在
  for (const auto& field : this->index_list_) {
    if (field == field_name) {
      return Status::InvalidArgument(field_name,
                                     "Index already exists for this field");
    }
  }
  // 将新的索引字段添加到索引列表
  index_list_.push_back(field_name);
  Options op = _op;
  DB* field_db;
  op.index_mode = true;
  WriteBatch batch;  // 创建批处理操作
  Status status = DB::Open(op, _db_name + "_index_" + field_name, &field_db);
  index_db.push_back(field_db);
  if (!status.ok()) {
    std::cerr << "Failed to open index DB: " << status.ToString() << std::endl;
    abort();  
    return status;  
  }
  return status;
 }
  index_db.push_back(field_db);  // 将新创建的索引数据库添加到列表
  // 记录所有变更
  std::vector<std::string> changes;
  changes.push_back("index_" + field_name);  // 记录索引字段的变更
  // 在批处理中添加索引数据
  batch.Put(Slice(("index_" + field_name).c_str()),
            Slice());  // 添加索引记录到批处理中
  // 提交批处理
  status = _fields_db->Write(WriteOptions(), &batch);
  if (!status.ok()) {
    std::cerr << "Failed to commit index: " << status.ToString() << std::endl;
    // 执行回滚
    for (const auto& change : changes) {
      _fields_db->Delete(WriteOptions(),
                         Slice(change.c_str()));  
    }
    return status;  
  }
  return Status::OK();  
 ```
 ### 删除索引
 **思路：**
 `DeleteIndex` 函数用于从系统中删除指定字段的索引。删除操作的核心是从 `index_list_` 和 `index_db` 中移除相关信息，释放存储资源。
 `DeleteIndex`函数用于从系统中删除指定字段的索引。删除操作的核心是从`index_list_`和`index_db`中移除相关信息，同时通过批处理保证操作的原子性和可回滚性。
 **实现步骤：**
 1. **检查索引是否存在：**
    遍历 `index_list_`，查找指定字段。如果未找到，返回错误状态，提示索引不存在。
 2. **移除索引信息：**
    如果索引存在，则从 `index_list_` 中移除字段名，并根据需要释放 `index_db` 中的数据库资源。
 3. **清理状态：**
    删除索引后，确保系统中的索引列表与实际存储状态一致，避免残留数据影响后续操作。
    使用`std::find`在`index_list_`中查找指定字段。如果未找到，返回`NotFound`错误状态。
 2. **删除索引信息：**
    - 从`index_list_`中移除字段名
    - 使用WriteBatch创建原子删除操作
    - 记录删除操作用于可能的回滚
 3. **提交与回滚处理：**
    - 提交批处理操作
    - 如果提交失败，执行回滚操作恢复索引
    - 确保操作的原子性和数据一致性
 **代码逻辑解释：**
 - 使用 `std::find` 查找字段名是否存在，避免重复删除。
 - 删除操作仅更新索引管理结构，未涉及底层数据库文件的清理。在实际应用中，可以扩展为支持物理文件的删除。
 该功能确保了索引的动态管理，便于根据业务需求调整索引配置。
 - 使用`std::find`高效查找索引字段位置。
 - 采用WriteBatch确保删除操作的原子性。
 - 通过changes vector跟踪删除操作，支持失败时的完整回滚。
 - 完整的错误处理确保即使在失败情况下也能维持系统一致性。
 ```c++
 Status MyLevelDB::DeleteIndex(std::string& field_name) {
  // 查找索引字段
  auto it = std::find(index_list_.begin(), index_list_.end(), field_name);
  if (it == index_list_.end()) {
    return Status::NotFound("Index not found for this field");
  }
  // 从列表中移除该字段
  WriteBatch batch; 
  // 删除索引字段
  index_list_.erase(it);
  return Status::OK();
  batch.Delete(Slice(("index_" + field_name).c_str()));  // 删除索引字段记录
  // 记录已删除的索引
  std::vector<std::string> changes;
  changes.push_back("index_" + field_name);
  // 提交批处理
  Status s = _fields_db->Write(WriteOptions(), &batch);
  if (!s.ok()) {
    std::cerr << "Failed to delete index: " << s.ToString() << std::endl;
    // 执行回滚
    for (const auto& change : changes) {
      batch.Put(Slice(change.c_str()), Slice());  // 恢复索引记录
    }
    _fields_db->Write(WriteOptions(), &batch);  // 再次提交恢复的批处理
    return s;  // 返回失败，确保不会提交任何变化
  }
  return Status::OK();  
 }
 ```
 ### 基于二级索引的查找