From ffa94bde76f8ade0b3e18673fe36683f84cc019a Mon Sep 17 00:00:00 2001 From: wangxuefei <10225501435@stu.ecnu.edu.cn> Date: Sun, 8 Dec 2024 18:29:06 +0800 Subject: [PATCH] complete db_test3.cc --- CMakeLists.txt | 433 ++++++++++++++++++++++++++++--------------------------- report.md | 170 ++++++++++++++++++---- test/db_test3.cc | 120 +++++++++++++++ 3 files changed, 482 insertions(+), 241 deletions(-) create mode 100644 test/db_test3.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 70bbd53..cd5ac28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,7 +84,7 @@ check_cxx_compiler_flag(-Wthread-safety HAVE_CLANG_THREAD_SAFETY) # Used by googletest. check_cxx_compiler_flag(-Wno-missing-field-initializers - LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS) + LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS) include(CheckCXXSourceCompiles) @@ -100,13 +100,13 @@ set(LEVELDB_PUBLIC_INCLUDE_DIR "include/leveldb") set(LEVELDB_PORT_CONFIG_DIR "include/port") configure_file( - "port/port_config.h.in" - "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" + "port/port_config.h.in" + "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" ) include_directories( - "${PROJECT_BINARY_DIR}/include" - "." + "${PROJECT_BINARY_DIR}/include" + "." ) if(BUILD_SHARED_LIBS) @@ -119,153 +119,153 @@ include(GNUInstallDirs) add_library(leveldb "") target_sources(leveldb - PRIVATE - "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" - "db/builder.cc" - "db/builder.h" - "db/c.cc" - "db/db_impl.cc" - "db/db_impl.h" - "db/db_iter.cc" - "db/db_iter.h" - "db/dbformat.cc" - "db/dbformat.h" - "db/dumpfile.cc" - "db/filename.cc" - "db/filename.h" - "db/log_format.h" - "db/log_reader.cc" - "db/log_reader.h" - "db/log_writer.cc" - "db/log_writer.h" - "db/memtable.cc" - "db/memtable.h" - "db/repair.cc" - "db/skiplist.h" - "db/snapshot.h" - "db/table_cache.cc" - "db/table_cache.h" - "db/version_edit.cc" - "db/version_edit.h" - "db/version_set.cc" - "db/version_set.h" - "db/write_batch_internal.h" - "db/write_batch.cc" - "port/port_stdcxx.h" - "port/port.h" - "port/thread_annotations.h" - "table/block_builder.cc" - "table/block_builder.h" - "table/block.cc" - "table/block.h" - "table/filter_block.cc" - "table/filter_block.h" - "table/format.cc" - "table/format.h" - "table/iterator_wrapper.h" - "table/iterator.cc" - "table/merger.cc" - "table/merger.h" - "table/table_builder.cc" - "table/table.cc" - "table/two_level_iterator.cc" - "table/two_level_iterator.h" - "util/arena.cc" - "util/arena.h" - "util/bloom.cc" - "util/cache.cc" - "util/coding.cc" - "util/coding.h" - "util/comparator.cc" - "util/crc32c.cc" - "util/crc32c.h" - "util/env.cc" - "util/filter_policy.cc" - "util/hash.cc" - "util/hash.h" - "util/logging.cc" - "util/logging.h" - "util/mutexlock.h" - "util/no_destructor.h" - "util/options.cc" - "util/random.h" - "util/status.cc" - - # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". - $<$:PUBLIC> - "${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h" + PRIVATE + "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" + "db/builder.cc" + "db/builder.h" + "db/c.cc" + "db/db_impl.cc" + "db/db_impl.h" + "db/db_iter.cc" + "db/db_iter.h" + "db/dbformat.cc" + "db/dbformat.h" + "db/dumpfile.cc" + "db/filename.cc" + "db/filename.h" + "db/log_format.h" + "db/log_reader.cc" + "db/log_reader.h" + "db/log_writer.cc" + "db/log_writer.h" + "db/memtable.cc" + "db/memtable.h" + "db/repair.cc" + "db/skiplist.h" + "db/snapshot.h" + "db/table_cache.cc" + "db/table_cache.h" + "db/version_edit.cc" + "db/version_edit.h" + "db/version_set.cc" + "db/version_set.h" + "db/write_batch_internal.h" + "db/write_batch.cc" + "port/port_stdcxx.h" + "port/port.h" + "port/thread_annotations.h" + "table/block_builder.cc" + "table/block_builder.h" + "table/block.cc" + "table/block.h" + "table/filter_block.cc" + "table/filter_block.h" + "table/format.cc" + "table/format.h" + "table/iterator_wrapper.h" + "table/iterator.cc" + "table/merger.cc" + "table/merger.h" + "table/table_builder.cc" + "table/table.cc" + "table/two_level_iterator.cc" + "table/two_level_iterator.h" + "util/arena.cc" + "util/arena.h" + "util/bloom.cc" + "util/cache.cc" + "util/coding.cc" + "util/coding.h" + "util/comparator.cc" + "util/crc32c.cc" + "util/crc32c.h" + "util/env.cc" + "util/filter_policy.cc" + "util/hash.cc" + "util/hash.h" + "util/logging.cc" + "util/logging.h" + "util/mutexlock.h" + "util/no_destructor.h" + "util/options.cc" + "util/random.h" + "util/status.cc" + + # Only CMake 3.3+ supports PUBLIC sources in targets exported by "install". + $<$:PUBLIC> + "${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h" ) if (WIN32) target_sources(leveldb - PRIVATE - "util/env_windows.cc" - "util/windows_logger.h" + PRIVATE + "util/env_windows.cc" + "util/windows_logger.h" ) else (WIN32) target_sources(leveldb - PRIVATE - "util/env_posix.cc" - "util/posix_logger.h" + PRIVATE + "util/env_posix.cc" + "util/posix_logger.h" ) endif (WIN32) # MemEnv is not part of the interface and could be pulled to a separate library. target_sources(leveldb - PRIVATE - "helpers/memenv/memenv.cc" - "helpers/memenv/memenv.h" + PRIVATE + "helpers/memenv/memenv.cc" + "helpers/memenv/memenv.h" ) target_include_directories(leveldb - PUBLIC - $ - $ + PUBLIC + $ + $ ) set_target_properties(leveldb - PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) + PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) target_compile_definitions(leveldb - PRIVATE - # Used by include/export.h when building shared libraries. - LEVELDB_COMPILE_LIBRARY - # Used by port/port.h. - ${LEVELDB_PLATFORM_NAME}=1 + PRIVATE + # Used by include/export.h when building shared libraries. + LEVELDB_COMPILE_LIBRARY + # Used by port/port.h. + ${LEVELDB_PLATFORM_NAME}=1 ) if (NOT HAVE_CXX17_HAS_INCLUDE) target_compile_definitions(leveldb - PRIVATE - LEVELDB_HAS_PORT_CONFIG_H=1 + PRIVATE + LEVELDB_HAS_PORT_CONFIG_H=1 ) endif(NOT HAVE_CXX17_HAS_INCLUDE) if(BUILD_SHARED_LIBS) target_compile_definitions(leveldb - PUBLIC - # Used by include/export.h. - LEVELDB_SHARED_LIBRARY + PUBLIC + # Used by include/export.h. + LEVELDB_SHARED_LIBRARY ) endif(BUILD_SHARED_LIBS) if(HAVE_CLANG_THREAD_SAFETY) target_compile_options(leveldb - PUBLIC - -Werror -Wthread-safety) + PUBLIC + -Werror -Wthread-safety) endif(HAVE_CLANG_THREAD_SAFETY) if(HAVE_CRC32C) @@ -286,7 +286,7 @@ find_package(Threads REQUIRED) target_link_libraries(leveldb Threads::Threads) add_executable(leveldbutil - "db/leveldbutil.cc" + "db/leveldbutil.cc" ) target_link_libraries(leveldbutil leveldb) @@ -305,60 +305,60 @@ if(LEVELDB_BUILD_TESTS) # GoogleTest triggers a missing field initializers warning. if(LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS) set_property(TARGET gtest - APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) + APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) set_property(TARGET gmock - APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) + APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) endif(LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS) add_executable(leveldb_tests "") target_sources(leveldb_tests - PRIVATE - # "db/fault_injection_test.cc" - # "issues/issue178_test.cc" - # "issues/issue200_test.cc" - # "issues/issue320_test.cc" - "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" - # "util/env_test.cc" - "util/status_test.cc" - "util/no_destructor_test.cc" - "util/testutil.cc" - "util/testutil.h" + PRIVATE + # "db/fault_injection_test.cc" + # "issues/issue178_test.cc" + # "issues/issue200_test.cc" + # "issues/issue320_test.cc" + "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" + # "util/env_test.cc" + "util/status_test.cc" + "util/no_destructor_test.cc" + "util/testutil.cc" + "util/testutil.h" ) if(NOT BUILD_SHARED_LIBS) target_sources(leveldb_tests - PRIVATE - "db/autocompact_test.cc" - "db/corruption_test.cc" - "db/db_test.cc" - "db/dbformat_test.cc" - "db/filename_test.cc" - "db/log_test.cc" - "db/recovery_test.cc" - "db/skiplist_test.cc" - "db/version_edit_test.cc" - "db/version_set_test.cc" - "db/write_batch_test.cc" - "helpers/memenv/memenv_test.cc" - "table/filter_block_test.cc" - "table/table_test.cc" - "util/arena_test.cc" - "util/bloom_test.cc" - "util/cache_test.cc" - "util/coding_test.cc" - "util/crc32c_test.cc" - "util/hash_test.cc" - "util/logging_test.cc" + PRIVATE + "db/autocompact_test.cc" + "db/corruption_test.cc" + "db/db_test.cc" + "db/dbformat_test.cc" + "db/filename_test.cc" + "db/log_test.cc" + "db/recovery_test.cc" + "db/skiplist_test.cc" + "db/version_edit_test.cc" + "db/version_set_test.cc" + "db/write_batch_test.cc" + "helpers/memenv/memenv_test.cc" + "table/filter_block_test.cc" + "table/table_test.cc" + "util/arena_test.cc" + "util/bloom_test.cc" + "util/cache_test.cc" + "util/coding_test.cc" + "util/crc32c_test.cc" + "util/hash_test.cc" + "util/logging_test.cc" ) endif(NOT BUILD_SHARED_LIBS) target_link_libraries(leveldb_tests leveldb gmock gtest gtest_main) target_compile_definitions(leveldb_tests - PRIVATE - ${LEVELDB_PLATFORM_NAME}=1 + PRIVATE + ${LEVELDB_PLATFORM_NAME}=1 ) if (NOT HAVE_CXX17_HAS_INCLUDE) target_compile_definitions(leveldb_tests - PRIVATE - LEVELDB_HAS_PORT_CONFIG_H=1 + PRIVATE + LEVELDB_HAS_PORT_CONFIG_H=1 ) endif(NOT HAVE_CXX17_HAS_INCLUDE) @@ -369,22 +369,22 @@ if(LEVELDB_BUILD_TESTS) add_executable("${test_target_name}" "") target_sources("${test_target_name}" - PRIVATE - "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" - "util/testutil.cc" - "util/testutil.h" + PRIVATE + "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" + "util/testutil.cc" + "util/testutil.h" - "${test_file}" + "${test_file}" ) target_link_libraries("${test_target_name}" leveldb gmock gtest) target_compile_definitions("${test_target_name}" - PRIVATE - ${LEVELDB_PLATFORM_NAME}=1 + PRIVATE + ${LEVELDB_PLATFORM_NAME}=1 ) if (NOT HAVE_CXX17_HAS_INCLUDE) target_compile_definitions("${test_target_name}" - PRIVATE - LEVELDB_HAS_PORT_CONFIG_H=1 + PRIVATE + LEVELDB_HAS_PORT_CONFIG_H=1 ) endif(NOT HAVE_CXX17_HAS_INCLUDE) @@ -415,24 +415,24 @@ if(LEVELDB_BUILD_BENCHMARKS) add_executable("${bench_target_name}" "") target_sources("${bench_target_name}" - PRIVATE - "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" - "util/histogram.cc" - "util/histogram.h" - "util/testutil.cc" - "util/testutil.h" - - "${bench_file}" + PRIVATE + "${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h" + "util/histogram.cc" + "util/histogram.h" + "util/testutil.cc" + "util/testutil.h" + + "${bench_file}" ) target_link_libraries("${bench_target_name}" leveldb gmock gtest benchmark) target_compile_definitions("${bench_target_name}" - PRIVATE - ${LEVELDB_PLATFORM_NAME}=1 + PRIVATE + ${LEVELDB_PLATFORM_NAME}=1 ) if (NOT HAVE_CXX17_HAS_INCLUDE) target_compile_definitions("${bench_target_name}" - PRIVATE - LEVELDB_HAS_PORT_CONFIG_H=1 + PRIVATE + LEVELDB_HAS_PORT_CONFIG_H=1 ) endif(NOT HAVE_CXX17_HAS_INCLUDE) endfunction(leveldb_benchmark) @@ -470,51 +470,51 @@ endif(LEVELDB_BUILD_BENCHMARKS) if(LEVELDB_INSTALL) install(TARGETS leveldb - EXPORT leveldbTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + EXPORT leveldbTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) install( - FILES - "${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h" - "${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/leveldb" + FILES + "${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h" + "${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/leveldb" ) include(CMakePackageConfigHelpers) configure_package_config_file( - "cmake/${PROJECT_NAME}Config.cmake.in" - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" - INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + "cmake/${PROJECT_NAME}Config.cmake.in" + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" ) write_basic_package_version_file( - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" - COMPATIBILITY SameMajorVersion + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" + COMPATIBILITY SameMajorVersion ) install( - EXPORT leveldbTargets - NAMESPACE leveldb:: - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + EXPORT leveldbTargets + NAMESPACE leveldb:: + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" ) install( - FILES - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" - "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + FILES + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" ) endif(LEVELDB_INSTALL) @@ -527,4 +527,9 @@ target_link_libraries(db_test1 leveldb) add_executable(db_test2 "${PROJECT_SOURCE_DIR}/test/db_test2.cc" ) -target_link_libraries(db_test2 PRIVATE leveldb) \ No newline at end of file +target_link_libraries(db_test2 PRIVATE leveldb) + +add_executable(db_test3 + "${PROJECT_SOURCE_DIR}/test/db_test3.cc" +) +target_link_libraries(db_test3 PRIVATE leveldb gtest) \ No newline at end of file diff --git a/report.md b/report.md index b9cd6a1..d6e9e8f 100644 --- a/report.md +++ b/report.md @@ -1,8 +1,8 @@ -#
设计文档
+#
LevelDB设计文档
王雪飞,马也驰
-### 1.项目概述 - +## 1.项目概述及目标 +### 1.1 项目概述 本项目的背景是提升 LevelDB 在高写入负载场景下的性能。LevelDB 是一种轻量级的键值存储引擎,但在数据频繁更新或大值(Large Values)存储场景下,由于数据写入和合并(Compaction)过程的设计,其性能可能受到显著影响。为解决这一问题,项目目标是实现 KV(Key-Value)分离机制,以降低写放大现象并提高存储效率。 具体实现内容包括在 LevelDB 内部引入 KV 分离功能,即将键(Key)与值(Value)存储到不同的存储介质中。通过修改 SSTable 的结构设计,将键与指向值的指针存储在原有的文件中,而将实际值存储到单独的文件或存储介质中,从而减少 Compaction 操作对大值的处理负担。此外,项目还优化了数据访问逻辑,实现了值文件的高效读写支持。 @@ -12,43 +12,95 @@ 1. 适用于大值写入频繁的场景,如日志存储、视频元数据管理等。 2. 提升 SSD 等固态存储设备的寿命,减少写入放大带来的磨损。 3. 在混合存储架构中,提高冷热数据分离的效率。 - - -### 2. 功能设计 -#### 2.1 字段设计 -**设计目标:** -能够准确描述kv的属性数量,以及每一个属性的名称和字节数量。 - -**设计思路:** -`key的格式:| key | vlog_fileno | value_offset | ` -`单个value的格式:| {attr1名称长度(定长), attr1名称(变长), attr1的偏移量(定长)}, ...{attr1长度(定长), attr1内容(变长)}, ... | ` +### 1.2 项目目标 + +本项目涵盖下面三个方面: +1. 实验一:在 LevelDB 的 value 中实现字段功能。 +2. 实验二:实现 KV 分离。 +3. 实验三:实现 Benchmark,测试并分析性能。 + +## 2. 实验内容 +### 2.1 在 LevelDB 的 value 中实现字段功能 +具体指:基于 levelDB扩展 value 的结构,使其可以包含多个字段,并通过这些字段实现类似数据库列查询的功能。 + +#### 2.1.1 实验要求: +字段存储: +1. 将 LevelDB 中的 value 组织成字段数组,每个数组元素对应一个字段(字段名:字段值)。 +2. 字段会被序列化为字符串,然后插入LevelDB。 +3. 这些字段可以通过解析字符串得到,字段名与字段值都是字符串类型。 +4. 允许任意调整字段。 +查询功能: +实现通过字段值查询对应的 key。 -#### 2.1 KV分离 +#### 2.1.2 实验内容 +1. 数据存储与解析: 每个 value 存储为一个字符串数组,数组中的每个元素代表一个字段。 +2. 通过字段查询 Key: 实现函数FindKeysByField,传入字段名和字段的值就可以找到对应的key + +**设计思路:** +1. 使用 Field 存储属性和值,使用 FieldArray 存储多个 Field; +2. 函数 SerializeValue 把字段数组序列化为字符串; +3. 函数 ParseValue 把字符串反序列化为字段数组; +4. 函数 FindKeysByField 根据传入的字段名和字段的值找到对应的key。 +### 2.1.3 实验进度以及实验结果 +#### 实验进度 +已初步实现上述四个函数,查询函数 FindKeysByField 后续会进行优化和完善。 +#### 实验结果 +通过测试 + +#### 2.2 KV分离 **设计目标:** 将value的存储和key在lsm tree中的存储分离,降低lsm tree的GC开销 **设计思路:** 1. value的分离式存储 -我们使用若干个vlog文件,为每一个vlog文件设置容量上限(比如16MiB),并在内存中为每一个vlog维护一个discard计数器,表示这个vlog中当前有多少value已经在lsm tree中被标记为删除。 + 我们使用若干个vlog文件,为每一个vlog文件设置容量上限(比如16MiB),并在内存中为每一个vlog维护一个discard计数器,表示这个vlog中当前有多少value已经在lsm tree中被标记为删除。 2. 存储value所在vlog和偏移量的元数据 -我们在key和vlog中添加一个vlog_page的中间层,这一层存储每一个key对应的value所在的vlog文件和文件内偏移,而lsm tree中的key包含的实际上是这个中间层的slot下标,而每一个slot中存储的是key所对应的vlog文件号以及value在vlog中的偏移。这样,我们就可以在不修改lsm tree的基础上,完成对vlog的compaction,并将vlog的gc结果只反映在这个中间层vlog_page中。这个vlog_page实际上也是一个线性增长的log文件,作用类似于os中的页表,负责维护lsm tree中存储的slot下标到vlog和vlog内偏移量的一个映射。这样,通过vlog_page我们就可以找到具体的vlog文件和其文件内偏移量。对于vlog的GC过程,我们不需要修改lsm tree中的内容,我们只需要修改vlog_page中的映射即可。 + 我们在key和vlog中添加一个vlog_page的中间层,这一层存储每一个key对应的value所在的vlog文件和文件内偏移,而lsm tree中的key包含的实际上是这个中间层的slot下标,而每一个slot中存储的是key所对应的vlog文件号以及value在vlog中的偏移。这样,我们就可以在不修改lsm tree的基础上,完成对vlog的compaction,并将vlog的gc结果只反映在这个中间层vlog_page中。这个vlog_page实际上也是一个线性增长的log文件,作用类似于os中的页表,负责维护lsm tree中存储的slot下标到vlog和vlog内偏移量的一个映射。这样,通过vlog_page我们就可以找到具体的vlog文件和其文件内偏移量。对于vlog的GC过程,我们不需要修改lsm tree中的内容,我们只需要修改vlog_page中的映射即可。 3. vlog_page文件和vlog文件的GC -对于vlog文件,我们在内存中维护一个bitmap,用来表示每一个slot的使用情况,并在插入和GC删除kv时进行动态的分配和释放。对于vlog文件的GC,我们用一个后台线程来扫描所有vlog的discard计数器。当某些vlog的discard计数器超过某个阈值(比如1024),我们就对这些vlog文件进行GC过程,当GC完成之后将vlog_page中的slot元数据进行更新,再将原来的vlog文件进行删除,GC过程就完成了。 - + 对于vlog文件,我们在内存中维护一个bitmap,用来表示每一个slot的使用情况,并在插入和GC删除kv时进行动态的分配和释放。对于vlog文件的GC,我们用一个后台线程来扫描所有vlog的discard计数器。当某些vlog的discard计数器超过某个阈值(比如1024),我们就对这些vlog文件进行GC过程,当GC完成之后将vlog_page中的slot元数据进行更新,再将原来的vlog文件进行删除,GC过程就完成了。 + + - ### 3. 数据结构设计 `key的格式:| key | vlog_page_slot | ` `vlog_page: | slot0:{vlog_no, offset}, slot1:{vlog_no, offset}, ... | ` - + 对于每一次读取,用户线程先读取lsm tree中key的slot下标,然后到vlog_page中读取对应的slot内容(**每一个slot都是定长的**),之后再在这个slot中读取value所在的vlog文件号和偏移量offset,之后到对应的vlog文件中读取value。 - + 但是这又带来了一个问题,我们该如何管理vlog_page这个文件?当插入新的kv时,我们需要在这个vlog_page中分配新的slot,在GC删除某个kv时,我们需要将对应的slot进行释放。这里我们选择在内存中维护一个可线性扩展的bitmap。这个bitmap中每一个bit标识了当前vlog_page文件中对应slot是否被使用,是为1,不是为0。这样一来,在插入新kv时,我们可以用bitmap来分配一个新的slot(将bitmap中第一个为0的bit设置为1),将内容进行写入;在GC删除某个kv时,我们将这个slot对应的bitmap中的bit重置为0即可。 ### 4. 接口设计 +#### 4.1 在 LevelDB 的 value 中实现字段功能 +1. std::string SerializeValue(const FieldArray& fields) + +**功能:** 将字段数组序列化为字符串 + +**输入:** 字段名和字段的值组成的字段数组 + +**输出:** 序列化后的字符串 + +2. FieldArray ParseValue(const std::string& value_str) + +**功能:** 将字符串反序列化为字段数组 + +**输入:** 字符串 + +**输出:** 反序列化的字段数组 + +3. std::vector< std::string >FindKeysByField(leveldb::DB* db, Field &field) + +**功能:** 根据字段名和字段的值找到对应的key + +**输入:** 数据库名,字段名和字段的值 + +**输出:** 包含该字段和字段数组的 key,由于可能不只有一个,所以返回值为 vector + +4. Put_Fields +5. Get_Fields +#### 4.2 实现KV分离 这里只展示和vlog以及GC无关的接口,vlog的创建,管理以及后台线程的GC设计到vlog等新数据结构的实现,较为复杂和庞大,这里不做展示。我们只列出与kv的插入有关的新接口: 1. 搜索vlog_page文件: Status find_slot(const Slice& key, Slot *slot); 2. 搜索vlog文件: Status find_value(Slot *slot); @@ -58,6 +110,69 @@ ### 5. 功能测试 +### 5.1 在 LevelDB 的 value 中实现字段功能 +```` +Status OpenDB(std::string dbName, DB **db) { + Options options; + options.create_if_missing = true; + return DB::Open(options, dbName, db); +} + +TEST(TestSchema, Basic) { + DB *db; + WriteOptions writeOptions; + ReadOptions readOptions; + if(OpenDB("testdb", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + std::string key1 = "k_1"; + std::string key2 = "k_2"; + FieldArray fields1 = { + {"name", "Customer#000000001"}, + {"address", "IVhzIApeRb"}, + {"phone", "25-989-741-2988"} + }; + + FieldArray fields2 = { + {"name", "Customer#000000001"}, + {"address", "ecnu"}, + {"phone", "123456789"} + }; + // 序列化并插入 + std::string value1 = SerializeValue(fields1); + std::string value2 = SerializeValue(fields2); + db->Put(leveldb::WriteOptions(), key1, value1); + db->Put(leveldb::WriteOptions(), key2, value2); + + // 读取并反序列化 + std::string value_ret; + db->Get(leveldb::ReadOptions(), key1, &value_ret); + auto fields_ret = ParseValue(value_ret); + + // 检查反序列化结果 + ASSERT_EQ(fields_ret.size(), fields1.size()); + for (size_t i = 0; i < fields_ret.size(); ++i) { + ASSERT_EQ(fields_ret[i].first, fields1[i].first); + ASSERT_EQ(fields_ret[i].second, fields1[i].second); + } + + // 测试查找功能 + Field query_field = {"name", "Customer#000000001"}; + std::vector found_keys = FindKeysByField(db, query_field); + std::cout << "找到的key有:" << found_keys.size() << "个" << std::endl; + ASSERT_EQ(found_keys[0], key1); + + // 关闭数据库 + delete db; +} + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} +```` +### 5.2 单元测试: 1. 测试插入后,是否能读取成功。 2. 测试插入超过初始vlog_page等slot数量之后,是否还能正常插入,检查vlog_page文件等线性可扩展性 @@ -74,13 +189,14 @@ #### 7. 分工和进度安排 -| 功能 | 完成日期 | 分工 | -|--------|--------|--------| -| vlog中value的存储格式 | 12.8 | 王雪飞 | +| 功能 | 完成日期 | 分工 | 是否完成 | +|--------------|--------|--------|------| +| 实现字段设计并通过测试 | 12.8 | 王雪飞 | 已完成 | +| vlog中value的存储格式 | 12.8 | 王雪飞 | | vlog_page实现 | 12.8 | 马也驰 | -| vlog的GC实现 | 12.29 | 马也驰 | -| 性能测试 | 1.5 | 王雪飞 | -| 功能测试 | 1.5 | 马也驰 | +| vlog的GC实现 | 12.29 | 马也驰 | +| 性能测试 | 1.5 | 王雪飞 | +| 功能测试 | 1.5 | 马也驰 | diff --git a/test/db_test3.cc b/test/db_test3.cc new file mode 100644 index 0000000..d51d2b8 --- /dev/null +++ b/test/db_test3.cc @@ -0,0 +1,120 @@ +#include +#include "leveldb/env.h" +#include +#include +#include +#include +#include +#include "gtest/gtest.h" + +using namespace leveldb; +using Field = std::pair; // field_name:field_value +using FieldArray = std::vector>; + +// 序列化为字符串 +std::string SerializeValue(const FieldArray& fields) { + std::ostringstream oss; + for (const auto& field : fields) { + oss << field.first << ":" << field.second << ";"; + } + return oss.str(); +} + +// 反序列化为字段数组 +FieldArray ParseValue(const std::string& value_str) { + FieldArray fields; + std::istringstream iss(value_str); + std::string field_str; + while (std::getline(iss, field_str, ';')) { + size_t delimiter_pos = field_str.find(':'); + if (delimiter_pos != std::string::npos) { + std::string field_name = field_str.substr(0, delimiter_pos); + std::string field_value = field_str.substr(delimiter_pos + 1); + fields.emplace_back(field_name, field_value); + } + } + return fields; +} + +// 根据字段值查找所有包含该字段的 key +std::vector FindKeysByField(leveldb::DB* db, Field &field) { + std::vector keys; + leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions()); + + for (it->SeekToFirst(); it->Valid(); it->Next()) { + std::string key = it->key().ToString(); + std::string value; + db->Get(leveldb::ReadOptions(), key, &value); + + FieldArray fields = ParseValue(value); + for (const auto& f : fields) { + if (f.first == field.first && f.second == field.second) { + keys.push_back(key); + break; // 假设每个key中每个字段值唯一,如果允许重复,可以移除这行 + } + } + } + + delete it; + return keys; +} + +Status OpenDB(std::string dbName, DB **db) { + Options options; + options.create_if_missing = true; + return DB::Open(options, dbName, db); +} + +TEST(TestSchema, Basic) { + DB *db; + WriteOptions writeOptions; + ReadOptions readOptions; + if(OpenDB("testdb", &db).ok() == false) { + std::cerr << "open db failed" << std::endl; + abort(); + } + std::string key1 = "k_1"; + std::string key2 = "k_2"; + FieldArray fields1 = { + {"name", "Customer#000000001"}, + {"address", "IVhzIApeRb"}, + {"phone", "25-989-741-2988"} + }; + + FieldArray fields2 = { + {"name", "Customer#000000001"}, + {"address", "ecnu"}, + {"phone", "123456789"} + }; + // 序列化并插入 + std::string value1 = SerializeValue(fields1); + std::string value2 = SerializeValue(fields2); + db->Put(leveldb::WriteOptions(), key1, value1); + db->Put(leveldb::WriteOptions(), key2, value2); + + // 读取并反序列化 + std::string value_ret; + db->Get(leveldb::ReadOptions(), key1, &value_ret); + auto fields_ret = ParseValue(value_ret); + + // 检查反序列化结果 + ASSERT_EQ(fields_ret.size(), fields1.size()); + for (size_t i = 0; i < fields_ret.size(); ++i) { + ASSERT_EQ(fields_ret[i].first, fields1[i].first); + ASSERT_EQ(fields_ret[i].second, fields1[i].second); + } + + // 测试查找功能 + Field query_field = {"name", "Customer#000000001"}; + std::vector found_keys = FindKeysByField(db, query_field); + std::cout << "找到的key有:" << found_keys.size() << "个" << std::endl; + ASSERT_EQ(found_keys[0], key1); + + // 关闭数据库 + delete db; +} + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file