Преглед на файлове

complete db_test3.cc

pull/1/head
王雪飞 преди 2 седмици
родител
ревизия
ffa94bde76
променени са 3 файла, в които са добавени 482 реда и са изтрити 241 реда
  1. +219
    -214
      CMakeLists.txt
  2. +143
    -27
      report.md
  3. +120
    -0
      test/db_test3.cc

+ 219
- 214
CMakeLists.txt Целия файл

@ -84,7 +84,7 @@ check_cxx_compiler_flag(-Wthread-safety HAVE_CLANG_THREAD_SAFETY)
# Used by googletest.
check_cxx_compiler_flag(-Wno-missing-field-initializers
LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS)
LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS)
include(CheckCXXSourceCompiles)
@ -100,13 +100,13 @@ set(LEVELDB_PUBLIC_INCLUDE_DIR "include/leveldb")
set(LEVELDB_PORT_CONFIG_DIR "include/port")
configure_file(
"port/port_config.h.in"
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"port/port_config.h.in"
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
)
include_directories(
"${PROJECT_BINARY_DIR}/include"
"."
"${PROJECT_BINARY_DIR}/include"
"."
)
if(BUILD_SHARED_LIBS)
@ -119,153 +119,153 @@ include(GNUInstallDirs)
add_library(leveldb "")
target_sources(leveldb
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"db/builder.cc"
"db/builder.h"
"db/c.cc"
"db/db_impl.cc"
"db/db_impl.h"
"db/db_iter.cc"
"db/db_iter.h"
"db/dbformat.cc"
"db/dbformat.h"
"db/dumpfile.cc"
"db/filename.cc"
"db/filename.h"
"db/log_format.h"
"db/log_reader.cc"
"db/log_reader.h"
"db/log_writer.cc"
"db/log_writer.h"
"db/memtable.cc"
"db/memtable.h"
"db/repair.cc"
"db/skiplist.h"
"db/snapshot.h"
"db/table_cache.cc"
"db/table_cache.h"
"db/version_edit.cc"
"db/version_edit.h"
"db/version_set.cc"
"db/version_set.h"
"db/write_batch_internal.h"
"db/write_batch.cc"
"port/port_stdcxx.h"
"port/port.h"
"port/thread_annotations.h"
"table/block_builder.cc"
"table/block_builder.h"
"table/block.cc"
"table/block.h"
"table/filter_block.cc"
"table/filter_block.h"
"table/format.cc"
"table/format.h"
"table/iterator_wrapper.h"
"table/iterator.cc"
"table/merger.cc"
"table/merger.h"
"table/table_builder.cc"
"table/table.cc"
"table/two_level_iterator.cc"
"table/two_level_iterator.h"
"util/arena.cc"
"util/arena.h"
"util/bloom.cc"
"util/cache.cc"
"util/coding.cc"
"util/coding.h"
"util/comparator.cc"
"util/crc32c.cc"
"util/crc32c.h"
"util/env.cc"
"util/filter_policy.cc"
"util/hash.cc"
"util/hash.h"
"util/logging.cc"
"util/logging.h"
"util/mutexlock.h"
"util/no_destructor.h"
"util/options.cc"
"util/random.h"
"util/status.cc"
# Only CMake 3.3+ supports PUBLIC sources in targets exported by "install".
$<$<VERSION_GREATER:CMAKE_VERSION,3.2>:PUBLIC>
"${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"db/builder.cc"
"db/builder.h"
"db/c.cc"
"db/db_impl.cc"
"db/db_impl.h"
"db/db_iter.cc"
"db/db_iter.h"
"db/dbformat.cc"
"db/dbformat.h"
"db/dumpfile.cc"
"db/filename.cc"
"db/filename.h"
"db/log_format.h"
"db/log_reader.cc"
"db/log_reader.h"
"db/log_writer.cc"
"db/log_writer.h"
"db/memtable.cc"
"db/memtable.h"
"db/repair.cc"
"db/skiplist.h"
"db/snapshot.h"
"db/table_cache.cc"
"db/table_cache.h"
"db/version_edit.cc"
"db/version_edit.h"
"db/version_set.cc"
"db/version_set.h"
"db/write_batch_internal.h"
"db/write_batch.cc"
"port/port_stdcxx.h"
"port/port.h"
"port/thread_annotations.h"
"table/block_builder.cc"
"table/block_builder.h"
"table/block.cc"
"table/block.h"
"table/filter_block.cc"
"table/filter_block.h"
"table/format.cc"
"table/format.h"
"table/iterator_wrapper.h"
"table/iterator.cc"
"table/merger.cc"
"table/merger.h"
"table/table_builder.cc"
"table/table.cc"
"table/two_level_iterator.cc"
"table/two_level_iterator.h"
"util/arena.cc"
"util/arena.h"
"util/bloom.cc"
"util/cache.cc"
"util/coding.cc"
"util/coding.h"
"util/comparator.cc"
"util/crc32c.cc"
"util/crc32c.h"
"util/env.cc"
"util/filter_policy.cc"
"util/hash.cc"
"util/hash.h"
"util/logging.cc"
"util/logging.h"
"util/mutexlock.h"
"util/no_destructor.h"
"util/options.cc"
"util/random.h"
"util/status.cc"
# Only CMake 3.3+ supports PUBLIC sources in targets exported by "install".
$<$<VERSION_GREATER:CMAKE_VERSION,3.2>:PUBLIC>
"${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
)
if (WIN32)
target_sources(leveldb
PRIVATE
"util/env_windows.cc"
"util/windows_logger.h"
PRIVATE
"util/env_windows.cc"
"util/windows_logger.h"
)
else (WIN32)
target_sources(leveldb
PRIVATE
"util/env_posix.cc"
"util/posix_logger.h"
PRIVATE
"util/env_posix.cc"
"util/posix_logger.h"
)
endif (WIN32)
# MemEnv is not part of the interface and could be pulled to a separate library.
target_sources(leveldb
PRIVATE
"helpers/memenv/memenv.cc"
"helpers/memenv/memenv.h"
PRIVATE
"helpers/memenv/memenv.cc"
"helpers/memenv/memenv.h"
)
target_include_directories(leveldb
PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
set_target_properties(leveldb
PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
target_compile_definitions(leveldb
PRIVATE
# Used by include/export.h when building shared libraries.
LEVELDB_COMPILE_LIBRARY
# Used by port/port.h.
${LEVELDB_PLATFORM_NAME}=1
PRIVATE
# Used by include/export.h when building shared libraries.
LEVELDB_COMPILE_LIBRARY
# Used by port/port.h.
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions(leveldb
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
if(BUILD_SHARED_LIBS)
target_compile_definitions(leveldb
PUBLIC
# Used by include/export.h.
LEVELDB_SHARED_LIBRARY
PUBLIC
# Used by include/export.h.
LEVELDB_SHARED_LIBRARY
)
endif(BUILD_SHARED_LIBS)
if(HAVE_CLANG_THREAD_SAFETY)
target_compile_options(leveldb
PUBLIC
-Werror -Wthread-safety)
PUBLIC
-Werror -Wthread-safety)
endif(HAVE_CLANG_THREAD_SAFETY)
if(HAVE_CRC32C)
@ -286,7 +286,7 @@ find_package(Threads REQUIRED)
target_link_libraries(leveldb Threads::Threads)
add_executable(leveldbutil
"db/leveldbutil.cc"
"db/leveldbutil.cc"
)
target_link_libraries(leveldbutil leveldb)
@ -305,60 +305,60 @@ if(LEVELDB_BUILD_TESTS)
# GoogleTest triggers a missing field initializers warning.
if(LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS)
set_property(TARGET gtest
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
set_property(TARGET gmock
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
endif(LEVELDB_HAVE_NO_MISSING_FIELD_INITIALIZERS)
add_executable(leveldb_tests "")
target_sources(leveldb_tests
PRIVATE
# "db/fault_injection_test.cc"
# "issues/issue178_test.cc"
# "issues/issue200_test.cc"
# "issues/issue320_test.cc"
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
# "util/env_test.cc"
"util/status_test.cc"
"util/no_destructor_test.cc"
"util/testutil.cc"
"util/testutil.h"
PRIVATE
# "db/fault_injection_test.cc"
# "issues/issue178_test.cc"
# "issues/issue200_test.cc"
# "issues/issue320_test.cc"
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
# "util/env_test.cc"
"util/status_test.cc"
"util/no_destructor_test.cc"
"util/testutil.cc"
"util/testutil.h"
)
if(NOT BUILD_SHARED_LIBS)
target_sources(leveldb_tests
PRIVATE
"db/autocompact_test.cc"
"db/corruption_test.cc"
"db/db_test.cc"
"db/dbformat_test.cc"
"db/filename_test.cc"
"db/log_test.cc"
"db/recovery_test.cc"
"db/skiplist_test.cc"
"db/version_edit_test.cc"
"db/version_set_test.cc"
"db/write_batch_test.cc"
"helpers/memenv/memenv_test.cc"
"table/filter_block_test.cc"
"table/table_test.cc"
"util/arena_test.cc"
"util/bloom_test.cc"
"util/cache_test.cc"
"util/coding_test.cc"
"util/crc32c_test.cc"
"util/hash_test.cc"
"util/logging_test.cc"
PRIVATE
"db/autocompact_test.cc"
"db/corruption_test.cc"
"db/db_test.cc"
"db/dbformat_test.cc"
"db/filename_test.cc"
"db/log_test.cc"
"db/recovery_test.cc"
"db/skiplist_test.cc"
"db/version_edit_test.cc"
"db/version_set_test.cc"
"db/write_batch_test.cc"
"helpers/memenv/memenv_test.cc"
"table/filter_block_test.cc"
"table/table_test.cc"
"util/arena_test.cc"
"util/bloom_test.cc"
"util/cache_test.cc"
"util/coding_test.cc"
"util/crc32c_test.cc"
"util/hash_test.cc"
"util/logging_test.cc"
)
endif(NOT BUILD_SHARED_LIBS)
target_link_libraries(leveldb_tests leveldb gmock gtest gtest_main)
target_compile_definitions(leveldb_tests
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions(leveldb_tests
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
@ -369,22 +369,22 @@ if(LEVELDB_BUILD_TESTS)
add_executable("${test_target_name}" "")
target_sources("${test_target_name}"
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"util/testutil.cc"
"util/testutil.h"
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"util/testutil.cc"
"util/testutil.h"
"${test_file}"
"${test_file}"
)
target_link_libraries("${test_target_name}" leveldb gmock gtest)
target_compile_definitions("${test_target_name}"
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions("${test_target_name}"
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
@ -415,24 +415,24 @@ if(LEVELDB_BUILD_BENCHMARKS)
add_executable("${bench_target_name}" "")
target_sources("${bench_target_name}"
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"util/histogram.cc"
"util/histogram.h"
"util/testutil.cc"
"util/testutil.h"
"${bench_file}"
PRIVATE
"${PROJECT_BINARY_DIR}/${LEVELDB_PORT_CONFIG_DIR}/port_config.h"
"util/histogram.cc"
"util/histogram.h"
"util/testutil.cc"
"util/testutil.h"
"${bench_file}"
)
target_link_libraries("${bench_target_name}" leveldb gmock gtest benchmark)
target_compile_definitions("${bench_target_name}"
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
PRIVATE
${LEVELDB_PLATFORM_NAME}=1
)
if (NOT HAVE_CXX17_HAS_INCLUDE)
target_compile_definitions("${bench_target_name}"
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
PRIVATE
LEVELDB_HAS_PORT_CONFIG_H=1
)
endif(NOT HAVE_CXX17_HAS_INCLUDE)
endfunction(leveldb_benchmark)
@ -470,51 +470,51 @@ endif(LEVELDB_BUILD_BENCHMARKS)
if(LEVELDB_INSTALL)
install(TARGETS leveldb
EXPORT leveldbTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
EXPORT leveldbTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(
FILES
"${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/leveldb"
FILES
"${LEVELDB_PUBLIC_INCLUDE_DIR}/c.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/cache.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/comparator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/db.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/dumpfile.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/env.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/export.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/filter_policy.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/iterator.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/options.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/slice.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/status.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table_builder.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/table.h"
"${LEVELDB_PUBLIC_INCLUDE_DIR}/write_batch.h"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/leveldb"
)
include(CMakePackageConfigHelpers)
configure_package_config_file(
"cmake/${PROJECT_NAME}Config.cmake.in"
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
"cmake/${PROJECT_NAME}Config.cmake.in"
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
)
write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
COMPATIBILITY SameMajorVersion
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
COMPATIBILITY SameMajorVersion
)
install(
EXPORT leveldbTargets
NAMESPACE leveldb::
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
EXPORT leveldbTargets
NAMESPACE leveldb::
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
)
install(
FILES
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
FILES
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
"${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
)
endif(LEVELDB_INSTALL)
@ -527,4 +527,9 @@ target_link_libraries(db_test1 leveldb)
add_executable(db_test2
"${PROJECT_SOURCE_DIR}/test/db_test2.cc"
)
target_link_libraries(db_test2 PRIVATE leveldb)
target_link_libraries(db_test2 PRIVATE leveldb)
add_executable(db_test3
"${PROJECT_SOURCE_DIR}/test/db_test3.cc"
)
target_link_libraries(db_test3 PRIVATE leveldb gtest)

+ 143
- 27
report.md Целия файл

@ -1,8 +1,8 @@
# <center>设计文档 </center>
# <center>LevelDB设计文档 </center>
<center>王雪飞,马也驰 </center>
### 1.项目概述
## 1.项目概述及目标
### 1.1 项目概述
本项目的背景是提升 LevelDB 在高写入负载场景下的性能。LevelDB 是一种轻量级的键值存储引擎,但在数据频繁更新或大值(Large Values)存储场景下,由于数据写入和合并(Compaction)过程的设计,其性能可能受到显著影响。为解决这一问题,项目目标是实现 KV(Key-Value)分离机制,以降低写放大现象并提高存储效率。
具体实现内容包括在 LevelDB 内部引入 KV 分离功能,即将键(Key)与值(Value)存储到不同的存储介质中。通过修改 SSTable 的结构设计,将键与指向值的指针存储在原有的文件中,而将实际值存储到单独的文件或存储介质中,从而减少 Compaction 操作对大值的处理负担。此外,项目还优化了数据访问逻辑,实现了值文件的高效读写支持。
@ -12,43 +12,95 @@
1. 适用于大值写入频繁的场景,如日志存储、视频元数据管理等。
2. 提升 SSD 等固态存储设备的寿命,减少写入放大带来的磨损。
3. 在混合存储架构中,提高冷热数据分离的效率。
### 2. 功能设计
#### 2.1 字段设计
**设计目标:**
能够准确描述kv的属性数量,以及每一个属性的名称和字节数量。
**设计思路:**
`key的格式:| key | vlog_fileno | value_offset | `
`单个value的格式:| {attr1名称长度(定长), attr1名称(变长), attr1的偏移量(定长)}, ...{attr1长度(定长), attr1内容(变长)}, ... | `
### 1.2 项目目标
本项目涵盖下面三个方面:
1. 实验一:在 LevelDB 的 value 中实现字段功能。
2. 实验二:实现 KV 分离。
3. 实验三:实现 Benchmark,测试并分析性能。
## 2. 实验内容
### 2.1 在 LevelDB 的 value 中实现字段功能
具体指:基于 levelDB扩展 value 的结构,使其可以包含多个字段,并通过这些字段实现类似数据库列查询的功能。
#### 2.1.1 实验要求:
字段存储:
1. 将 LevelDB 中的 value 组织成字段数组,每个数组元素对应一个字段(字段名:字段值)。
2. 字段会被序列化为字符串,然后插入LevelDB。
3. 这些字段可以通过解析字符串得到,字段名与字段值都是字符串类型。
4. 允许任意调整字段。
查询功能:
实现通过字段值查询对应的 key。
#### 2.1 KV分离
#### 2.1.2 实验内容
1. 数据存储与解析: 每个 value 存储为一个字符串数组,数组中的每个元素代表一个字段。
2. 通过字段查询 Key: 实现函数FindKeysByField,传入字段名和字段的值就可以找到对应的key
**设计思路:**
1. 使用 Field 存储属性和值,使用 FieldArray 存储多个 Field;
2. 函数 SerializeValue 把字段数组序列化为字符串;
3. 函数 ParseValue 把字符串反序列化为字段数组;
4. 函数 FindKeysByField 根据传入的字段名和字段的值找到对应的key。
### 2.1.3 实验进度以及实验结果
#### 实验进度
已初步实现上述四个函数,查询函数 FindKeysByField 后续会进行优化和完善。
#### 实验结果
通过测试
#### 2.2 KV分离
**设计目标:**
将value的存储和key在lsm tree中的存储分离,降低lsm tree的GC开销
**设计思路:**
1. value的分离式存储
我们使用若干个vlog文件,为每一个vlog文件设置容量上限(比如16MiB),并在内存中为每一个vlog维护一个discard计数器,表示这个vlog中当前有多少value已经在lsm tree中被标记为删除。
我们使用若干个vlog文件,为每一个vlog文件设置容量上限(比如16MiB),并在内存中为每一个vlog维护一个discard计数器,表示这个vlog中当前有多少value已经在lsm tree中被标记为删除。
2. 存储value所在vlog和偏移量的元数据
我们在key和vlog中添加一个vlog_page的中间层,这一层存储每一个key对应的value所在的vlog文件和文件内偏移,而lsm tree中的key包含的实际上是这个中间层的slot下标,而每一个slot中存储的是key所对应的vlog文件号以及value在vlog中的偏移。这样,我们就可以在不修改lsm tree的基础上,完成对vlog的compaction,并将vlog的gc结果只反映在这个中间层vlog_page中。这个vlog_page实际上也是一个线性增长的log文件,作用类似于os中的页表,负责维护lsm tree中存储的slot下标到vlog和vlog内偏移量的一个映射。这样,通过vlog_page我们就可以找到具体的vlog文件和其文件内偏移量。对于vlog的GC过程,我们不需要修改lsm tree中的内容,我们只需要修改vlog_page中的映射即可。
我们在key和vlog中添加一个vlog_page的中间层,这一层存储每一个key对应的value所在的vlog文件和文件内偏移,而lsm tree中的key包含的实际上是这个中间层的slot下标,而每一个slot中存储的是key所对应的vlog文件号以及value在vlog中的偏移。这样,我们就可以在不修改lsm tree的基础上,完成对vlog的compaction,并将vlog的gc结果只反映在这个中间层vlog_page中。这个vlog_page实际上也是一个线性增长的log文件,作用类似于os中的页表,负责维护lsm tree中存储的slot下标到vlog和vlog内偏移量的一个映射。这样,通过vlog_page我们就可以找到具体的vlog文件和其文件内偏移量。对于vlog的GC过程,我们不需要修改lsm tree中的内容,我们只需要修改vlog_page中的映射即可。
3. vlog_page文件和vlog文件的GC
对于vlog文件,我们在内存中维护一个bitmap,用来表示每一个slot的使用情况,并在插入和GC删除kv时进行动态的分配和释放。对于vlog文件的GC,我们用一个后台线程来扫描所有vlog的discard计数器。当某些vlog的discard计数器超过某个阈值(比如1024),我们就对这些vlog文件进行GC过程,当GC完成之后将vlog_page中的slot元数据进行更新,再将原来的vlog文件进行删除,GC过程就完成了。
对于vlog文件,我们在内存中维护一个bitmap,用来表示每一个slot的使用情况,并在插入和GC删除kv时进行动态的分配和释放。对于vlog文件的GC,我们用一个后台线程来扫描所有vlog的discard计数器。当某些vlog的discard计数器超过某个阈值(比如1024),我们就对这些vlog文件进行GC过程,当GC完成之后将vlog_page中的slot元数据进行更新,再将原来的vlog文件进行删除,GC过程就完成了。
### 3. 数据结构设计
`key的格式:| key | vlog_page_slot | `
`vlog_page: | slot0:{vlog_no, offset}, slot1:{vlog_no, offset}, ... | `
对于每一次读取,用户线程先读取lsm tree中key的slot下标,然后到vlog_page中读取对应的slot内容(**每一个slot都是定长的**),之后再在这个slot中读取value所在的vlog文件号和偏移量offset,之后到对应的vlog文件中读取value。
但是这又带来了一个问题,我们该如何管理vlog_page这个文件?当插入新的kv时,我们需要在这个vlog_page中分配新的slot,在GC删除某个kv时,我们需要将对应的slot进行释放。这里我们选择在内存中维护一个可线性扩展的bitmap。这个bitmap中每一个bit标识了当前vlog_page文件中对应slot是否被使用,是为1,不是为0。这样一来,在插入新kv时,我们可以用bitmap来分配一个新的slot(将bitmap中第一个为0的bit设置为1),将内容进行写入;在GC删除某个kv时,我们将这个slot对应的bitmap中的bit重置为0即可。
### 4. 接口设计
#### 4.1 在 LevelDB 的 value 中实现字段功能
1. std::string SerializeValue(const FieldArray& fields)
**功能:** 将字段数组序列化为字符串
**输入:** 字段名和字段的值组成的字段数组
**输出:** 序列化后的字符串
2. FieldArray ParseValue(const std::string& value_str)
**功能:** 将字符串反序列化为字段数组
**输入:** 字符串
**输出:** 反序列化的字段数组
3. std::vector< std::string >FindKeysByField(leveldb::DB* db, Field &field)
**功能:** 根据字段名和字段的值找到对应的key
**输入:** 数据库名,字段名和字段的值
**输出:** 包含该字段和字段数组的 key,由于可能不只有一个,所以返回值为 vector
4. Put_Fields
5. Get_Fields
#### 4.2 实现KV分离
这里只展示和vlog以及GC无关的接口,vlog的创建,管理以及后台线程的GC设计到vlog等新数据结构的实现,较为复杂和庞大,这里不做展示。我们只列出与kv的插入有关的新接口:
1. 搜索vlog_page文件: Status find_slot(const Slice& key, Slot *slot);
2. 搜索vlog文件: Status find_value(Slot *slot);
@ -58,6 +110,69 @@
### 5. 功能测试
### 5.1 在 LevelDB 的 value 中实现字段功能
````
Status OpenDB(std::string dbName, DB **db) {
Options options;
options.create_if_missing = true;
return DB::Open(options, dbName, db);
}
TEST(TestSchema, Basic) {
DB *db;
WriteOptions writeOptions;
ReadOptions readOptions;
if(OpenDB("testdb", &db).ok() == false) {
std::cerr << "open db failed" << std::endl;
abort();
}
std::string key1 = "k_1";
std::string key2 = "k_2";
FieldArray fields1 = {
{"name", "Customer#000000001"},
{"address", "IVhzIApeRb"},
{"phone", "25-989-741-2988"}
};
FieldArray fields2 = {
{"name", "Customer#000000001"},
{"address", "ecnu"},
{"phone", "123456789"}
};
// 序列化并插入
std::string value1 = SerializeValue(fields1);
std::string value2 = SerializeValue(fields2);
db->Put(leveldb::WriteOptions(), key1, value1);
db->Put(leveldb::WriteOptions(), key2, value2);
// 读取并反序列化
std::string value_ret;
db->Get(leveldb::ReadOptions(), key1, &value_ret);
auto fields_ret = ParseValue(value_ret);
// 检查反序列化结果
ASSERT_EQ(fields_ret.size(), fields1.size());
for (size_t i = 0; i < fields_ret.size(); ++i) {
ASSERT_EQ(fields_ret[i].first, fields1[i].first);
ASSERT_EQ(fields_ret[i].second, fields1[i].second);
}
// 测试查找功能
Field query_field = {"name", "Customer#000000001"};
std::vector<std::string> found_keys = FindKeysByField(db, query_field);
std::cout << "找到的key有:" << found_keys.size() << "" << std::endl;
ASSERT_EQ(found_keys[0], key1);
// 关闭数据库
delete db;
}
int main(int argc, char **argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
````
### 5.2
单元测试:
1. 测试插入后,是否能读取成功。
2. 测试插入超过初始vlog_page等slot数量之后,是否还能正常插入,检查vlog_page文件等线性可扩展性
@ -74,13 +189,14 @@
#### 7. 分工和进度安排
| 功能 | 完成日期 | 分工 |
|--------|--------|--------|
| vlog中value的存储格式 | 12.8 | 王雪飞 |
| 功能 | 完成日期 | 分工 | 是否完成 |
|--------------|--------|--------|------|
| 实现字段设计并通过测试 | 12.8 | 王雪飞 | 已完成 |
| vlog中value的存储格式 | 12.8 | 王雪飞 |
| vlog_page实现 | 12.8 | 马也驰 |
| vlog的GC实现 | 12.29 | 马也驰 |
| 性能测试 | 1.5 | 王雪飞 |
| 功能测试 | 1.5 | 马也驰 |
| vlog的GC实现 | 12.29 | 马也驰 |
| 性能测试 | 1.5 | 王雪飞 |
| 功能测试 | 1.5 | 马也驰 |

+ 120
- 0
test/db_test3.cc Целия файл

@ -0,0 +1,120 @@
#include <leveldb/db.h>
#include "leveldb/env.h"
#include <leveldb/options.h>
#include <vector>
#include <string>
#include <sstream>
#include <iostream>
#include "gtest/gtest.h"
using namespace leveldb;
using Field = std::pair<std::string, std::string>; // field_name:field_value
using FieldArray = std::vector<std::pair<std::string, std::string>>;
// 序列化为字符串
std::string SerializeValue(const FieldArray& fields) {
std::ostringstream oss;
for (const auto& field : fields) {
oss << field.first << ":" << field.second << ";";
}
return oss.str();
}
// 反序列化为字段数组
FieldArray ParseValue(const std::string& value_str) {
FieldArray fields;
std::istringstream iss(value_str);
std::string field_str;
while (std::getline(iss, field_str, ';')) {
size_t delimiter_pos = field_str.find(':');
if (delimiter_pos != std::string::npos) {
std::string field_name = field_str.substr(0, delimiter_pos);
std::string field_value = field_str.substr(delimiter_pos + 1);
fields.emplace_back(field_name, field_value);
}
}
return fields;
}
// 根据字段值查找所有包含该字段的 key
std::vector<std::string> FindKeysByField(leveldb::DB* db, Field &field) {
std::vector<std::string> keys;
leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions());
for (it->SeekToFirst(); it->Valid(); it->Next()) {
std::string key = it->key().ToString();
std::string value;
db->Get(leveldb::ReadOptions(), key, &value);
FieldArray fields = ParseValue(value);
for (const auto& f : fields) {
if (f.first == field.first && f.second == field.second) {
keys.push_back(key);
break; // 假设每个key中每个字段值唯一,如果允许重复,可以移除这行
}
}
}
delete it;
return keys;
}
Status OpenDB(std::string dbName, DB **db) {
Options options;
options.create_if_missing = true;
return DB::Open(options, dbName, db);
}
TEST(TestSchema, Basic) {
DB *db;
WriteOptions writeOptions;
ReadOptions readOptions;
if(OpenDB("testdb", &db).ok() == false) {
std::cerr << "open db failed" << std::endl;
abort();
}
std::string key1 = "k_1";
std::string key2 = "k_2";
FieldArray fields1 = {
{"name", "Customer#000000001"},
{"address", "IVhzIApeRb"},
{"phone", "25-989-741-2988"}
};
FieldArray fields2 = {
{"name", "Customer#000000001"},
{"address", "ecnu"},
{"phone", "123456789"}
};
// 序列化并插入
std::string value1 = SerializeValue(fields1);
std::string value2 = SerializeValue(fields2);
db->Put(leveldb::WriteOptions(), key1, value1);
db->Put(leveldb::WriteOptions(), key2, value2);
// 读取并反序列化
std::string value_ret;
db->Get(leveldb::ReadOptions(), key1, &value_ret);
auto fields_ret = ParseValue(value_ret);
// 检查反序列化结果
ASSERT_EQ(fields_ret.size(), fields1.size());
for (size_t i = 0; i < fields_ret.size(); ++i) {
ASSERT_EQ(fields_ret[i].first, fields1[i].first);
ASSERT_EQ(fields_ret[i].second, fields1[i].second);
}
// 测试查找功能
Field query_field = {"name", "Customer#000000001"};
std::vector<std::string> found_keys = FindKeysByField(db, query_field);
std::cout << "找到的key有:" << found_keys.size() << "" << std::endl;
ASSERT_EQ(found_keys[0], key1);
// 关闭数据库
delete db;
}
int main(int argc, char **argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

Зареждане…
Отказ
Запис