作者: 韩晨旭@ArcueidType(Arcueid) 10225101440 李畅@wesley 10225102463 设计文档为PLAN.md,md版本报告为README.md,pdf版本报告为Report.pdf
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

190 lines
7.3 KiB

Add support for Zstd-based compression in LevelDB. This change implements support for Zstd-based compression in LevelDB. Building up from the Snappy compression (which has been supported since inception), this change adds Zstd as an alternate compression algorithm. We are implementing this to provide alternative options for users who might have different performance and efficiency requirements. For instance, the Zstandard website (https://facebook.github.io/zstd/) claims that the Zstd algorithm can achieve around 30% higher compression ratios than Snappy, with relatively smaller (~10%) slowdowns in de/compression speeds. Benchmarking results: $ blaze-bin/third_party/leveldb/db_bench LevelDB: version 1.23 Date: Thu Feb 2 18:50:06 2023 CPU: 56 * Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz CPUCache: 35840 KB Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1000000 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) ------------------------------------------------ fillseq : 2.613 micros/op; 42.3 MB/s fillsync : 3924.432 micros/op; 0.0 MB/s (1000 ops) fillrandom : 3.609 micros/op; 30.7 MB/s overwrite : 4.508 micros/op; 24.5 MB/s readrandom : 6.136 micros/op; (864322 of 1000000 found) readrandom : 5.446 micros/op; (864083 of 1000000 found) readseq : 0.180 micros/op; 613.3 MB/s readreverse : 0.321 micros/op; 344.7 MB/s compact : 827043.000 micros/op; readrandom : 4.603 micros/op; (864105 of 1000000 found) readseq : 0.169 micros/op; 656.3 MB/s readreverse : 0.315 micros/op; 350.8 MB/s fill100K : 854.009 micros/op; 111.7 MB/s (1000 ops) crc32c : 1.227 micros/op; 3184.0 MB/s (4K per op) snappycomp : 3.610 micros/op; 1081.9 MB/s (output: 55.2%) snappyuncomp : 0.691 micros/op; 5656.3 MB/s zstdcomp : 15.731 micros/op; 248.3 MB/s (output: 44.1%) zstduncomp : 4.218 micros/op; 926.2 MB/s PiperOrigin-RevId: 509957778
1 year ago
  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
  5. #define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
  6. #include <cstddef>
  7. #include "leveldb/export.h"
  8. namespace leveldb {
  9. class Cache;
  10. class Comparator;
  11. class Env;
  12. class FilterPolicy;
  13. class Logger;
  14. class Snapshot;
  15. // DB contents are stored in a set of blocks, each of which holds a
  16. // sequence of key,value pairs. Each block may be compressed before
  17. // being stored in a file. The following enum describes which
  18. // compression method (if any) is used to compress a block.
  19. enum CompressionType {
  20. // NOTE: do not change the values of existing entries, as these are
  21. // part of the persistent format on disk.
  22. kNoCompression = 0x0,
  23. kSnappyCompression = 0x1,
  24. kZstdCompression = 0x2,
  25. };
  26. // Options to control the behavior of a database (passed to DB::Open)
  27. struct LEVELDB_EXPORT Options {
  28. // Create an Options object with default values for all fields.
  29. Options();
  30. // -------------------
  31. // Parameters that affect behavior
  32. // Comparator used to define the order of keys in the table.
  33. // Default: a comparator that uses lexicographic byte-wise ordering
  34. //
  35. // REQUIRES: The client must ensure that the comparator supplied
  36. // here has the same name and orders keys *exactly* the same as the
  37. // comparator provided to previous open calls on the same DB.
  38. const Comparator* comparator;
  39. // If true, the database will be created if it is missing.
  40. bool create_if_missing = false;
  41. // If true, an error is raised if the database already exists.
  42. bool error_if_exists = false;
  43. // If true, the implementation will do aggressive checking of the
  44. // data it is processing and will stop early if it detects any
  45. // errors. This may have unforeseen ramifications: for example, a
  46. // corruption of one DB entry may cause a large number of entries to
  47. // become unreadable or for the entire DB to become unopenable.
  48. bool paranoid_checks = false;
  49. // Use the specified object to interact with the environment,
  50. // e.g. to read/write files, schedule background work, etc.
  51. // Default: Env::Default()
  52. Env* env;
  53. // Any internal progress/error information generated by the db will
  54. // be written to info_log if it is non-null, or to a file stored
  55. // in the same directory as the DB contents if info_log is null.
  56. Logger* info_log = nullptr;
  57. // -------------------
  58. // Parameters that affect performance
  59. // Amount of data to build up in memory (backed by an unsorted log
  60. // on disk) before converting to a sorted on-disk file.
  61. //
  62. // Larger values increase performance, especially during bulk loads.
  63. // Up to two write buffers may be held in memory at the same time,
  64. // so you may wish to adjust this parameter to control memory usage.
  65. // Also, a larger write buffer will result in a longer recovery time
  66. // the next time the database is opened.
  67. size_t write_buffer_size = 4 * 1024 * 1024;
  68. // Number of open files that can be used by the DB. You may need to
  69. // increase this if your database has a large working set (budget
  70. // one open file per 2MB of working set).
  71. int max_open_files = 1000;
  72. // Control over blocks (user data is stored in a set of blocks, and
  73. // a block is the unit of reading from disk).
  74. // If non-null, use the specified cache for blocks.
  75. // If null, leveldb will automatically create and use an 8MB internal cache.
  76. Cache* block_cache = nullptr;
  77. // Approximate size of user data packed per block. Note that the
  78. // block size specified here corresponds to uncompressed data. The
  79. // actual size of the unit read from disk may be smaller if
  80. // compression is enabled. This parameter can be changed dynamically.
  81. size_t block_size = 4 * 1024;
  82. // Number of keys between restart points for delta encoding of keys.
  83. // This parameter can be changed dynamically. Most clients should
  84. // leave this parameter alone.
  85. int block_restart_interval = 16;
  86. // Leveldb will write up to this amount of bytes to a file before
  87. // switching to a new one.
  88. // Most clients should leave this parameter alone. However if your
  89. // filesystem is more efficient with larger files, you could
  90. // consider increasing the value. The downside will be longer
  91. // compactions and hence longer latency/performance hiccups.
  92. // Another reason to increase this parameter might be when you are
  93. // initially populating a large database.
  94. size_t max_file_size = 2 * 1024 * 1024;
  95. // Compress blocks using the specified compression algorithm. This
  96. // parameter can be changed dynamically.
  97. //
  98. // Default: kSnappyCompression, which gives lightweight but fast
  99. // compression.
  100. //
  101. // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
  102. // ~200-500MB/s compression
  103. // ~400-800MB/s decompression
  104. // Note that these speeds are significantly faster than most
  105. // persistent storage speeds, and therefore it is typically never
  106. // worth switching to kNoCompression. Even if the input data is
  107. // incompressible, the kSnappyCompression implementation will
  108. // efficiently detect that and will switch to uncompressed mode.
  109. CompressionType compression = kSnappyCompression;
  110. // Compression level for zstd.
  111. // Currently only the range [-5,22] is supported. Default is 1.
  112. int zstd_compression_level = 1;
  113. // EXPERIMENTAL: If true, append to existing MANIFEST and log files
  114. // when a database is opened. This can significantly speed up open.
  115. //
  116. // Default: currently false, but may become true later.
  117. bool reuse_logs = false;
  118. // If non-null, use the specified filter policy to reduce disk reads.
  119. // Many applications will benefit from passing the result of
  120. // NewBloomFilterPolicy() here.
  121. const FilterPolicy* filter_policy = nullptr;
  122. };
  123. // Options that control read operations
  124. struct LEVELDB_EXPORT ReadOptions {
  125. // If true, all data read from underlying storage will be
  126. // verified against corresponding checksums.
  127. bool verify_checksums = false;
  128. // Should the data read for this iteration be cached in memory?
  129. // Callers may wish to set this field to false for bulk scans.
  130. bool fill_cache = true;
  131. // If "snapshot" is non-null, read as of the supplied snapshot
  132. // (which must belong to the DB that is being read and which must
  133. // not have been released). If "snapshot" is null, use an implicit
  134. // snapshot of the state at the beginning of this read operation.
  135. const Snapshot* snapshot = nullptr;
  136. };
  137. // Options that control write operations
  138. struct LEVELDB_EXPORT WriteOptions {
  139. WriteOptions() = default;
  140. // If true, the write will be flushed from the operating system
  141. // buffer cache (by calling WritableFile::Sync()) before the write
  142. // is considered complete. If this flag is true, writes will be
  143. // slower.
  144. //
  145. // If this flag is false, and the machine crashes, some recent
  146. // writes may be lost. Note that if it is just the process that
  147. // crashes (i.e., the machine does not reboot), no writes will be
  148. // lost even if sync==false.
  149. //
  150. // In other words, a DB write with sync==false has similar
  151. // crash semantics as the "write()" system call. A DB write
  152. // with sync==true has similar crash semantics to a "write()"
  153. // system call followed by "fsync()".
  154. bool sync = false;
  155. };
  156. } // namespace leveldb
  157. #endif // STORAGE_LEVELDB_INCLUDE_OPTIONS_H_