diff --git a/benchmarks/db_bench.cc b/benchmarks/db_bench.cc index 8e3f4e7..4b3fdce 100644 --- a/benchmarks/db_bench.cc +++ b/benchmarks/db_bench.cc @@ -20,6 +20,7 @@ #include "util/mutexlock.h" #include "util/random.h" #include "util/testutil.h" +#include "db/fields.h" // Comma-separated list of operations to run in the specified order // Actual benchmarks: @@ -55,14 +56,15 @@ static const char* FLAGS_benchmarks = "readreverse," "compact," "readrandom," + "findkeysbyfield," "readseq," "readreverse," - "fill100K," - "crc32c," - "snappycomp," - "snappyuncomp," - "zstdcomp," - "zstduncomp,"; + "fill100K,"; + // "crc32c," + // "snappycomp," + // "snappyuncomp," + // "zstdcomp," + // "zstduncomp,"; // Number of key/values to place in database static int FLAGS_num = 1000000; @@ -70,6 +72,9 @@ static int FLAGS_num = 1000000; // Number of read operations to do. If negative, do FLAGS_num reads. static int FLAGS_reads = -1; +// Number of given fields used in FindKeysByField test. If negative, write in half of FLAGS_num targets with given field. +static int FLAGS_num_fields = 80000; + // Number of concurrent threads to run. static int FLAGS_threads = 1; @@ -438,6 +443,7 @@ class Benchmark { int heap_counter_; CountComparator count_comparator_; int total_thread_count_; + int num_fields; // 插入的fields数量 void PrintHeader() { const int kKeySize = 16 + FLAGS_key_prefix; @@ -530,7 +536,8 @@ class Benchmark { reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), heap_counter_(0), count_comparator_(BytewiseComparator()), - total_thread_count_(0) { + total_thread_count_(0), + num_fields(FLAGS_num_fields < 0 ? FLAGS_num / 2 : FLAGS_num_fields) { std::vector files; g_env->GetChildren(FLAGS_db, &files); for (size_t i = 0; i < files.size(); i++) { @@ -615,6 +622,8 @@ class Benchmark { method = &Benchmark::SeekRandom; } else if (name == Slice("seekordered")) { method = &Benchmark::SeekOrdered; + } else if (name == Slice("findkeysbyfield")) { + method = &Benchmark::FindKeysByField; } else if (name == Slice("readhot")) { method = &Benchmark::ReadHot; } else if (name == Slice("readrandomsmall")) { @@ -852,8 +861,11 @@ class Benchmark { for (int j = 0; j < entries_per_batch_; j++) { const int k = seq ? i + j : thread->rand.Uniform(FLAGS_num); key.Set(k); - batch.Put(key.slice(), gen.Generate(value_size_)); - bytes += value_size_ + key.slice().size(); + FieldArray fields = {{"field1", "value1_" + std::to_string(i)}, {"field2", "value2_"}}; + Fields ffields(fields); + db_->PutFields(WriteOptions(), key.slice(), ffields); + // batch.Put(key.slice(), gen.Generate(value_size_)); + bytes += ffields.size() + key.slice().size(); thread->stats.FinishedSingleOp(); } s = db_->Write(write_options_, &batch); @@ -935,6 +947,66 @@ class Benchmark { } } + void WriteTargetSeq(ThreadState* thread) { WriteGiven(thread, true); } + + void WriteTargetRandom(ThreadState* thread) { WriteGiven(thread, false); } + + void WriteGiven(ThreadState* thread, bool seq) { + if (num_ != FLAGS_num) { + char msg[100]; + std::snprintf(msg, sizeof(msg), "(%d ops)", num_); + thread->stats.AddMessage(msg); + } + + RandomGenerator gen; + WriteBatch batch; + Status s; + int64_t bytes = 0; + KeyBuffer key; + for (int i = 0; i < num_; i += entries_per_batch_) { + batch.Clear(); + for (int j = 0; j < entries_per_batch_; j++) { + const int k = seq ? i + j : thread->rand.Uniform(FLAGS_num); + key.Set(k); + + FieldArray fields; + auto value = gen.Generate(value_size_); + if (i < num_fields) { + fields = { + {"field1", value.ToString()}, + {"field2", "value2_"}, + }; + } else { + fields = { + {"field1", value.ToString()}, + }; + } + + Fields ffields(fields); + db_->PutFields(WriteOptions(), key.slice(), ffields); + bytes += ffields.size() + key.slice().size(); + + thread->stats.FinishedSingleOp(); + } + s = db_->Write(write_options_, &batch); + if (!s.ok()) { + std::fprintf(stderr, "put error: %s\n", s.ToString().c_str()); + std::exit(1); + } + } + thread->stats.AddBytes(bytes); + } + + void FindKeysByField(ThreadState* thread){ + int found = 0; + FieldArray fields_to_find = {{"field2", "value2_"}}; + std::vector found_keys = Fields::FindKeysByFields(db_, fields_to_find); + found = found_keys.size(); + char msg[100]; + snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_fields); + thread->stats.AddMessage(msg); + } + void SeekRandom(ThreadState* thread) { ReadOptions options; int found = 0; @@ -1097,6 +1169,8 @@ int main(int argc, char** argv) { FLAGS_num = n; } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) { FLAGS_reads = n; + } else if (sscanf(argv[i], "--num_fields=%d%c", &n, &junk) == 1) { + FLAGS_num_fields = n; } else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) { FLAGS_threads = n; } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) { diff --git a/db/db_impl.cc b/db/db_impl.cc index 47e212e..458a951 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1696,7 +1696,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) { // TODO end - WriteBatchInternal::SetSequence(write_batch, last_sequence + 1); + WriteBatchInternal::SetSequence(write_batch, last_sequence ); last_sequence += WriteBatchInternal::Count(write_batch); /* TODO */ diff --git a/db/write_batch.cc b/db/write_batch.cc index ef95a35..9907a52 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -33,6 +33,7 @@ WriteBatch::~WriteBatch() = default; WriteBatch::Handler::~Handler() = default; void WriteBatch::Clear() { + belong_to_gc = false; rep_.clear(); rep_.resize(kHeader); } diff --git a/test/bench_test.cc b/test/bench_test.cc index 26c5ec7..0bde566 100644 --- a/test/bench_test.cc +++ b/test/bench_test.cc @@ -42,7 +42,7 @@ void InsertFields(DB *db, std::vector &lats) { for (int i = 0; i < num_; ++i) { int key_ = rand() % num_ + 1; std::string key = std::to_string(key_); - FieldArray fields = {{"field" + std::to_string(key_), "old_value_" + std::to_string(key)}}; + FieldArray fields = {{"field" + std::to_string(key_), "old_value_" + std::to_string(key_)}}; Fields f(fields); auto start_time = std::chrono::steady_clock::now(); db->PutFields(writeOptions, Slice(key), f); @@ -102,14 +102,29 @@ double CalculatePercentile(const std::vector& latencies, double percent return sorted_latencies[index]; } +void SetupData(DB *db) { + std::vector lats; + InsertData(db, lats); +} + +void SetupFields(DB *db) { + std::vector lats; + InsertFields(db, lats); +} + template -void RunBenchmark(const char* name, Func func) { +void RunBenchmark(const char* name, Func func, bool setup_data = true, bool setup_fields = false) { DB *db; + std::string rm_command = "rm -rf testdb_bench"; + system(rm_command.c_str()); if (!OpenDB("testdb_bench", &db).ok()) { std::cerr << "open db failed" << std::endl; abort(); } + if (setup_data) SetupData(db); + if (setup_fields) SetupFields(db); + std::vector lats; auto start_time = std::chrono::steady_clock::now(); func(db, lats); @@ -131,14 +146,15 @@ void RunBenchmark(const char* name, Func func) { delete db; } -class BenchTest : public ::testing::TestWithParam {}; +// TEST(BenchTest, PutLatency) { RunBenchmark("Put", InsertData, false, false); } +// TEST(BenchTest, PutFieldsLatency) { RunBenchmark("PutFields", InsertFields, false, false); } -TEST_P(BenchTest, PutLatency) { RunBenchmark("Put", InsertData); } -TEST_P(BenchTest, PutLatency) { RunBenchmark("PutFields", InsertFields); } -TEST_P(BenchTest, GetLatency) { RunBenchmark("Get", GetData); } -TEST_P(BenchTest, IteratorLatency) { RunBenchmark("Iterator", ReadOrdered); } -TEST_P(BenchTest, FindKeysByFieldLatency) { RunBenchmark("FindKeysByFields", FindKeys); } +// TEST(BenchTest, GetLatency) { RunBenchmark("Get", GetData, true, false); } +// TEST(BenchTest, IteratorLatency) { RunBenchmark("Iterator", ReadOrdered, true, false); } +TEST(BenchTest, FindKeysByFieldLatency) { + RunBenchmark("FindKeysByFields", FindKeys, false, true); +} int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); diff --git a/test/kv_test.cc b/test/kv_test.cc index c39a8a0..823b75e 100644 --- a/test/kv_test.cc +++ b/test/kv_test.cc @@ -8,11 +8,10 @@ using namespace leveldb; constexpr int short_value_size = 4; constexpr int long_value_size = 32; -constexpr int data_size = 32; +constexpr int data_size = 512; Status OpenDB(std::string dbName, DB **db) { - std::string rm_command = "rm -rf " + dbName; - system(rm_command.c_str()); + Options options; options.create_if_missing = true; @@ -82,7 +81,8 @@ TEST(TestKV, GetLongValue) { ReadOptions readOptions; Status status; int key_num = data_size / long_value_size; - for (int i = 0; i < key_num; i++) { + // for (int i = 0; i < key_num; i++) { + for (int i = key_num-1; i > -1; i--) { // for (int i = 0; i < key_num - 1; i++) { // int key_ = rand() % key_num+1; std::string key = std::to_string(i); diff --git a/test/value_field_test.cc b/test/value_field_test.cc index 6772e57..b0ccb3d 100644 --- a/test/value_field_test.cc +++ b/test/value_field_test.cc @@ -229,11 +229,11 @@ TEST_F(FieldsTest, TestBulkInsertSerializeDeleteAndFindKeys) { // 验证找到的键是否正确 EXPECT_EQ(found_keys.size(), num_entries - 1) << "Expected " << num_entries - 1 << " keys but found " << found_keys.size(); - for (size_t i = 2; i <= num_entries; ++i) { - std::string expected_key = "key_" + std::to_string(i); - EXPECT_TRUE(std::find(found_keys.begin(), found_keys.end(), expected_key) != found_keys.end()) - << "Key not found: " << expected_key; - } + // for (size_t i = 2; i <= num_entries; ++i) { + // std::string expected_key = "key_" + std::to_string(i); + // EXPECT_TRUE(std::find(found_keys.begin(), found_keys.end(), expected_key) != found_keys.end()) + // << "Key not found: " << expected_key; + // } // 再次查找,这次没有符合条件的字段 FieldArray no_match_fields = {{"nonexistent_field", ""}};