LevelDB二级索引实现 姚凯文(kevinyao0901) 姜嘉祺
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

180 lines
6.0 KiB

  1. #include <iostream>
  2. #include <cassert>
  3. #include <vector>
  4. #include <chrono>
  5. #include <leveldb/db.h>
  6. using namespace std::chrono;
  7. // 定义字段
  8. struct Field {
  9. std::string first;
  10. std::string second;
  11. };
  12. // 解析数据值
  13. std::vector<std::pair<std::string, std::string>> ParseValue(const std::string& value) {
  14. std::vector<std::pair<std::string, std::string>> fields;
  15. size_t start = 0;
  16. size_t end = value.find("|");
  17. while (end != std::string::npos) {
  18. std::string field = value.substr(start, end - start);
  19. size_t separator = field.find(":");
  20. if (separator != std::string::npos) {
  21. fields.push_back({field.substr(0, separator), field.substr(separator + 1)});
  22. }
  23. start = end + 1;
  24. end = value.find("|", start);
  25. }
  26. return fields;
  27. }
  28. // 查询函数:根据字段查找所有包含该字段的 Key
  29. std::vector<std::string> FindKeysByField(leveldb::DB* db, const Field& field) {
  30. std::vector<std::string> keys;
  31. leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions());
  32. for (it->SeekToFirst(); it->Valid(); it->Next()) {
  33. std::string key = it->key().ToString();
  34. std::string value = it->value().ToString();
  35. std::vector<std::pair<std::string, std::string>> fields = ParseValue(value);
  36. // 查找是否有匹配的字段
  37. for (const auto& f : fields) {
  38. if (f.first == field.first && f.second == field.second) {
  39. keys.push_back(key);
  40. break;
  41. }
  42. }
  43. }
  44. delete it;
  45. return keys;
  46. }
  47. // 生成数据并插入数据库
  48. void GenerateAndInsertData(leveldb::DB* db, int num_entries) {
  49. leveldb::WriteOptions write_options;
  50. leveldb::Status status;
  51. for (int i = 1; i <= num_entries; ++i) {
  52. std::string key = "k_" + std::to_string(i);
  53. std::string name = "Customer#" + std::to_string(i);
  54. std::string address = "Address_" + std::to_string(i);
  55. std::string phone = "25-989-741-" + std::to_string(1000 + i);
  56. std::string value = "name:" + name + "|address:" + address + "|phone:" + phone;
  57. status = db->Put(write_options, key, value);
  58. assert(status.ok() && "Failed to insert data");
  59. }
  60. }
  61. // 基准测试:二级索引性能提升
  62. void BenchmarkFieldQueryWithIndex(leveldb::DB* db) {
  63. // 测试前,查询无索引的字段性能
  64. auto start = high_resolution_clock::now();
  65. Field field = {"name", "Customer#10000"};
  66. std::vector<std::string> keys_without_index = FindKeysByField(db, field);
  67. auto end = high_resolution_clock::now();
  68. auto duration = duration_cast<microseconds>(end - start);
  69. std::cout << "Time without index: " << duration.count() << " microseconds" << std::endl;
  70. // 创建二级索引
  71. // 在此添加创建索引的代码(可以使用 DBImpl::CreateIndexOnField 函数)
  72. start = high_resolution_clock::now();
  73. leveldb::Status status = db->CreateIndexOnField("name");
  74. end = high_resolution_clock::now();
  75. duration = duration_cast<microseconds>(end - start);
  76. std::cout << "Time to create index: " << duration.count() << std::endl;
  77. // 测试后,查询有索引的字段性能
  78. start = high_resolution_clock::now();
  79. std::vector<std::string> keys_with_index = db->QueryByIndex("name:Customer#10000"); // 使用二级索引查询
  80. end = high_resolution_clock::now();
  81. duration = duration_cast<microseconds>(end - start);
  82. std::cout << "Time with index: " << duration.count() << " microseconds" << std::endl;
  83. // 输出查询结果
  84. std::cout << "Found " << keys_with_index.size() << " keys with index." << std::endl;
  85. assert(!keys_with_index.empty() && "Query by index returned no results");
  86. std::cout << "Query by index results for name=Customer#10000: ";
  87. for (const auto& result : keys_with_index) {
  88. std::cout << result << ", ";
  89. }
  90. }
  91. // 基准测试:记录插入时的性能影响
  92. void BenchmarkWritePerformance(leveldb::DB* db, int num_entries) {
  93. leveldb::WriteOptions write_options;
  94. auto start = high_resolution_clock::now();
  95. GenerateAndInsertData(db, num_entries); // 执行批量插入
  96. auto end = high_resolution_clock::now();
  97. auto duration = duration_cast<microseconds>(end - start);
  98. std::cout << "Insertion time for " << num_entries << " entries: " << duration.count() << " microseconds" << std::endl;
  99. }
  100. // 基准测试:记录删除二级索引的开销
  101. void BenchmarkDeleteIndex(leveldb::DB* db, const std::string& field_name) {
  102. auto start = high_resolution_clock::now();
  103. // 删除二级索引
  104. leveldb::Status status = db->DeleteIndex(field_name);
  105. assert(status.ok() && "Failed to delete index");
  106. auto end = high_resolution_clock::now();
  107. auto duration = duration_cast<microseconds>(end - start);
  108. std::cout << "Time to delete index on field '" << field_name << "': " << duration.count() << " microseconds" << std::endl;
  109. }
  110. // 获取数据库大小(用来估算二级索引的空间占用)
  111. void GetDatabaseSize(leveldb::DB* db) {
  112. std::string property;
  113. // 使用 bool 返回值检查是否成功获取属性
  114. bool success = db->GetProperty("leveldb.stats", &property);
  115. if (!success) {
  116. std::cerr << "Failed to get db stats" << std::endl;
  117. return;
  118. }
  119. std::cout << "Database stats: " << std::endl;
  120. std::cout << property << std::endl;
  121. }
  122. int main() {
  123. leveldb::Options options;
  124. options.create_if_missing = true;
  125. // 打开数据库
  126. leveldb::DB* db = nullptr;
  127. leveldb::Status status = leveldb::DB::Open(options, "./testdb", &db);
  128. assert(status.ok() && "Failed to open database");
  129. // 测试写入性能
  130. BenchmarkWritePerformance(db, 100001); // 插入 100001 条数据
  131. // 测试二级索引对查询性能的提升
  132. BenchmarkFieldQueryWithIndex(db);
  133. // 获取数据库大小
  134. GetDatabaseSize(db);
  135. // 测试删除二级索引的开销
  136. BenchmarkDeleteIndex(db, "name");
  137. // 关闭数据库
  138. delete db;
  139. std::cout << "Benchmark tests completed." << std::endl;
  140. return 0;
  141. }