LevelDB二级索引实现 姚凯文(kevinyao0901) 姜嘉祺
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

180 lines
6.0 KiB

#include <iostream>
#include <cassert>
#include <vector>
#include <chrono>
#include <leveldb/db.h>
using namespace std::chrono;
// 定义字段
struct Field {
std::string first;
std::string second;
};
// 解析数据值
std::vector<std::pair<std::string, std::string>> ParseValue(const std::string& value) {
std::vector<std::pair<std::string, std::string>> fields;
size_t start = 0;
size_t end = value.find("|");
while (end != std::string::npos) {
std::string field = value.substr(start, end - start);
size_t separator = field.find(":");
if (separator != std::string::npos) {
fields.push_back({field.substr(0, separator), field.substr(separator + 1)});
}
start = end + 1;
end = value.find("|", start);
}
return fields;
}
// 查询函数:根据字段查找所有包含该字段的 Key
std::vector<std::string> FindKeysByField(leveldb::DB* db, const Field& field) {
std::vector<std::string> keys;
leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions());
for (it->SeekToFirst(); it->Valid(); it->Next()) {
std::string key = it->key().ToString();
std::string value = it->value().ToString();
std::vector<std::pair<std::string, std::string>> fields = ParseValue(value);
// 查找是否有匹配的字段
for (const auto& f : fields) {
if (f.first == field.first && f.second == field.second) {
keys.push_back(key);
break;
}
}
}
delete it;
return keys;
}
// 生成数据并插入数据库
void GenerateAndInsertData(leveldb::DB* db, int num_entries) {
leveldb::WriteOptions write_options;
leveldb::Status status;
for (int i = 1; i <= num_entries; ++i) {
std::string key = "k_" + std::to_string(i);
std::string name = "Customer#" + std::to_string(i);
std::string address = "Address_" + std::to_string(i);
std::string phone = "25-989-741-" + std::to_string(1000 + i);
std::string value = "name:" + name + "|address:" + address + "|phone:" + phone;
status = db->Put(write_options, key, value);
assert(status.ok() && "Failed to insert data");
}
}
// 基准测试:二级索引性能提升
void BenchmarkFieldQueryWithIndex(leveldb::DB* db) {
// 测试前,查询无索引的字段性能
auto start = high_resolution_clock::now();
Field field = {"name", "Customer#10000"};
std::vector<std::string> keys_without_index = FindKeysByField(db, field);
auto end = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(end - start);
std::cout << "Time without index: " << duration.count() << " microseconds" << std::endl;
// 创建二级索引
// 在此添加创建索引的代码(可以使用 DBImpl::CreateIndexOnField 函数)
start = high_resolution_clock::now();
leveldb::Status status = db->CreateIndexOnField("name");
end = high_resolution_clock::now();
duration = duration_cast<microseconds>(end - start);
std::cout << "Time to create index: " << duration.count() << std::endl;
// 测试后,查询有索引的字段性能
start = high_resolution_clock::now();
std::vector<std::string> keys_with_index = db->QueryByIndex("name:Customer#10000"); // 使用二级索引查询
end = high_resolution_clock::now();
duration = duration_cast<microseconds>(end - start);
std::cout << "Time with index: " << duration.count() << " microseconds" << std::endl;
// 输出查询结果
std::cout << "Found " << keys_with_index.size() << " keys with index." << std::endl;
assert(!keys_with_index.empty() && "Query by index returned no results");
std::cout << "Query by index results for name=Customer#10000: ";
for (const auto& result : keys_with_index) {
std::cout << result << ", ";
}
}
// 基准测试:记录插入时的性能影响
void BenchmarkWritePerformance(leveldb::DB* db, int num_entries) {
leveldb::WriteOptions write_options;
auto start = high_resolution_clock::now();
GenerateAndInsertData(db, num_entries); // 执行批量插入
auto end = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(end - start);
std::cout << "Insertion time for " << num_entries << " entries: " << duration.count() << " microseconds" << std::endl;
}
// 基准测试:记录删除二级索引的开销
void BenchmarkDeleteIndex(leveldb::DB* db, const std::string& field_name) {
auto start = high_resolution_clock::now();
// 删除二级索引
leveldb::Status status = db->DeleteIndex(field_name);
assert(status.ok() && "Failed to delete index");
auto end = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(end - start);
std::cout << "Time to delete index on field '" << field_name << "': " << duration.count() << " microseconds" << std::endl;
}
// 获取数据库大小(用来估算二级索引的空间占用)
void GetDatabaseSize(leveldb::DB* db) {
std::string property;
// 使用 bool 返回值检查是否成功获取属性
bool success = db->GetProperty("leveldb.stats", &property);
if (!success) {
std::cerr << "Failed to get db stats" << std::endl;
return;
}
std::cout << "Database stats: " << std::endl;
std::cout << property << std::endl;
}
int main() {
leveldb::Options options;
options.create_if_missing = true;
// 打开数据库
leveldb::DB* db = nullptr;
leveldb::Status status = leveldb::DB::Open(options, "./testdb", &db);
assert(status.ok() && "Failed to open database");
// 测试写入性能
BenchmarkWritePerformance(db, 100001); // 插入 100001 条数据
// 测试二级索引对查询性能的提升
BenchmarkFieldQueryWithIndex(db);
// 获取数据库大小
GetDatabaseSize(db);
// 测试删除二级索引的开销
BenchmarkDeleteIndex(db, "name");
// 关闭数据库
delete db;
std::cout << "Benchmark tests completed." << std::endl;
return 0;
}