10225501448 李度 10225101546 陈胤遒 10215501422 高宇菲
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 

288 Zeilen
9.8 KiB

#include "gtest/gtest.h"
// #include "leveldb/env.h"
// #include "leveldb/db.h"
#include "fielddb/field_db.h"
#include <random>
#include "helper.h"
using namespace fielddb;
constexpr int value_size = 2048;
constexpr int data_size = 128 << 20;
#define AGE_RANGE 100
std::vector<std::string> cities = {
"Beijing", "Shanghai", "Guangzhou", "Shenzhen", "Hangzhou",
"Chengdu", "Chongqing", "Wuhan", "Suzhou", "Tianjin"
};
//检查insert和queryByIndex的数据是否对应
//封装了一个线程安全的全局set
ThreadSafeSet shanghaiKeys;
ThreadSafeSet age20Keys;
//复杂的测试要注意这两个全局变量,
//目前只有InsertFieldData和InsertOneField和writeFieldData会往里加,
//DeleteFieldData和InsertOneField会删除,
//其他测试之间有必要手动clear
const WriteOptions op;
Status OpenDB(std::string dbName, FieldDB **db) {
Options options;
options.create_if_missing = true;
return FieldDB::OpenFieldDB(options, dbName, db);
}
// void ClearDB(FieldDB *db){
// //destroy和恢复没做前先用这个清理数据库,否则跑不同的数据多做几次测试会污染
// WriteOptions writeOptions;
// int key_num = data_size / value_size;
// for (int i = 0; i < key_num; i++) {
// int key_ = i+1;
// std::string key = std::to_string(key_);
// Status s = db->Delete(WriteOptions(), key);
// ASSERT_TRUE(s.ok());
// }
// }
//只插一条特定数据的测试
void InsertOneField(FieldDB *db, std::string key = "0") {
WriteOptions writeOptions;
FieldArray fields = {
{"name", "special#" + key},
{"address", "Shanghai"},
{"age", "20"}
};
Status s = db->PutFields(WriteOptions(), key, fields);
ASSERT_TRUE(s.ok());
shanghaiKeys.insert(key);
age20Keys.insert(key);
}
//只删一条特定数据的测试
void DeleteOneField(FieldDB *db, std::string key = "0") {
WriteOptions writeOptions;
Status s = db->Delete(WriteOptions(), key);
ASSERT_TRUE(s.ok());
shanghaiKeys.erase(key);
age20Keys.erase(key);
}
//与上面对应
void GetOneField(FieldDB *db, std::string key = "0") {
ReadOptions readOptions;
FieldArray fields_ret;
Status s = db->GetFields(readOptions, key, &fields_ret);
ASSERT_TRUE(s.ok());
for (const Field& pairs : fields_ret) {
if (pairs.first == "name"){
ASSERT_EQ(pairs.second, "special#" + key);
} else if (pairs.first == "address"){
ASSERT_EQ(pairs.second, "Shanghai");
} else if (pairs.first == "age"){
ASSERT_EQ(pairs.second, "20");
} else assert(false);
}
}
void InsertFieldData(FieldDB *db, int seed = 0/*随机种子*/) {
std::cout << "-------inserting-------" << std::endl;
// size_t writeSize = 0;
WriteOptions writeOptions;
int key_num = data_size / value_size;
// srand线程不安全,这种可以保证多线程时随机序列也一致
std::mt19937 rng(seed);
for (int i = 0; i < key_num; i++) {
int randThisTime = rng(); //确保读写一个循环只rand一次,否则随机序列会不一致
//让批量写入的key>0, 单独写入的key<=0,方便测试观察
int key_ = std::abs(randThisTime) % key_num + 1;
std::string key = std::to_string(key_);
// std::string key = std::to_string(seed*key_num+i+1);
std::string name = "customer#" + std::to_string(key_);
std::string address = cities[randThisTime % cities.size()];
std::string age = std::to_string(std::abs(randThisTime) % AGE_RANGE);
FieldArray fields = {
{"name", name},
{"address", address},
{"age", age}
};
if (address == "Shanghai") {
shanghaiKeys.insert(key);
}
if (age == "20") {
age20Keys.insert(key);
}
Status s = db->PutFields(WriteOptions(), key, fields);
ASSERT_TRUE(s.ok());
// writeSize += key.size() + SerializeValue(fields).size();
}
// std::cout << writeSize << std::endl;
}
void DeleteFieldData(FieldDB *db, int seed = 0/*随机种子*/) {
std::cout << "-------deleting-------" << std::endl;
WriteOptions writeOptions;
int key_num = data_size / value_size;
// srand线程不安全,这种可以保证多线程时随机序列也一致
std::mt19937 rng(seed);
shanghaiKeys.clear();
age20Keys.clear();
for (int i = 0; i < key_num; i++) {
int randThisTime = rng(); //确保读写一个循环只rand一次,否则随机序列会不一致
int key_ = std::abs(randThisTime) % key_num + 1;
std::string key = std::to_string(key_);
Status s = db->Delete(WriteOptions(), key);
ASSERT_TRUE(s.ok());
}
}
void WriteFieldData(FieldDB *db, int seed = 0/*随机种子*/) {
std::cout << "-------writing-------" << std::endl;
WriteOptions writeOptions;
int key_num = data_size / value_size;
// srand线程不安全,这种可以保证多线程时随机序列也一致
std::mt19937 rng(seed);
WriteBatch wb;
for (int i = 0; i < key_num; i++) {
int randThisTime = rng(); //确保读写一个循环只rand一次,否则随机序列会不一致
//让批量写入的key>0, 单独写入的key<=0,方便测试观察
int key_ = std::abs(randThisTime) % key_num + 1;
std::string key = std::to_string(key_);
std::string name = "customer#" + std::to_string(key_);
std::string address = cities[randThisTime % cities.size()];
std::string age = std::to_string(std::abs(randThisTime) % AGE_RANGE);
FieldArray fields = {
{"name", name},
{"address", address},
{"age", age}
};
if (address == "Shanghai") {
shanghaiKeys.insert(key);
}
if (age == "20") {
age20Keys.insert(key);
}
wb.Put(key, SerializeValue(fields));
}
Status s = db->Write(writeOptions, &wb);
ASSERT_TRUE(s.ok());
}
//并发时不一定能读到,加个参数控制
void GetFieldData(FieldDB *db, bool allowNotFound, int seed = 0) {
std::cout << "-------getting-------" << std::endl;
ReadOptions readOptions;
int key_num = data_size / value_size;
// 点查
std::mt19937 rng(seed);
for (int i = 0; i < 100; i++) {
int randThisTime = rng();
int key_ = std::abs(randThisTime) % key_num + 1;
std::string key = std::to_string(key_);
FieldArray fields_ret;
Status s = db->GetFields(readOptions, key, &fields_ret);
if (!allowNotFound){ //必须读到
// if (!s.ok()){
// std::cout << key << std::endl;
// }
ASSERT_TRUE(s.ok());
} else { //不必须读到,但只要读到address必须正确
if(s.IsNotFound()) continue;
}
for (const Field& pairs : fields_ret) {
if (pairs.first == "name"){
} else if (pairs.first == "address"){
std::string city = pairs.second;
ASSERT_NE(std::find(cities.begin(), cities.end(), city), cities.end());
} else if (pairs.first == "age"){
int age = std::stoi(pairs.second);
ASSERT_TRUE(age >= 0 && age < AGE_RANGE);
} else assert(false);
}
}
}
//检查对应种子有没有删除干净
//删除期间即使其他种子也不能并发写,因为即使种子不同,随机出的key可能相同
void GetDeleteData(FieldDB *db, int seed = 0) {
std::cout << "-------getting-------" << std::endl;
ReadOptions readOptions;
int key_num = data_size / value_size;
std::mt19937 rng(seed);
for (int i = 0; i < 100; i++) {
int randThisTime = rng();
int key_ = std::abs(randThisTime) % key_num + 1;
std::string key = std::to_string(key_);
FieldArray fields_ret;
Status s = db->GetFields(readOptions, key, &fields_ret);
ASSERT_TRUE(s.IsNotFound());
}
}
void findKeysByCity(FieldDB *db) {
std::cout << "-------getting field address-------" << std::endl;
Field field = {"address", "Shanghai"};
std::vector<std::string> resKeys = db->FindKeysByField(field);
//打印比较,因为shanghaikey可能被后写入的、其他address的key覆盖,打印出的后一个数应该小于前一个数
//如果随机种子相同,每次打印出的两个数也应该相同
std::cout << "address: " << shanghaiKeys.size() << " " << resKeys.size() << std::endl;
for (const std::string &key : resKeys){
ASSERT_TRUE(shanghaiKeys.haveKey(key));
}
}
// haveIndex表明数据库有没有该索引(address)
void findKeysByCityIndex(FieldDB *db, bool haveIndex) {
std::cout << "-------getting field address by index-------" << std::endl;
Field field = {"address", "Shanghai"};
Status s;
std::vector<std::string> resKeys = db->QueryByIndex(field, &s);
if (haveIndex) ASSERT_TRUE(s.ok());
else {
ASSERT_TRUE(s.IsNotFound());
return;
}
std::cout << "address: " << shanghaiKeys.size() << " " << resKeys.size() << std::endl;//打印比较
for (const std::string &key : resKeys){
ASSERT_TRUE(shanghaiKeys.haveKey(key));
}
}
void findKeysByAgeIndex(FieldDB *db, bool haveIndex) {
std::cout << "-------getting field age by index-------" << std::endl;
Field field = {"age", "20"};
Status s;
std::vector<std::string> resKeys = db->QueryByIndex(field, &s);
if (haveIndex) ASSERT_TRUE(s.ok());
else {
ASSERT_TRUE(s.IsNotFound());
return;
}
std::cout << "age: " << age20Keys.size() << " " << resKeys.size() << std::endl;
for (const std::string &key : resKeys){
ASSERT_TRUE(age20Keys.haveKey(key));
}
}
void checkDataInKVAndIndex(FieldDB *db, std::string fieldName = "address") {
Field field;
if (fieldName == "address") field = {"address", "Shanghai"};
else if (fieldName == "age") field = {"age", "20"};
else assert(0);//只支持这两个字段检查
Status s;
std::vector<std::string> resKeys1 = db->QueryByIndex(field, &s); //indexdb根据索引查到的数据
std::vector<std::string> resKeys2 = db->FindKeysByField(field); //kvdb强行遍历查到的数据
std::sort(resKeys1.begin(), resKeys1.end());
std::sort(resKeys2.begin(), resKeys2.end());
std::cout << resKeys1.size() << " " << resKeys2.size() << std::endl;
ASSERT_EQ(resKeys1, resKeys2);
}