小组成员:谢瑞阳、徐翔宇
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

506 lines
15 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <kcpolydb.h>
  7. #include "util/histogram.h"
  8. #include "util/random.h"
  9. #include "util/testutil.h"
  10. // Comma-separated list of operations to run in the specified order
  11. // Actual benchmarks:
  12. //
  13. // fillseq -- write N values in sequential key order in async mode
  14. // fillrandom -- write N values in random key order in async mode
  15. // overwrite -- overwrite N values in random key order in async mode
  16. // fillseqsync -- write N/100 values in sequential key order in sync mode
  17. // fillrandsync -- write N/100 values in random key order in sync mode
  18. // fillrand100K -- write N/1000 100K values in random order in async mode
  19. // fillseq100K -- write N/1000 100K values in seq order in async mode
  20. // readseq -- read N times sequentially
  21. // readseq100K -- read N/1000 100K values in sequential order in async mode
  22. // readrand100K -- read N/1000 100K values in sequential order in async mode
  23. // readrandom -- read N times in random order
  24. static const char* FLAGS_benchmarks =
  25. "fillseq,"
  26. "fillseqsync,"
  27. "fillrandsync,"
  28. "fillrandom,"
  29. "overwrite,"
  30. "readrandom,"
  31. "readseq,"
  32. "fillrand100K,"
  33. "fillseq100K,"
  34. "readseq100K,"
  35. "readrand100K,"
  36. ;
  37. // Number of key/values to place in database
  38. static int FLAGS_num = 1000000;
  39. // Number of read operations to do. If negative, do FLAGS_num reads.
  40. static int FLAGS_reads = -1;
  41. // Size of each value
  42. static int FLAGS_value_size = 100;
  43. // Arrange to generate values that shrink to this fraction of
  44. // their original size after compression
  45. static double FLAGS_compression_ratio = 0.5;
  46. // Print histogram of operation timings
  47. static bool FLAGS_histogram = false;
  48. // Cache size. Default 4 MB
  49. static int FLAGS_cache_size = 4194304;
  50. // Page size. Default 1 KB
  51. static int FLAGS_page_size = 1024;
  52. // If true, do not destroy the existing database. If you set this
  53. // flag and also specify a benchmark that wants a fresh database, that
  54. // benchmark will fail.
  55. static bool FLAGS_use_existing_db = false;
  56. // Compression flag. If true, compression is on. If false, compression
  57. // is off.
  58. static bool FLAGS_compression = true;
  59. inline
  60. static void DBSynchronize(kyotocabinet::TreeDB* db_)
  61. {
  62. // Synchronize will flush writes to disk
  63. if (!db_->synchronize()) {
  64. fprintf(stderr, "synchronize error: %s\n", db_->error().name());
  65. }
  66. }
  67. namespace leveldb {
  68. // Helper for quickly generating random data.
  69. namespace {
  70. class RandomGenerator {
  71. private:
  72. std::string data_;
  73. int pos_;
  74. public:
  75. RandomGenerator() {
  76. // We use a limited amount of data over and over again and ensure
  77. // that it is larger than the compression window (32KB), and also
  78. // large enough to serve all typical value sizes we want to write.
  79. Random rnd(301);
  80. std::string piece;
  81. while (data_.size() < 1048576) {
  82. // Add a short fragment that is as compressible as specified
  83. // by FLAGS_compression_ratio.
  84. test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
  85. data_.append(piece);
  86. }
  87. pos_ = 0;
  88. }
  89. Slice Generate(int len) {
  90. if (pos_ + len > data_.size()) {
  91. pos_ = 0;
  92. assert(len < data_.size());
  93. }
  94. pos_ += len;
  95. return Slice(data_.data() + pos_ - len, len);
  96. }
  97. };
  98. static Slice TrimSpace(Slice s) {
  99. int start = 0;
  100. while (start < s.size() && isspace(s[start])) {
  101. start++;
  102. }
  103. int limit = s.size();
  104. while (limit > start && isspace(s[limit-1])) {
  105. limit--;
  106. }
  107. return Slice(s.data() + start, limit - start);
  108. }
  109. }
  110. class Benchmark {
  111. private:
  112. kyotocabinet::TreeDB* db_;
  113. int db_num_;
  114. int num_;
  115. int reads_;
  116. double start_;
  117. double last_op_finish_;
  118. int64_t bytes_;
  119. std::string message_;
  120. Histogram hist_;
  121. RandomGenerator gen_;
  122. Random rand_;
  123. kyotocabinet::LZOCompressor<kyotocabinet::LZO::RAW> comp_;
  124. // State kept for progress messages
  125. int done_;
  126. int next_report_; // When to report next
  127. void PrintHeader() {
  128. const int kKeySize = 16;
  129. PrintEnvironment();
  130. fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
  131. fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n",
  132. FLAGS_value_size,
  133. static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
  134. fprintf(stdout, "Entries: %d\n", num_);
  135. fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
  136. ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
  137. / 1048576.0));
  138. fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
  139. (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
  140. / 1048576.0));
  141. PrintWarnings();
  142. fprintf(stdout, "------------------------------------------------\n");
  143. }
  144. void PrintWarnings() {
  145. #if defined(__GNUC__) && !defined(__OPTIMIZE__)
  146. fprintf(stdout,
  147. "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
  148. );
  149. #endif
  150. #ifndef NDEBUG
  151. fprintf(stdout,
  152. "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
  153. #endif
  154. }
  155. void PrintEnvironment() {
  156. fprintf(stderr, "Kyoto Cabinet: version %s, lib ver %d, lib rev %d\n",
  157. kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV);
  158. #if defined(__linux)
  159. time_t now = time(NULL);
  160. fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
  161. FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
  162. if (cpuinfo != NULL) {
  163. char line[1000];
  164. int num_cpus = 0;
  165. std::string cpu_type;
  166. std::string cache_size;
  167. while (fgets(line, sizeof(line), cpuinfo) != NULL) {
  168. const char* sep = strchr(line, ':');
  169. if (sep == NULL) {
  170. continue;
  171. }
  172. Slice key = TrimSpace(Slice(line, sep - 1 - line));
  173. Slice val = TrimSpace(Slice(sep + 1));
  174. if (key == "model name") {
  175. ++num_cpus;
  176. cpu_type = val.ToString();
  177. } else if (key == "cache size") {
  178. cache_size = val.ToString();
  179. }
  180. }
  181. fclose(cpuinfo);
  182. fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
  183. fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
  184. }
  185. #endif
  186. }
  187. void Start() {
  188. start_ = Env::Default()->NowMicros() * 1e-6;
  189. bytes_ = 0;
  190. message_.clear();
  191. last_op_finish_ = start_;
  192. hist_.Clear();
  193. done_ = 0;
  194. next_report_ = 100;
  195. }
  196. void FinishedSingleOp() {
  197. if (FLAGS_histogram) {
  198. double now = Env::Default()->NowMicros() * 1e-6;
  199. double micros = (now - last_op_finish_) * 1e6;
  200. hist_.Add(micros);
  201. if (micros > 20000) {
  202. fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
  203. fflush(stderr);
  204. }
  205. last_op_finish_ = now;
  206. }
  207. done_++;
  208. if (done_ >= next_report_) {
  209. if (next_report_ < 1000) next_report_ += 100;
  210. else if (next_report_ < 5000) next_report_ += 500;
  211. else if (next_report_ < 10000) next_report_ += 1000;
  212. else if (next_report_ < 50000) next_report_ += 5000;
  213. else if (next_report_ < 100000) next_report_ += 10000;
  214. else if (next_report_ < 500000) next_report_ += 50000;
  215. else next_report_ += 100000;
  216. fprintf(stderr, "... finished %d ops%30s\r", done_, "");
  217. fflush(stderr);
  218. }
  219. }
  220. void Stop(const Slice& name) {
  221. double finish = Env::Default()->NowMicros() * 1e-6;
  222. // Pretend at least one op was done in case we are running a benchmark
  223. // that does not call FinishedSingleOp().
  224. if (done_ < 1) done_ = 1;
  225. if (bytes_ > 0) {
  226. char rate[100];
  227. snprintf(rate, sizeof(rate), "%6.1f MB/s",
  228. (bytes_ / 1048576.0) / (finish - start_));
  229. if (!message_.empty()) {
  230. message_ = std::string(rate) + " " + message_;
  231. } else {
  232. message_ = rate;
  233. }
  234. }
  235. fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
  236. name.ToString().c_str(),
  237. (finish - start_) * 1e6 / done_,
  238. (message_.empty() ? "" : " "),
  239. message_.c_str());
  240. if (FLAGS_histogram) {
  241. fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
  242. }
  243. fflush(stdout);
  244. }
  245. public:
  246. enum Order {
  247. SEQUENTIAL,
  248. RANDOM
  249. };
  250. enum DBState {
  251. FRESH,
  252. EXISTING
  253. };
  254. Benchmark()
  255. : db_(NULL),
  256. num_(FLAGS_num),
  257. reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
  258. bytes_(0),
  259. rand_(301) {
  260. std::vector<std::string> files;
  261. Env::Default()->GetChildren("/tmp", &files);
  262. if (!FLAGS_use_existing_db) {
  263. for (int i = 0; i < files.size(); i++) {
  264. if (Slice(files[i]).starts_with("dbbench_polyDB")) {
  265. Env::Default()->DeleteFile("/tmp/" + files[i]);
  266. }
  267. }
  268. }
  269. }
  270. ~Benchmark() {
  271. if (!db_->close()) {
  272. fprintf(stderr, "close error: %s\n", db_->error().name());
  273. }
  274. }
  275. void Run() {
  276. PrintHeader();
  277. Open(false);
  278. const char* benchmarks = FLAGS_benchmarks;
  279. while (benchmarks != NULL) {
  280. const char* sep = strchr(benchmarks, ',');
  281. Slice name;
  282. if (sep == NULL) {
  283. name = benchmarks;
  284. benchmarks = NULL;
  285. } else {
  286. name = Slice(benchmarks, sep - benchmarks);
  287. benchmarks = sep + 1;
  288. }
  289. Start();
  290. bool known = true;
  291. bool write_sync = false;
  292. if (name == Slice("fillseq")) {
  293. Write(write_sync, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1);
  294. } else if (name == Slice("fillrandom")) {
  295. Write(write_sync, RANDOM, FRESH, num_, FLAGS_value_size, 1);
  296. DBSynchronize(db_);
  297. } else if (name == Slice("overwrite")) {
  298. Write(write_sync, RANDOM, EXISTING, num_, FLAGS_value_size, 1);
  299. DBSynchronize(db_);
  300. } else if (name == Slice("fillrandsync")) {
  301. write_sync = true;
  302. Write(write_sync, RANDOM, FRESH, num_ / 100, FLAGS_value_size, 1);
  303. DBSynchronize(db_);
  304. } else if (name == Slice("fillseqsync")) {
  305. write_sync = true;
  306. Write(write_sync, SEQUENTIAL, FRESH, num_ / 100, FLAGS_value_size, 1);
  307. DBSynchronize(db_);
  308. } else if (name == Slice("fillrand100K")) {
  309. Write(write_sync, RANDOM, FRESH, num_ / 1000, 100 * 1000, 1);
  310. DBSynchronize(db_);
  311. } else if (name == Slice("fillseq100K")) {
  312. Write(write_sync, SEQUENTIAL, FRESH, num_ / 1000, 100 * 1000, 1);
  313. DBSynchronize(db_);
  314. } else if (name == Slice("readseq")) {
  315. ReadSequential();
  316. } else if (name == Slice("readrandom")) {
  317. ReadRandom();
  318. } else if (name == Slice("readrand100K")) {
  319. int n = reads_;
  320. reads_ /= 1000;
  321. ReadRandom();
  322. reads_ = n;
  323. } else if (name == Slice("readseq100K")) {
  324. int n = reads_;
  325. reads_ /= 1000;
  326. ReadSequential();
  327. reads_ = n;
  328. } else {
  329. known = false;
  330. if (name != Slice()) { // No error message for empty name
  331. fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
  332. }
  333. }
  334. if (known) {
  335. Stop(name);
  336. }
  337. }
  338. }
  339. private:
  340. void Open(bool sync) {
  341. assert(db_ == NULL);
  342. // Initialize db_
  343. db_ = new kyotocabinet::TreeDB();
  344. char file_name[100];
  345. db_num_++;
  346. snprintf(file_name, sizeof(file_name), "/tmp/dbbench_polyDB-%d.kct",
  347. db_num_);
  348. // Create tuning options and open the database
  349. int open_options = kyotocabinet::PolyDB::OWRITER |
  350. kyotocabinet::PolyDB::OCREATE;
  351. int tune_options = kyotocabinet::TreeDB::TSMALL |
  352. kyotocabinet::TreeDB::TLINEAR;
  353. if (FLAGS_compression) {
  354. tune_options |= kyotocabinet::TreeDB::TCOMPRESS;
  355. db_->tune_compressor(&comp_);
  356. }
  357. db_->tune_options(tune_options);
  358. db_->tune_page_cache(FLAGS_cache_size);
  359. db_->tune_page(FLAGS_page_size);
  360. db_->tune_map(256LL<<20);
  361. if (sync) {
  362. open_options |= kyotocabinet::PolyDB::OAUTOSYNC;
  363. }
  364. if (!db_->open(file_name, open_options)) {
  365. fprintf(stderr, "open error: %s\n", db_->error().name());
  366. }
  367. }
  368. void Write(bool sync, Order order, DBState state,
  369. int num_entries, int value_size, int entries_per_batch) {
  370. // Create new database if state == FRESH
  371. if (state == FRESH) {
  372. if (FLAGS_use_existing_db) {
  373. message_ = "skipping (--use_existing_db is true)";
  374. return;
  375. }
  376. delete db_;
  377. db_ = NULL;
  378. Open(sync);
  379. Start(); // Do not count time taken to destroy/open
  380. }
  381. if (num_entries != num_) {
  382. char msg[100];
  383. snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
  384. message_ = msg;
  385. }
  386. // Write to database
  387. for (int i = 0; i < num_entries; i++)
  388. {
  389. const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % num_entries);
  390. char key[100];
  391. snprintf(key, sizeof(key), "%016d", k);
  392. bytes_ += value_size + strlen(key);
  393. std::string cpp_key = key;
  394. if (!db_->set(cpp_key, gen_.Generate(value_size).ToString())) {
  395. fprintf(stderr, "set error: %s\n", db_->error().name());
  396. }
  397. FinishedSingleOp();
  398. }
  399. }
  400. void ReadSequential() {
  401. kyotocabinet::DB::Cursor* cur = db_->cursor();
  402. cur->jump();
  403. std::string ckey, cvalue;
  404. while (cur->get(&ckey, &cvalue, true)) {
  405. bytes_ += ckey.size() + cvalue.size();
  406. FinishedSingleOp();
  407. }
  408. delete cur;
  409. }
  410. void ReadRandom() {
  411. std::string value;
  412. for (int i = 0; i < reads_; i++) {
  413. char key[100];
  414. const int k = rand_.Next() % reads_;
  415. snprintf(key, sizeof(key), "%016d", k);
  416. db_->get(key, &value);
  417. FinishedSingleOp();
  418. }
  419. }
  420. };
  421. }
  422. int main(int argc, char** argv) {
  423. for (int i = 1; i < argc; i++) {
  424. double d;
  425. int n;
  426. char junk;
  427. if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
  428. FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
  429. } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
  430. FLAGS_compression_ratio = d;
  431. } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
  432. (n == 0 || n == 1)) {
  433. FLAGS_histogram = n;
  434. } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
  435. FLAGS_num = n;
  436. } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
  437. FLAGS_reads = n;
  438. } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
  439. FLAGS_value_size = n;
  440. } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
  441. FLAGS_cache_size = n;
  442. } else if (sscanf(argv[i], "--page_size=%d%c", &n, &junk) == 1) {
  443. FLAGS_page_size = n;
  444. } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 &&
  445. (n == 0 || n == 1)) {
  446. FLAGS_compression = (n == 1) ? true : false;
  447. } else {
  448. fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
  449. exit(1);
  450. }
  451. }
  452. leveldb::Benchmark benchmark;
  453. benchmark.Run();
  454. return 0;
  455. }