作者: 谢瑞阳 10225101483 徐翔宇 10225101535
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

635 lines
18 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include <sys/types.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include "db/db_impl.h"
  8. #include "db/version_set.h"
  9. #include "leveldb/cache.h"
  10. #include "leveldb/db.h"
  11. #include "leveldb/env.h"
  12. #include "leveldb/write_batch.h"
  13. #include "port/port.h"
  14. #include "util/crc32c.h"
  15. #include "util/histogram.h"
  16. #include "util/random.h"
  17. #include "util/testutil.h"
  18. // Comma-separated list of operations to run in the specified order
  19. // Actual benchmarks:
  20. // fillseq -- write N values in sequential key order in async mode
  21. // fillrandom -- write N values in random key order in async mode
  22. // overwrite -- overwrite N values in random key order in async mode
  23. // fillsync -- write N/100 values in random key order in sync mode
  24. // fill100K -- write N/1000 100K values in random order in async mode
  25. // readseq -- read N values sequentially
  26. // readreverse -- read N values in reverse order
  27. // readrandom -- read N values in random order
  28. // crc32c -- repeated crc32c of 4K of data
  29. // sha1 -- repeated SHA1 computation over 4K of data
  30. // Meta operations:
  31. // compact -- Compact the entire DB
  32. // stats -- Print DB stats
  33. // heapprofile -- Dump a heap profile (if supported by this port)
  34. static const char* FLAGS_benchmarks =
  35. "fillseq,"
  36. "fillsync,"
  37. "fillrandom,"
  38. "overwrite,"
  39. "readrandom,"
  40. "readrandom," // Extra run to allow previous compactions to quiesce
  41. "readseq,"
  42. "readreverse,"
  43. "compact,"
  44. "readrandom,"
  45. "readseq,"
  46. "readreverse,"
  47. "fill100K,"
  48. "crc32c,"
  49. "sha1,"
  50. "snappycomp,"
  51. "snappyuncomp,"
  52. ;
  53. // Number of key/values to place in database
  54. static int FLAGS_num = 1000000;
  55. // Size of each value
  56. static int FLAGS_value_size = 100;
  57. // Arrange to generate values that shrink to this fraction of
  58. // their original size after compression
  59. static double FLAGS_compression_ratio = 0.5;
  60. // Print histogram of operation timings
  61. static bool FLAGS_histogram = false;
  62. // Number of bytes to buffer in memtable before compacting
  63. // (initialized to default value by "main")
  64. static int FLAGS_write_buffer_size = 0;
  65. // Number of bytes to use as a cache of uncompressed data.
  66. // Negative means use default settings.
  67. static int FLAGS_cache_size = -1;
  68. namespace leveldb {
  69. // Helper for quickly generating random data.
  70. namespace {
  71. class RandomGenerator {
  72. private:
  73. std::string data_;
  74. int pos_;
  75. public:
  76. RandomGenerator() {
  77. // We use a limited amount of data over and over again and ensure
  78. // that it is larger than the compression window (32KB), and also
  79. // large enough to serve all typical value sizes we want to write.
  80. Random rnd(301);
  81. std::string piece;
  82. while (data_.size() < 1048576) {
  83. // Add a short fragment that is as compressible as specified
  84. // by FLAGS_compression_ratio.
  85. test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
  86. data_.append(piece);
  87. }
  88. pos_ = 0;
  89. }
  90. Slice Generate(int len) {
  91. if (pos_ + len > data_.size()) {
  92. pos_ = 0;
  93. assert(len < data_.size());
  94. }
  95. pos_ += len;
  96. return Slice(data_.data() + pos_ - len, len);
  97. }
  98. };
  99. static Slice TrimSpace(Slice s) {
  100. int start = 0;
  101. while (start < s.size() && isspace(s[start])) {
  102. start++;
  103. }
  104. int limit = s.size();
  105. while (limit > start && isspace(s[limit-1])) {
  106. limit--;
  107. }
  108. return Slice(s.data() + start, limit - start);
  109. }
  110. }
  111. class Benchmark {
  112. private:
  113. Cache* cache_;
  114. DB* db_;
  115. int num_;
  116. int heap_counter_;
  117. double start_;
  118. double last_op_finish_;
  119. int64_t bytes_;
  120. std::string message_;
  121. std::string post_message_;
  122. Histogram hist_;
  123. RandomGenerator gen_;
  124. Random rand_;
  125. // State kept for progress messages
  126. int done_;
  127. int next_report_; // When to report next
  128. void PrintHeader() {
  129. const int kKeySize = 16;
  130. PrintEnvironment();
  131. fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
  132. fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n",
  133. FLAGS_value_size,
  134. static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
  135. fprintf(stdout, "Entries: %d\n", num_);
  136. fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
  137. ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
  138. / 1048576.0));
  139. fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
  140. (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
  141. / 1048576.0));
  142. PrintWarnings();
  143. fprintf(stdout, "------------------------------------------------\n");
  144. }
  145. void PrintWarnings() {
  146. #if defined(__GNUC__) && !defined(__OPTIMIZE__)
  147. fprintf(stdout,
  148. "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
  149. );
  150. #endif
  151. #ifndef NDEBUG
  152. fprintf(stdout,
  153. "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
  154. #endif
  155. // See if snappy is working by attempting to compress a compressible string
  156. const char text[] = "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy";
  157. std::string compressed;
  158. if (!port::Snappy_Compress(text, sizeof(text), &compressed)) {
  159. fprintf(stdout, "WARNING: Snappy compression is not enabled\n");
  160. } else if (compressed.size() >= sizeof(text)) {
  161. fprintf(stdout, "WARNING: Snappy compression is not effective\n");
  162. }
  163. }
  164. void PrintEnvironment() {
  165. fprintf(stderr, "LevelDB: version %d.%d\n",
  166. kMajorVersion, kMinorVersion);
  167. #if defined(__linux)
  168. time_t now = time(NULL);
  169. fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
  170. FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
  171. if (cpuinfo != NULL) {
  172. char line[1000];
  173. int num_cpus = 0;
  174. std::string cpu_type;
  175. std::string cache_size;
  176. while (fgets(line, sizeof(line), cpuinfo) != NULL) {
  177. const char* sep = strchr(line, ':');
  178. if (sep == NULL) {
  179. continue;
  180. }
  181. Slice key = TrimSpace(Slice(line, sep - 1 - line));
  182. Slice val = TrimSpace(Slice(sep + 1));
  183. if (key == "model name") {
  184. ++num_cpus;
  185. cpu_type = val.ToString();
  186. } else if (key == "cache size") {
  187. cache_size = val.ToString();
  188. }
  189. }
  190. fclose(cpuinfo);
  191. fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
  192. fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
  193. }
  194. #endif
  195. }
  196. void Start() {
  197. start_ = Env::Default()->NowMicros() * 1e-6;
  198. bytes_ = 0;
  199. message_.clear();
  200. last_op_finish_ = start_;
  201. hist_.Clear();
  202. done_ = 0;
  203. next_report_ = 100;
  204. }
  205. void FinishedSingleOp() {
  206. if (FLAGS_histogram) {
  207. double now = Env::Default()->NowMicros() * 1e-6;
  208. double micros = (now - last_op_finish_) * 1e6;
  209. hist_.Add(micros);
  210. if (micros > 20000) {
  211. fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
  212. fflush(stderr);
  213. }
  214. last_op_finish_ = now;
  215. }
  216. done_++;
  217. if (done_ >= next_report_) {
  218. if (next_report_ < 1000) next_report_ += 100;
  219. else if (next_report_ < 5000) next_report_ += 500;
  220. else if (next_report_ < 10000) next_report_ += 1000;
  221. else if (next_report_ < 50000) next_report_ += 5000;
  222. else if (next_report_ < 100000) next_report_ += 10000;
  223. else if (next_report_ < 500000) next_report_ += 50000;
  224. else next_report_ += 100000;
  225. fprintf(stderr, "... finished %d ops%30s\r", done_, "");
  226. fflush(stderr);
  227. }
  228. }
  229. void Stop(const Slice& name) {
  230. double finish = Env::Default()->NowMicros() * 1e-6;
  231. // Pretend at least one op was done in case we are running a benchmark
  232. // that does nto call FinishedSingleOp().
  233. if (done_ < 1) done_ = 1;
  234. if (bytes_ > 0) {
  235. char rate[100];
  236. snprintf(rate, sizeof(rate), "%6.1f MB/s",
  237. (bytes_ / 1048576.0) / (finish - start_));
  238. if (!message_.empty()) {
  239. message_ = std::string(rate) + " " + message_;
  240. } else {
  241. message_ = rate;
  242. }
  243. }
  244. fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
  245. name.ToString().c_str(),
  246. (finish - start_) * 1e6 / done_,
  247. (message_.empty() ? "" : " "),
  248. message_.c_str());
  249. if (FLAGS_histogram) {
  250. fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
  251. }
  252. fflush(stdout);
  253. if (!post_message_.empty()) {
  254. fprintf(stdout, "\n%s\n", post_message_.c_str());
  255. post_message_.clear();
  256. }
  257. }
  258. public:
  259. enum Order {
  260. SEQUENTIAL,
  261. RANDOM
  262. };
  263. enum DBState {
  264. FRESH,
  265. EXISTING
  266. };
  267. Benchmark()
  268. : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
  269. db_(NULL),
  270. num_(FLAGS_num),
  271. heap_counter_(0),
  272. bytes_(0),
  273. rand_(301) {
  274. std::vector<std::string> files;
  275. Env::Default()->GetChildren("/tmp/dbbench", &files);
  276. for (int i = 0; i < files.size(); i++) {
  277. if (Slice(files[i]).starts_with("heap-")) {
  278. Env::Default()->DeleteFile("/tmp/dbbench/" + files[i]);
  279. }
  280. }
  281. DestroyDB("/tmp/dbbench", Options());
  282. }
  283. ~Benchmark() {
  284. delete db_;
  285. delete cache_;
  286. }
  287. void Run() {
  288. PrintHeader();
  289. Open();
  290. const char* benchmarks = FLAGS_benchmarks;
  291. while (benchmarks != NULL) {
  292. const char* sep = strchr(benchmarks, ',');
  293. Slice name;
  294. if (sep == NULL) {
  295. name = benchmarks;
  296. benchmarks = NULL;
  297. } else {
  298. name = Slice(benchmarks, sep - benchmarks);
  299. benchmarks = sep + 1;
  300. }
  301. Start();
  302. WriteOptions write_options;
  303. bool known = true;
  304. if (name == Slice("fillseq")) {
  305. Write(write_options, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1);
  306. } else if (name == Slice("fillbatch")) {
  307. Write(write_options, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1000);
  308. } else if (name == Slice("fillrandom")) {
  309. Write(write_options, RANDOM, FRESH, num_, FLAGS_value_size, 1);
  310. } else if (name == Slice("overwrite")) {
  311. Write(write_options, RANDOM, EXISTING, num_, FLAGS_value_size, 1);
  312. } else if (name == Slice("fillsync")) {
  313. write_options.sync = true;
  314. Write(write_options, RANDOM, FRESH, num_ / 100, FLAGS_value_size, 1);
  315. } else if (name == Slice("fill100K")) {
  316. Write(write_options, RANDOM, FRESH, num_ / 1000, 100 * 1000, 1);
  317. } else if (name == Slice("readseq")) {
  318. ReadSequential();
  319. } else if (name == Slice("readreverse")) {
  320. ReadReverse();
  321. } else if (name == Slice("readrandom")) {
  322. ReadRandom();
  323. } else if (name == Slice("readrandomsmall")) {
  324. int n = num_;
  325. num_ /= 1000;
  326. ReadRandom();
  327. num_ = n;
  328. } else if (name == Slice("compact")) {
  329. Compact();
  330. } else if (name == Slice("crc32c")) {
  331. Crc32c(4096, "(4K per op)");
  332. } else if (name == Slice("sha1")) {
  333. SHA1(4096, "(4K per op)");
  334. } else if (name == Slice("snappycomp")) {
  335. SnappyCompress();
  336. } else if (name == Slice("snappyuncomp")) {
  337. SnappyUncompress();
  338. } else if (name == Slice("heapprofile")) {
  339. HeapProfile();
  340. } else if (name == Slice("stats")) {
  341. PrintStats();
  342. } else {
  343. known = false;
  344. if (name != Slice()) { // No error message for empty name
  345. fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
  346. }
  347. }
  348. if (known) {
  349. Stop(name);
  350. }
  351. }
  352. }
  353. private:
  354. void Crc32c(int size, const char* label) {
  355. // Checksum about 500MB of data total
  356. std::string data(size, 'x');
  357. int64_t bytes = 0;
  358. uint32_t crc = 0;
  359. while (bytes < 500 * 1048576) {
  360. crc = crc32c::Value(data.data(), size);
  361. FinishedSingleOp();
  362. bytes += size;
  363. }
  364. // Print so result is not dead
  365. fprintf(stderr, "... crc=0x%x\r", static_cast<unsigned int>(crc));
  366. bytes_ = bytes;
  367. message_ = label;
  368. }
  369. void SHA1(int size, const char* label) {
  370. // SHA1 about 100MB of data total
  371. std::string data(size, 'x');
  372. int64_t bytes = 0;
  373. char sha1[20];
  374. while (bytes < 100 * 1048576) {
  375. port::SHA1_Hash(data.data(), size, sha1);
  376. FinishedSingleOp();
  377. bytes += size;
  378. }
  379. // Print so result is not dead
  380. fprintf(stderr, "... sha1=%02x...\r", static_cast<unsigned int>(sha1[0]));
  381. bytes_ = bytes;
  382. message_ = label;
  383. }
  384. void SnappyCompress() {
  385. Slice input = gen_.Generate(Options().block_size);
  386. int64_t bytes = 0;
  387. int64_t produced = 0;
  388. bool ok = true;
  389. std::string compressed;
  390. while (ok && bytes < 1024 * 1048576) { // Compress 1G
  391. ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
  392. produced += compressed.size();
  393. bytes += input.size();
  394. FinishedSingleOp();
  395. }
  396. if (!ok) {
  397. message_ = "(snappy failure)";
  398. } else {
  399. char buf[100];
  400. snprintf(buf, sizeof(buf), "(output: %.1f%%)",
  401. (produced * 100.0) / bytes);
  402. message_ = buf;
  403. bytes_ = bytes;
  404. }
  405. }
  406. void SnappyUncompress() {
  407. Slice input = gen_.Generate(Options().block_size);
  408. std::string compressed;
  409. bool ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
  410. int64_t bytes = 0;
  411. std::string uncompressed;
  412. while (ok && bytes < 1024 * 1048576) { // Compress 1G
  413. ok = port::Snappy_Uncompress(compressed.data(), compressed.size(),
  414. &uncompressed);
  415. bytes += uncompressed.size();
  416. FinishedSingleOp();
  417. }
  418. if (!ok) {
  419. message_ = "(snappy failure)";
  420. } else {
  421. bytes_ = bytes;
  422. }
  423. }
  424. void Open() {
  425. assert(db_ == NULL);
  426. Options options;
  427. options.create_if_missing = true;
  428. options.block_cache = cache_;
  429. options.write_buffer_size = FLAGS_write_buffer_size;
  430. Status s = DB::Open(options, "/tmp/dbbench", &db_);
  431. if (!s.ok()) {
  432. fprintf(stderr, "open error: %s\n", s.ToString().c_str());
  433. exit(1);
  434. }
  435. }
  436. void Write(const WriteOptions& options, Order order, DBState state,
  437. int num_entries, int value_size, int entries_per_batch) {
  438. if (state == FRESH) {
  439. delete db_;
  440. db_ = NULL;
  441. DestroyDB("/tmp/dbbench", Options());
  442. Open();
  443. Start(); // Do not count time taken to destroy/open
  444. }
  445. if (num_entries != num_) {
  446. char msg[100];
  447. snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
  448. message_ = msg;
  449. }
  450. WriteBatch batch;
  451. Status s;
  452. std::string val;
  453. for (int i = 0; i < num_entries; i += entries_per_batch) {
  454. batch.Clear();
  455. for (int j = 0; j < entries_per_batch; j++) {
  456. const int k = (order == SEQUENTIAL) ? i+j : (rand_.Next() % FLAGS_num);
  457. char key[100];
  458. snprintf(key, sizeof(key), "%016d", k);
  459. batch.Put(key, gen_.Generate(value_size));
  460. bytes_ += value_size + strlen(key);
  461. FinishedSingleOp();
  462. }
  463. s = db_->Write(options, &batch);
  464. if (!s.ok()) {
  465. fprintf(stderr, "put error: %s\n", s.ToString().c_str());
  466. exit(1);
  467. }
  468. }
  469. }
  470. void ReadSequential() {
  471. Iterator* iter = db_->NewIterator(ReadOptions());
  472. int i = 0;
  473. for (iter->SeekToFirst(); i < num_ && iter->Valid(); iter->Next()) {
  474. bytes_ += iter->key().size() + iter->value().size();
  475. FinishedSingleOp();
  476. ++i;
  477. }
  478. delete iter;
  479. }
  480. void ReadReverse() {
  481. Iterator* iter = db_->NewIterator(ReadOptions());
  482. int i = 0;
  483. for (iter->SeekToLast(); i < num_ && iter->Valid(); iter->Prev()) {
  484. bytes_ += iter->key().size() + iter->value().size();
  485. FinishedSingleOp();
  486. ++i;
  487. }
  488. delete iter;
  489. }
  490. void ReadRandom() {
  491. ReadOptions options;
  492. std::string value;
  493. for (int i = 0; i < num_; i++) {
  494. char key[100];
  495. const int k = rand_.Next() % FLAGS_num;
  496. snprintf(key, sizeof(key), "%016d", k);
  497. db_->Get(options, key, &value);
  498. FinishedSingleOp();
  499. }
  500. }
  501. void Compact() {
  502. DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
  503. dbi->TEST_CompactMemTable();
  504. int max_level_with_files = 1;
  505. for (int level = 1; level < config::kNumLevels; level++) {
  506. std::string property;
  507. char name[100];
  508. snprintf(name, sizeof(name), "leveldb.num-files-at-level%d", level);
  509. if (db_->GetProperty(name, &property) && atoi(property.c_str()) > 0) {
  510. max_level_with_files = level;
  511. }
  512. }
  513. for (int level = 0; level < max_level_with_files; level++) {
  514. dbi->TEST_CompactRange(level, "", "~");
  515. }
  516. }
  517. void PrintStats() {
  518. std::string stats;
  519. if (!db_->GetProperty("leveldb.stats", &stats)) {
  520. message_ = "(failed)";
  521. } else {
  522. post_message_ = stats;
  523. }
  524. }
  525. static void WriteToFile(void* arg, const char* buf, int n) {
  526. reinterpret_cast<WritableFile*>(arg)->Append(Slice(buf, n));
  527. }
  528. void HeapProfile() {
  529. char fname[100];
  530. snprintf(fname, sizeof(fname), "/tmp/dbbench/heap-%04d", ++heap_counter_);
  531. WritableFile* file;
  532. Status s = Env::Default()->NewWritableFile(fname, &file);
  533. if (!s.ok()) {
  534. message_ = s.ToString();
  535. return;
  536. }
  537. bool ok = port::GetHeapProfile(WriteToFile, file);
  538. delete file;
  539. if (!ok) {
  540. message_ = "not supported";
  541. Env::Default()->DeleteFile(fname);
  542. }
  543. }
  544. };
  545. }
  546. int main(int argc, char** argv) {
  547. FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
  548. for (int i = 1; i < argc; i++) {
  549. double d;
  550. int n;
  551. char junk;
  552. if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
  553. FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
  554. } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
  555. FLAGS_compression_ratio = d;
  556. } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
  557. (n == 0 || n == 1)) {
  558. FLAGS_histogram = n;
  559. } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
  560. FLAGS_num = n;
  561. } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
  562. FLAGS_value_size = n;
  563. } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
  564. FLAGS_write_buffer_size = n;
  565. } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
  566. FLAGS_cache_size = n;
  567. } else {
  568. fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
  569. exit(1);
  570. }
  571. }
  572. leveldb::Benchmark benchmark;
  573. benchmark.Run();
  574. return 0;
  575. }