作者: 韩晨旭 10225101440 李畅 10225102463
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

613 lines
18 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include <sys/types.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include "db/db_impl.h"
  8. #include "db/version_set.h"
  9. #include "leveldb/cache.h"
  10. #include "leveldb/db.h"
  11. #include "leveldb/env.h"
  12. #include "leveldb/write_batch.h"
  13. #include "port/port.h"
  14. #include "util/crc32c.h"
  15. #include "util/histogram.h"
  16. #include "util/random.h"
  17. #include "util/testutil.h"
  18. // Comma-separated list of operations to run in the specified order
  19. // Actual benchmarks:
  20. // fillseq -- write N values in sequential key order in async mode
  21. // fillrandom -- write N values in random key order in async mode
  22. // overwrite -- overwrite N values in random key order in async mode
  23. // fillsync -- write N/100 values in random key order in sync mode
  24. // fill100K -- write N/1000 100K values in random order in async mode
  25. // readseq -- read N values sequentially
  26. // readreverse -- read N values in reverse order
  27. // readrandom -- read N values in random order
  28. // crc32c -- repeated crc32c of 4K of data
  29. // Meta operations:
  30. // compact -- Compact the entire DB
  31. // stats -- Print DB stats
  32. // heapprofile -- Dump a heap profile (if supported by this port)
  33. static const char* FLAGS_benchmarks =
  34. "fillseq,"
  35. "fillsync,"
  36. "fillrandom,"
  37. "overwrite,"
  38. "readrandom,"
  39. "readrandom," // Extra run to allow previous compactions to quiesce
  40. "readseq,"
  41. "readreverse,"
  42. "compact,"
  43. "readrandom,"
  44. "readseq,"
  45. "readreverse,"
  46. "fill100K,"
  47. "crc32c,"
  48. "snappycomp,"
  49. "snappyuncomp,"
  50. ;
  51. // Number of key/values to place in database
  52. static int FLAGS_num = 1000000;
  53. // Size of each value
  54. static int FLAGS_value_size = 100;
  55. // Arrange to generate values that shrink to this fraction of
  56. // their original size after compression
  57. static double FLAGS_compression_ratio = 0.5;
  58. // Print histogram of operation timings
  59. static bool FLAGS_histogram = false;
  60. // Number of bytes to buffer in memtable before compacting
  61. // (initialized to default value by "main")
  62. static int FLAGS_write_buffer_size = 0;
  63. // Number of bytes to use as a cache of uncompressed data.
  64. // Negative means use default settings.
  65. static int FLAGS_cache_size = -1;
  66. namespace leveldb {
  67. // Helper for quickly generating random data.
  68. namespace {
  69. class RandomGenerator {
  70. private:
  71. std::string data_;
  72. int pos_;
  73. public:
  74. RandomGenerator() {
  75. // We use a limited amount of data over and over again and ensure
  76. // that it is larger than the compression window (32KB), and also
  77. // large enough to serve all typical value sizes we want to write.
  78. Random rnd(301);
  79. std::string piece;
  80. while (data_.size() < 1048576) {
  81. // Add a short fragment that is as compressible as specified
  82. // by FLAGS_compression_ratio.
  83. test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
  84. data_.append(piece);
  85. }
  86. pos_ = 0;
  87. }
  88. Slice Generate(int len) {
  89. if (pos_ + len > data_.size()) {
  90. pos_ = 0;
  91. assert(len < data_.size());
  92. }
  93. pos_ += len;
  94. return Slice(data_.data() + pos_ - len, len);
  95. }
  96. };
  97. static Slice TrimSpace(Slice s) {
  98. int start = 0;
  99. while (start < s.size() && isspace(s[start])) {
  100. start++;
  101. }
  102. int limit = s.size();
  103. while (limit > start && isspace(s[limit-1])) {
  104. limit--;
  105. }
  106. return Slice(s.data() + start, limit - start);
  107. }
  108. }
  109. class Benchmark {
  110. private:
  111. Cache* cache_;
  112. DB* db_;
  113. int num_;
  114. int heap_counter_;
  115. double start_;
  116. double last_op_finish_;
  117. int64_t bytes_;
  118. std::string message_;
  119. std::string post_message_;
  120. Histogram hist_;
  121. RandomGenerator gen_;
  122. Random rand_;
  123. // State kept for progress messages
  124. int done_;
  125. int next_report_; // When to report next
  126. void PrintHeader() {
  127. const int kKeySize = 16;
  128. PrintEnvironment();
  129. fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
  130. fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n",
  131. FLAGS_value_size,
  132. static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
  133. fprintf(stdout, "Entries: %d\n", num_);
  134. fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
  135. ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
  136. / 1048576.0));
  137. fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
  138. (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
  139. / 1048576.0));
  140. PrintWarnings();
  141. fprintf(stdout, "------------------------------------------------\n");
  142. }
  143. void PrintWarnings() {
  144. #if defined(__GNUC__) && !defined(__OPTIMIZE__)
  145. fprintf(stdout,
  146. "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
  147. );
  148. #endif
  149. #ifndef NDEBUG
  150. fprintf(stdout,
  151. "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
  152. #endif
  153. // See if snappy is working by attempting to compress a compressible string
  154. const char text[] = "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy";
  155. std::string compressed;
  156. if (!port::Snappy_Compress(text, sizeof(text), &compressed)) {
  157. fprintf(stdout, "WARNING: Snappy compression is not enabled\n");
  158. } else if (compressed.size() >= sizeof(text)) {
  159. fprintf(stdout, "WARNING: Snappy compression is not effective\n");
  160. }
  161. }
  162. void PrintEnvironment() {
  163. fprintf(stderr, "LevelDB: version %d.%d\n",
  164. kMajorVersion, kMinorVersion);
  165. #if defined(__linux)
  166. time_t now = time(NULL);
  167. fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
  168. FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
  169. if (cpuinfo != NULL) {
  170. char line[1000];
  171. int num_cpus = 0;
  172. std::string cpu_type;
  173. std::string cache_size;
  174. while (fgets(line, sizeof(line), cpuinfo) != NULL) {
  175. const char* sep = strchr(line, ':');
  176. if (sep == NULL) {
  177. continue;
  178. }
  179. Slice key = TrimSpace(Slice(line, sep - 1 - line));
  180. Slice val = TrimSpace(Slice(sep + 1));
  181. if (key == "model name") {
  182. ++num_cpus;
  183. cpu_type = val.ToString();
  184. } else if (key == "cache size") {
  185. cache_size = val.ToString();
  186. }
  187. }
  188. fclose(cpuinfo);
  189. fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
  190. fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
  191. }
  192. #endif
  193. }
  194. void Start() {
  195. start_ = Env::Default()->NowMicros() * 1e-6;
  196. bytes_ = 0;
  197. message_.clear();
  198. last_op_finish_ = start_;
  199. hist_.Clear();
  200. done_ = 0;
  201. next_report_ = 100;
  202. }
  203. void FinishedSingleOp() {
  204. if (FLAGS_histogram) {
  205. double now = Env::Default()->NowMicros() * 1e-6;
  206. double micros = (now - last_op_finish_) * 1e6;
  207. hist_.Add(micros);
  208. if (micros > 20000) {
  209. fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
  210. fflush(stderr);
  211. }
  212. last_op_finish_ = now;
  213. }
  214. done_++;
  215. if (done_ >= next_report_) {
  216. if (next_report_ < 1000) next_report_ += 100;
  217. else if (next_report_ < 5000) next_report_ += 500;
  218. else if (next_report_ < 10000) next_report_ += 1000;
  219. else if (next_report_ < 50000) next_report_ += 5000;
  220. else if (next_report_ < 100000) next_report_ += 10000;
  221. else if (next_report_ < 500000) next_report_ += 50000;
  222. else next_report_ += 100000;
  223. fprintf(stderr, "... finished %d ops%30s\r", done_, "");
  224. fflush(stderr);
  225. }
  226. }
  227. void Stop(const Slice& name) {
  228. double finish = Env::Default()->NowMicros() * 1e-6;
  229. // Pretend at least one op was done in case we are running a benchmark
  230. // that does nto call FinishedSingleOp().
  231. if (done_ < 1) done_ = 1;
  232. if (bytes_ > 0) {
  233. char rate[100];
  234. snprintf(rate, sizeof(rate), "%6.1f MB/s",
  235. (bytes_ / 1048576.0) / (finish - start_));
  236. if (!message_.empty()) {
  237. message_ = std::string(rate) + " " + message_;
  238. } else {
  239. message_ = rate;
  240. }
  241. }
  242. fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
  243. name.ToString().c_str(),
  244. (finish - start_) * 1e6 / done_,
  245. (message_.empty() ? "" : " "),
  246. message_.c_str());
  247. if (FLAGS_histogram) {
  248. fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
  249. }
  250. fflush(stdout);
  251. if (!post_message_.empty()) {
  252. fprintf(stdout, "\n%s\n", post_message_.c_str());
  253. post_message_.clear();
  254. }
  255. }
  256. public:
  257. enum Order {
  258. SEQUENTIAL,
  259. RANDOM
  260. };
  261. enum DBState {
  262. FRESH,
  263. EXISTING
  264. };
  265. Benchmark()
  266. : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
  267. db_(NULL),
  268. num_(FLAGS_num),
  269. heap_counter_(0),
  270. bytes_(0),
  271. rand_(301) {
  272. std::vector<std::string> files;
  273. Env::Default()->GetChildren("/tmp/dbbench", &files);
  274. for (int i = 0; i < files.size(); i++) {
  275. if (Slice(files[i]).starts_with("heap-")) {
  276. Env::Default()->DeleteFile("/tmp/dbbench/" + files[i]);
  277. }
  278. }
  279. DestroyDB("/tmp/dbbench", Options());
  280. }
  281. ~Benchmark() {
  282. delete db_;
  283. delete cache_;
  284. }
  285. void Run() {
  286. PrintHeader();
  287. Open();
  288. const char* benchmarks = FLAGS_benchmarks;
  289. while (benchmarks != NULL) {
  290. const char* sep = strchr(benchmarks, ',');
  291. Slice name;
  292. if (sep == NULL) {
  293. name = benchmarks;
  294. benchmarks = NULL;
  295. } else {
  296. name = Slice(benchmarks, sep - benchmarks);
  297. benchmarks = sep + 1;
  298. }
  299. Start();
  300. WriteOptions write_options;
  301. bool known = true;
  302. if (name == Slice("fillseq")) {
  303. Write(write_options, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1);
  304. } else if (name == Slice("fillbatch")) {
  305. Write(write_options, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1000);
  306. } else if (name == Slice("fillrandom")) {
  307. Write(write_options, RANDOM, FRESH, num_, FLAGS_value_size, 1);
  308. } else if (name == Slice("overwrite")) {
  309. Write(write_options, RANDOM, EXISTING, num_, FLAGS_value_size, 1);
  310. } else if (name == Slice("fillsync")) {
  311. write_options.sync = true;
  312. Write(write_options, RANDOM, FRESH, num_ / 100, FLAGS_value_size, 1);
  313. } else if (name == Slice("fill100K")) {
  314. Write(write_options, RANDOM, FRESH, num_ / 1000, 100 * 1000, 1);
  315. } else if (name == Slice("readseq")) {
  316. ReadSequential();
  317. } else if (name == Slice("readreverse")) {
  318. ReadReverse();
  319. } else if (name == Slice("readrandom")) {
  320. ReadRandom();
  321. } else if (name == Slice("readrandomsmall")) {
  322. int n = num_;
  323. num_ /= 1000;
  324. ReadRandom();
  325. num_ = n;
  326. } else if (name == Slice("compact")) {
  327. Compact();
  328. } else if (name == Slice("crc32c")) {
  329. Crc32c(4096, "(4K per op)");
  330. } else if (name == Slice("snappycomp")) {
  331. SnappyCompress();
  332. } else if (name == Slice("snappyuncomp")) {
  333. SnappyUncompress();
  334. } else if (name == Slice("heapprofile")) {
  335. HeapProfile();
  336. } else if (name == Slice("stats")) {
  337. PrintStats();
  338. } else {
  339. known = false;
  340. if (name != Slice()) { // No error message for empty name
  341. fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
  342. }
  343. }
  344. if (known) {
  345. Stop(name);
  346. }
  347. }
  348. }
  349. private:
  350. void Crc32c(int size, const char* label) {
  351. // Checksum about 500MB of data total
  352. std::string data(size, 'x');
  353. int64_t bytes = 0;
  354. uint32_t crc = 0;
  355. while (bytes < 500 * 1048576) {
  356. crc = crc32c::Value(data.data(), size);
  357. FinishedSingleOp();
  358. bytes += size;
  359. }
  360. // Print so result is not dead
  361. fprintf(stderr, "... crc=0x%x\r", static_cast<unsigned int>(crc));
  362. bytes_ = bytes;
  363. message_ = label;
  364. }
  365. void SnappyCompress() {
  366. Slice input = gen_.Generate(Options().block_size);
  367. int64_t bytes = 0;
  368. int64_t produced = 0;
  369. bool ok = true;
  370. std::string compressed;
  371. while (ok && bytes < 1024 * 1048576) { // Compress 1G
  372. ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
  373. produced += compressed.size();
  374. bytes += input.size();
  375. FinishedSingleOp();
  376. }
  377. if (!ok) {
  378. message_ = "(snappy failure)";
  379. } else {
  380. char buf[100];
  381. snprintf(buf, sizeof(buf), "(output: %.1f%%)",
  382. (produced * 100.0) / bytes);
  383. message_ = buf;
  384. bytes_ = bytes;
  385. }
  386. }
  387. void SnappyUncompress() {
  388. Slice input = gen_.Generate(Options().block_size);
  389. std::string compressed;
  390. bool ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
  391. int64_t bytes = 0;
  392. std::string uncompressed;
  393. while (ok && bytes < 1024 * 1048576) { // Compress 1G
  394. ok = port::Snappy_Uncompress(compressed.data(), compressed.size(),
  395. &uncompressed);
  396. bytes += uncompressed.size();
  397. FinishedSingleOp();
  398. }
  399. if (!ok) {
  400. message_ = "(snappy failure)";
  401. } else {
  402. bytes_ = bytes;
  403. }
  404. }
  405. void Open() {
  406. assert(db_ == NULL);
  407. Options options;
  408. options.create_if_missing = true;
  409. options.block_cache = cache_;
  410. options.write_buffer_size = FLAGS_write_buffer_size;
  411. Status s = DB::Open(options, "/tmp/dbbench", &db_);
  412. if (!s.ok()) {
  413. fprintf(stderr, "open error: %s\n", s.ToString().c_str());
  414. exit(1);
  415. }
  416. }
  417. void Write(const WriteOptions& options, Order order, DBState state,
  418. int num_entries, int value_size, int entries_per_batch) {
  419. if (state == FRESH) {
  420. delete db_;
  421. db_ = NULL;
  422. DestroyDB("/tmp/dbbench", Options());
  423. Open();
  424. Start(); // Do not count time taken to destroy/open
  425. }
  426. if (num_entries != num_) {
  427. char msg[100];
  428. snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
  429. message_ = msg;
  430. }
  431. WriteBatch batch;
  432. Status s;
  433. std::string val;
  434. for (int i = 0; i < num_entries; i += entries_per_batch) {
  435. batch.Clear();
  436. for (int j = 0; j < entries_per_batch; j++) {
  437. const int k = (order == SEQUENTIAL) ? i+j : (rand_.Next() % FLAGS_num);
  438. char key[100];
  439. snprintf(key, sizeof(key), "%016d", k);
  440. batch.Put(key, gen_.Generate(value_size));
  441. bytes_ += value_size + strlen(key);
  442. FinishedSingleOp();
  443. }
  444. s = db_->Write(options, &batch);
  445. if (!s.ok()) {
  446. fprintf(stderr, "put error: %s\n", s.ToString().c_str());
  447. exit(1);
  448. }
  449. }
  450. }
  451. void ReadSequential() {
  452. Iterator* iter = db_->NewIterator(ReadOptions());
  453. int i = 0;
  454. for (iter->SeekToFirst(); i < num_ && iter->Valid(); iter->Next()) {
  455. bytes_ += iter->key().size() + iter->value().size();
  456. FinishedSingleOp();
  457. ++i;
  458. }
  459. delete iter;
  460. }
  461. void ReadReverse() {
  462. Iterator* iter = db_->NewIterator(ReadOptions());
  463. int i = 0;
  464. for (iter->SeekToLast(); i < num_ && iter->Valid(); iter->Prev()) {
  465. bytes_ += iter->key().size() + iter->value().size();
  466. FinishedSingleOp();
  467. ++i;
  468. }
  469. delete iter;
  470. }
  471. void ReadRandom() {
  472. ReadOptions options;
  473. std::string value;
  474. for (int i = 0; i < num_; i++) {
  475. char key[100];
  476. const int k = rand_.Next() % FLAGS_num;
  477. snprintf(key, sizeof(key), "%016d", k);
  478. db_->Get(options, key, &value);
  479. FinishedSingleOp();
  480. }
  481. }
  482. void Compact() {
  483. DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
  484. dbi->TEST_CompactMemTable();
  485. int max_level_with_files = 1;
  486. for (int level = 1; level < config::kNumLevels; level++) {
  487. std::string property;
  488. char name[100];
  489. snprintf(name, sizeof(name), "leveldb.num-files-at-level%d", level);
  490. if (db_->GetProperty(name, &property) && atoi(property.c_str()) > 0) {
  491. max_level_with_files = level;
  492. }
  493. }
  494. for (int level = 0; level < max_level_with_files; level++) {
  495. dbi->TEST_CompactRange(level, "", "~");
  496. }
  497. }
  498. void PrintStats() {
  499. std::string stats;
  500. if (!db_->GetProperty("leveldb.stats", &stats)) {
  501. message_ = "(failed)";
  502. } else {
  503. post_message_ = stats;
  504. }
  505. }
  506. static void WriteToFile(void* arg, const char* buf, int n) {
  507. reinterpret_cast<WritableFile*>(arg)->Append(Slice(buf, n));
  508. }
  509. void HeapProfile() {
  510. char fname[100];
  511. snprintf(fname, sizeof(fname), "/tmp/dbbench/heap-%04d", ++heap_counter_);
  512. WritableFile* file;
  513. Status s = Env::Default()->NewWritableFile(fname, &file);
  514. if (!s.ok()) {
  515. message_ = s.ToString();
  516. return;
  517. }
  518. bool ok = port::GetHeapProfile(WriteToFile, file);
  519. delete file;
  520. if (!ok) {
  521. message_ = "not supported";
  522. Env::Default()->DeleteFile(fname);
  523. }
  524. }
  525. };
  526. }
  527. int main(int argc, char** argv) {
  528. FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
  529. for (int i = 1; i < argc; i++) {
  530. double d;
  531. int n;
  532. char junk;
  533. if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
  534. FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
  535. } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
  536. FLAGS_compression_ratio = d;
  537. } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
  538. (n == 0 || n == 1)) {
  539. FLAGS_histogram = n;
  540. } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
  541. FLAGS_num = n;
  542. } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
  543. FLAGS_value_size = n;
  544. } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
  545. FLAGS_write_buffer_size = n;
  546. } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
  547. FLAGS_cache_size = n;
  548. } else {
  549. fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
  550. exit(1);
  551. }
  552. }
  553. leveldb::Benchmark benchmark;
  554. benchmark.Run();
  555. return 0;
  556. }