小组成员:谢瑞阳、徐翔宇
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1540 lines
44 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/db.h"
  5. #include "db/db_impl.h"
  6. #include "db/filename.h"
  7. #include "db/version_set.h"
  8. #include "db/write_batch_internal.h"
  9. #include "leveldb/env.h"
  10. #include "leveldb/table.h"
  11. #include "util/logging.h"
  12. #include "util/mutexlock.h"
  13. #include "util/testharness.h"
  14. #include "util/testutil.h"
  15. namespace leveldb {
  16. static std::string RandomString(Random* rnd, int len) {
  17. std::string r;
  18. test::RandomString(rnd, len, &r);
  19. return r;
  20. }
  21. // Special Env used to delay background operations
  22. class SpecialEnv : public EnvWrapper {
  23. public:
  24. // sstable Sync() calls are blocked while this pointer is non-NULL.
  25. port::AtomicPointer delay_sstable_sync_;
  26. explicit SpecialEnv(Env* base) : EnvWrapper(base) {
  27. delay_sstable_sync_.Release_Store(NULL);
  28. }
  29. Status NewWritableFile(const std::string& f, WritableFile** r) {
  30. class SSTableFile : public WritableFile {
  31. private:
  32. SpecialEnv* env_;
  33. WritableFile* base_;
  34. public:
  35. SSTableFile(SpecialEnv* env, WritableFile* base)
  36. : env_(env),
  37. base_(base) {
  38. }
  39. ~SSTableFile() { delete base_; }
  40. Status Append(const Slice& data) { return base_->Append(data); }
  41. Status Close() { return base_->Close(); }
  42. Status Flush() { return base_->Flush(); }
  43. Status Sync() {
  44. while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
  45. env_->SleepForMicroseconds(100000);
  46. }
  47. return base_->Sync();
  48. }
  49. };
  50. Status s = target()->NewWritableFile(f, r);
  51. if (s.ok()) {
  52. if (strstr(f.c_str(), ".sst") != NULL) {
  53. *r = new SSTableFile(this, *r);
  54. }
  55. }
  56. return s;
  57. }
  58. };
  59. class DBTest {
  60. public:
  61. std::string dbname_;
  62. SpecialEnv* env_;
  63. DB* db_;
  64. Options last_options_;
  65. DBTest() : env_(new SpecialEnv(Env::Default())) {
  66. dbname_ = test::TmpDir() + "/db_test";
  67. DestroyDB(dbname_, Options());
  68. db_ = NULL;
  69. Reopen();
  70. }
  71. ~DBTest() {
  72. delete db_;
  73. DestroyDB(dbname_, Options());
  74. delete env_;
  75. }
  76. DBImpl* dbfull() {
  77. return reinterpret_cast<DBImpl*>(db_);
  78. }
  79. void Reopen(Options* options = NULL) {
  80. ASSERT_OK(TryReopen(options));
  81. }
  82. void DestroyAndReopen(Options* options = NULL) {
  83. delete db_;
  84. db_ = NULL;
  85. DestroyDB(dbname_, Options());
  86. ASSERT_OK(TryReopen(options));
  87. }
  88. Status TryReopen(Options* options) {
  89. delete db_;
  90. db_ = NULL;
  91. Options opts;
  92. if (options != NULL) {
  93. opts = *options;
  94. } else {
  95. opts.create_if_missing = true;
  96. }
  97. last_options_ = opts;
  98. return DB::Open(opts, dbname_, &db_);
  99. }
  100. Status Put(const std::string& k, const std::string& v) {
  101. return db_->Put(WriteOptions(), k, v);
  102. }
  103. Status Delete(const std::string& k) {
  104. return db_->Delete(WriteOptions(), k);
  105. }
  106. std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
  107. ReadOptions options;
  108. options.snapshot = snapshot;
  109. std::string result;
  110. Status s = db_->Get(options, k, &result);
  111. if (s.IsNotFound()) {
  112. result = "NOT_FOUND";
  113. } else if (!s.ok()) {
  114. result = s.ToString();
  115. }
  116. return result;
  117. }
  118. std::string AllEntriesFor(const Slice& user_key) {
  119. Iterator* iter = dbfull()->TEST_NewInternalIterator();
  120. InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
  121. iter->Seek(target.Encode());
  122. std::string result;
  123. if (!iter->status().ok()) {
  124. result = iter->status().ToString();
  125. } else {
  126. result = "[ ";
  127. bool first = true;
  128. while (iter->Valid()) {
  129. ParsedInternalKey ikey;
  130. if (!ParseInternalKey(iter->key(), &ikey)) {
  131. result += "CORRUPTED";
  132. } else {
  133. if (last_options_.comparator->Compare(
  134. ikey.user_key, user_key) != 0) {
  135. break;
  136. }
  137. if (!first) {
  138. result += ", ";
  139. }
  140. first = false;
  141. switch (ikey.type) {
  142. case kTypeValue:
  143. result += iter->value().ToString();
  144. break;
  145. case kTypeDeletion:
  146. result += "DEL";
  147. break;
  148. }
  149. }
  150. iter->Next();
  151. }
  152. if (!first) {
  153. result += " ";
  154. }
  155. result += "]";
  156. }
  157. delete iter;
  158. return result;
  159. }
  160. int NumTableFilesAtLevel(int level) {
  161. std::string property;
  162. ASSERT_TRUE(
  163. db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level),
  164. &property));
  165. return atoi(property.c_str());
  166. }
  167. int TotalTableFiles() {
  168. int result = 0;
  169. for (int level = 0; level < config::kNumLevels; level++) {
  170. result += NumTableFilesAtLevel(level);
  171. }
  172. return result;
  173. }
  174. // Return spread of files per level
  175. std::string FilesPerLevel() {
  176. std::string result;
  177. int last_non_zero_offset = 0;
  178. for (int level = 0; level < config::kNumLevels; level++) {
  179. int f = NumTableFilesAtLevel(level);
  180. char buf[100];
  181. snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
  182. result += buf;
  183. if (f > 0) {
  184. last_non_zero_offset = result.size();
  185. }
  186. }
  187. result.resize(last_non_zero_offset);
  188. return result;
  189. }
  190. uint64_t Size(const Slice& start, const Slice& limit) {
  191. Range r(start, limit);
  192. uint64_t size;
  193. db_->GetApproximateSizes(&r, 1, &size);
  194. return size;
  195. }
  196. void Compact(const Slice& start, const Slice& limit) {
  197. db_->CompactRange(&start, &limit);
  198. }
  199. // Do n memtable compactions, each of which produces an sstable
  200. // covering the range [small,large].
  201. void MakeTables(int n, const std::string& small, const std::string& large) {
  202. for (int i = 0; i < n; i++) {
  203. Put(small, "begin");
  204. Put(large, "end");
  205. dbfull()->TEST_CompactMemTable();
  206. }
  207. }
  208. // Prevent pushing of new sstables into deeper levels by adding
  209. // tables that cover a specified range to all levels.
  210. void FillLevels(const std::string& smallest, const std::string& largest) {
  211. MakeTables(config::kNumLevels, smallest, largest);
  212. }
  213. void DumpFileCounts(const char* label) {
  214. fprintf(stderr, "---\n%s:\n", label);
  215. fprintf(stderr, "maxoverlap: %lld\n",
  216. static_cast<long long>(
  217. dbfull()->TEST_MaxNextLevelOverlappingBytes()));
  218. for (int level = 0; level < config::kNumLevels; level++) {
  219. int num = NumTableFilesAtLevel(level);
  220. if (num > 0) {
  221. fprintf(stderr, " level %3d : %d files\n", level, num);
  222. }
  223. }
  224. }
  225. std::string DumpSSTableList() {
  226. std::string property;
  227. db_->GetProperty("leveldb.sstables", &property);
  228. return property;
  229. }
  230. std::string IterStatus(Iterator* iter) {
  231. std::string result;
  232. if (iter->Valid()) {
  233. result = iter->key().ToString() + "->" + iter->value().ToString();
  234. } else {
  235. result = "(invalid)";
  236. }
  237. return result;
  238. }
  239. };
  240. TEST(DBTest, Empty) {
  241. ASSERT_TRUE(db_ != NULL);
  242. ASSERT_EQ("NOT_FOUND", Get("foo"));
  243. }
  244. TEST(DBTest, ReadWrite) {
  245. ASSERT_OK(Put("foo", "v1"));
  246. ASSERT_EQ("v1", Get("foo"));
  247. ASSERT_OK(Put("bar", "v2"));
  248. ASSERT_OK(Put("foo", "v3"));
  249. ASSERT_EQ("v3", Get("foo"));
  250. ASSERT_EQ("v2", Get("bar"));
  251. }
  252. TEST(DBTest, PutDeleteGet) {
  253. ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
  254. ASSERT_EQ("v1", Get("foo"));
  255. ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
  256. ASSERT_EQ("v2", Get("foo"));
  257. ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
  258. ASSERT_EQ("NOT_FOUND", Get("foo"));
  259. }
  260. TEST(DBTest, GetFromImmutableLayer) {
  261. Options options;
  262. options.env = env_;
  263. options.write_buffer_size = 100000; // Small write buffer
  264. Reopen(&options);
  265. ASSERT_OK(Put("foo", "v1"));
  266. ASSERT_EQ("v1", Get("foo"));
  267. env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
  268. Put("k1", std::string(100000, 'x')); // Fill memtable
  269. Put("k2", std::string(100000, 'y')); // Trigger compaction
  270. ASSERT_EQ("v1", Get("foo"));
  271. env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls
  272. }
  273. TEST(DBTest, GetFromVersions) {
  274. ASSERT_OK(Put("foo", "v1"));
  275. dbfull()->TEST_CompactMemTable();
  276. ASSERT_EQ("v1", Get("foo"));
  277. }
  278. TEST(DBTest, GetSnapshot) {
  279. // Try with both a short key and a long key
  280. for (int i = 0; i < 2; i++) {
  281. std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
  282. ASSERT_OK(Put(key, "v1"));
  283. const Snapshot* s1 = db_->GetSnapshot();
  284. ASSERT_OK(Put(key, "v2"));
  285. ASSERT_EQ("v2", Get(key));
  286. ASSERT_EQ("v1", Get(key, s1));
  287. dbfull()->TEST_CompactMemTable();
  288. ASSERT_EQ("v2", Get(key));
  289. ASSERT_EQ("v1", Get(key, s1));
  290. db_->ReleaseSnapshot(s1);
  291. }
  292. }
  293. TEST(DBTest, GetLevel0Ordering) {
  294. // Check that we process level-0 files in correct order. The code
  295. // below generates two level-0 files where the earlier one comes
  296. // before the later one in the level-0 file list since the earlier
  297. // one has a smaller "smallest" key.
  298. ASSERT_OK(Put("bar", "b"));
  299. ASSERT_OK(Put("foo", "v1"));
  300. dbfull()->TEST_CompactMemTable();
  301. ASSERT_OK(Put("foo", "v2"));
  302. dbfull()->TEST_CompactMemTable();
  303. ASSERT_EQ("v2", Get("foo"));
  304. }
  305. TEST(DBTest, GetOrderedByLevels) {
  306. ASSERT_OK(Put("foo", "v1"));
  307. Compact("a", "z");
  308. ASSERT_EQ("v1", Get("foo"));
  309. ASSERT_OK(Put("foo", "v2"));
  310. ASSERT_EQ("v2", Get("foo"));
  311. dbfull()->TEST_CompactMemTable();
  312. ASSERT_EQ("v2", Get("foo"));
  313. }
  314. TEST(DBTest, GetPicksCorrectFile) {
  315. // Arrange to have multiple files in a non-level-0 level.
  316. ASSERT_OK(Put("a", "va"));
  317. Compact("a", "b");
  318. ASSERT_OK(Put("x", "vx"));
  319. Compact("x", "y");
  320. ASSERT_OK(Put("f", "vf"));
  321. Compact("f", "g");
  322. ASSERT_EQ("va", Get("a"));
  323. ASSERT_EQ("vf", Get("f"));
  324. ASSERT_EQ("vx", Get("x"));
  325. }
  326. TEST(DBTest, GetEncountersEmptyLevel) {
  327. // Arrange for the following to happen:
  328. // * sstable A in level 0
  329. // * nothing in level 1
  330. // * sstable B in level 2
  331. // Then do enough Get() calls to arrange for an automatic compaction
  332. // of sstable A. A bug would cause the compaction to be marked as
  333. // occuring at level 1 (instead of the correct level 0).
  334. // Step 1: First place sstables in levels 0 and 2
  335. int compaction_count = 0;
  336. while (NumTableFilesAtLevel(0) == 0 ||
  337. NumTableFilesAtLevel(2) == 0) {
  338. ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
  339. compaction_count++;
  340. Put("a", "begin");
  341. Put("z", "end");
  342. dbfull()->TEST_CompactMemTable();
  343. }
  344. // Step 2: clear level 1 if necessary.
  345. dbfull()->TEST_CompactRange(1, NULL, NULL);
  346. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  347. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  348. ASSERT_EQ(NumTableFilesAtLevel(2), 1);
  349. // Step 3: read until level 0 compaction disappears.
  350. int read_count = 0;
  351. while (NumTableFilesAtLevel(0) > 0) {
  352. ASSERT_LE(read_count, 10000) << "did not trigger level 0 compaction";
  353. read_count++;
  354. ASSERT_EQ("NOT_FOUND", Get("missing"));
  355. }
  356. }
  357. TEST(DBTest, IterEmpty) {
  358. Iterator* iter = db_->NewIterator(ReadOptions());
  359. iter->SeekToFirst();
  360. ASSERT_EQ(IterStatus(iter), "(invalid)");
  361. iter->SeekToLast();
  362. ASSERT_EQ(IterStatus(iter), "(invalid)");
  363. iter->Seek("foo");
  364. ASSERT_EQ(IterStatus(iter), "(invalid)");
  365. delete iter;
  366. }
  367. TEST(DBTest, IterSingle) {
  368. ASSERT_OK(Put("a", "va"));
  369. Iterator* iter = db_->NewIterator(ReadOptions());
  370. iter->SeekToFirst();
  371. ASSERT_EQ(IterStatus(iter), "a->va");
  372. iter->Next();
  373. ASSERT_EQ(IterStatus(iter), "(invalid)");
  374. iter->SeekToFirst();
  375. ASSERT_EQ(IterStatus(iter), "a->va");
  376. iter->Prev();
  377. ASSERT_EQ(IterStatus(iter), "(invalid)");
  378. iter->SeekToLast();
  379. ASSERT_EQ(IterStatus(iter), "a->va");
  380. iter->Next();
  381. ASSERT_EQ(IterStatus(iter), "(invalid)");
  382. iter->SeekToLast();
  383. ASSERT_EQ(IterStatus(iter), "a->va");
  384. iter->Prev();
  385. ASSERT_EQ(IterStatus(iter), "(invalid)");
  386. iter->Seek("");
  387. ASSERT_EQ(IterStatus(iter), "a->va");
  388. iter->Next();
  389. ASSERT_EQ(IterStatus(iter), "(invalid)");
  390. iter->Seek("a");
  391. ASSERT_EQ(IterStatus(iter), "a->va");
  392. iter->Next();
  393. ASSERT_EQ(IterStatus(iter), "(invalid)");
  394. iter->Seek("b");
  395. ASSERT_EQ(IterStatus(iter), "(invalid)");
  396. delete iter;
  397. }
  398. TEST(DBTest, IterMulti) {
  399. ASSERT_OK(Put("a", "va"));
  400. ASSERT_OK(Put("b", "vb"));
  401. ASSERT_OK(Put("c", "vc"));
  402. Iterator* iter = db_->NewIterator(ReadOptions());
  403. iter->SeekToFirst();
  404. ASSERT_EQ(IterStatus(iter), "a->va");
  405. iter->Next();
  406. ASSERT_EQ(IterStatus(iter), "b->vb");
  407. iter->Next();
  408. ASSERT_EQ(IterStatus(iter), "c->vc");
  409. iter->Next();
  410. ASSERT_EQ(IterStatus(iter), "(invalid)");
  411. iter->SeekToFirst();
  412. ASSERT_EQ(IterStatus(iter), "a->va");
  413. iter->Prev();
  414. ASSERT_EQ(IterStatus(iter), "(invalid)");
  415. iter->SeekToLast();
  416. ASSERT_EQ(IterStatus(iter), "c->vc");
  417. iter->Prev();
  418. ASSERT_EQ(IterStatus(iter), "b->vb");
  419. iter->Prev();
  420. ASSERT_EQ(IterStatus(iter), "a->va");
  421. iter->Prev();
  422. ASSERT_EQ(IterStatus(iter), "(invalid)");
  423. iter->SeekToLast();
  424. ASSERT_EQ(IterStatus(iter), "c->vc");
  425. iter->Next();
  426. ASSERT_EQ(IterStatus(iter), "(invalid)");
  427. iter->Seek("");
  428. ASSERT_EQ(IterStatus(iter), "a->va");
  429. iter->Seek("a");
  430. ASSERT_EQ(IterStatus(iter), "a->va");
  431. iter->Seek("ax");
  432. ASSERT_EQ(IterStatus(iter), "b->vb");
  433. iter->Seek("b");
  434. ASSERT_EQ(IterStatus(iter), "b->vb");
  435. iter->Seek("z");
  436. ASSERT_EQ(IterStatus(iter), "(invalid)");
  437. // Switch from reverse to forward
  438. iter->SeekToLast();
  439. iter->Prev();
  440. iter->Prev();
  441. iter->Next();
  442. ASSERT_EQ(IterStatus(iter), "b->vb");
  443. // Switch from forward to reverse
  444. iter->SeekToFirst();
  445. iter->Next();
  446. iter->Next();
  447. iter->Prev();
  448. ASSERT_EQ(IterStatus(iter), "b->vb");
  449. // Make sure iter stays at snapshot
  450. ASSERT_OK(Put("a", "va2"));
  451. ASSERT_OK(Put("a2", "va3"));
  452. ASSERT_OK(Put("b", "vb2"));
  453. ASSERT_OK(Put("c", "vc2"));
  454. ASSERT_OK(Delete("b"));
  455. iter->SeekToFirst();
  456. ASSERT_EQ(IterStatus(iter), "a->va");
  457. iter->Next();
  458. ASSERT_EQ(IterStatus(iter), "b->vb");
  459. iter->Next();
  460. ASSERT_EQ(IterStatus(iter), "c->vc");
  461. iter->Next();
  462. ASSERT_EQ(IterStatus(iter), "(invalid)");
  463. iter->SeekToLast();
  464. ASSERT_EQ(IterStatus(iter), "c->vc");
  465. iter->Prev();
  466. ASSERT_EQ(IterStatus(iter), "b->vb");
  467. iter->Prev();
  468. ASSERT_EQ(IterStatus(iter), "a->va");
  469. iter->Prev();
  470. ASSERT_EQ(IterStatus(iter), "(invalid)");
  471. delete iter;
  472. }
  473. TEST(DBTest, IterSmallAndLargeMix) {
  474. ASSERT_OK(Put("a", "va"));
  475. ASSERT_OK(Put("b", std::string(100000, 'b')));
  476. ASSERT_OK(Put("c", "vc"));
  477. ASSERT_OK(Put("d", std::string(100000, 'd')));
  478. ASSERT_OK(Put("e", std::string(100000, 'e')));
  479. Iterator* iter = db_->NewIterator(ReadOptions());
  480. iter->SeekToFirst();
  481. ASSERT_EQ(IterStatus(iter), "a->va");
  482. iter->Next();
  483. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  484. iter->Next();
  485. ASSERT_EQ(IterStatus(iter), "c->vc");
  486. iter->Next();
  487. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  488. iter->Next();
  489. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  490. iter->Next();
  491. ASSERT_EQ(IterStatus(iter), "(invalid)");
  492. iter->SeekToLast();
  493. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  494. iter->Prev();
  495. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  496. iter->Prev();
  497. ASSERT_EQ(IterStatus(iter), "c->vc");
  498. iter->Prev();
  499. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  500. iter->Prev();
  501. ASSERT_EQ(IterStatus(iter), "a->va");
  502. iter->Prev();
  503. ASSERT_EQ(IterStatus(iter), "(invalid)");
  504. delete iter;
  505. }
  506. TEST(DBTest, IterMultiWithDelete) {
  507. ASSERT_OK(Put("a", "va"));
  508. ASSERT_OK(Put("b", "vb"));
  509. ASSERT_OK(Put("c", "vc"));
  510. ASSERT_OK(Delete("b"));
  511. ASSERT_EQ("NOT_FOUND", Get("b"));
  512. Iterator* iter = db_->NewIterator(ReadOptions());
  513. iter->Seek("c");
  514. ASSERT_EQ(IterStatus(iter), "c->vc");
  515. iter->Prev();
  516. ASSERT_EQ(IterStatus(iter), "a->va");
  517. delete iter;
  518. }
  519. TEST(DBTest, Recover) {
  520. ASSERT_OK(Put("foo", "v1"));
  521. ASSERT_OK(Put("baz", "v5"));
  522. Reopen();
  523. ASSERT_EQ("v1", Get("foo"));
  524. ASSERT_EQ("v1", Get("foo"));
  525. ASSERT_EQ("v5", Get("baz"));
  526. ASSERT_OK(Put("bar", "v2"));
  527. ASSERT_OK(Put("foo", "v3"));
  528. Reopen();
  529. ASSERT_EQ("v3", Get("foo"));
  530. ASSERT_OK(Put("foo", "v4"));
  531. ASSERT_EQ("v4", Get("foo"));
  532. ASSERT_EQ("v2", Get("bar"));
  533. ASSERT_EQ("v5", Get("baz"));
  534. }
  535. TEST(DBTest, RecoveryWithEmptyLog) {
  536. ASSERT_OK(Put("foo", "v1"));
  537. ASSERT_OK(Put("foo", "v2"));
  538. Reopen();
  539. Reopen();
  540. ASSERT_OK(Put("foo", "v3"));
  541. Reopen();
  542. ASSERT_EQ("v3", Get("foo"));
  543. }
  544. // Check that writes done during a memtable compaction are recovered
  545. // if the database is shutdown during the memtable compaction.
  546. TEST(DBTest, RecoverDuringMemtableCompaction) {
  547. Options options;
  548. options.env = env_;
  549. options.write_buffer_size = 1000000;
  550. Reopen(&options);
  551. // Trigger a long memtable compaction and reopen the database during it
  552. ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
  553. ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
  554. ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
  555. ASSERT_OK(Put("bar", "v2")); // Goes to new log file
  556. Reopen(&options);
  557. ASSERT_EQ("v1", Get("foo"));
  558. ASSERT_EQ("v2", Get("bar"));
  559. ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
  560. ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
  561. }
  562. static std::string Key(int i) {
  563. char buf[100];
  564. snprintf(buf, sizeof(buf), "key%06d", i);
  565. return std::string(buf);
  566. }
  567. TEST(DBTest, MinorCompactionsHappen) {
  568. Options options;
  569. options.write_buffer_size = 10000;
  570. Reopen(&options);
  571. const int N = 500;
  572. int starting_num_tables = TotalTableFiles();
  573. for (int i = 0; i < N; i++) {
  574. ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
  575. }
  576. int ending_num_tables = TotalTableFiles();
  577. ASSERT_GT(ending_num_tables, starting_num_tables);
  578. for (int i = 0; i < N; i++) {
  579. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  580. }
  581. Reopen();
  582. for (int i = 0; i < N; i++) {
  583. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  584. }
  585. }
  586. TEST(DBTest, RecoverWithLargeLog) {
  587. {
  588. Options options;
  589. Reopen(&options);
  590. ASSERT_OK(Put("big1", std::string(200000, '1')));
  591. ASSERT_OK(Put("big2", std::string(200000, '2')));
  592. ASSERT_OK(Put("small3", std::string(10, '3')));
  593. ASSERT_OK(Put("small4", std::string(10, '4')));
  594. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  595. }
  596. // Make sure that if we re-open with a small write buffer size that
  597. // we flush table files in the middle of a large log file.
  598. Options options;
  599. options.write_buffer_size = 100000;
  600. Reopen(&options);
  601. ASSERT_EQ(NumTableFilesAtLevel(0), 3);
  602. ASSERT_EQ(std::string(200000, '1'), Get("big1"));
  603. ASSERT_EQ(std::string(200000, '2'), Get("big2"));
  604. ASSERT_EQ(std::string(10, '3'), Get("small3"));
  605. ASSERT_EQ(std::string(10, '4'), Get("small4"));
  606. ASSERT_GT(NumTableFilesAtLevel(0), 1);
  607. }
  608. TEST(DBTest, CompactionsGenerateMultipleFiles) {
  609. Options options;
  610. options.write_buffer_size = 100000000; // Large write buffer
  611. Reopen(&options);
  612. Random rnd(301);
  613. // Write 8MB (80 values, each 100K)
  614. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  615. std::vector<std::string> values;
  616. for (int i = 0; i < 80; i++) {
  617. values.push_back(RandomString(&rnd, 100000));
  618. ASSERT_OK(Put(Key(i), values[i]));
  619. }
  620. // Reopening moves updates to level-0
  621. Reopen(&options);
  622. dbfull()->TEST_CompactRange(0, NULL, NULL);
  623. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  624. ASSERT_GT(NumTableFilesAtLevel(1), 1);
  625. for (int i = 0; i < 80; i++) {
  626. ASSERT_EQ(Get(Key(i)), values[i]);
  627. }
  628. }
  629. TEST(DBTest, RepeatedWritesToSameKey) {
  630. Options options;
  631. options.env = env_;
  632. options.write_buffer_size = 100000; // Small write buffer
  633. Reopen(&options);
  634. // We must have at most one file per level except for level-0,
  635. // which may have up to kL0_StopWritesTrigger files.
  636. const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
  637. Random rnd(301);
  638. std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
  639. for (int i = 0; i < 5 * kMaxFiles; i++) {
  640. Put("key", value);
  641. ASSERT_LE(TotalTableFiles(), kMaxFiles);
  642. fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
  643. }
  644. }
  645. TEST(DBTest, SparseMerge) {
  646. Options options;
  647. options.compression = kNoCompression;
  648. Reopen(&options);
  649. FillLevels("A", "Z");
  650. // Suppose there is:
  651. // small amount of data with prefix A
  652. // large amount of data with prefix B
  653. // small amount of data with prefix C
  654. // and that recent updates have made small changes to all three prefixes.
  655. // Check that we do not do a compaction that merges all of B in one shot.
  656. const std::string value(1000, 'x');
  657. Put("A", "va");
  658. // Write approximately 100MB of "B" values
  659. for (int i = 0; i < 100000; i++) {
  660. char key[100];
  661. snprintf(key, sizeof(key), "B%010d", i);
  662. Put(key, value);
  663. }
  664. Put("C", "vc");
  665. dbfull()->TEST_CompactMemTable();
  666. dbfull()->TEST_CompactRange(0, NULL, NULL);
  667. // Make sparse update
  668. Put("A", "va2");
  669. Put("B100", "bvalue2");
  670. Put("C", "vc2");
  671. dbfull()->TEST_CompactMemTable();
  672. // Compactions should not cause us to create a situation where
  673. // a file overlaps too much data at the next level.
  674. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  675. dbfull()->TEST_CompactRange(0, NULL, NULL);
  676. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  677. dbfull()->TEST_CompactRange(1, NULL, NULL);
  678. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  679. }
  680. static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  681. bool result = (val >= low) && (val <= high);
  682. if (!result) {
  683. fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
  684. (unsigned long long)(val),
  685. (unsigned long long)(low),
  686. (unsigned long long)(high));
  687. }
  688. return result;
  689. }
  690. TEST(DBTest, ApproximateSizes) {
  691. Options options;
  692. options.write_buffer_size = 100000000; // Large write buffer
  693. options.compression = kNoCompression;
  694. DestroyAndReopen();
  695. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  696. Reopen(&options);
  697. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  698. // Write 8MB (80 values, each 100K)
  699. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  700. const int N = 80;
  701. Random rnd(301);
  702. for (int i = 0; i < N; i++) {
  703. ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
  704. }
  705. // 0 because GetApproximateSizes() does not account for memtable space
  706. ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
  707. // Check sizes across recovery by reopening a few times
  708. for (int run = 0; run < 3; run++) {
  709. Reopen(&options);
  710. for (int compact_start = 0; compact_start < N; compact_start += 10) {
  711. for (int i = 0; i < N; i += 10) {
  712. ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000));
  713. ASSERT_TRUE(Between(Size("", Key(i)+".suffix"),
  714. 100000 * (i+1), 100000 * (i+1) + 10000));
  715. ASSERT_TRUE(Between(Size(Key(i), Key(i+10)),
  716. 100000 * 10, 100000 * 10 + 10000));
  717. }
  718. ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
  719. ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
  720. std::string cstart_str = Key(compact_start);
  721. std::string cend_str = Key(compact_start + 9);
  722. Slice cstart = cstart_str;
  723. Slice cend = cend_str;
  724. dbfull()->TEST_CompactRange(0, &cstart, &cend);
  725. }
  726. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  727. ASSERT_GT(NumTableFilesAtLevel(1), 0);
  728. }
  729. }
  730. TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
  731. Options options;
  732. options.compression = kNoCompression;
  733. Reopen();
  734. Random rnd(301);
  735. std::string big1 = RandomString(&rnd, 100000);
  736. ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
  737. ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
  738. ASSERT_OK(Put(Key(2), big1));
  739. ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
  740. ASSERT_OK(Put(Key(4), big1));
  741. ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
  742. ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
  743. ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
  744. // Check sizes across recovery by reopening a few times
  745. for (int run = 0; run < 3; run++) {
  746. Reopen(&options);
  747. ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
  748. ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
  749. ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
  750. ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
  751. ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
  752. ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
  753. ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
  754. ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
  755. ASSERT_TRUE(Between(Size("", Key(8)), 550000, 551000));
  756. ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
  757. dbfull()->TEST_CompactRange(0, NULL, NULL);
  758. }
  759. }
  760. TEST(DBTest, IteratorPinsRef) {
  761. Put("foo", "hello");
  762. // Get iterator that will yield the current contents of the DB.
  763. Iterator* iter = db_->NewIterator(ReadOptions());
  764. // Write to force compactions
  765. Put("foo", "newvalue1");
  766. for (int i = 0; i < 100; i++) {
  767. ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
  768. }
  769. Put("foo", "newvalue2");
  770. iter->SeekToFirst();
  771. ASSERT_TRUE(iter->Valid());
  772. ASSERT_EQ("foo", iter->key().ToString());
  773. ASSERT_EQ("hello", iter->value().ToString());
  774. iter->Next();
  775. ASSERT_TRUE(!iter->Valid());
  776. delete iter;
  777. }
  778. TEST(DBTest, Snapshot) {
  779. Put("foo", "v1");
  780. const Snapshot* s1 = db_->GetSnapshot();
  781. Put("foo", "v2");
  782. const Snapshot* s2 = db_->GetSnapshot();
  783. Put("foo", "v3");
  784. const Snapshot* s3 = db_->GetSnapshot();
  785. Put("foo", "v4");
  786. ASSERT_EQ("v1", Get("foo", s1));
  787. ASSERT_EQ("v2", Get("foo", s2));
  788. ASSERT_EQ("v3", Get("foo", s3));
  789. ASSERT_EQ("v4", Get("foo"));
  790. db_->ReleaseSnapshot(s3);
  791. ASSERT_EQ("v1", Get("foo", s1));
  792. ASSERT_EQ("v2", Get("foo", s2));
  793. ASSERT_EQ("v4", Get("foo"));
  794. db_->ReleaseSnapshot(s1);
  795. ASSERT_EQ("v2", Get("foo", s2));
  796. ASSERT_EQ("v4", Get("foo"));
  797. db_->ReleaseSnapshot(s2);
  798. ASSERT_EQ("v4", Get("foo"));
  799. }
  800. TEST(DBTest, HiddenValuesAreRemoved) {
  801. Random rnd(301);
  802. FillLevels("a", "z");
  803. std::string big = RandomString(&rnd, 50000);
  804. Put("foo", big);
  805. Put("pastfoo", "v");
  806. const Snapshot* snapshot = db_->GetSnapshot();
  807. Put("foo", "tiny");
  808. Put("pastfoo2", "v2"); // Advance sequence number one more
  809. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  810. ASSERT_GT(NumTableFilesAtLevel(0), 0);
  811. ASSERT_EQ(big, Get("foo", snapshot));
  812. ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
  813. db_->ReleaseSnapshot(snapshot);
  814. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
  815. Slice x("x");
  816. dbfull()->TEST_CompactRange(0, NULL, &x);
  817. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  818. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  819. ASSERT_GE(NumTableFilesAtLevel(1), 1);
  820. dbfull()->TEST_CompactRange(1, NULL, &x);
  821. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  822. ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
  823. }
  824. TEST(DBTest, DeletionMarkers1) {
  825. Put("foo", "v1");
  826. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  827. const int last = config::kMaxMemCompactLevel;
  828. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  829. // Place a table at level last-1 to prevent merging with preceding mutation
  830. Put("a", "begin");
  831. Put("z", "end");
  832. dbfull()->TEST_CompactMemTable();
  833. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  834. ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
  835. Delete("foo");
  836. Put("foo", "v2");
  837. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  838. ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  839. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  840. Slice z("z");
  841. dbfull()->TEST_CompactRange(last-2, NULL, &z);
  842. // DEL eliminated, but v1 remains because we aren't compacting that level
  843. // (DEL can be eliminated because v2 hides v1).
  844. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
  845. dbfull()->TEST_CompactRange(last-1, NULL, NULL);
  846. // Merging last-1 w/ last, so we are the base level for "foo", so
  847. // DEL is removed. (as is v1).
  848. ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
  849. }
  850. TEST(DBTest, DeletionMarkers2) {
  851. Put("foo", "v1");
  852. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  853. const int last = config::kMaxMemCompactLevel;
  854. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  855. // Place a table at level last-1 to prevent merging with preceding mutation
  856. Put("a", "begin");
  857. Put("z", "end");
  858. dbfull()->TEST_CompactMemTable();
  859. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  860. ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
  861. Delete("foo");
  862. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  863. ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  864. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  865. dbfull()->TEST_CompactRange(last-2, NULL, NULL);
  866. // DEL kept: "last" file overlaps
  867. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  868. dbfull()->TEST_CompactRange(last-1, NULL, NULL);
  869. // Merging last-1 w/ last, so we are the base level for "foo", so
  870. // DEL is removed. (as is v1).
  871. ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
  872. }
  873. TEST(DBTest, OverlapInLevel0) {
  874. ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
  875. // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
  876. ASSERT_OK(Put("100", "v100"));
  877. ASSERT_OK(Put("999", "v999"));
  878. dbfull()->TEST_CompactMemTable();
  879. ASSERT_OK(Delete("100"));
  880. ASSERT_OK(Delete("999"));
  881. dbfull()->TEST_CompactMemTable();
  882. ASSERT_EQ("0,1,1", FilesPerLevel());
  883. // Make files spanning the following ranges in level-0:
  884. // files[0] 200 .. 900
  885. // files[1] 300 .. 500
  886. // Note that files are sorted by smallest key.
  887. ASSERT_OK(Put("300", "v300"));
  888. ASSERT_OK(Put("500", "v500"));
  889. dbfull()->TEST_CompactMemTable();
  890. ASSERT_OK(Put("200", "v200"));
  891. ASSERT_OK(Put("600", "v600"));
  892. ASSERT_OK(Put("900", "v900"));
  893. dbfull()->TEST_CompactMemTable();
  894. ASSERT_EQ("2,1,1", FilesPerLevel());
  895. // Compact away the placeholder files we created initially
  896. dbfull()->TEST_CompactRange(1, NULL, NULL);
  897. dbfull()->TEST_CompactRange(2, NULL, NULL);
  898. ASSERT_EQ("2", FilesPerLevel());
  899. // Do a memtable compaction. Before bug-fix, the compaction would
  900. // not detect the overlap with level-0 files and would incorrectly place
  901. // the deletion in a deeper level.
  902. ASSERT_OK(Delete("600"));
  903. dbfull()->TEST_CompactMemTable();
  904. ASSERT_EQ("3", FilesPerLevel());
  905. ASSERT_EQ("NOT_FOUND", Get("600"));
  906. }
  907. TEST(DBTest, ComparatorCheck) {
  908. class NewComparator : public Comparator {
  909. public:
  910. virtual const char* Name() const { return "leveldb.NewComparator"; }
  911. virtual int Compare(const Slice& a, const Slice& b) const {
  912. return BytewiseComparator()->Compare(a, b);
  913. }
  914. virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
  915. BytewiseComparator()->FindShortestSeparator(s, l);
  916. }
  917. virtual void FindShortSuccessor(std::string* key) const {
  918. BytewiseComparator()->FindShortSuccessor(key);
  919. }
  920. };
  921. NewComparator cmp;
  922. Options new_options;
  923. new_options.comparator = &cmp;
  924. Status s = TryReopen(&new_options);
  925. ASSERT_TRUE(!s.ok());
  926. ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
  927. << s.ToString();
  928. }
  929. TEST(DBTest, ManualCompaction) {
  930. ASSERT_EQ(config::kMaxMemCompactLevel, 2)
  931. << "Need to update this test to match kMaxMemCompactLevel";
  932. MakeTables(3, "p", "q");
  933. ASSERT_EQ("1,1,1", FilesPerLevel());
  934. // Compaction range falls before files
  935. Compact("", "c");
  936. ASSERT_EQ("1,1,1", FilesPerLevel());
  937. // Compaction range falls after files
  938. Compact("r", "z");
  939. ASSERT_EQ("1,1,1", FilesPerLevel());
  940. // Compaction range overlaps files
  941. Compact("p1", "p9");
  942. ASSERT_EQ("0,0,1", FilesPerLevel());
  943. // Populate a different range
  944. MakeTables(3, "c", "e");
  945. ASSERT_EQ("1,1,2", FilesPerLevel());
  946. // Compact just the new range
  947. Compact("b", "f");
  948. ASSERT_EQ("0,0,2", FilesPerLevel());
  949. // Compact all
  950. MakeTables(1, "a", "z");
  951. ASSERT_EQ("0,1,2", FilesPerLevel());
  952. db_->CompactRange(NULL, NULL);
  953. ASSERT_EQ("0,0,1", FilesPerLevel());
  954. }
  955. TEST(DBTest, DBOpen_Options) {
  956. std::string dbname = test::TmpDir() + "/db_options_test";
  957. DestroyDB(dbname, Options());
  958. // Does not exist, and create_if_missing == false: error
  959. DB* db = NULL;
  960. Options opts;
  961. opts.create_if_missing = false;
  962. Status s = DB::Open(opts, dbname, &db);
  963. ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL);
  964. ASSERT_TRUE(db == NULL);
  965. // Does not exist, and create_if_missing == true: OK
  966. opts.create_if_missing = true;
  967. s = DB::Open(opts, dbname, &db);
  968. ASSERT_OK(s);
  969. ASSERT_TRUE(db != NULL);
  970. delete db;
  971. db = NULL;
  972. // Does exist, and error_if_exists == true: error
  973. opts.create_if_missing = false;
  974. opts.error_if_exists = true;
  975. s = DB::Open(opts, dbname, &db);
  976. ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL);
  977. ASSERT_TRUE(db == NULL);
  978. // Does exist, and error_if_exists == false: OK
  979. opts.create_if_missing = true;
  980. opts.error_if_exists = false;
  981. s = DB::Open(opts, dbname, &db);
  982. ASSERT_OK(s);
  983. ASSERT_TRUE(db != NULL);
  984. delete db;
  985. db = NULL;
  986. }
  987. // Multi-threaded test:
  988. namespace {
  989. static const int kNumThreads = 4;
  990. static const int kTestSeconds = 10;
  991. static const int kNumKeys = 1000;
  992. struct MTState {
  993. DBTest* test;
  994. port::AtomicPointer stop;
  995. port::AtomicPointer counter[kNumThreads];
  996. port::AtomicPointer thread_done[kNumThreads];
  997. };
  998. struct MTThread {
  999. MTState* state;
  1000. int id;
  1001. };
  1002. static void MTThreadBody(void* arg) {
  1003. MTThread* t = reinterpret_cast<MTThread*>(arg);
  1004. DB* db = t->state->test->db_;
  1005. uintptr_t counter = 0;
  1006. fprintf(stderr, "... starting thread %d\n", t->id);
  1007. Random rnd(1000 + t->id);
  1008. std::string value;
  1009. char valbuf[1500];
  1010. while (t->state->stop.Acquire_Load() == NULL) {
  1011. t->state->counter[t->id].Release_Store(reinterpret_cast<void*>(counter));
  1012. int key = rnd.Uniform(kNumKeys);
  1013. char keybuf[20];
  1014. snprintf(keybuf, sizeof(keybuf), "%016d", key);
  1015. if (rnd.OneIn(2)) {
  1016. // Write values of the form <key, my id, counter>.
  1017. // We add some padding for force compactions.
  1018. snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d",
  1019. key, t->id, static_cast<int>(counter));
  1020. ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
  1021. } else {
  1022. // Read a value and verify that it matches the pattern written above.
  1023. Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
  1024. if (s.IsNotFound()) {
  1025. // Key has not yet been written
  1026. } else {
  1027. // Check that the writer thread counter is >= the counter in the value
  1028. ASSERT_OK(s);
  1029. int k, w, c;
  1030. ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
  1031. ASSERT_EQ(k, key);
  1032. ASSERT_GE(w, 0);
  1033. ASSERT_LT(w, kNumThreads);
  1034. ASSERT_LE(c, reinterpret_cast<uintptr_t>(
  1035. t->state->counter[w].Acquire_Load()));
  1036. }
  1037. }
  1038. counter++;
  1039. }
  1040. t->state->thread_done[t->id].Release_Store(t);
  1041. fprintf(stderr, "... stopping thread %d after %d ops\n", t->id, int(counter));
  1042. }
  1043. }
  1044. TEST(DBTest, MultiThreaded) {
  1045. // Initialize state
  1046. MTState mt;
  1047. mt.test = this;
  1048. mt.stop.Release_Store(0);
  1049. for (int id = 0; id < kNumThreads; id++) {
  1050. mt.counter[id].Release_Store(0);
  1051. mt.thread_done[id].Release_Store(0);
  1052. }
  1053. // Start threads
  1054. MTThread thread[kNumThreads];
  1055. for (int id = 0; id < kNumThreads; id++) {
  1056. thread[id].state = &mt;
  1057. thread[id].id = id;
  1058. env_->StartThread(MTThreadBody, &thread[id]);
  1059. }
  1060. // Let them run for a while
  1061. env_->SleepForMicroseconds(kTestSeconds * 1000000);
  1062. // Stop the threads and wait for them to finish
  1063. mt.stop.Release_Store(&mt);
  1064. for (int id = 0; id < kNumThreads; id++) {
  1065. while (mt.thread_done[id].Acquire_Load() == NULL) {
  1066. env_->SleepForMicroseconds(100000);
  1067. }
  1068. }
  1069. }
  1070. namespace {
  1071. typedef std::map<std::string, std::string> KVMap;
  1072. }
  1073. class ModelDB: public DB {
  1074. public:
  1075. class ModelSnapshot : public Snapshot {
  1076. public:
  1077. KVMap map_;
  1078. };
  1079. explicit ModelDB(const Options& options): options_(options) { }
  1080. ~ModelDB() { }
  1081. virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) {
  1082. return DB::Put(o, k, v);
  1083. }
  1084. virtual Status Delete(const WriteOptions& o, const Slice& key) {
  1085. return DB::Delete(o, key);
  1086. }
  1087. virtual Status Get(const ReadOptions& options,
  1088. const Slice& key, std::string* value) {
  1089. assert(false); // Not implemented
  1090. return Status::NotFound(key);
  1091. }
  1092. virtual Iterator* NewIterator(const ReadOptions& options) {
  1093. if (options.snapshot == NULL) {
  1094. KVMap* saved = new KVMap;
  1095. *saved = map_;
  1096. return new ModelIter(saved, true);
  1097. } else {
  1098. const KVMap* snapshot_state =
  1099. &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
  1100. return new ModelIter(snapshot_state, false);
  1101. }
  1102. }
  1103. virtual const Snapshot* GetSnapshot() {
  1104. ModelSnapshot* snapshot = new ModelSnapshot;
  1105. snapshot->map_ = map_;
  1106. return snapshot;
  1107. }
  1108. virtual void ReleaseSnapshot(const Snapshot* snapshot) {
  1109. delete reinterpret_cast<const ModelSnapshot*>(snapshot);
  1110. }
  1111. virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
  1112. class Handler : public WriteBatch::Handler {
  1113. public:
  1114. KVMap* map_;
  1115. virtual void Put(const Slice& key, const Slice& value) {
  1116. (*map_)[key.ToString()] = value.ToString();
  1117. }
  1118. virtual void Delete(const Slice& key) {
  1119. map_->erase(key.ToString());
  1120. }
  1121. };
  1122. Handler handler;
  1123. handler.map_ = &map_;
  1124. return batch->Iterate(&handler);
  1125. }
  1126. virtual bool GetProperty(const Slice& property, std::string* value) {
  1127. return false;
  1128. }
  1129. virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
  1130. for (int i = 0; i < n; i++) {
  1131. sizes[i] = 0;
  1132. }
  1133. }
  1134. virtual void CompactRange(const Slice* start, const Slice* end) {
  1135. }
  1136. private:
  1137. class ModelIter: public Iterator {
  1138. public:
  1139. ModelIter(const KVMap* map, bool owned)
  1140. : map_(map), owned_(owned), iter_(map_->end()) {
  1141. }
  1142. ~ModelIter() {
  1143. if (owned_) delete map_;
  1144. }
  1145. virtual bool Valid() const { return iter_ != map_->end(); }
  1146. virtual void SeekToFirst() { iter_ = map_->begin(); }
  1147. virtual void SeekToLast() {
  1148. if (map_->empty()) {
  1149. iter_ = map_->end();
  1150. } else {
  1151. iter_ = map_->find(map_->rbegin()->first);
  1152. }
  1153. }
  1154. virtual void Seek(const Slice& k) {
  1155. iter_ = map_->lower_bound(k.ToString());
  1156. }
  1157. virtual void Next() { ++iter_; }
  1158. virtual void Prev() { --iter_; }
  1159. virtual Slice key() const { return iter_->first; }
  1160. virtual Slice value() const { return iter_->second; }
  1161. virtual Status status() const { return Status::OK(); }
  1162. private:
  1163. const KVMap* const map_;
  1164. const bool owned_; // Do we own map_
  1165. KVMap::const_iterator iter_;
  1166. };
  1167. const Options options_;
  1168. KVMap map_;
  1169. };
  1170. static std::string RandomKey(Random* rnd) {
  1171. int len = (rnd->OneIn(3)
  1172. ? 1 // Short sometimes to encourage collisions
  1173. : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
  1174. return test::RandomKey(rnd, len);
  1175. }
  1176. static bool CompareIterators(int step,
  1177. DB* model,
  1178. DB* db,
  1179. const Snapshot* model_snap,
  1180. const Snapshot* db_snap) {
  1181. ReadOptions options;
  1182. options.snapshot = model_snap;
  1183. Iterator* miter = model->NewIterator(options);
  1184. options.snapshot = db_snap;
  1185. Iterator* dbiter = db->NewIterator(options);
  1186. bool ok = true;
  1187. int count = 0;
  1188. for (miter->SeekToFirst(), dbiter->SeekToFirst();
  1189. ok && miter->Valid() && dbiter->Valid();
  1190. miter->Next(), dbiter->Next()) {
  1191. count++;
  1192. if (miter->key().compare(dbiter->key()) != 0) {
  1193. fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
  1194. step,
  1195. EscapeString(miter->key()).c_str(),
  1196. EscapeString(dbiter->key()).c_str());
  1197. ok = false;
  1198. break;
  1199. }
  1200. if (miter->value().compare(dbiter->value()) != 0) {
  1201. fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
  1202. step,
  1203. EscapeString(miter->key()).c_str(),
  1204. EscapeString(miter->value()).c_str(),
  1205. EscapeString(miter->value()).c_str());
  1206. ok = false;
  1207. }
  1208. }
  1209. if (ok) {
  1210. if (miter->Valid() != dbiter->Valid()) {
  1211. fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
  1212. step, miter->Valid(), dbiter->Valid());
  1213. ok = false;
  1214. }
  1215. }
  1216. fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
  1217. delete miter;
  1218. delete dbiter;
  1219. return ok;
  1220. }
  1221. TEST(DBTest, Randomized) {
  1222. Random rnd(test::RandomSeed());
  1223. ModelDB model(last_options_);
  1224. const int N = 10000;
  1225. const Snapshot* model_snap = NULL;
  1226. const Snapshot* db_snap = NULL;
  1227. std::string k, v;
  1228. for (int step = 0; step < N; step++) {
  1229. if (step % 100 == 0) {
  1230. fprintf(stderr, "Step %d of %d\n", step, N);
  1231. }
  1232. int p = rnd.Uniform(100);
  1233. if (p < 45) { // Put
  1234. k = RandomKey(&rnd);
  1235. v = RandomString(&rnd,
  1236. rnd.OneIn(20)
  1237. ? 100 + rnd.Uniform(100)
  1238. : rnd.Uniform(8));
  1239. ASSERT_OK(model.Put(WriteOptions(), k, v));
  1240. ASSERT_OK(db_->Put(WriteOptions(), k, v));
  1241. } else if (p < 90) { // Delete
  1242. k = RandomKey(&rnd);
  1243. ASSERT_OK(model.Delete(WriteOptions(), k));
  1244. ASSERT_OK(db_->Delete(WriteOptions(), k));
  1245. } else { // Multi-element batch
  1246. WriteBatch b;
  1247. const int num = rnd.Uniform(8);
  1248. for (int i = 0; i < num; i++) {
  1249. if (i == 0 || !rnd.OneIn(10)) {
  1250. k = RandomKey(&rnd);
  1251. } else {
  1252. // Periodically re-use the same key from the previous iter, so
  1253. // we have multiple entries in the write batch for the same key
  1254. }
  1255. if (rnd.OneIn(2)) {
  1256. v = RandomString(&rnd, rnd.Uniform(10));
  1257. b.Put(k, v);
  1258. } else {
  1259. b.Delete(k);
  1260. }
  1261. }
  1262. ASSERT_OK(model.Write(WriteOptions(), &b));
  1263. ASSERT_OK(db_->Write(WriteOptions(), &b));
  1264. }
  1265. if ((step % 100) == 0) {
  1266. ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
  1267. ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
  1268. // Save a snapshot from each DB this time that we'll use next
  1269. // time we compare things, to make sure the current state is
  1270. // preserved with the snapshot
  1271. if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
  1272. if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
  1273. Reopen();
  1274. ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
  1275. model_snap = model.GetSnapshot();
  1276. db_snap = db_->GetSnapshot();
  1277. }
  1278. }
  1279. if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
  1280. if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
  1281. }
  1282. std::string MakeKey(unsigned int num) {
  1283. char buf[30];
  1284. snprintf(buf, sizeof(buf), "%016u", num);
  1285. return std::string(buf);
  1286. }
  1287. void BM_LogAndApply(int iters, int num_base_files) {
  1288. std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
  1289. DestroyDB(dbname, Options());
  1290. DB* db = NULL;
  1291. Options opts;
  1292. opts.create_if_missing = true;
  1293. Status s = DB::Open(opts, dbname, &db);
  1294. ASSERT_OK(s);
  1295. ASSERT_TRUE(db != NULL);
  1296. delete db;
  1297. db = NULL;
  1298. Env* env = Env::Default();
  1299. port::Mutex mu;
  1300. MutexLock l(&mu);
  1301. InternalKeyComparator cmp(BytewiseComparator());
  1302. Options options;
  1303. VersionSet vset(dbname, &options, NULL, &cmp);
  1304. ASSERT_OK(vset.Recover());
  1305. VersionEdit vbase;
  1306. uint64_t fnum = 1;
  1307. for (int i = 0; i < num_base_files; i++) {
  1308. InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
  1309. InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
  1310. vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
  1311. }
  1312. ASSERT_OK(vset.LogAndApply(&vbase, &mu));
  1313. uint64_t start_micros = env->NowMicros();
  1314. for (int i = 0; i < iters; i++) {
  1315. VersionEdit vedit;
  1316. vedit.DeleteFile(2, fnum);
  1317. InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
  1318. InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
  1319. vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
  1320. vset.LogAndApply(&vedit, &mu);
  1321. }
  1322. uint64_t stop_micros = env->NowMicros();
  1323. unsigned int us = stop_micros - start_micros;
  1324. char buf[16];
  1325. snprintf(buf, sizeof(buf), "%d", num_base_files);
  1326. fprintf(stderr,
  1327. "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n",
  1328. buf, iters, us, ((float)us) / iters);
  1329. }
  1330. }
  1331. int main(int argc, char** argv) {
  1332. if (argc > 1 && std::string(argv[1]) == "--benchmark") {
  1333. leveldb::BM_LogAndApply(1000, 1);
  1334. leveldb::BM_LogAndApply(1000, 100);
  1335. leveldb::BM_LogAndApply(1000, 10000);
  1336. leveldb::BM_LogAndApply(100, 100000);
  1337. return 0;
  1338. }
  1339. return leveldb::test::RunAllTests();
  1340. }