10215300402 朱维清 10222140408 谷杰
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

1638 行
47 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/db.h"
  5. #include "db/db_impl.h"
  6. #include "db/filename.h"
  7. #include "db/version_set.h"
  8. #include "db/write_batch_internal.h"
  9. #include "leveldb/env.h"
  10. #include "leveldb/table.h"
  11. #include "util/logging.h"
  12. #include "util/mutexlock.h"
  13. #include "util/testharness.h"
  14. #include "util/testutil.h"
  15. namespace leveldb {
  16. static std::string RandomString(Random* rnd, int len) {
  17. std::string r;
  18. test::RandomString(rnd, len, &r);
  19. return r;
  20. }
  21. // Special Env used to delay background operations
  22. class SpecialEnv : public EnvWrapper {
  23. public:
  24. // sstable Sync() calls are blocked while this pointer is non-NULL.
  25. port::AtomicPointer delay_sstable_sync_;
  26. explicit SpecialEnv(Env* base) : EnvWrapper(base) {
  27. delay_sstable_sync_.Release_Store(NULL);
  28. }
  29. Status NewWritableFile(const std::string& f, WritableFile** r) {
  30. class SSTableFile : public WritableFile {
  31. private:
  32. SpecialEnv* env_;
  33. WritableFile* base_;
  34. public:
  35. SSTableFile(SpecialEnv* env, WritableFile* base)
  36. : env_(env),
  37. base_(base) {
  38. }
  39. ~SSTableFile() { delete base_; }
  40. Status Append(const Slice& data) { return base_->Append(data); }
  41. Status Close() { return base_->Close(); }
  42. Status Flush() { return base_->Flush(); }
  43. Status Sync() {
  44. while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
  45. env_->SleepForMicroseconds(100000);
  46. }
  47. return base_->Sync();
  48. }
  49. };
  50. Status s = target()->NewWritableFile(f, r);
  51. if (s.ok()) {
  52. if (strstr(f.c_str(), ".sst") != NULL) {
  53. *r = new SSTableFile(this, *r);
  54. }
  55. }
  56. return s;
  57. }
  58. };
  59. class DBTest {
  60. public:
  61. std::string dbname_;
  62. SpecialEnv* env_;
  63. DB* db_;
  64. Options last_options_;
  65. DBTest() : env_(new SpecialEnv(Env::Default())) {
  66. dbname_ = test::TmpDir() + "/db_test";
  67. DestroyDB(dbname_, Options());
  68. db_ = NULL;
  69. Reopen();
  70. }
  71. ~DBTest() {
  72. delete db_;
  73. DestroyDB(dbname_, Options());
  74. delete env_;
  75. }
  76. DBImpl* dbfull() {
  77. return reinterpret_cast<DBImpl*>(db_);
  78. }
  79. void Reopen(Options* options = NULL) {
  80. ASSERT_OK(TryReopen(options));
  81. }
  82. void DestroyAndReopen(Options* options = NULL) {
  83. delete db_;
  84. db_ = NULL;
  85. DestroyDB(dbname_, Options());
  86. ASSERT_OK(TryReopen(options));
  87. }
  88. Status TryReopen(Options* options) {
  89. delete db_;
  90. db_ = NULL;
  91. Options opts;
  92. if (options != NULL) {
  93. opts = *options;
  94. } else {
  95. opts.create_if_missing = true;
  96. }
  97. last_options_ = opts;
  98. return DB::Open(opts, dbname_, &db_);
  99. }
  100. Status Put(const std::string& k, const std::string& v) {
  101. return db_->Put(WriteOptions(), k, v);
  102. }
  103. Status Delete(const std::string& k) {
  104. return db_->Delete(WriteOptions(), k);
  105. }
  106. std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
  107. ReadOptions options;
  108. options.snapshot = snapshot;
  109. std::string result;
  110. Status s = db_->Get(options, k, &result);
  111. if (s.IsNotFound()) {
  112. result = "NOT_FOUND";
  113. } else if (!s.ok()) {
  114. result = s.ToString();
  115. }
  116. return result;
  117. }
  118. // Return a string that contains all key,value pairs in order,
  119. // formatted like "(k1->v1)(k2->v2)".
  120. std::string Contents() {
  121. std::vector<std::string> forward;
  122. std::string result;
  123. Iterator* iter = db_->NewIterator(ReadOptions());
  124. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  125. std::string s = IterStatus(iter);
  126. result.push_back('(');
  127. result.append(s);
  128. result.push_back(')');
  129. forward.push_back(s);
  130. }
  131. // Check reverse iteration results are the reverse of forward results
  132. int matched = 0;
  133. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  134. ASSERT_LT(matched, forward.size());
  135. ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
  136. matched++;
  137. }
  138. ASSERT_EQ(matched, forward.size());
  139. delete iter;
  140. return result;
  141. }
  142. std::string AllEntriesFor(const Slice& user_key) {
  143. Iterator* iter = dbfull()->TEST_NewInternalIterator();
  144. InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
  145. iter->Seek(target.Encode());
  146. std::string result;
  147. if (!iter->status().ok()) {
  148. result = iter->status().ToString();
  149. } else {
  150. result = "[ ";
  151. bool first = true;
  152. while (iter->Valid()) {
  153. ParsedInternalKey ikey;
  154. if (!ParseInternalKey(iter->key(), &ikey)) {
  155. result += "CORRUPTED";
  156. } else {
  157. if (last_options_.comparator->Compare(
  158. ikey.user_key, user_key) != 0) {
  159. break;
  160. }
  161. if (!first) {
  162. result += ", ";
  163. }
  164. first = false;
  165. switch (ikey.type) {
  166. case kTypeValue:
  167. result += iter->value().ToString();
  168. break;
  169. case kTypeDeletion:
  170. result += "DEL";
  171. break;
  172. }
  173. }
  174. iter->Next();
  175. }
  176. if (!first) {
  177. result += " ";
  178. }
  179. result += "]";
  180. }
  181. delete iter;
  182. return result;
  183. }
  184. int NumTableFilesAtLevel(int level) {
  185. std::string property;
  186. ASSERT_TRUE(
  187. db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level),
  188. &property));
  189. return atoi(property.c_str());
  190. }
  191. int TotalTableFiles() {
  192. int result = 0;
  193. for (int level = 0; level < config::kNumLevels; level++) {
  194. result += NumTableFilesAtLevel(level);
  195. }
  196. return result;
  197. }
  198. // Return spread of files per level
  199. std::string FilesPerLevel() {
  200. std::string result;
  201. int last_non_zero_offset = 0;
  202. for (int level = 0; level < config::kNumLevels; level++) {
  203. int f = NumTableFilesAtLevel(level);
  204. char buf[100];
  205. snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
  206. result += buf;
  207. if (f > 0) {
  208. last_non_zero_offset = result.size();
  209. }
  210. }
  211. result.resize(last_non_zero_offset);
  212. return result;
  213. }
  214. uint64_t Size(const Slice& start, const Slice& limit) {
  215. Range r(start, limit);
  216. uint64_t size;
  217. db_->GetApproximateSizes(&r, 1, &size);
  218. return size;
  219. }
  220. void Compact(const Slice& start, const Slice& limit) {
  221. db_->CompactRange(&start, &limit);
  222. }
  223. // Do n memtable compactions, each of which produces an sstable
  224. // covering the range [small,large].
  225. void MakeTables(int n, const std::string& small, const std::string& large) {
  226. for (int i = 0; i < n; i++) {
  227. Put(small, "begin");
  228. Put(large, "end");
  229. dbfull()->TEST_CompactMemTable();
  230. }
  231. }
  232. // Prevent pushing of new sstables into deeper levels by adding
  233. // tables that cover a specified range to all levels.
  234. void FillLevels(const std::string& smallest, const std::string& largest) {
  235. MakeTables(config::kNumLevels, smallest, largest);
  236. }
  237. void DumpFileCounts(const char* label) {
  238. fprintf(stderr, "---\n%s:\n", label);
  239. fprintf(stderr, "maxoverlap: %lld\n",
  240. static_cast<long long>(
  241. dbfull()->TEST_MaxNextLevelOverlappingBytes()));
  242. for (int level = 0; level < config::kNumLevels; level++) {
  243. int num = NumTableFilesAtLevel(level);
  244. if (num > 0) {
  245. fprintf(stderr, " level %3d : %d files\n", level, num);
  246. }
  247. }
  248. }
  249. std::string DumpSSTableList() {
  250. std::string property;
  251. db_->GetProperty("leveldb.sstables", &property);
  252. return property;
  253. }
  254. std::string IterStatus(Iterator* iter) {
  255. std::string result;
  256. if (iter->Valid()) {
  257. result = iter->key().ToString() + "->" + iter->value().ToString();
  258. } else {
  259. result = "(invalid)";
  260. }
  261. return result;
  262. }
  263. };
  264. TEST(DBTest, Empty) {
  265. ASSERT_TRUE(db_ != NULL);
  266. ASSERT_EQ("NOT_FOUND", Get("foo"));
  267. }
  268. TEST(DBTest, ReadWrite) {
  269. ASSERT_OK(Put("foo", "v1"));
  270. ASSERT_EQ("v1", Get("foo"));
  271. ASSERT_OK(Put("bar", "v2"));
  272. ASSERT_OK(Put("foo", "v3"));
  273. ASSERT_EQ("v3", Get("foo"));
  274. ASSERT_EQ("v2", Get("bar"));
  275. }
  276. TEST(DBTest, PutDeleteGet) {
  277. ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
  278. ASSERT_EQ("v1", Get("foo"));
  279. ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
  280. ASSERT_EQ("v2", Get("foo"));
  281. ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
  282. ASSERT_EQ("NOT_FOUND", Get("foo"));
  283. }
  284. TEST(DBTest, GetFromImmutableLayer) {
  285. Options options;
  286. options.env = env_;
  287. options.write_buffer_size = 100000; // Small write buffer
  288. Reopen(&options);
  289. ASSERT_OK(Put("foo", "v1"));
  290. ASSERT_EQ("v1", Get("foo"));
  291. env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
  292. Put("k1", std::string(100000, 'x')); // Fill memtable
  293. Put("k2", std::string(100000, 'y')); // Trigger compaction
  294. ASSERT_EQ("v1", Get("foo"));
  295. env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls
  296. }
  297. TEST(DBTest, GetFromVersions) {
  298. ASSERT_OK(Put("foo", "v1"));
  299. dbfull()->TEST_CompactMemTable();
  300. ASSERT_EQ("v1", Get("foo"));
  301. }
  302. TEST(DBTest, GetSnapshot) {
  303. // Try with both a short key and a long key
  304. for (int i = 0; i < 2; i++) {
  305. std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
  306. ASSERT_OK(Put(key, "v1"));
  307. const Snapshot* s1 = db_->GetSnapshot();
  308. ASSERT_OK(Put(key, "v2"));
  309. ASSERT_EQ("v2", Get(key));
  310. ASSERT_EQ("v1", Get(key, s1));
  311. dbfull()->TEST_CompactMemTable();
  312. ASSERT_EQ("v2", Get(key));
  313. ASSERT_EQ("v1", Get(key, s1));
  314. db_->ReleaseSnapshot(s1);
  315. }
  316. }
  317. TEST(DBTest, GetLevel0Ordering) {
  318. // Check that we process level-0 files in correct order. The code
  319. // below generates two level-0 files where the earlier one comes
  320. // before the later one in the level-0 file list since the earlier
  321. // one has a smaller "smallest" key.
  322. ASSERT_OK(Put("bar", "b"));
  323. ASSERT_OK(Put("foo", "v1"));
  324. dbfull()->TEST_CompactMemTable();
  325. ASSERT_OK(Put("foo", "v2"));
  326. dbfull()->TEST_CompactMemTable();
  327. ASSERT_EQ("v2", Get("foo"));
  328. }
  329. TEST(DBTest, GetOrderedByLevels) {
  330. ASSERT_OK(Put("foo", "v1"));
  331. Compact("a", "z");
  332. ASSERT_EQ("v1", Get("foo"));
  333. ASSERT_OK(Put("foo", "v2"));
  334. ASSERT_EQ("v2", Get("foo"));
  335. dbfull()->TEST_CompactMemTable();
  336. ASSERT_EQ("v2", Get("foo"));
  337. }
  338. TEST(DBTest, GetPicksCorrectFile) {
  339. // Arrange to have multiple files in a non-level-0 level.
  340. ASSERT_OK(Put("a", "va"));
  341. Compact("a", "b");
  342. ASSERT_OK(Put("x", "vx"));
  343. Compact("x", "y");
  344. ASSERT_OK(Put("f", "vf"));
  345. Compact("f", "g");
  346. ASSERT_EQ("va", Get("a"));
  347. ASSERT_EQ("vf", Get("f"));
  348. ASSERT_EQ("vx", Get("x"));
  349. }
  350. TEST(DBTest, GetEncountersEmptyLevel) {
  351. // Arrange for the following to happen:
  352. // * sstable A in level 0
  353. // * nothing in level 1
  354. // * sstable B in level 2
  355. // Then do enough Get() calls to arrange for an automatic compaction
  356. // of sstable A. A bug would cause the compaction to be marked as
  357. // occuring at level 1 (instead of the correct level 0).
  358. // Step 1: First place sstables in levels 0 and 2
  359. int compaction_count = 0;
  360. while (NumTableFilesAtLevel(0) == 0 ||
  361. NumTableFilesAtLevel(2) == 0) {
  362. ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
  363. compaction_count++;
  364. Put("a", "begin");
  365. Put("z", "end");
  366. dbfull()->TEST_CompactMemTable();
  367. }
  368. // Step 2: clear level 1 if necessary.
  369. dbfull()->TEST_CompactRange(1, NULL, NULL);
  370. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  371. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  372. ASSERT_EQ(NumTableFilesAtLevel(2), 1);
  373. // Step 3: read until level 0 compaction disappears.
  374. int read_count = 0;
  375. while (NumTableFilesAtLevel(0) > 0) {
  376. ASSERT_LE(read_count, 10000) << "did not trigger level 0 compaction";
  377. read_count++;
  378. ASSERT_EQ("NOT_FOUND", Get("missing"));
  379. }
  380. }
  381. TEST(DBTest, IterEmpty) {
  382. Iterator* iter = db_->NewIterator(ReadOptions());
  383. iter->SeekToFirst();
  384. ASSERT_EQ(IterStatus(iter), "(invalid)");
  385. iter->SeekToLast();
  386. ASSERT_EQ(IterStatus(iter), "(invalid)");
  387. iter->Seek("foo");
  388. ASSERT_EQ(IterStatus(iter), "(invalid)");
  389. delete iter;
  390. }
  391. TEST(DBTest, IterSingle) {
  392. ASSERT_OK(Put("a", "va"));
  393. Iterator* iter = db_->NewIterator(ReadOptions());
  394. iter->SeekToFirst();
  395. ASSERT_EQ(IterStatus(iter), "a->va");
  396. iter->Next();
  397. ASSERT_EQ(IterStatus(iter), "(invalid)");
  398. iter->SeekToFirst();
  399. ASSERT_EQ(IterStatus(iter), "a->va");
  400. iter->Prev();
  401. ASSERT_EQ(IterStatus(iter), "(invalid)");
  402. iter->SeekToLast();
  403. ASSERT_EQ(IterStatus(iter), "a->va");
  404. iter->Next();
  405. ASSERT_EQ(IterStatus(iter), "(invalid)");
  406. iter->SeekToLast();
  407. ASSERT_EQ(IterStatus(iter), "a->va");
  408. iter->Prev();
  409. ASSERT_EQ(IterStatus(iter), "(invalid)");
  410. iter->Seek("");
  411. ASSERT_EQ(IterStatus(iter), "a->va");
  412. iter->Next();
  413. ASSERT_EQ(IterStatus(iter), "(invalid)");
  414. iter->Seek("a");
  415. ASSERT_EQ(IterStatus(iter), "a->va");
  416. iter->Next();
  417. ASSERT_EQ(IterStatus(iter), "(invalid)");
  418. iter->Seek("b");
  419. ASSERT_EQ(IterStatus(iter), "(invalid)");
  420. delete iter;
  421. }
  422. TEST(DBTest, IterMulti) {
  423. ASSERT_OK(Put("a", "va"));
  424. ASSERT_OK(Put("b", "vb"));
  425. ASSERT_OK(Put("c", "vc"));
  426. Iterator* iter = db_->NewIterator(ReadOptions());
  427. iter->SeekToFirst();
  428. ASSERT_EQ(IterStatus(iter), "a->va");
  429. iter->Next();
  430. ASSERT_EQ(IterStatus(iter), "b->vb");
  431. iter->Next();
  432. ASSERT_EQ(IterStatus(iter), "c->vc");
  433. iter->Next();
  434. ASSERT_EQ(IterStatus(iter), "(invalid)");
  435. iter->SeekToFirst();
  436. ASSERT_EQ(IterStatus(iter), "a->va");
  437. iter->Prev();
  438. ASSERT_EQ(IterStatus(iter), "(invalid)");
  439. iter->SeekToLast();
  440. ASSERT_EQ(IterStatus(iter), "c->vc");
  441. iter->Prev();
  442. ASSERT_EQ(IterStatus(iter), "b->vb");
  443. iter->Prev();
  444. ASSERT_EQ(IterStatus(iter), "a->va");
  445. iter->Prev();
  446. ASSERT_EQ(IterStatus(iter), "(invalid)");
  447. iter->SeekToLast();
  448. ASSERT_EQ(IterStatus(iter), "c->vc");
  449. iter->Next();
  450. ASSERT_EQ(IterStatus(iter), "(invalid)");
  451. iter->Seek("");
  452. ASSERT_EQ(IterStatus(iter), "a->va");
  453. iter->Seek("a");
  454. ASSERT_EQ(IterStatus(iter), "a->va");
  455. iter->Seek("ax");
  456. ASSERT_EQ(IterStatus(iter), "b->vb");
  457. iter->Seek("b");
  458. ASSERT_EQ(IterStatus(iter), "b->vb");
  459. iter->Seek("z");
  460. ASSERT_EQ(IterStatus(iter), "(invalid)");
  461. // Switch from reverse to forward
  462. iter->SeekToLast();
  463. iter->Prev();
  464. iter->Prev();
  465. iter->Next();
  466. ASSERT_EQ(IterStatus(iter), "b->vb");
  467. // Switch from forward to reverse
  468. iter->SeekToFirst();
  469. iter->Next();
  470. iter->Next();
  471. iter->Prev();
  472. ASSERT_EQ(IterStatus(iter), "b->vb");
  473. // Make sure iter stays at snapshot
  474. ASSERT_OK(Put("a", "va2"));
  475. ASSERT_OK(Put("a2", "va3"));
  476. ASSERT_OK(Put("b", "vb2"));
  477. ASSERT_OK(Put("c", "vc2"));
  478. ASSERT_OK(Delete("b"));
  479. iter->SeekToFirst();
  480. ASSERT_EQ(IterStatus(iter), "a->va");
  481. iter->Next();
  482. ASSERT_EQ(IterStatus(iter), "b->vb");
  483. iter->Next();
  484. ASSERT_EQ(IterStatus(iter), "c->vc");
  485. iter->Next();
  486. ASSERT_EQ(IterStatus(iter), "(invalid)");
  487. iter->SeekToLast();
  488. ASSERT_EQ(IterStatus(iter), "c->vc");
  489. iter->Prev();
  490. ASSERT_EQ(IterStatus(iter), "b->vb");
  491. iter->Prev();
  492. ASSERT_EQ(IterStatus(iter), "a->va");
  493. iter->Prev();
  494. ASSERT_EQ(IterStatus(iter), "(invalid)");
  495. delete iter;
  496. }
  497. TEST(DBTest, IterSmallAndLargeMix) {
  498. ASSERT_OK(Put("a", "va"));
  499. ASSERT_OK(Put("b", std::string(100000, 'b')));
  500. ASSERT_OK(Put("c", "vc"));
  501. ASSERT_OK(Put("d", std::string(100000, 'd')));
  502. ASSERT_OK(Put("e", std::string(100000, 'e')));
  503. Iterator* iter = db_->NewIterator(ReadOptions());
  504. iter->SeekToFirst();
  505. ASSERT_EQ(IterStatus(iter), "a->va");
  506. iter->Next();
  507. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  508. iter->Next();
  509. ASSERT_EQ(IterStatus(iter), "c->vc");
  510. iter->Next();
  511. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  512. iter->Next();
  513. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  514. iter->Next();
  515. ASSERT_EQ(IterStatus(iter), "(invalid)");
  516. iter->SeekToLast();
  517. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  518. iter->Prev();
  519. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  520. iter->Prev();
  521. ASSERT_EQ(IterStatus(iter), "c->vc");
  522. iter->Prev();
  523. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  524. iter->Prev();
  525. ASSERT_EQ(IterStatus(iter), "a->va");
  526. iter->Prev();
  527. ASSERT_EQ(IterStatus(iter), "(invalid)");
  528. delete iter;
  529. }
  530. TEST(DBTest, IterMultiWithDelete) {
  531. ASSERT_OK(Put("a", "va"));
  532. ASSERT_OK(Put("b", "vb"));
  533. ASSERT_OK(Put("c", "vc"));
  534. ASSERT_OK(Delete("b"));
  535. ASSERT_EQ("NOT_FOUND", Get("b"));
  536. Iterator* iter = db_->NewIterator(ReadOptions());
  537. iter->Seek("c");
  538. ASSERT_EQ(IterStatus(iter), "c->vc");
  539. iter->Prev();
  540. ASSERT_EQ(IterStatus(iter), "a->va");
  541. delete iter;
  542. }
  543. TEST(DBTest, Recover) {
  544. ASSERT_OK(Put("foo", "v1"));
  545. ASSERT_OK(Put("baz", "v5"));
  546. Reopen();
  547. ASSERT_EQ("v1", Get("foo"));
  548. ASSERT_EQ("v1", Get("foo"));
  549. ASSERT_EQ("v5", Get("baz"));
  550. ASSERT_OK(Put("bar", "v2"));
  551. ASSERT_OK(Put("foo", "v3"));
  552. Reopen();
  553. ASSERT_EQ("v3", Get("foo"));
  554. ASSERT_OK(Put("foo", "v4"));
  555. ASSERT_EQ("v4", Get("foo"));
  556. ASSERT_EQ("v2", Get("bar"));
  557. ASSERT_EQ("v5", Get("baz"));
  558. }
  559. TEST(DBTest, RecoveryWithEmptyLog) {
  560. ASSERT_OK(Put("foo", "v1"));
  561. ASSERT_OK(Put("foo", "v2"));
  562. Reopen();
  563. Reopen();
  564. ASSERT_OK(Put("foo", "v3"));
  565. Reopen();
  566. ASSERT_EQ("v3", Get("foo"));
  567. }
  568. // Check that writes done during a memtable compaction are recovered
  569. // if the database is shutdown during the memtable compaction.
  570. TEST(DBTest, RecoverDuringMemtableCompaction) {
  571. Options options;
  572. options.env = env_;
  573. options.write_buffer_size = 1000000;
  574. Reopen(&options);
  575. // Trigger a long memtable compaction and reopen the database during it
  576. ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
  577. ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
  578. ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
  579. ASSERT_OK(Put("bar", "v2")); // Goes to new log file
  580. Reopen(&options);
  581. ASSERT_EQ("v1", Get("foo"));
  582. ASSERT_EQ("v2", Get("bar"));
  583. ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
  584. ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
  585. }
  586. static std::string Key(int i) {
  587. char buf[100];
  588. snprintf(buf, sizeof(buf), "key%06d", i);
  589. return std::string(buf);
  590. }
  591. TEST(DBTest, MinorCompactionsHappen) {
  592. Options options;
  593. options.write_buffer_size = 10000;
  594. Reopen(&options);
  595. const int N = 500;
  596. int starting_num_tables = TotalTableFiles();
  597. for (int i = 0; i < N; i++) {
  598. ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
  599. }
  600. int ending_num_tables = TotalTableFiles();
  601. ASSERT_GT(ending_num_tables, starting_num_tables);
  602. for (int i = 0; i < N; i++) {
  603. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  604. }
  605. Reopen();
  606. for (int i = 0; i < N; i++) {
  607. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  608. }
  609. }
  610. TEST(DBTest, RecoverWithLargeLog) {
  611. {
  612. Options options;
  613. Reopen(&options);
  614. ASSERT_OK(Put("big1", std::string(200000, '1')));
  615. ASSERT_OK(Put("big2", std::string(200000, '2')));
  616. ASSERT_OK(Put("small3", std::string(10, '3')));
  617. ASSERT_OK(Put("small4", std::string(10, '4')));
  618. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  619. }
  620. // Make sure that if we re-open with a small write buffer size that
  621. // we flush table files in the middle of a large log file.
  622. Options options;
  623. options.write_buffer_size = 100000;
  624. Reopen(&options);
  625. ASSERT_EQ(NumTableFilesAtLevel(0), 3);
  626. ASSERT_EQ(std::string(200000, '1'), Get("big1"));
  627. ASSERT_EQ(std::string(200000, '2'), Get("big2"));
  628. ASSERT_EQ(std::string(10, '3'), Get("small3"));
  629. ASSERT_EQ(std::string(10, '4'), Get("small4"));
  630. ASSERT_GT(NumTableFilesAtLevel(0), 1);
  631. }
  632. TEST(DBTest, CompactionsGenerateMultipleFiles) {
  633. Options options;
  634. options.write_buffer_size = 100000000; // Large write buffer
  635. Reopen(&options);
  636. Random rnd(301);
  637. // Write 8MB (80 values, each 100K)
  638. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  639. std::vector<std::string> values;
  640. for (int i = 0; i < 80; i++) {
  641. values.push_back(RandomString(&rnd, 100000));
  642. ASSERT_OK(Put(Key(i), values[i]));
  643. }
  644. // Reopening moves updates to level-0
  645. Reopen(&options);
  646. dbfull()->TEST_CompactRange(0, NULL, NULL);
  647. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  648. ASSERT_GT(NumTableFilesAtLevel(1), 1);
  649. for (int i = 0; i < 80; i++) {
  650. ASSERT_EQ(Get(Key(i)), values[i]);
  651. }
  652. }
  653. TEST(DBTest, RepeatedWritesToSameKey) {
  654. Options options;
  655. options.env = env_;
  656. options.write_buffer_size = 100000; // Small write buffer
  657. Reopen(&options);
  658. // We must have at most one file per level except for level-0,
  659. // which may have up to kL0_StopWritesTrigger files.
  660. const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
  661. Random rnd(301);
  662. std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
  663. for (int i = 0; i < 5 * kMaxFiles; i++) {
  664. Put("key", value);
  665. ASSERT_LE(TotalTableFiles(), kMaxFiles);
  666. fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
  667. }
  668. }
  669. TEST(DBTest, SparseMerge) {
  670. Options options;
  671. options.compression = kNoCompression;
  672. Reopen(&options);
  673. FillLevels("A", "Z");
  674. // Suppose there is:
  675. // small amount of data with prefix A
  676. // large amount of data with prefix B
  677. // small amount of data with prefix C
  678. // and that recent updates have made small changes to all three prefixes.
  679. // Check that we do not do a compaction that merges all of B in one shot.
  680. const std::string value(1000, 'x');
  681. Put("A", "va");
  682. // Write approximately 100MB of "B" values
  683. for (int i = 0; i < 100000; i++) {
  684. char key[100];
  685. snprintf(key, sizeof(key), "B%010d", i);
  686. Put(key, value);
  687. }
  688. Put("C", "vc");
  689. dbfull()->TEST_CompactMemTable();
  690. dbfull()->TEST_CompactRange(0, NULL, NULL);
  691. // Make sparse update
  692. Put("A", "va2");
  693. Put("B100", "bvalue2");
  694. Put("C", "vc2");
  695. dbfull()->TEST_CompactMemTable();
  696. // Compactions should not cause us to create a situation where
  697. // a file overlaps too much data at the next level.
  698. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  699. dbfull()->TEST_CompactRange(0, NULL, NULL);
  700. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  701. dbfull()->TEST_CompactRange(1, NULL, NULL);
  702. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  703. }
  704. static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  705. bool result = (val >= low) && (val <= high);
  706. if (!result) {
  707. fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
  708. (unsigned long long)(val),
  709. (unsigned long long)(low),
  710. (unsigned long long)(high));
  711. }
  712. return result;
  713. }
  714. TEST(DBTest, ApproximateSizes) {
  715. Options options;
  716. options.write_buffer_size = 100000000; // Large write buffer
  717. options.compression = kNoCompression;
  718. DestroyAndReopen();
  719. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  720. Reopen(&options);
  721. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  722. // Write 8MB (80 values, each 100K)
  723. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  724. const int N = 80;
  725. Random rnd(301);
  726. for (int i = 0; i < N; i++) {
  727. ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
  728. }
  729. // 0 because GetApproximateSizes() does not account for memtable space
  730. ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
  731. // Check sizes across recovery by reopening a few times
  732. for (int run = 0; run < 3; run++) {
  733. Reopen(&options);
  734. for (int compact_start = 0; compact_start < N; compact_start += 10) {
  735. for (int i = 0; i < N; i += 10) {
  736. ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000));
  737. ASSERT_TRUE(Between(Size("", Key(i)+".suffix"),
  738. 100000 * (i+1), 100000 * (i+1) + 10000));
  739. ASSERT_TRUE(Between(Size(Key(i), Key(i+10)),
  740. 100000 * 10, 100000 * 10 + 10000));
  741. }
  742. ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
  743. ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
  744. std::string cstart_str = Key(compact_start);
  745. std::string cend_str = Key(compact_start + 9);
  746. Slice cstart = cstart_str;
  747. Slice cend = cend_str;
  748. dbfull()->TEST_CompactRange(0, &cstart, &cend);
  749. }
  750. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  751. ASSERT_GT(NumTableFilesAtLevel(1), 0);
  752. }
  753. }
  754. TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
  755. Options options;
  756. options.compression = kNoCompression;
  757. Reopen();
  758. Random rnd(301);
  759. std::string big1 = RandomString(&rnd, 100000);
  760. ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
  761. ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
  762. ASSERT_OK(Put(Key(2), big1));
  763. ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
  764. ASSERT_OK(Put(Key(4), big1));
  765. ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
  766. ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
  767. ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
  768. // Check sizes across recovery by reopening a few times
  769. for (int run = 0; run < 3; run++) {
  770. Reopen(&options);
  771. ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
  772. ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
  773. ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
  774. ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
  775. ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
  776. ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
  777. ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
  778. ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
  779. ASSERT_TRUE(Between(Size("", Key(8)), 550000, 551000));
  780. ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
  781. dbfull()->TEST_CompactRange(0, NULL, NULL);
  782. }
  783. }
  784. TEST(DBTest, IteratorPinsRef) {
  785. Put("foo", "hello");
  786. // Get iterator that will yield the current contents of the DB.
  787. Iterator* iter = db_->NewIterator(ReadOptions());
  788. // Write to force compactions
  789. Put("foo", "newvalue1");
  790. for (int i = 0; i < 100; i++) {
  791. ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
  792. }
  793. Put("foo", "newvalue2");
  794. iter->SeekToFirst();
  795. ASSERT_TRUE(iter->Valid());
  796. ASSERT_EQ("foo", iter->key().ToString());
  797. ASSERT_EQ("hello", iter->value().ToString());
  798. iter->Next();
  799. ASSERT_TRUE(!iter->Valid());
  800. delete iter;
  801. }
  802. TEST(DBTest, Snapshot) {
  803. Put("foo", "v1");
  804. const Snapshot* s1 = db_->GetSnapshot();
  805. Put("foo", "v2");
  806. const Snapshot* s2 = db_->GetSnapshot();
  807. Put("foo", "v3");
  808. const Snapshot* s3 = db_->GetSnapshot();
  809. Put("foo", "v4");
  810. ASSERT_EQ("v1", Get("foo", s1));
  811. ASSERT_EQ("v2", Get("foo", s2));
  812. ASSERT_EQ("v3", Get("foo", s3));
  813. ASSERT_EQ("v4", Get("foo"));
  814. db_->ReleaseSnapshot(s3);
  815. ASSERT_EQ("v1", Get("foo", s1));
  816. ASSERT_EQ("v2", Get("foo", s2));
  817. ASSERT_EQ("v4", Get("foo"));
  818. db_->ReleaseSnapshot(s1);
  819. ASSERT_EQ("v2", Get("foo", s2));
  820. ASSERT_EQ("v4", Get("foo"));
  821. db_->ReleaseSnapshot(s2);
  822. ASSERT_EQ("v4", Get("foo"));
  823. }
  824. TEST(DBTest, HiddenValuesAreRemoved) {
  825. Random rnd(301);
  826. FillLevels("a", "z");
  827. std::string big = RandomString(&rnd, 50000);
  828. Put("foo", big);
  829. Put("pastfoo", "v");
  830. const Snapshot* snapshot = db_->GetSnapshot();
  831. Put("foo", "tiny");
  832. Put("pastfoo2", "v2"); // Advance sequence number one more
  833. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  834. ASSERT_GT(NumTableFilesAtLevel(0), 0);
  835. ASSERT_EQ(big, Get("foo", snapshot));
  836. ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
  837. db_->ReleaseSnapshot(snapshot);
  838. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
  839. Slice x("x");
  840. dbfull()->TEST_CompactRange(0, NULL, &x);
  841. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  842. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  843. ASSERT_GE(NumTableFilesAtLevel(1), 1);
  844. dbfull()->TEST_CompactRange(1, NULL, &x);
  845. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  846. ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
  847. }
  848. TEST(DBTest, DeletionMarkers1) {
  849. Put("foo", "v1");
  850. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  851. const int last = config::kMaxMemCompactLevel;
  852. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  853. // Place a table at level last-1 to prevent merging with preceding mutation
  854. Put("a", "begin");
  855. Put("z", "end");
  856. dbfull()->TEST_CompactMemTable();
  857. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  858. ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
  859. Delete("foo");
  860. Put("foo", "v2");
  861. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  862. ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  863. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  864. Slice z("z");
  865. dbfull()->TEST_CompactRange(last-2, NULL, &z);
  866. // DEL eliminated, but v1 remains because we aren't compacting that level
  867. // (DEL can be eliminated because v2 hides v1).
  868. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
  869. dbfull()->TEST_CompactRange(last-1, NULL, NULL);
  870. // Merging last-1 w/ last, so we are the base level for "foo", so
  871. // DEL is removed. (as is v1).
  872. ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
  873. }
  874. TEST(DBTest, DeletionMarkers2) {
  875. Put("foo", "v1");
  876. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  877. const int last = config::kMaxMemCompactLevel;
  878. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  879. // Place a table at level last-1 to prevent merging with preceding mutation
  880. Put("a", "begin");
  881. Put("z", "end");
  882. dbfull()->TEST_CompactMemTable();
  883. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  884. ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
  885. Delete("foo");
  886. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  887. ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  888. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  889. dbfull()->TEST_CompactRange(last-2, NULL, NULL);
  890. // DEL kept: "last" file overlaps
  891. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  892. dbfull()->TEST_CompactRange(last-1, NULL, NULL);
  893. // Merging last-1 w/ last, so we are the base level for "foo", so
  894. // DEL is removed. (as is v1).
  895. ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
  896. }
  897. TEST(DBTest, OverlapInLevel0) {
  898. ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
  899. // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
  900. ASSERT_OK(Put("100", "v100"));
  901. ASSERT_OK(Put("999", "v999"));
  902. dbfull()->TEST_CompactMemTable();
  903. ASSERT_OK(Delete("100"));
  904. ASSERT_OK(Delete("999"));
  905. dbfull()->TEST_CompactMemTable();
  906. ASSERT_EQ("0,1,1", FilesPerLevel());
  907. // Make files spanning the following ranges in level-0:
  908. // files[0] 200 .. 900
  909. // files[1] 300 .. 500
  910. // Note that files are sorted by smallest key.
  911. ASSERT_OK(Put("300", "v300"));
  912. ASSERT_OK(Put("500", "v500"));
  913. dbfull()->TEST_CompactMemTable();
  914. ASSERT_OK(Put("200", "v200"));
  915. ASSERT_OK(Put("600", "v600"));
  916. ASSERT_OK(Put("900", "v900"));
  917. dbfull()->TEST_CompactMemTable();
  918. ASSERT_EQ("2,1,1", FilesPerLevel());
  919. // Compact away the placeholder files we created initially
  920. dbfull()->TEST_CompactRange(1, NULL, NULL);
  921. dbfull()->TEST_CompactRange(2, NULL, NULL);
  922. ASSERT_EQ("2", FilesPerLevel());
  923. // Do a memtable compaction. Before bug-fix, the compaction would
  924. // not detect the overlap with level-0 files and would incorrectly place
  925. // the deletion in a deeper level.
  926. ASSERT_OK(Delete("600"));
  927. dbfull()->TEST_CompactMemTable();
  928. ASSERT_EQ("3", FilesPerLevel());
  929. ASSERT_EQ("NOT_FOUND", Get("600"));
  930. }
  931. TEST(DBTest, L0_CompactionBug_Issue44_a) {
  932. Reopen();
  933. ASSERT_OK(Put("b", "v"));
  934. Reopen();
  935. ASSERT_OK(Delete("b"));
  936. ASSERT_OK(Delete("a"));
  937. Reopen();
  938. ASSERT_OK(Delete("a"));
  939. Reopen();
  940. ASSERT_OK(Put("a", "v"));
  941. Reopen();
  942. Reopen();
  943. ASSERT_EQ("(a->v)", Contents());
  944. env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
  945. ASSERT_EQ("(a->v)", Contents());
  946. }
  947. TEST(DBTest, L0_CompactionBug_Issue44_b) {
  948. Reopen();
  949. Put("","");
  950. Reopen();
  951. Delete("e");
  952. Put("","");
  953. Reopen();
  954. Put("c", "cv");
  955. Reopen();
  956. Put("","");
  957. Reopen();
  958. Put("","");
  959. env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
  960. Reopen();
  961. Put("d","dv");
  962. Reopen();
  963. Put("","");
  964. Reopen();
  965. Delete("d");
  966. Delete("b");
  967. Reopen();
  968. ASSERT_EQ("(->)(c->cv)", Contents());
  969. env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
  970. ASSERT_EQ("(->)(c->cv)", Contents());
  971. }
  972. TEST(DBTest, ComparatorCheck) {
  973. class NewComparator : public Comparator {
  974. public:
  975. virtual const char* Name() const { return "leveldb.NewComparator"; }
  976. virtual int Compare(const Slice& a, const Slice& b) const {
  977. return BytewiseComparator()->Compare(a, b);
  978. }
  979. virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
  980. BytewiseComparator()->FindShortestSeparator(s, l);
  981. }
  982. virtual void FindShortSuccessor(std::string* key) const {
  983. BytewiseComparator()->FindShortSuccessor(key);
  984. }
  985. };
  986. NewComparator cmp;
  987. Options new_options;
  988. new_options.comparator = &cmp;
  989. Status s = TryReopen(&new_options);
  990. ASSERT_TRUE(!s.ok());
  991. ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
  992. << s.ToString();
  993. }
  994. TEST(DBTest, CustomComparator) {
  995. class NumberComparator : public Comparator {
  996. public:
  997. virtual const char* Name() const { return "test.NumberComparator"; }
  998. virtual int Compare(const Slice& a, const Slice& b) const {
  999. return (strtol(a.ToString().c_str(), NULL, 0) -
  1000. strtol(b.ToString().c_str(), NULL, 0));
  1001. }
  1002. virtual void FindShortestSeparator(std::string* s, const Slice& l) const {}
  1003. virtual void FindShortSuccessor(std::string* key) const {}
  1004. };
  1005. NumberComparator cmp;
  1006. Options new_options;
  1007. new_options.create_if_missing = true;
  1008. new_options.comparator = &cmp;
  1009. DestroyAndReopen(&new_options);
  1010. ASSERT_OK(Put("10", "ten"));
  1011. ASSERT_OK(Put("0x14", "twenty"));
  1012. for (int i = 0; i < 2; i++) {
  1013. ASSERT_EQ("ten", Get("10"));
  1014. ASSERT_EQ("ten", Get("0xa"));
  1015. ASSERT_EQ("twenty", Get("20"));
  1016. ASSERT_EQ("twenty", Get("0x14"));
  1017. Compact("0", "9999");
  1018. fprintf(stderr, "ss\n%s\n", DumpSSTableList().c_str());
  1019. }
  1020. }
  1021. TEST(DBTest, ManualCompaction) {
  1022. ASSERT_EQ(config::kMaxMemCompactLevel, 2)
  1023. << "Need to update this test to match kMaxMemCompactLevel";
  1024. MakeTables(3, "p", "q");
  1025. ASSERT_EQ("1,1,1", FilesPerLevel());
  1026. // Compaction range falls before files
  1027. Compact("", "c");
  1028. ASSERT_EQ("1,1,1", FilesPerLevel());
  1029. // Compaction range falls after files
  1030. Compact("r", "z");
  1031. ASSERT_EQ("1,1,1", FilesPerLevel());
  1032. // Compaction range overlaps files
  1033. Compact("p1", "p9");
  1034. ASSERT_EQ("0,0,1", FilesPerLevel());
  1035. // Populate a different range
  1036. MakeTables(3, "c", "e");
  1037. ASSERT_EQ("1,1,2", FilesPerLevel());
  1038. // Compact just the new range
  1039. Compact("b", "f");
  1040. ASSERT_EQ("0,0,2", FilesPerLevel());
  1041. // Compact all
  1042. MakeTables(1, "a", "z");
  1043. ASSERT_EQ("0,1,2", FilesPerLevel());
  1044. db_->CompactRange(NULL, NULL);
  1045. ASSERT_EQ("0,0,1", FilesPerLevel());
  1046. }
  1047. TEST(DBTest, DBOpen_Options) {
  1048. std::string dbname = test::TmpDir() + "/db_options_test";
  1049. DestroyDB(dbname, Options());
  1050. // Does not exist, and create_if_missing == false: error
  1051. DB* db = NULL;
  1052. Options opts;
  1053. opts.create_if_missing = false;
  1054. Status s = DB::Open(opts, dbname, &db);
  1055. ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL);
  1056. ASSERT_TRUE(db == NULL);
  1057. // Does not exist, and create_if_missing == true: OK
  1058. opts.create_if_missing = true;
  1059. s = DB::Open(opts, dbname, &db);
  1060. ASSERT_OK(s);
  1061. ASSERT_TRUE(db != NULL);
  1062. delete db;
  1063. db = NULL;
  1064. // Does exist, and error_if_exists == true: error
  1065. opts.create_if_missing = false;
  1066. opts.error_if_exists = true;
  1067. s = DB::Open(opts, dbname, &db);
  1068. ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL);
  1069. ASSERT_TRUE(db == NULL);
  1070. // Does exist, and error_if_exists == false: OK
  1071. opts.create_if_missing = true;
  1072. opts.error_if_exists = false;
  1073. s = DB::Open(opts, dbname, &db);
  1074. ASSERT_OK(s);
  1075. ASSERT_TRUE(db != NULL);
  1076. delete db;
  1077. db = NULL;
  1078. }
  1079. // Multi-threaded test:
  1080. namespace {
  1081. static const int kNumThreads = 4;
  1082. static const int kTestSeconds = 10;
  1083. static const int kNumKeys = 1000;
  1084. struct MTState {
  1085. DBTest* test;
  1086. port::AtomicPointer stop;
  1087. port::AtomicPointer counter[kNumThreads];
  1088. port::AtomicPointer thread_done[kNumThreads];
  1089. };
  1090. struct MTThread {
  1091. MTState* state;
  1092. int id;
  1093. };
  1094. static void MTThreadBody(void* arg) {
  1095. MTThread* t = reinterpret_cast<MTThread*>(arg);
  1096. DB* db = t->state->test->db_;
  1097. uintptr_t counter = 0;
  1098. fprintf(stderr, "... starting thread %d\n", t->id);
  1099. Random rnd(1000 + t->id);
  1100. std::string value;
  1101. char valbuf[1500];
  1102. while (t->state->stop.Acquire_Load() == NULL) {
  1103. t->state->counter[t->id].Release_Store(reinterpret_cast<void*>(counter));
  1104. int key = rnd.Uniform(kNumKeys);
  1105. char keybuf[20];
  1106. snprintf(keybuf, sizeof(keybuf), "%016d", key);
  1107. if (rnd.OneIn(2)) {
  1108. // Write values of the form <key, my id, counter>.
  1109. // We add some padding for force compactions.
  1110. snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d",
  1111. key, t->id, static_cast<int>(counter));
  1112. ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
  1113. } else {
  1114. // Read a value and verify that it matches the pattern written above.
  1115. Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
  1116. if (s.IsNotFound()) {
  1117. // Key has not yet been written
  1118. } else {
  1119. // Check that the writer thread counter is >= the counter in the value
  1120. ASSERT_OK(s);
  1121. int k, w, c;
  1122. ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
  1123. ASSERT_EQ(k, key);
  1124. ASSERT_GE(w, 0);
  1125. ASSERT_LT(w, kNumThreads);
  1126. ASSERT_LE(c, reinterpret_cast<uintptr_t>(
  1127. t->state->counter[w].Acquire_Load()));
  1128. }
  1129. }
  1130. counter++;
  1131. }
  1132. t->state->thread_done[t->id].Release_Store(t);
  1133. fprintf(stderr, "... stopping thread %d after %d ops\n", t->id, int(counter));
  1134. }
  1135. } // namespace
  1136. TEST(DBTest, MultiThreaded) {
  1137. // Initialize state
  1138. MTState mt;
  1139. mt.test = this;
  1140. mt.stop.Release_Store(0);
  1141. for (int id = 0; id < kNumThreads; id++) {
  1142. mt.counter[id].Release_Store(0);
  1143. mt.thread_done[id].Release_Store(0);
  1144. }
  1145. // Start threads
  1146. MTThread thread[kNumThreads];
  1147. for (int id = 0; id < kNumThreads; id++) {
  1148. thread[id].state = &mt;
  1149. thread[id].id = id;
  1150. env_->StartThread(MTThreadBody, &thread[id]);
  1151. }
  1152. // Let them run for a while
  1153. env_->SleepForMicroseconds(kTestSeconds * 1000000);
  1154. // Stop the threads and wait for them to finish
  1155. mt.stop.Release_Store(&mt);
  1156. for (int id = 0; id < kNumThreads; id++) {
  1157. while (mt.thread_done[id].Acquire_Load() == NULL) {
  1158. env_->SleepForMicroseconds(100000);
  1159. }
  1160. }
  1161. }
  1162. namespace {
  1163. typedef std::map<std::string, std::string> KVMap;
  1164. }
  1165. class ModelDB: public DB {
  1166. public:
  1167. class ModelSnapshot : public Snapshot {
  1168. public:
  1169. KVMap map_;
  1170. };
  1171. explicit ModelDB(const Options& options): options_(options) { }
  1172. ~ModelDB() { }
  1173. virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) {
  1174. return DB::Put(o, k, v);
  1175. }
  1176. virtual Status Delete(const WriteOptions& o, const Slice& key) {
  1177. return DB::Delete(o, key);
  1178. }
  1179. virtual Status Get(const ReadOptions& options,
  1180. const Slice& key, std::string* value) {
  1181. assert(false); // Not implemented
  1182. return Status::NotFound(key);
  1183. }
  1184. virtual Iterator* NewIterator(const ReadOptions& options) {
  1185. if (options.snapshot == NULL) {
  1186. KVMap* saved = new KVMap;
  1187. *saved = map_;
  1188. return new ModelIter(saved, true);
  1189. } else {
  1190. const KVMap* snapshot_state =
  1191. &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
  1192. return new ModelIter(snapshot_state, false);
  1193. }
  1194. }
  1195. virtual const Snapshot* GetSnapshot() {
  1196. ModelSnapshot* snapshot = new ModelSnapshot;
  1197. snapshot->map_ = map_;
  1198. return snapshot;
  1199. }
  1200. virtual void ReleaseSnapshot(const Snapshot* snapshot) {
  1201. delete reinterpret_cast<const ModelSnapshot*>(snapshot);
  1202. }
  1203. virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
  1204. class Handler : public WriteBatch::Handler {
  1205. public:
  1206. KVMap* map_;
  1207. virtual void Put(const Slice& key, const Slice& value) {
  1208. (*map_)[key.ToString()] = value.ToString();
  1209. }
  1210. virtual void Delete(const Slice& key) {
  1211. map_->erase(key.ToString());
  1212. }
  1213. };
  1214. Handler handler;
  1215. handler.map_ = &map_;
  1216. return batch->Iterate(&handler);
  1217. }
  1218. virtual bool GetProperty(const Slice& property, std::string* value) {
  1219. return false;
  1220. }
  1221. virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
  1222. for (int i = 0; i < n; i++) {
  1223. sizes[i] = 0;
  1224. }
  1225. }
  1226. virtual void CompactRange(const Slice* start, const Slice* end) {
  1227. }
  1228. private:
  1229. class ModelIter: public Iterator {
  1230. public:
  1231. ModelIter(const KVMap* map, bool owned)
  1232. : map_(map), owned_(owned), iter_(map_->end()) {
  1233. }
  1234. ~ModelIter() {
  1235. if (owned_) delete map_;
  1236. }
  1237. virtual bool Valid() const { return iter_ != map_->end(); }
  1238. virtual void SeekToFirst() { iter_ = map_->begin(); }
  1239. virtual void SeekToLast() {
  1240. if (map_->empty()) {
  1241. iter_ = map_->end();
  1242. } else {
  1243. iter_ = map_->find(map_->rbegin()->first);
  1244. }
  1245. }
  1246. virtual void Seek(const Slice& k) {
  1247. iter_ = map_->lower_bound(k.ToString());
  1248. }
  1249. virtual void Next() { ++iter_; }
  1250. virtual void Prev() { --iter_; }
  1251. virtual Slice key() const { return iter_->first; }
  1252. virtual Slice value() const { return iter_->second; }
  1253. virtual Status status() const { return Status::OK(); }
  1254. private:
  1255. const KVMap* const map_;
  1256. const bool owned_; // Do we own map_
  1257. KVMap::const_iterator iter_;
  1258. };
  1259. const Options options_;
  1260. KVMap map_;
  1261. };
  1262. static std::string RandomKey(Random* rnd) {
  1263. int len = (rnd->OneIn(3)
  1264. ? 1 // Short sometimes to encourage collisions
  1265. : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
  1266. return test::RandomKey(rnd, len);
  1267. }
  1268. static bool CompareIterators(int step,
  1269. DB* model,
  1270. DB* db,
  1271. const Snapshot* model_snap,
  1272. const Snapshot* db_snap) {
  1273. ReadOptions options;
  1274. options.snapshot = model_snap;
  1275. Iterator* miter = model->NewIterator(options);
  1276. options.snapshot = db_snap;
  1277. Iterator* dbiter = db->NewIterator(options);
  1278. bool ok = true;
  1279. int count = 0;
  1280. for (miter->SeekToFirst(), dbiter->SeekToFirst();
  1281. ok && miter->Valid() && dbiter->Valid();
  1282. miter->Next(), dbiter->Next()) {
  1283. count++;
  1284. if (miter->key().compare(dbiter->key()) != 0) {
  1285. fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
  1286. step,
  1287. EscapeString(miter->key()).c_str(),
  1288. EscapeString(dbiter->key()).c_str());
  1289. ok = false;
  1290. break;
  1291. }
  1292. if (miter->value().compare(dbiter->value()) != 0) {
  1293. fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
  1294. step,
  1295. EscapeString(miter->key()).c_str(),
  1296. EscapeString(miter->value()).c_str(),
  1297. EscapeString(miter->value()).c_str());
  1298. ok = false;
  1299. }
  1300. }
  1301. if (ok) {
  1302. if (miter->Valid() != dbiter->Valid()) {
  1303. fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
  1304. step, miter->Valid(), dbiter->Valid());
  1305. ok = false;
  1306. }
  1307. }
  1308. fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
  1309. delete miter;
  1310. delete dbiter;
  1311. return ok;
  1312. }
  1313. TEST(DBTest, Randomized) {
  1314. Random rnd(test::RandomSeed());
  1315. ModelDB model(last_options_);
  1316. const int N = 10000;
  1317. const Snapshot* model_snap = NULL;
  1318. const Snapshot* db_snap = NULL;
  1319. std::string k, v;
  1320. for (int step = 0; step < N; step++) {
  1321. if (step % 100 == 0) {
  1322. fprintf(stderr, "Step %d of %d\n", step, N);
  1323. }
  1324. int p = rnd.Uniform(100);
  1325. if (p < 45) { // Put
  1326. k = RandomKey(&rnd);
  1327. v = RandomString(&rnd,
  1328. rnd.OneIn(20)
  1329. ? 100 + rnd.Uniform(100)
  1330. : rnd.Uniform(8));
  1331. ASSERT_OK(model.Put(WriteOptions(), k, v));
  1332. ASSERT_OK(db_->Put(WriteOptions(), k, v));
  1333. } else if (p < 90) { // Delete
  1334. k = RandomKey(&rnd);
  1335. ASSERT_OK(model.Delete(WriteOptions(), k));
  1336. ASSERT_OK(db_->Delete(WriteOptions(), k));
  1337. } else { // Multi-element batch
  1338. WriteBatch b;
  1339. const int num = rnd.Uniform(8);
  1340. for (int i = 0; i < num; i++) {
  1341. if (i == 0 || !rnd.OneIn(10)) {
  1342. k = RandomKey(&rnd);
  1343. } else {
  1344. // Periodically re-use the same key from the previous iter, so
  1345. // we have multiple entries in the write batch for the same key
  1346. }
  1347. if (rnd.OneIn(2)) {
  1348. v = RandomString(&rnd, rnd.Uniform(10));
  1349. b.Put(k, v);
  1350. } else {
  1351. b.Delete(k);
  1352. }
  1353. }
  1354. ASSERT_OK(model.Write(WriteOptions(), &b));
  1355. ASSERT_OK(db_->Write(WriteOptions(), &b));
  1356. }
  1357. if ((step % 100) == 0) {
  1358. ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
  1359. ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
  1360. // Save a snapshot from each DB this time that we'll use next
  1361. // time we compare things, to make sure the current state is
  1362. // preserved with the snapshot
  1363. if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
  1364. if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
  1365. Reopen();
  1366. ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
  1367. model_snap = model.GetSnapshot();
  1368. db_snap = db_->GetSnapshot();
  1369. }
  1370. }
  1371. if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
  1372. if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
  1373. }
  1374. std::string MakeKey(unsigned int num) {
  1375. char buf[30];
  1376. snprintf(buf, sizeof(buf), "%016u", num);
  1377. return std::string(buf);
  1378. }
  1379. void BM_LogAndApply(int iters, int num_base_files) {
  1380. std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
  1381. DestroyDB(dbname, Options());
  1382. DB* db = NULL;
  1383. Options opts;
  1384. opts.create_if_missing = true;
  1385. Status s = DB::Open(opts, dbname, &db);
  1386. ASSERT_OK(s);
  1387. ASSERT_TRUE(db != NULL);
  1388. delete db;
  1389. db = NULL;
  1390. Env* env = Env::Default();
  1391. port::Mutex mu;
  1392. MutexLock l(&mu);
  1393. InternalKeyComparator cmp(BytewiseComparator());
  1394. Options options;
  1395. VersionSet vset(dbname, &options, NULL, &cmp);
  1396. ASSERT_OK(vset.Recover());
  1397. VersionEdit vbase;
  1398. uint64_t fnum = 1;
  1399. for (int i = 0; i < num_base_files; i++) {
  1400. InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
  1401. InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
  1402. vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
  1403. }
  1404. ASSERT_OK(vset.LogAndApply(&vbase, &mu));
  1405. uint64_t start_micros = env->NowMicros();
  1406. for (int i = 0; i < iters; i++) {
  1407. VersionEdit vedit;
  1408. vedit.DeleteFile(2, fnum);
  1409. InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
  1410. InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
  1411. vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
  1412. vset.LogAndApply(&vedit, &mu);
  1413. }
  1414. uint64_t stop_micros = env->NowMicros();
  1415. unsigned int us = stop_micros - start_micros;
  1416. char buf[16];
  1417. snprintf(buf, sizeof(buf), "%d", num_base_files);
  1418. fprintf(stderr,
  1419. "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n",
  1420. buf, iters, us, ((float)us) / iters);
  1421. }
  1422. } // namespace leveldb
  1423. int main(int argc, char** argv) {
  1424. if (argc > 1 && std::string(argv[1]) == "--benchmark") {
  1425. leveldb::BM_LogAndApply(1000, 1);
  1426. leveldb::BM_LogAndApply(1000, 100);
  1427. leveldb::BM_LogAndApply(1000, 10000);
  1428. leveldb::BM_LogAndApply(100, 100000);
  1429. return 0;
  1430. }
  1431. return leveldb::test::RunAllTests();
  1432. }