LevelDB二级索引实现 姚凯文(kevinyao0901) 姜嘉祺
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2360 lines
70 KiB

  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/db.h"
  5. #include <atomic>
  6. #include <cinttypes>
  7. #include <string>
  8. #include "gtest/gtest.h"
  9. #include "db/db_impl.h"
  10. #include "db/filename.h"
  11. #include "db/version_set.h"
  12. #include "db/write_batch_internal.h"
  13. #include "leveldb/cache.h"
  14. #include "leveldb/env.h"
  15. #include "leveldb/filter_policy.h"
  16. #include "leveldb/table.h"
  17. #include "port/port.h"
  18. #include "port/thread_annotations.h"
  19. #include "util/hash.h"
  20. #include "util/logging.h"
  21. #include "util/mutexlock.h"
  22. #include "util/testutil.h"
  23. namespace leveldb {
  24. static std::string RandomString(Random* rnd, int len) {
  25. std::string r;
  26. test::RandomString(rnd, len, &r);
  27. return r;
  28. }
  29. static std::string RandomKey(Random* rnd) {
  30. int len =
  31. (rnd->OneIn(3) ? 1 // Short sometimes to encourage collisions
  32. : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
  33. return test::RandomKey(rnd, len);
  34. }
  35. namespace {
  36. class AtomicCounter {
  37. public:
  38. AtomicCounter() : count_(0) {}
  39. void Increment() { IncrementBy(1); }
  40. void IncrementBy(int count) LOCKS_EXCLUDED(mu_) {
  41. MutexLock l(&mu_);
  42. count_ += count;
  43. }
  44. int Read() LOCKS_EXCLUDED(mu_) {
  45. MutexLock l(&mu_);
  46. return count_;
  47. }
  48. void Reset() LOCKS_EXCLUDED(mu_) {
  49. MutexLock l(&mu_);
  50. count_ = 0;
  51. }
  52. private:
  53. port::Mutex mu_;
  54. int count_ GUARDED_BY(mu_);
  55. };
  56. void DelayMilliseconds(int millis) {
  57. Env::Default()->SleepForMicroseconds(millis * 1000);
  58. }
  59. bool IsLdbFile(const std::string& f) {
  60. return strstr(f.c_str(), ".ldb") != nullptr;
  61. }
  62. bool IsLogFile(const std::string& f) {
  63. return strstr(f.c_str(), ".log") != nullptr;
  64. }
  65. bool IsManifestFile(const std::string& f) {
  66. return strstr(f.c_str(), "MANIFEST") != nullptr;
  67. }
  68. } // namespace
  69. // Test Env to override default Env behavior for testing.
  70. class TestEnv : public EnvWrapper {
  71. public:
  72. explicit TestEnv(Env* base) : EnvWrapper(base), ignore_dot_files_(false) {}
  73. void SetIgnoreDotFiles(bool ignored) { ignore_dot_files_ = ignored; }
  74. Status GetChildren(const std::string& dir,
  75. std::vector<std::string>* result) override {
  76. Status s = target()->GetChildren(dir, result);
  77. if (!s.ok() || !ignore_dot_files_) {
  78. return s;
  79. }
  80. std::vector<std::string>::iterator it = result->begin();
  81. while (it != result->end()) {
  82. if ((*it == ".") || (*it == "..")) {
  83. it = result->erase(it);
  84. } else {
  85. ++it;
  86. }
  87. }
  88. return s;
  89. }
  90. private:
  91. bool ignore_dot_files_;
  92. };
  93. // Special Env used to delay background operations.
  94. class SpecialEnv : public EnvWrapper {
  95. public:
  96. // For historical reasons, the std::atomic<> fields below are currently
  97. // accessed via acquired loads and release stores. We should switch
  98. // to plain load(), store() calls that provide sequential consistency.
  99. // sstable/log Sync() calls are blocked while this pointer is non-null.
  100. std::atomic<bool> delay_data_sync_;
  101. // sstable/log Sync() calls return an error.
  102. std::atomic<bool> data_sync_error_;
  103. // Simulate no-space errors while this pointer is non-null.
  104. std::atomic<bool> no_space_;
  105. // Simulate non-writable file system while this pointer is non-null.
  106. std::atomic<bool> non_writable_;
  107. // Force sync of manifest files to fail while this pointer is non-null.
  108. std::atomic<bool> manifest_sync_error_;
  109. // Force write to manifest files to fail while this pointer is non-null.
  110. std::atomic<bool> manifest_write_error_;
  111. // Force log file close to fail while this bool is true.
  112. std::atomic<bool> log_file_close_;
  113. bool count_random_reads_;
  114. AtomicCounter random_read_counter_;
  115. explicit SpecialEnv(Env* base)
  116. : EnvWrapper(base),
  117. delay_data_sync_(false),
  118. data_sync_error_(false),
  119. no_space_(false),
  120. non_writable_(false),
  121. manifest_sync_error_(false),
  122. manifest_write_error_(false),
  123. log_file_close_(false),
  124. count_random_reads_(false) {}
  125. Status NewWritableFile(const std::string& f, WritableFile** r) {
  126. class DataFile : public WritableFile {
  127. private:
  128. SpecialEnv* const env_;
  129. WritableFile* const base_;
  130. const std::string fname_;
  131. public:
  132. DataFile(SpecialEnv* env, WritableFile* base, const std::string& fname)
  133. : env_(env), base_(base), fname_(fname) {}
  134. ~DataFile() { delete base_; }
  135. Status Append(const Slice& data) {
  136. if (env_->no_space_.load(std::memory_order_acquire)) {
  137. // Drop writes on the floor
  138. return Status::OK();
  139. } else {
  140. return base_->Append(data);
  141. }
  142. }
  143. Status Close() {
  144. Status s = base_->Close();
  145. if (s.ok() && IsLogFile(fname_) &&
  146. env_->log_file_close_.load(std::memory_order_acquire)) {
  147. s = Status::IOError("simulated log file Close error");
  148. }
  149. return s;
  150. }
  151. Status Flush() { return base_->Flush(); }
  152. Status Sync() {
  153. if (env_->data_sync_error_.load(std::memory_order_acquire)) {
  154. return Status::IOError("simulated data sync error");
  155. }
  156. while (env_->delay_data_sync_.load(std::memory_order_acquire)) {
  157. DelayMilliseconds(100);
  158. }
  159. return base_->Sync();
  160. }
  161. };
  162. class ManifestFile : public WritableFile {
  163. private:
  164. SpecialEnv* env_;
  165. WritableFile* base_;
  166. public:
  167. ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) {}
  168. ~ManifestFile() { delete base_; }
  169. Status Append(const Slice& data) {
  170. if (env_->manifest_write_error_.load(std::memory_order_acquire)) {
  171. return Status::IOError("simulated writer error");
  172. } else {
  173. return base_->Append(data);
  174. }
  175. }
  176. Status Close() { return base_->Close(); }
  177. Status Flush() { return base_->Flush(); }
  178. Status Sync() {
  179. if (env_->manifest_sync_error_.load(std::memory_order_acquire)) {
  180. return Status::IOError("simulated sync error");
  181. } else {
  182. return base_->Sync();
  183. }
  184. }
  185. };
  186. if (non_writable_.load(std::memory_order_acquire)) {
  187. return Status::IOError("simulated write error");
  188. }
  189. Status s = target()->NewWritableFile(f, r);
  190. if (s.ok()) {
  191. if (IsLdbFile(f) || IsLogFile(f)) {
  192. *r = new DataFile(this, *r, f);
  193. } else if (IsManifestFile(f)) {
  194. *r = new ManifestFile(this, *r);
  195. }
  196. }
  197. return s;
  198. }
  199. Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
  200. class CountingFile : public RandomAccessFile {
  201. private:
  202. RandomAccessFile* target_;
  203. AtomicCounter* counter_;
  204. public:
  205. CountingFile(RandomAccessFile* target, AtomicCounter* counter)
  206. : target_(target), counter_(counter) {}
  207. ~CountingFile() override { delete target_; }
  208. Status Read(uint64_t offset, size_t n, Slice* result,
  209. char* scratch) const override {
  210. counter_->Increment();
  211. return target_->Read(offset, n, result, scratch);
  212. }
  213. };
  214. Status s = target()->NewRandomAccessFile(f, r);
  215. if (s.ok() && count_random_reads_) {
  216. *r = new CountingFile(*r, &random_read_counter_);
  217. }
  218. return s;
  219. }
  220. };
  221. class DBTest : public testing::Test {
  222. public:
  223. std::string dbname_;
  224. SpecialEnv* env_;
  225. DB* db_;
  226. Options last_options_;
  227. DBTest() : env_(new SpecialEnv(Env::Default())), option_config_(kDefault) {
  228. filter_policy_ = NewBloomFilterPolicy(10);
  229. dbname_ = testing::TempDir() + "db_test";
  230. DestroyDB(dbname_, Options());
  231. db_ = nullptr;
  232. Reopen();
  233. }
  234. ~DBTest() {
  235. delete db_;
  236. DestroyDB(dbname_, Options());
  237. delete env_;
  238. delete filter_policy_;
  239. }
  240. // Switch to a fresh database with the next option configuration to
  241. // test. Return false if there are no more configurations to test.
  242. bool ChangeOptions() {
  243. option_config_++;
  244. if (option_config_ >= kEnd) {
  245. return false;
  246. } else {
  247. DestroyAndReopen();
  248. return true;
  249. }
  250. }
  251. // Return the current option configuration.
  252. Options CurrentOptions() {
  253. Options options;
  254. options.reuse_logs = false;
  255. switch (option_config_) {
  256. case kReuse:
  257. options.reuse_logs = true;
  258. break;
  259. case kFilter:
  260. options.filter_policy = filter_policy_;
  261. break;
  262. case kUncompressed:
  263. options.compression = kNoCompression;
  264. break;
  265. default:
  266. break;
  267. }
  268. return options;
  269. }
  270. DBImpl* dbfull() { return reinterpret_cast<DBImpl*>(db_); }
  271. void Reopen(Options* options = nullptr) {
  272. ASSERT_LEVELDB_OK(TryReopen(options));
  273. }
  274. void Close() {
  275. delete db_;
  276. db_ = nullptr;
  277. }
  278. void DestroyAndReopen(Options* options = nullptr) {
  279. delete db_;
  280. db_ = nullptr;
  281. DestroyDB(dbname_, Options());
  282. ASSERT_LEVELDB_OK(TryReopen(options));
  283. }
  284. Status TryReopen(Options* options) {
  285. delete db_;
  286. db_ = nullptr;
  287. Options opts;
  288. if (options != nullptr) {
  289. opts = *options;
  290. } else {
  291. opts = CurrentOptions();
  292. opts.create_if_missing = true;
  293. }
  294. last_options_ = opts;
  295. return DB::Open(opts, dbname_, &db_);
  296. }
  297. Status Put(const std::string& k, const std::string& v) {
  298. return db_->Put(WriteOptions(), k, v);
  299. }
  300. Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); }
  301. std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
  302. ReadOptions options;
  303. options.snapshot = snapshot;
  304. std::string result;
  305. Status s = db_->Get(options, k, &result);
  306. if (s.IsNotFound()) {
  307. result = "NOT_FOUND";
  308. } else if (!s.ok()) {
  309. result = s.ToString();
  310. }
  311. return result;
  312. }
  313. // Return a string that contains all key,value pairs in order,
  314. // formatted like "(k1->v1)(k2->v2)".
  315. std::string Contents() {
  316. std::vector<std::string> forward;
  317. std::string result;
  318. Iterator* iter = db_->NewIterator(ReadOptions());
  319. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  320. std::string s = IterStatus(iter);
  321. result.push_back('(');
  322. result.append(s);
  323. result.push_back(')');
  324. forward.push_back(s);
  325. }
  326. // Check reverse iteration results are the reverse of forward results
  327. size_t matched = 0;
  328. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  329. EXPECT_LT(matched, forward.size());
  330. EXPECT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
  331. matched++;
  332. }
  333. EXPECT_EQ(matched, forward.size());
  334. delete iter;
  335. return result;
  336. }
  337. std::string AllEntriesFor(const Slice& user_key) {
  338. Iterator* iter = dbfull()->TEST_NewInternalIterator();
  339. InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
  340. iter->Seek(target.Encode());
  341. std::string result;
  342. if (!iter->status().ok()) {
  343. result = iter->status().ToString();
  344. } else {
  345. result = "[ ";
  346. bool first = true;
  347. while (iter->Valid()) {
  348. ParsedInternalKey ikey;
  349. if (!ParseInternalKey(iter->key(), &ikey)) {
  350. result += "CORRUPTED";
  351. } else {
  352. if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
  353. break;
  354. }
  355. if (!first) {
  356. result += ", ";
  357. }
  358. first = false;
  359. switch (ikey.type) {
  360. case kTypeValue:
  361. result += iter->value().ToString();
  362. break;
  363. case kTypeDeletion:
  364. result += "DEL";
  365. break;
  366. }
  367. }
  368. iter->Next();
  369. }
  370. if (!first) {
  371. result += " ";
  372. }
  373. result += "]";
  374. }
  375. delete iter;
  376. return result;
  377. }
  378. int NumTableFilesAtLevel(int level) {
  379. std::string property;
  380. EXPECT_TRUE(db_->GetProperty(
  381. "leveldb.num-files-at-level" + NumberToString(level), &property));
  382. return std::stoi(property);
  383. }
  384. int TotalTableFiles() {
  385. int result = 0;
  386. for (int level = 0; level < config::kNumLevels; level++) {
  387. result += NumTableFilesAtLevel(level);
  388. }
  389. return result;
  390. }
  391. // Return spread of files per level
  392. std::string FilesPerLevel() {
  393. std::string result;
  394. int last_non_zero_offset = 0;
  395. for (int level = 0; level < config::kNumLevels; level++) {
  396. int f = NumTableFilesAtLevel(level);
  397. char buf[100];
  398. std::snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
  399. result += buf;
  400. if (f > 0) {
  401. last_non_zero_offset = result.size();
  402. }
  403. }
  404. result.resize(last_non_zero_offset);
  405. return result;
  406. }
  407. int CountFiles() {
  408. std::vector<std::string> files;
  409. env_->GetChildren(dbname_, &files);
  410. return static_cast<int>(files.size());
  411. }
  412. uint64_t Size(const Slice& start, const Slice& limit) {
  413. Range r(start, limit);
  414. uint64_t size;
  415. db_->GetApproximateSizes(&r, 1, &size);
  416. return size;
  417. }
  418. void Compact(const Slice& start, const Slice& limit) {
  419. db_->CompactRange(&start, &limit);
  420. }
  421. // Do n memtable compactions, each of which produces an sstable
  422. // covering the range [small_key,large_key].
  423. void MakeTables(int n, const std::string& small_key,
  424. const std::string& large_key) {
  425. for (int i = 0; i < n; i++) {
  426. Put(small_key, "begin");
  427. Put(large_key, "end");
  428. dbfull()->TEST_CompactMemTable();
  429. }
  430. }
  431. // Prevent pushing of new sstables into deeper levels by adding
  432. // tables that cover a specified range to all levels.
  433. void FillLevels(const std::string& smallest, const std::string& largest) {
  434. MakeTables(config::kNumLevels, smallest, largest);
  435. }
  436. void DumpFileCounts(const char* label) {
  437. std::fprintf(stderr, "---\n%s:\n", label);
  438. std::fprintf(
  439. stderr, "maxoverlap: %lld\n",
  440. static_cast<long long>(dbfull()->TEST_MaxNextLevelOverlappingBytes()));
  441. for (int level = 0; level < config::kNumLevels; level++) {
  442. int num = NumTableFilesAtLevel(level);
  443. if (num > 0) {
  444. std::fprintf(stderr, " level %3d : %d files\n", level, num);
  445. }
  446. }
  447. }
  448. std::string DumpSSTableList() {
  449. std::string property;
  450. db_->GetProperty("leveldb.sstables", &property);
  451. return property;
  452. }
  453. std::string IterStatus(Iterator* iter) {
  454. std::string result;
  455. if (iter->Valid()) {
  456. result = iter->key().ToString() + "->" + iter->value().ToString();
  457. } else {
  458. result = "(invalid)";
  459. }
  460. return result;
  461. }
  462. bool DeleteAnSSTFile() {
  463. std::vector<std::string> filenames;
  464. EXPECT_LEVELDB_OK(env_->GetChildren(dbname_, &filenames));
  465. uint64_t number;
  466. FileType type;
  467. for (size_t i = 0; i < filenames.size(); i++) {
  468. if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
  469. EXPECT_LEVELDB_OK(env_->RemoveFile(TableFileName(dbname_, number)));
  470. return true;
  471. }
  472. }
  473. return false;
  474. }
  475. // Returns number of files renamed.
  476. int RenameLDBToSST() {
  477. std::vector<std::string> filenames;
  478. EXPECT_LEVELDB_OK(env_->GetChildren(dbname_, &filenames));
  479. uint64_t number;
  480. FileType type;
  481. int files_renamed = 0;
  482. for (size_t i = 0; i < filenames.size(); i++) {
  483. if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
  484. const std::string from = TableFileName(dbname_, number);
  485. const std::string to = SSTTableFileName(dbname_, number);
  486. EXPECT_LEVELDB_OK(env_->RenameFile(from, to));
  487. files_renamed++;
  488. }
  489. }
  490. return files_renamed;
  491. }
  492. private:
  493. // Sequence of option configurations to try
  494. enum OptionConfig { kDefault, kReuse, kFilter, kUncompressed, kEnd };
  495. const FilterPolicy* filter_policy_;
  496. int option_config_;
  497. };
  498. TEST_F(DBTest, Empty) {
  499. do {
  500. ASSERT_TRUE(db_ != nullptr);
  501. ASSERT_EQ("NOT_FOUND", Get("foo"));
  502. } while (ChangeOptions());
  503. }
  504. TEST_F(DBTest, EmptyKey) {
  505. do {
  506. ASSERT_LEVELDB_OK(Put("", "v1"));
  507. ASSERT_EQ("v1", Get(""));
  508. ASSERT_LEVELDB_OK(Put("", "v2"));
  509. ASSERT_EQ("v2", Get(""));
  510. } while (ChangeOptions());
  511. }
  512. TEST_F(DBTest, EmptyValue) {
  513. do {
  514. ASSERT_LEVELDB_OK(Put("key", "v1"));
  515. ASSERT_EQ("v1", Get("key"));
  516. ASSERT_LEVELDB_OK(Put("key", ""));
  517. ASSERT_EQ("", Get("key"));
  518. ASSERT_LEVELDB_OK(Put("key", "v2"));
  519. ASSERT_EQ("v2", Get("key"));
  520. } while (ChangeOptions());
  521. }
  522. TEST_F(DBTest, ReadWrite) {
  523. do {
  524. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  525. ASSERT_EQ("v1", Get("foo"));
  526. ASSERT_LEVELDB_OK(Put("bar", "v2"));
  527. ASSERT_LEVELDB_OK(Put("foo", "v3"));
  528. ASSERT_EQ("v3", Get("foo"));
  529. ASSERT_EQ("v2", Get("bar"));
  530. } while (ChangeOptions());
  531. }
  532. TEST_F(DBTest, PutDeleteGet) {
  533. do {
  534. ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v1"));
  535. ASSERT_EQ("v1", Get("foo"));
  536. ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), "foo", "v2"));
  537. ASSERT_EQ("v2", Get("foo"));
  538. ASSERT_LEVELDB_OK(db_->Delete(WriteOptions(), "foo"));
  539. ASSERT_EQ("NOT_FOUND", Get("foo"));
  540. } while (ChangeOptions());
  541. }
  542. TEST_F(DBTest, GetFromImmutableLayer) {
  543. do {
  544. Options options = CurrentOptions();
  545. options.env = env_;
  546. options.write_buffer_size = 100000; // Small write buffer
  547. Reopen(&options);
  548. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  549. ASSERT_EQ("v1", Get("foo"));
  550. // Block sync calls.
  551. env_->delay_data_sync_.store(true, std::memory_order_release);
  552. Put("k1", std::string(100000, 'x')); // Fill memtable.
  553. Put("k2", std::string(100000, 'y')); // Trigger compaction.
  554. ASSERT_EQ("v1", Get("foo"));
  555. // Release sync calls.
  556. env_->delay_data_sync_.store(false, std::memory_order_release);
  557. } while (ChangeOptions());
  558. }
  559. TEST_F(DBTest, GetFromVersions) {
  560. do {
  561. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  562. dbfull()->TEST_CompactMemTable();
  563. ASSERT_EQ("v1", Get("foo"));
  564. } while (ChangeOptions());
  565. }
  566. TEST_F(DBTest, GetMemUsage) {
  567. do {
  568. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  569. std::string val;
  570. ASSERT_TRUE(db_->GetProperty("leveldb.approximate-memory-usage", &val));
  571. int mem_usage = std::stoi(val);
  572. ASSERT_GT(mem_usage, 0);
  573. ASSERT_LT(mem_usage, 5 * 1024 * 1024);
  574. } while (ChangeOptions());
  575. }
  576. TEST_F(DBTest, GetSnapshot) {
  577. do {
  578. // Try with both a short key and a long key
  579. for (int i = 0; i < 2; i++) {
  580. std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
  581. ASSERT_LEVELDB_OK(Put(key, "v1"));
  582. const Snapshot* s1 = db_->GetSnapshot();
  583. ASSERT_LEVELDB_OK(Put(key, "v2"));
  584. ASSERT_EQ("v2", Get(key));
  585. ASSERT_EQ("v1", Get(key, s1));
  586. dbfull()->TEST_CompactMemTable();
  587. ASSERT_EQ("v2", Get(key));
  588. ASSERT_EQ("v1", Get(key, s1));
  589. db_->ReleaseSnapshot(s1);
  590. }
  591. } while (ChangeOptions());
  592. }
  593. TEST_F(DBTest, GetIdenticalSnapshots) {
  594. do {
  595. // Try with both a short key and a long key
  596. for (int i = 0; i < 2; i++) {
  597. std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
  598. ASSERT_LEVELDB_OK(Put(key, "v1"));
  599. const Snapshot* s1 = db_->GetSnapshot();
  600. const Snapshot* s2 = db_->GetSnapshot();
  601. const Snapshot* s3 = db_->GetSnapshot();
  602. ASSERT_LEVELDB_OK(Put(key, "v2"));
  603. ASSERT_EQ("v2", Get(key));
  604. ASSERT_EQ("v1", Get(key, s1));
  605. ASSERT_EQ("v1", Get(key, s2));
  606. ASSERT_EQ("v1", Get(key, s3));
  607. db_->ReleaseSnapshot(s1);
  608. dbfull()->TEST_CompactMemTable();
  609. ASSERT_EQ("v2", Get(key));
  610. ASSERT_EQ("v1", Get(key, s2));
  611. db_->ReleaseSnapshot(s2);
  612. ASSERT_EQ("v1", Get(key, s3));
  613. db_->ReleaseSnapshot(s3);
  614. }
  615. } while (ChangeOptions());
  616. }
  617. TEST_F(DBTest, IterateOverEmptySnapshot) {
  618. do {
  619. const Snapshot* snapshot = db_->GetSnapshot();
  620. ReadOptions read_options;
  621. read_options.snapshot = snapshot;
  622. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  623. ASSERT_LEVELDB_OK(Put("foo", "v2"));
  624. Iterator* iterator1 = db_->NewIterator(read_options);
  625. iterator1->SeekToFirst();
  626. ASSERT_TRUE(!iterator1->Valid());
  627. delete iterator1;
  628. dbfull()->TEST_CompactMemTable();
  629. Iterator* iterator2 = db_->NewIterator(read_options);
  630. iterator2->SeekToFirst();
  631. ASSERT_TRUE(!iterator2->Valid());
  632. delete iterator2;
  633. db_->ReleaseSnapshot(snapshot);
  634. } while (ChangeOptions());
  635. }
  636. TEST_F(DBTest, GetLevel0Ordering) {
  637. do {
  638. // Check that we process level-0 files in correct order. The code
  639. // below generates two level-0 files where the earlier one comes
  640. // before the later one in the level-0 file list since the earlier
  641. // one has a smaller "smallest" key.
  642. ASSERT_LEVELDB_OK(Put("bar", "b"));
  643. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  644. dbfull()->TEST_CompactMemTable();
  645. ASSERT_LEVELDB_OK(Put("foo", "v2"));
  646. dbfull()->TEST_CompactMemTable();
  647. ASSERT_EQ("v2", Get("foo"));
  648. } while (ChangeOptions());
  649. }
  650. TEST_F(DBTest, GetOrderedByLevels) {
  651. do {
  652. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  653. Compact("a", "z");
  654. ASSERT_EQ("v1", Get("foo"));
  655. ASSERT_LEVELDB_OK(Put("foo", "v2"));
  656. ASSERT_EQ("v2", Get("foo"));
  657. dbfull()->TEST_CompactMemTable();
  658. ASSERT_EQ("v2", Get("foo"));
  659. } while (ChangeOptions());
  660. }
  661. TEST_F(DBTest, GetPicksCorrectFile) {
  662. do {
  663. // Arrange to have multiple files in a non-level-0 level.
  664. ASSERT_LEVELDB_OK(Put("a", "va"));
  665. Compact("a", "b");
  666. ASSERT_LEVELDB_OK(Put("x", "vx"));
  667. Compact("x", "y");
  668. ASSERT_LEVELDB_OK(Put("f", "vf"));
  669. Compact("f", "g");
  670. ASSERT_EQ("va", Get("a"));
  671. ASSERT_EQ("vf", Get("f"));
  672. ASSERT_EQ("vx", Get("x"));
  673. } while (ChangeOptions());
  674. }
  675. TEST_F(DBTest, GetEncountersEmptyLevel) {
  676. do {
  677. // Arrange for the following to happen:
  678. // * sstable A in level 0
  679. // * nothing in level 1
  680. // * sstable B in level 2
  681. // Then do enough Get() calls to arrange for an automatic compaction
  682. // of sstable A. A bug would cause the compaction to be marked as
  683. // occurring at level 1 (instead of the correct level 0).
  684. // Step 1: First place sstables in levels 0 and 2
  685. int compaction_count = 0;
  686. while (NumTableFilesAtLevel(0) == 0 || NumTableFilesAtLevel(2) == 0) {
  687. ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
  688. compaction_count++;
  689. Put("a", "begin");
  690. Put("z", "end");
  691. dbfull()->TEST_CompactMemTable();
  692. }
  693. // Step 2: clear level 1 if necessary.
  694. dbfull()->TEST_CompactRange(1, nullptr, nullptr);
  695. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  696. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  697. ASSERT_EQ(NumTableFilesAtLevel(2), 1);
  698. // Step 3: read a bunch of times
  699. for (int i = 0; i < 1000; i++) {
  700. ASSERT_EQ("NOT_FOUND", Get("missing"));
  701. }
  702. // Step 4: Wait for compaction to finish
  703. DelayMilliseconds(1000);
  704. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  705. } while (ChangeOptions());
  706. }
  707. TEST_F(DBTest, IterEmpty) {
  708. Iterator* iter = db_->NewIterator(ReadOptions());
  709. iter->SeekToFirst();
  710. ASSERT_EQ(IterStatus(iter), "(invalid)");
  711. iter->SeekToLast();
  712. ASSERT_EQ(IterStatus(iter), "(invalid)");
  713. iter->Seek("foo");
  714. ASSERT_EQ(IterStatus(iter), "(invalid)");
  715. delete iter;
  716. }
  717. TEST_F(DBTest, IterSingle) {
  718. ASSERT_LEVELDB_OK(Put("a", "va"));
  719. Iterator* iter = db_->NewIterator(ReadOptions());
  720. iter->SeekToFirst();
  721. ASSERT_EQ(IterStatus(iter), "a->va");
  722. iter->Next();
  723. ASSERT_EQ(IterStatus(iter), "(invalid)");
  724. iter->SeekToFirst();
  725. ASSERT_EQ(IterStatus(iter), "a->va");
  726. iter->Prev();
  727. ASSERT_EQ(IterStatus(iter), "(invalid)");
  728. iter->SeekToLast();
  729. ASSERT_EQ(IterStatus(iter), "a->va");
  730. iter->Next();
  731. ASSERT_EQ(IterStatus(iter), "(invalid)");
  732. iter->SeekToLast();
  733. ASSERT_EQ(IterStatus(iter), "a->va");
  734. iter->Prev();
  735. ASSERT_EQ(IterStatus(iter), "(invalid)");
  736. iter->Seek("");
  737. ASSERT_EQ(IterStatus(iter), "a->va");
  738. iter->Next();
  739. ASSERT_EQ(IterStatus(iter), "(invalid)");
  740. iter->Seek("a");
  741. ASSERT_EQ(IterStatus(iter), "a->va");
  742. iter->Next();
  743. ASSERT_EQ(IterStatus(iter), "(invalid)");
  744. iter->Seek("b");
  745. ASSERT_EQ(IterStatus(iter), "(invalid)");
  746. delete iter;
  747. }
  748. TEST_F(DBTest, IterMulti) {
  749. ASSERT_LEVELDB_OK(Put("a", "va"));
  750. ASSERT_LEVELDB_OK(Put("b", "vb"));
  751. ASSERT_LEVELDB_OK(Put("c", "vc"));
  752. Iterator* iter = db_->NewIterator(ReadOptions());
  753. iter->SeekToFirst();
  754. ASSERT_EQ(IterStatus(iter), "a->va");
  755. iter->Next();
  756. ASSERT_EQ(IterStatus(iter), "b->vb");
  757. iter->Next();
  758. ASSERT_EQ(IterStatus(iter), "c->vc");
  759. iter->Next();
  760. ASSERT_EQ(IterStatus(iter), "(invalid)");
  761. iter->SeekToFirst();
  762. ASSERT_EQ(IterStatus(iter), "a->va");
  763. iter->Prev();
  764. ASSERT_EQ(IterStatus(iter), "(invalid)");
  765. iter->SeekToLast();
  766. ASSERT_EQ(IterStatus(iter), "c->vc");
  767. iter->Prev();
  768. ASSERT_EQ(IterStatus(iter), "b->vb");
  769. iter->Prev();
  770. ASSERT_EQ(IterStatus(iter), "a->va");
  771. iter->Prev();
  772. ASSERT_EQ(IterStatus(iter), "(invalid)");
  773. iter->SeekToLast();
  774. ASSERT_EQ(IterStatus(iter), "c->vc");
  775. iter->Next();
  776. ASSERT_EQ(IterStatus(iter), "(invalid)");
  777. iter->Seek("");
  778. ASSERT_EQ(IterStatus(iter), "a->va");
  779. iter->Seek("a");
  780. ASSERT_EQ(IterStatus(iter), "a->va");
  781. iter->Seek("ax");
  782. ASSERT_EQ(IterStatus(iter), "b->vb");
  783. iter->Seek("b");
  784. ASSERT_EQ(IterStatus(iter), "b->vb");
  785. iter->Seek("z");
  786. ASSERT_EQ(IterStatus(iter), "(invalid)");
  787. // Switch from reverse to forward
  788. iter->SeekToLast();
  789. iter->Prev();
  790. iter->Prev();
  791. iter->Next();
  792. ASSERT_EQ(IterStatus(iter), "b->vb");
  793. // Switch from forward to reverse
  794. iter->SeekToFirst();
  795. iter->Next();
  796. iter->Next();
  797. iter->Prev();
  798. ASSERT_EQ(IterStatus(iter), "b->vb");
  799. // Make sure iter stays at snapshot
  800. ASSERT_LEVELDB_OK(Put("a", "va2"));
  801. ASSERT_LEVELDB_OK(Put("a2", "va3"));
  802. ASSERT_LEVELDB_OK(Put("b", "vb2"));
  803. ASSERT_LEVELDB_OK(Put("c", "vc2"));
  804. ASSERT_LEVELDB_OK(Delete("b"));
  805. iter->SeekToFirst();
  806. ASSERT_EQ(IterStatus(iter), "a->va");
  807. iter->Next();
  808. ASSERT_EQ(IterStatus(iter), "b->vb");
  809. iter->Next();
  810. ASSERT_EQ(IterStatus(iter), "c->vc");
  811. iter->Next();
  812. ASSERT_EQ(IterStatus(iter), "(invalid)");
  813. iter->SeekToLast();
  814. ASSERT_EQ(IterStatus(iter), "c->vc");
  815. iter->Prev();
  816. ASSERT_EQ(IterStatus(iter), "b->vb");
  817. iter->Prev();
  818. ASSERT_EQ(IterStatus(iter), "a->va");
  819. iter->Prev();
  820. ASSERT_EQ(IterStatus(iter), "(invalid)");
  821. delete iter;
  822. }
  823. TEST_F(DBTest, IterSmallAndLargeMix) {
  824. ASSERT_LEVELDB_OK(Put("a", "va"));
  825. ASSERT_LEVELDB_OK(Put("b", std::string(100000, 'b')));
  826. ASSERT_LEVELDB_OK(Put("c", "vc"));
  827. ASSERT_LEVELDB_OK(Put("d", std::string(100000, 'd')));
  828. ASSERT_LEVELDB_OK(Put("e", std::string(100000, 'e')));
  829. Iterator* iter = db_->NewIterator(ReadOptions());
  830. iter->SeekToFirst();
  831. ASSERT_EQ(IterStatus(iter), "a->va");
  832. iter->Next();
  833. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  834. iter->Next();
  835. ASSERT_EQ(IterStatus(iter), "c->vc");
  836. iter->Next();
  837. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  838. iter->Next();
  839. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  840. iter->Next();
  841. ASSERT_EQ(IterStatus(iter), "(invalid)");
  842. iter->SeekToLast();
  843. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  844. iter->Prev();
  845. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  846. iter->Prev();
  847. ASSERT_EQ(IterStatus(iter), "c->vc");
  848. iter->Prev();
  849. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  850. iter->Prev();
  851. ASSERT_EQ(IterStatus(iter), "a->va");
  852. iter->Prev();
  853. ASSERT_EQ(IterStatus(iter), "(invalid)");
  854. delete iter;
  855. }
  856. TEST_F(DBTest, IterMultiWithDelete) {
  857. do {
  858. ASSERT_LEVELDB_OK(Put("a", "va"));
  859. ASSERT_LEVELDB_OK(Put("b", "vb"));
  860. ASSERT_LEVELDB_OK(Put("c", "vc"));
  861. ASSERT_LEVELDB_OK(Delete("b"));
  862. ASSERT_EQ("NOT_FOUND", Get("b"));
  863. Iterator* iter = db_->NewIterator(ReadOptions());
  864. iter->Seek("c");
  865. ASSERT_EQ(IterStatus(iter), "c->vc");
  866. iter->Prev();
  867. ASSERT_EQ(IterStatus(iter), "a->va");
  868. delete iter;
  869. } while (ChangeOptions());
  870. }
  871. TEST_F(DBTest, IterMultiWithDeleteAndCompaction) {
  872. do {
  873. ASSERT_LEVELDB_OK(Put("b", "vb"));
  874. ASSERT_LEVELDB_OK(Put("c", "vc"));
  875. ASSERT_LEVELDB_OK(Put("a", "va"));
  876. dbfull()->TEST_CompactMemTable();
  877. ASSERT_LEVELDB_OK(Delete("b"));
  878. ASSERT_EQ("NOT_FOUND", Get("b"));
  879. Iterator* iter = db_->NewIterator(ReadOptions());
  880. iter->Seek("c");
  881. ASSERT_EQ(IterStatus(iter), "c->vc");
  882. iter->Prev();
  883. ASSERT_EQ(IterStatus(iter), "a->va");
  884. iter->Seek("b");
  885. ASSERT_EQ(IterStatus(iter), "c->vc");
  886. delete iter;
  887. } while (ChangeOptions());
  888. }
  889. TEST_F(DBTest, Recover) {
  890. do {
  891. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  892. ASSERT_LEVELDB_OK(Put("baz", "v5"));
  893. Reopen();
  894. ASSERT_EQ("v1", Get("foo"));
  895. ASSERT_EQ("v1", Get("foo"));
  896. ASSERT_EQ("v5", Get("baz"));
  897. ASSERT_LEVELDB_OK(Put("bar", "v2"));
  898. ASSERT_LEVELDB_OK(Put("foo", "v3"));
  899. Reopen();
  900. ASSERT_EQ("v3", Get("foo"));
  901. ASSERT_LEVELDB_OK(Put("foo", "v4"));
  902. ASSERT_EQ("v4", Get("foo"));
  903. ASSERT_EQ("v2", Get("bar"));
  904. ASSERT_EQ("v5", Get("baz"));
  905. } while (ChangeOptions());
  906. }
  907. TEST_F(DBTest, RecoveryWithEmptyLog) {
  908. do {
  909. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  910. ASSERT_LEVELDB_OK(Put("foo", "v2"));
  911. Reopen();
  912. Reopen();
  913. ASSERT_LEVELDB_OK(Put("foo", "v3"));
  914. Reopen();
  915. ASSERT_EQ("v3", Get("foo"));
  916. } while (ChangeOptions());
  917. }
  918. // Check that writes done during a memtable compaction are recovered
  919. // if the database is shutdown during the memtable compaction.
  920. TEST_F(DBTest, RecoverDuringMemtableCompaction) {
  921. do {
  922. Options options = CurrentOptions();
  923. options.env = env_;
  924. options.write_buffer_size = 1000000;
  925. Reopen(&options);
  926. // Trigger a long memtable compaction and reopen the database during it
  927. ASSERT_LEVELDB_OK(Put("foo", "v1")); // Goes to 1st log file
  928. ASSERT_LEVELDB_OK(
  929. Put("big1", std::string(10000000, 'x'))); // Fills memtable
  930. ASSERT_LEVELDB_OK(
  931. Put("big2", std::string(1000, 'y'))); // Triggers compaction
  932. ASSERT_LEVELDB_OK(Put("bar", "v2")); // Goes to new log file
  933. Reopen(&options);
  934. ASSERT_EQ("v1", Get("foo"));
  935. ASSERT_EQ("v2", Get("bar"));
  936. ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
  937. ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
  938. } while (ChangeOptions());
  939. }
  940. static std::string Key(int i) {
  941. char buf[100];
  942. std::snprintf(buf, sizeof(buf), "key%06d", i);
  943. return std::string(buf);
  944. }
  945. TEST_F(DBTest, MinorCompactionsHappen) {
  946. Options options = CurrentOptions();
  947. options.write_buffer_size = 10000;
  948. Reopen(&options);
  949. const int N = 500;
  950. int starting_num_tables = TotalTableFiles();
  951. for (int i = 0; i < N; i++) {
  952. ASSERT_LEVELDB_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
  953. }
  954. int ending_num_tables = TotalTableFiles();
  955. ASSERT_GT(ending_num_tables, starting_num_tables);
  956. for (int i = 0; i < N; i++) {
  957. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  958. }
  959. Reopen();
  960. for (int i = 0; i < N; i++) {
  961. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  962. }
  963. }
  964. TEST_F(DBTest, RecoverWithLargeLog) {
  965. {
  966. Options options = CurrentOptions();
  967. Reopen(&options);
  968. ASSERT_LEVELDB_OK(Put("big1", std::string(200000, '1')));
  969. ASSERT_LEVELDB_OK(Put("big2", std::string(200000, '2')));
  970. ASSERT_LEVELDB_OK(Put("small3", std::string(10, '3')));
  971. ASSERT_LEVELDB_OK(Put("small4", std::string(10, '4')));
  972. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  973. }
  974. // Make sure that if we re-open with a small write buffer size that
  975. // we flush table files in the middle of a large log file.
  976. Options options = CurrentOptions();
  977. options.write_buffer_size = 100000;
  978. Reopen(&options);
  979. ASSERT_EQ(NumTableFilesAtLevel(0), 3);
  980. ASSERT_EQ(std::string(200000, '1'), Get("big1"));
  981. ASSERT_EQ(std::string(200000, '2'), Get("big2"));
  982. ASSERT_EQ(std::string(10, '3'), Get("small3"));
  983. ASSERT_EQ(std::string(10, '4'), Get("small4"));
  984. ASSERT_GT(NumTableFilesAtLevel(0), 1);
  985. }
  986. TEST_F(DBTest, CompactionsGenerateMultipleFiles) {
  987. Options options = CurrentOptions();
  988. options.write_buffer_size = 100000000; // Large write buffer
  989. Reopen(&options);
  990. Random rnd(301);
  991. // Write 8MB (80 values, each 100K)
  992. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  993. std::vector<std::string> values;
  994. for (int i = 0; i < 80; i++) {
  995. values.push_back(RandomString(&rnd, 100000));
  996. ASSERT_LEVELDB_OK(Put(Key(i), values[i]));
  997. }
  998. // Reopening moves updates to level-0
  999. Reopen(&options);
  1000. dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  1001. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  1002. ASSERT_GT(NumTableFilesAtLevel(1), 1);
  1003. for (int i = 0; i < 80; i++) {
  1004. ASSERT_EQ(Get(Key(i)), values[i]);
  1005. }
  1006. }
  1007. TEST_F(DBTest, RepeatedWritesToSameKey) {
  1008. Options options = CurrentOptions();
  1009. options.env = env_;
  1010. options.write_buffer_size = 100000; // Small write buffer
  1011. Reopen(&options);
  1012. // We must have at most one file per level except for level-0,
  1013. // which may have up to kL0_StopWritesTrigger files.
  1014. const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
  1015. Random rnd(301);
  1016. std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
  1017. for (int i = 0; i < 5 * kMaxFiles; i++) {
  1018. Put("key", value);
  1019. ASSERT_LE(TotalTableFiles(), kMaxFiles);
  1020. std::fprintf(stderr, "after %d: %d files\n", i + 1, TotalTableFiles());
  1021. }
  1022. }
  1023. TEST_F(DBTest, SparseMerge) {
  1024. Options options = CurrentOptions();
  1025. options.compression = kNoCompression;
  1026. Reopen(&options);
  1027. FillLevels("A", "Z");
  1028. // Suppose there is:
  1029. // small amount of data with prefix A
  1030. // large amount of data with prefix B
  1031. // small amount of data with prefix C
  1032. // and that recent updates have made small changes to all three prefixes.
  1033. // Check that we do not do a compaction that merges all of B in one shot.
  1034. const std::string value(1000, 'x');
  1035. Put("A", "va");
  1036. // Write approximately 100MB of "B" values
  1037. for (int i = 0; i < 100000; i++) {
  1038. char key[100];
  1039. std::snprintf(key, sizeof(key), "B%010d", i);
  1040. Put(key, value);
  1041. }
  1042. Put("C", "vc");
  1043. dbfull()->TEST_CompactMemTable();
  1044. dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  1045. // Make sparse update
  1046. Put("A", "va2");
  1047. Put("B100", "bvalue2");
  1048. Put("C", "vc2");
  1049. dbfull()->TEST_CompactMemTable();
  1050. // Compactions should not cause us to create a situation where
  1051. // a file overlaps too much data at the next level.
  1052. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
  1053. dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  1054. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
  1055. dbfull()->TEST_CompactRange(1, nullptr, nullptr);
  1056. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576);
  1057. }
  1058. static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  1059. bool result = (val >= low) && (val <= high);
  1060. if (!result) {
  1061. std::fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
  1062. (unsigned long long)(val), (unsigned long long)(low),
  1063. (unsigned long long)(high));
  1064. }
  1065. return result;
  1066. }
  1067. TEST_F(DBTest, ApproximateSizes) {
  1068. do {
  1069. Options options = CurrentOptions();
  1070. options.write_buffer_size = 100000000; // Large write buffer
  1071. options.compression = kNoCompression;
  1072. DestroyAndReopen();
  1073. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  1074. Reopen(&options);
  1075. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  1076. // Write 8MB (80 values, each 100K)
  1077. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  1078. const int N = 80;
  1079. static const int S1 = 100000;
  1080. static const int S2 = 105000; // Allow some expansion from metadata
  1081. Random rnd(301);
  1082. for (int i = 0; i < N; i++) {
  1083. ASSERT_LEVELDB_OK(Put(Key(i), RandomString(&rnd, S1)));
  1084. }
  1085. // 0 because GetApproximateSizes() does not account for memtable space
  1086. ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
  1087. if (options.reuse_logs) {
  1088. // Recovery will reuse memtable, and GetApproximateSizes() does not
  1089. // account for memtable usage;
  1090. Reopen(&options);
  1091. ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
  1092. continue;
  1093. }
  1094. // Check sizes across recovery by reopening a few times
  1095. for (int run = 0; run < 3; run++) {
  1096. Reopen(&options);
  1097. for (int compact_start = 0; compact_start < N; compact_start += 10) {
  1098. for (int i = 0; i < N; i += 10) {
  1099. ASSERT_TRUE(Between(Size("", Key(i)), S1 * i, S2 * i));
  1100. ASSERT_TRUE(Between(Size("", Key(i) + ".suffix"), S1 * (i + 1),
  1101. S2 * (i + 1)));
  1102. ASSERT_TRUE(Between(Size(Key(i), Key(i + 10)), S1 * 10, S2 * 10));
  1103. }
  1104. ASSERT_TRUE(Between(Size("", Key(50)), S1 * 50, S2 * 50));
  1105. ASSERT_TRUE(Between(Size("", Key(50) + ".suffix"), S1 * 50, S2 * 50));
  1106. std::string cstart_str = Key(compact_start);
  1107. std::string cend_str = Key(compact_start + 9);
  1108. Slice cstart = cstart_str;
  1109. Slice cend = cend_str;
  1110. dbfull()->TEST_CompactRange(0, &cstart, &cend);
  1111. }
  1112. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  1113. ASSERT_GT(NumTableFilesAtLevel(1), 0);
  1114. }
  1115. } while (ChangeOptions());
  1116. }
  1117. TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
  1118. do {
  1119. Options options = CurrentOptions();
  1120. options.compression = kNoCompression;
  1121. Reopen();
  1122. Random rnd(301);
  1123. std::string big1 = RandomString(&rnd, 100000);
  1124. ASSERT_LEVELDB_OK(Put(Key(0), RandomString(&rnd, 10000)));
  1125. ASSERT_LEVELDB_OK(Put(Key(1), RandomString(&rnd, 10000)));
  1126. ASSERT_LEVELDB_OK(Put(Key(2), big1));
  1127. ASSERT_LEVELDB_OK(Put(Key(3), RandomString(&rnd, 10000)));
  1128. ASSERT_LEVELDB_OK(Put(Key(4), big1));
  1129. ASSERT_LEVELDB_OK(Put(Key(5), RandomString(&rnd, 10000)));
  1130. ASSERT_LEVELDB_OK(Put(Key(6), RandomString(&rnd, 300000)));
  1131. ASSERT_LEVELDB_OK(Put(Key(7), RandomString(&rnd, 10000)));
  1132. if (options.reuse_logs) {
  1133. // Need to force a memtable compaction since recovery does not do so.
  1134. ASSERT_LEVELDB_OK(dbfull()->TEST_CompactMemTable());
  1135. }
  1136. // Check sizes across recovery by reopening a few times
  1137. for (int run = 0; run < 3; run++) {
  1138. Reopen(&options);
  1139. ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
  1140. ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
  1141. ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
  1142. ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
  1143. ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
  1144. ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
  1145. ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
  1146. ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
  1147. ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000));
  1148. ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
  1149. dbfull()->TEST_CompactRange(0, nullptr, nullptr);
  1150. }
  1151. } while (ChangeOptions());
  1152. }
  1153. TEST_F(DBTest, IteratorPinsRef) {
  1154. Put("foo", "hello");
  1155. // Get iterator that will yield the current contents of the DB.
  1156. Iterator* iter = db_->NewIterator(ReadOptions());
  1157. // Write to force compactions
  1158. Put("foo", "newvalue1");
  1159. for (int i = 0; i < 100; i++) {
  1160. ASSERT_LEVELDB_OK(
  1161. Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
  1162. }
  1163. Put("foo", "newvalue2");
  1164. iter->SeekToFirst();
  1165. ASSERT_TRUE(iter->Valid());
  1166. ASSERT_EQ("foo", iter->key().ToString());
  1167. ASSERT_EQ("hello", iter->value().ToString());
  1168. iter->Next();
  1169. ASSERT_TRUE(!iter->Valid());
  1170. delete iter;
  1171. }
  1172. TEST_F(DBTest, Snapshot) {
  1173. do {
  1174. Put("foo", "v1");
  1175. const Snapshot* s1 = db_->GetSnapshot();
  1176. Put("foo", "v2");
  1177. const Snapshot* s2 = db_->GetSnapshot();
  1178. Put("foo", "v3");
  1179. const Snapshot* s3 = db_->GetSnapshot();
  1180. Put("foo", "v4");
  1181. ASSERT_EQ("v1", Get("foo", s1));
  1182. ASSERT_EQ("v2", Get("foo", s2));
  1183. ASSERT_EQ("v3", Get("foo", s3));
  1184. ASSERT_EQ("v4", Get("foo"));
  1185. db_->ReleaseSnapshot(s3);
  1186. ASSERT_EQ("v1", Get("foo", s1));
  1187. ASSERT_EQ("v2", Get("foo", s2));
  1188. ASSERT_EQ("v4", Get("foo"));
  1189. db_->ReleaseSnapshot(s1);
  1190. ASSERT_EQ("v2", Get("foo", s2));
  1191. ASSERT_EQ("v4", Get("foo"));
  1192. db_->ReleaseSnapshot(s2);
  1193. ASSERT_EQ("v4", Get("foo"));
  1194. } while (ChangeOptions());
  1195. }
  1196. TEST_F(DBTest, HiddenValuesAreRemoved) {
  1197. do {
  1198. Random rnd(301);
  1199. FillLevels("a", "z");
  1200. std::string big = RandomString(&rnd, 50000);
  1201. Put("foo", big);
  1202. Put("pastfoo", "v");
  1203. const Snapshot* snapshot = db_->GetSnapshot();
  1204. Put("foo", "tiny");
  1205. Put("pastfoo2", "v2"); // Advance sequence number one more
  1206. ASSERT_LEVELDB_OK(dbfull()->TEST_CompactMemTable());
  1207. ASSERT_GT(NumTableFilesAtLevel(0), 0);
  1208. ASSERT_EQ(big, Get("foo", snapshot));
  1209. ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
  1210. db_->ReleaseSnapshot(snapshot);
  1211. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
  1212. Slice x("x");
  1213. dbfull()->TEST_CompactRange(0, nullptr, &x);
  1214. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  1215. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  1216. ASSERT_GE(NumTableFilesAtLevel(1), 1);
  1217. dbfull()->TEST_CompactRange(1, nullptr, &x);
  1218. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  1219. ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
  1220. } while (ChangeOptions());
  1221. }
  1222. TEST_F(DBTest, DeletionMarkers1) {
  1223. Put("foo", "v1");
  1224. ASSERT_LEVELDB_OK(dbfull()->TEST_CompactMemTable());
  1225. const int last = config::kMaxMemCompactLevel;
  1226. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  1227. // Place a table at level last-1 to prevent merging with preceding mutation
  1228. Put("a", "begin");
  1229. Put("z", "end");
  1230. dbfull()->TEST_CompactMemTable();
  1231. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  1232. ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1);
  1233. Delete("foo");
  1234. Put("foo", "v2");
  1235. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  1236. ASSERT_LEVELDB_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  1237. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  1238. Slice z("z");
  1239. dbfull()->TEST_CompactRange(last - 2, nullptr, &z);
  1240. // DEL eliminated, but v1 remains because we aren't compacting that level
  1241. // (DEL can be eliminated because v2 hides v1).
  1242. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
  1243. dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr);
  1244. // Merging last-1 w/ last, so we are the base level for "foo", so
  1245. // DEL is removed. (as is v1).
  1246. ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
  1247. }
  1248. TEST_F(DBTest, DeletionMarkers2) {
  1249. Put("foo", "v1");
  1250. ASSERT_LEVELDB_OK(dbfull()->TEST_CompactMemTable());
  1251. const int last = config::kMaxMemCompactLevel;
  1252. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  1253. // Place a table at level last-1 to prevent merging with preceding mutation
  1254. Put("a", "begin");
  1255. Put("z", "end");
  1256. dbfull()->TEST_CompactMemTable();
  1257. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  1258. ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1);
  1259. Delete("foo");
  1260. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  1261. ASSERT_LEVELDB_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  1262. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  1263. dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr);
  1264. // DEL kept: "last" file overlaps
  1265. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  1266. dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr);
  1267. // Merging last-1 w/ last, so we are the base level for "foo", so
  1268. // DEL is removed. (as is v1).
  1269. ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
  1270. }
  1271. TEST_F(DBTest, OverlapInLevel0) {
  1272. do {
  1273. ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
  1274. // Fill levels 1 and 2 to disable the pushing of new memtables to levels >
  1275. // 0.
  1276. ASSERT_LEVELDB_OK(Put("100", "v100"));
  1277. ASSERT_LEVELDB_OK(Put("999", "v999"));
  1278. dbfull()->TEST_CompactMemTable();
  1279. ASSERT_LEVELDB_OK(Delete("100"));
  1280. ASSERT_LEVELDB_OK(Delete("999"));
  1281. dbfull()->TEST_CompactMemTable();
  1282. ASSERT_EQ("0,1,1", FilesPerLevel());
  1283. // Make files spanning the following ranges in level-0:
  1284. // files[0] 200 .. 900
  1285. // files[1] 300 .. 500
  1286. // Note that files are sorted by smallest key.
  1287. ASSERT_LEVELDB_OK(Put("300", "v300"));
  1288. ASSERT_LEVELDB_OK(Put("500", "v500"));
  1289. dbfull()->TEST_CompactMemTable();
  1290. ASSERT_LEVELDB_OK(Put("200", "v200"));
  1291. ASSERT_LEVELDB_OK(Put("600", "v600"));
  1292. ASSERT_LEVELDB_OK(Put("900", "v900"));
  1293. dbfull()->TEST_CompactMemTable();
  1294. ASSERT_EQ("2,1,1", FilesPerLevel());
  1295. // Compact away the placeholder files we created initially
  1296. dbfull()->TEST_CompactRange(1, nullptr, nullptr);
  1297. dbfull()->TEST_CompactRange(2, nullptr, nullptr);
  1298. ASSERT_EQ("2", FilesPerLevel());
  1299. // Do a memtable compaction. Before bug-fix, the compaction would
  1300. // not detect the overlap with level-0 files and would incorrectly place
  1301. // the deletion in a deeper level.
  1302. ASSERT_LEVELDB_OK(Delete("600"));
  1303. dbfull()->TEST_CompactMemTable();
  1304. ASSERT_EQ("3", FilesPerLevel());
  1305. ASSERT_EQ("NOT_FOUND", Get("600"));
  1306. } while (ChangeOptions());
  1307. }
  1308. TEST_F(DBTest, L0_CompactionBug_Issue44_a) {
  1309. Reopen();
  1310. ASSERT_LEVELDB_OK(Put("b", "v"));
  1311. Reopen();
  1312. ASSERT_LEVELDB_OK(Delete("b"));
  1313. ASSERT_LEVELDB_OK(Delete("a"));
  1314. Reopen();
  1315. ASSERT_LEVELDB_OK(Delete("a"));
  1316. Reopen();
  1317. ASSERT_LEVELDB_OK(Put("a", "v"));
  1318. Reopen();
  1319. Reopen();
  1320. ASSERT_EQ("(a->v)", Contents());
  1321. DelayMilliseconds(1000); // Wait for compaction to finish
  1322. ASSERT_EQ("(a->v)", Contents());
  1323. }
  1324. TEST_F(DBTest, L0_CompactionBug_Issue44_b) {
  1325. Reopen();
  1326. Put("", "");
  1327. Reopen();
  1328. Delete("e");
  1329. Put("", "");
  1330. Reopen();
  1331. Put("c", "cv");
  1332. Reopen();
  1333. Put("", "");
  1334. Reopen();
  1335. Put("", "");
  1336. DelayMilliseconds(1000); // Wait for compaction to finish
  1337. Reopen();
  1338. Put("d", "dv");
  1339. Reopen();
  1340. Put("", "");
  1341. Reopen();
  1342. Delete("d");
  1343. Delete("b");
  1344. Reopen();
  1345. ASSERT_EQ("(->)(c->cv)", Contents());
  1346. DelayMilliseconds(1000); // Wait for compaction to finish
  1347. ASSERT_EQ("(->)(c->cv)", Contents());
  1348. }
  1349. TEST_F(DBTest, Fflush_Issue474) {
  1350. static const int kNum = 100000;
  1351. Random rnd(test::RandomSeed());
  1352. for (int i = 0; i < kNum; i++) {
  1353. std::fflush(nullptr);
  1354. ASSERT_LEVELDB_OK(Put(RandomKey(&rnd), RandomString(&rnd, 100)));
  1355. }
  1356. }
  1357. TEST_F(DBTest, ComparatorCheck) {
  1358. class NewComparator : public Comparator {
  1359. public:
  1360. const char* Name() const override { return "leveldb.NewComparator"; }
  1361. int Compare(const Slice& a, const Slice& b) const override {
  1362. return BytewiseComparator()->Compare(a, b);
  1363. }
  1364. void FindShortestSeparator(std::string* s, const Slice& l) const override {
  1365. BytewiseComparator()->FindShortestSeparator(s, l);
  1366. }
  1367. void FindShortSuccessor(std::string* key) const override {
  1368. BytewiseComparator()->FindShortSuccessor(key);
  1369. }
  1370. };
  1371. NewComparator cmp;
  1372. Options new_options = CurrentOptions();
  1373. new_options.comparator = &cmp;
  1374. Status s = TryReopen(&new_options);
  1375. ASSERT_TRUE(!s.ok());
  1376. ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
  1377. << s.ToString();
  1378. }
  1379. TEST_F(DBTest, CustomComparator) {
  1380. class NumberComparator : public Comparator {
  1381. public:
  1382. const char* Name() const override { return "test.NumberComparator"; }
  1383. int Compare(const Slice& a, const Slice& b) const override {
  1384. return ToNumber(a) - ToNumber(b);
  1385. }
  1386. void FindShortestSeparator(std::string* s, const Slice& l) const override {
  1387. ToNumber(*s); // Check format
  1388. ToNumber(l); // Check format
  1389. }
  1390. void FindShortSuccessor(std::string* key) const override {
  1391. ToNumber(*key); // Check format
  1392. }
  1393. private:
  1394. static int ToNumber(const Slice& x) {
  1395. // Check that there are no extra characters.
  1396. EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
  1397. << EscapeString(x);
  1398. int val;
  1399. char ignored;
  1400. EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
  1401. << EscapeString(x);
  1402. return val;
  1403. }
  1404. };
  1405. NumberComparator cmp;
  1406. Options new_options = CurrentOptions();
  1407. new_options.create_if_missing = true;
  1408. new_options.comparator = &cmp;
  1409. new_options.filter_policy = nullptr; // Cannot use bloom filters
  1410. new_options.write_buffer_size = 1000; // Compact more often
  1411. DestroyAndReopen(&new_options);
  1412. ASSERT_LEVELDB_OK(Put("[10]", "ten"));
  1413. ASSERT_LEVELDB_OK(Put("[0x14]", "twenty"));
  1414. for (int i = 0; i < 2; i++) {
  1415. ASSERT_EQ("ten", Get("[10]"));
  1416. ASSERT_EQ("ten", Get("[0xa]"));
  1417. ASSERT_EQ("twenty", Get("[20]"));
  1418. ASSERT_EQ("twenty", Get("[0x14]"));
  1419. ASSERT_EQ("NOT_FOUND", Get("[15]"));
  1420. ASSERT_EQ("NOT_FOUND", Get("[0xf]"));
  1421. Compact("[0]", "[9999]");
  1422. }
  1423. for (int run = 0; run < 2; run++) {
  1424. for (int i = 0; i < 1000; i++) {
  1425. char buf[100];
  1426. std::snprintf(buf, sizeof(buf), "[%d]", i * 10);
  1427. ASSERT_LEVELDB_OK(Put(buf, buf));
  1428. }
  1429. Compact("[0]", "[1000000]");
  1430. }
  1431. }
  1432. TEST_F(DBTest, ManualCompaction) {
  1433. ASSERT_EQ(config::kMaxMemCompactLevel, 2)
  1434. << "Need to update this test to match kMaxMemCompactLevel";
  1435. MakeTables(3, "p", "q");
  1436. ASSERT_EQ("1,1,1", FilesPerLevel());
  1437. // Compaction range falls before files
  1438. Compact("", "c");
  1439. ASSERT_EQ("1,1,1", FilesPerLevel());
  1440. // Compaction range falls after files
  1441. Compact("r", "z");
  1442. ASSERT_EQ("1,1,1", FilesPerLevel());
  1443. // Compaction range overlaps files
  1444. Compact("p1", "p9");
  1445. ASSERT_EQ("0,0,1", FilesPerLevel());
  1446. // Populate a different range
  1447. MakeTables(3, "c", "e");
  1448. ASSERT_EQ("1,1,2", FilesPerLevel());
  1449. // Compact just the new range
  1450. Compact("b", "f");
  1451. ASSERT_EQ("0,0,2", FilesPerLevel());
  1452. // Compact all
  1453. MakeTables(1, "a", "z");
  1454. ASSERT_EQ("0,1,2", FilesPerLevel());
  1455. db_->CompactRange(nullptr, nullptr);
  1456. ASSERT_EQ("0,0,1", FilesPerLevel());
  1457. }
  1458. TEST_F(DBTest, DBOpen_Options) {
  1459. std::string dbname = testing::TempDir() + "db_options_test";
  1460. DestroyDB(dbname, Options());
  1461. // Does not exist, and create_if_missing == false: error
  1462. DB* db = nullptr;
  1463. Options opts;
  1464. opts.create_if_missing = false;
  1465. Status s = DB::Open(opts, dbname, &db);
  1466. ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
  1467. ASSERT_TRUE(db == nullptr);
  1468. // Does not exist, and create_if_missing == true: OK
  1469. opts.create_if_missing = true;
  1470. s = DB::Open(opts, dbname, &db);
  1471. ASSERT_LEVELDB_OK(s);
  1472. ASSERT_TRUE(db != nullptr);
  1473. delete db;
  1474. db = nullptr;
  1475. // Does exist, and error_if_exists == true: error
  1476. opts.create_if_missing = false;
  1477. opts.error_if_exists = true;
  1478. s = DB::Open(opts, dbname, &db);
  1479. ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
  1480. ASSERT_TRUE(db == nullptr);
  1481. // Does exist, and error_if_exists == false: OK
  1482. opts.create_if_missing = true;
  1483. opts.error_if_exists = false;
  1484. s = DB::Open(opts, dbname, &db);
  1485. ASSERT_LEVELDB_OK(s);
  1486. ASSERT_TRUE(db != nullptr);
  1487. delete db;
  1488. db = nullptr;
  1489. }
  1490. TEST_F(DBTest, DestroyEmptyDir) {
  1491. std::string dbname = testing::TempDir() + "db_empty_dir";
  1492. TestEnv env(Env::Default());
  1493. env.RemoveDir(dbname);
  1494. ASSERT_TRUE(!env.FileExists(dbname));
  1495. Options opts;
  1496. opts.env = &env;
  1497. ASSERT_LEVELDB_OK(env.CreateDir(dbname));
  1498. ASSERT_TRUE(env.FileExists(dbname));
  1499. std::vector<std::string> children;
  1500. ASSERT_LEVELDB_OK(env.GetChildren(dbname, &children));
  1501. #if defined(LEVELDB_PLATFORM_CHROMIUM)
  1502. // TODO(https://crbug.com/1428746): Chromium's file system abstraction always
  1503. // filters out '.' and '..'.
  1504. ASSERT_EQ(0, children.size());
  1505. #else
  1506. // The stock Env's do not filter out '.' and '..' special files.
  1507. ASSERT_EQ(2, children.size());
  1508. #endif // defined(LEVELDB_PLATFORM_CHROMIUM)
  1509. ASSERT_LEVELDB_OK(DestroyDB(dbname, opts));
  1510. ASSERT_TRUE(!env.FileExists(dbname));
  1511. // Should also be destroyed if Env is filtering out dot files.
  1512. env.SetIgnoreDotFiles(true);
  1513. ASSERT_LEVELDB_OK(env.CreateDir(dbname));
  1514. ASSERT_TRUE(env.FileExists(dbname));
  1515. ASSERT_LEVELDB_OK(env.GetChildren(dbname, &children));
  1516. ASSERT_EQ(0, children.size());
  1517. ASSERT_LEVELDB_OK(DestroyDB(dbname, opts));
  1518. ASSERT_TRUE(!env.FileExists(dbname));
  1519. }
  1520. TEST_F(DBTest, DestroyOpenDB) {
  1521. std::string dbname = testing::TempDir() + "open_db_dir";
  1522. env_->RemoveDir(dbname);
  1523. ASSERT_TRUE(!env_->FileExists(dbname));
  1524. Options opts;
  1525. opts.create_if_missing = true;
  1526. DB* db = nullptr;
  1527. ASSERT_LEVELDB_OK(DB::Open(opts, dbname, &db));
  1528. ASSERT_TRUE(db != nullptr);
  1529. // Must fail to destroy an open db.
  1530. ASSERT_TRUE(env_->FileExists(dbname));
  1531. ASSERT_TRUE(!DestroyDB(dbname, Options()).ok());
  1532. ASSERT_TRUE(env_->FileExists(dbname));
  1533. delete db;
  1534. db = nullptr;
  1535. // Should succeed destroying a closed db.
  1536. ASSERT_LEVELDB_OK(DestroyDB(dbname, Options()));
  1537. ASSERT_TRUE(!env_->FileExists(dbname));
  1538. }
  1539. TEST_F(DBTest, Locking) {
  1540. DB* db2 = nullptr;
  1541. Status s = DB::Open(CurrentOptions(), dbname_, &db2);
  1542. ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
  1543. }
  1544. // Check that number of files does not grow when we are out of space
  1545. TEST_F(DBTest, NoSpace) {
  1546. Options options = CurrentOptions();
  1547. options.env = env_;
  1548. Reopen(&options);
  1549. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  1550. ASSERT_EQ("v1", Get("foo"));
  1551. Compact("a", "z");
  1552. const int num_files = CountFiles();
  1553. // Force out-of-space errors.
  1554. env_->no_space_.store(true, std::memory_order_release);
  1555. for (int i = 0; i < 10; i++) {
  1556. for (int level = 0; level < config::kNumLevels - 1; level++) {
  1557. dbfull()->TEST_CompactRange(level, nullptr, nullptr);
  1558. }
  1559. }
  1560. env_->no_space_.store(false, std::memory_order_release);
  1561. ASSERT_LT(CountFiles(), num_files + 3);
  1562. }
  1563. TEST_F(DBTest, NonWritableFileSystem) {
  1564. Options options = CurrentOptions();
  1565. options.write_buffer_size = 1000;
  1566. options.env = env_;
  1567. Reopen(&options);
  1568. ASSERT_LEVELDB_OK(Put("foo", "v1"));
  1569. // Force errors for new files.
  1570. env_->non_writable_.store(true, std::memory_order_release);
  1571. std::string big(100000, 'x');
  1572. int errors = 0;
  1573. for (int i = 0; i < 20; i++) {
  1574. std::fprintf(stderr, "iter %d; errors %d\n", i, errors);
  1575. if (!Put("foo", big).ok()) {
  1576. errors++;
  1577. DelayMilliseconds(100);
  1578. }
  1579. }
  1580. ASSERT_GT(errors, 0);
  1581. env_->non_writable_.store(false, std::memory_order_release);
  1582. }
  1583. TEST_F(DBTest, WriteSyncError) {
  1584. // Check that log sync errors cause the DB to disallow future writes.
  1585. // (a) Cause log sync calls to fail
  1586. Options options = CurrentOptions();
  1587. options.env = env_;
  1588. Reopen(&options);
  1589. env_->data_sync_error_.store(true, std::memory_order_release);
  1590. // (b) Normal write should succeed
  1591. WriteOptions w;
  1592. ASSERT_LEVELDB_OK(db_->Put(w, "k1", "v1"));
  1593. ASSERT_EQ("v1", Get("k1"));
  1594. // (c) Do a sync write; should fail
  1595. w.sync = true;
  1596. ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok());
  1597. ASSERT_EQ("v1", Get("k1"));
  1598. ASSERT_EQ("NOT_FOUND", Get("k2"));
  1599. // (d) make sync behave normally
  1600. env_->data_sync_error_.store(false, std::memory_order_release);
  1601. // (e) Do a non-sync write; should fail
  1602. w.sync = false;
  1603. ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok());
  1604. ASSERT_EQ("v1", Get("k1"));
  1605. ASSERT_EQ("NOT_FOUND", Get("k2"));
  1606. ASSERT_EQ("NOT_FOUND", Get("k3"));
  1607. }
  1608. TEST_F(DBTest, ManifestWriteError) {
  1609. // Test for the following problem:
  1610. // (a) Compaction produces file F
  1611. // (b) Log record containing F is written to MANIFEST file, but Sync() fails
  1612. // (c) GC deletes F
  1613. // (d) After reopening DB, reads fail since deleted F is named in log record
  1614. // We iterate twice. In the second iteration, everything is the
  1615. // same except the log record never makes it to the MANIFEST file.
  1616. for (int iter = 0; iter < 2; iter++) {
  1617. std::atomic<bool>* error_type = (iter == 0) ? &env_->manifest_sync_error_
  1618. : &env_->manifest_write_error_;
  1619. // Insert foo=>bar mapping
  1620. Options options = CurrentOptions();
  1621. options.env = env_;
  1622. options.create_if_missing = true;
  1623. options.error_if_exists = false;
  1624. DestroyAndReopen(&options);
  1625. ASSERT_LEVELDB_OK(Put("foo", "bar"));
  1626. ASSERT_EQ("bar", Get("foo"));
  1627. // Memtable compaction (will succeed)
  1628. dbfull()->TEST_CompactMemTable();
  1629. ASSERT_EQ("bar", Get("foo"));
  1630. const int last = config::kMaxMemCompactLevel;
  1631. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
  1632. // Merging compaction (will fail)
  1633. error_type->store(true, std::memory_order_release);
  1634. dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail
  1635. ASSERT_EQ("bar", Get("foo"));
  1636. // Recovery: should not lose data
  1637. error_type->store(false, std::memory_order_release);
  1638. Reopen(&options);
  1639. ASSERT_EQ("bar", Get("foo"));
  1640. }
  1641. }
  1642. TEST_F(DBTest, MissingSSTFile) {
  1643. ASSERT_LEVELDB_OK(Put("foo", "bar"));
  1644. ASSERT_EQ("bar", Get("foo"));
  1645. // Dump the memtable to disk.
  1646. dbfull()->TEST_CompactMemTable();
  1647. ASSERT_EQ("bar", Get("foo"));
  1648. Close();
  1649. ASSERT_TRUE(DeleteAnSSTFile());
  1650. Options options = CurrentOptions();
  1651. options.paranoid_checks = true;
  1652. Status s = TryReopen(&options);
  1653. ASSERT_TRUE(!s.ok());
  1654. ASSERT_TRUE(s.ToString().find("issing") != std::string::npos) << s.ToString();
  1655. }
  1656. TEST_F(DBTest, StillReadSST) {
  1657. ASSERT_LEVELDB_OK(Put("foo", "bar"));
  1658. ASSERT_EQ("bar", Get("foo"));
  1659. // Dump the memtable to disk.
  1660. dbfull()->TEST_CompactMemTable();
  1661. ASSERT_EQ("bar", Get("foo"));
  1662. Close();
  1663. ASSERT_GT(RenameLDBToSST(), 0);
  1664. Options options = CurrentOptions();
  1665. options.paranoid_checks = true;
  1666. Status s = TryReopen(&options);
  1667. ASSERT_TRUE(s.ok());
  1668. ASSERT_EQ("bar", Get("foo"));
  1669. }
  1670. TEST_F(DBTest, FilesDeletedAfterCompaction) {
  1671. ASSERT_LEVELDB_OK(Put("foo", "v2"));
  1672. Compact("a", "z");
  1673. const int num_files = CountFiles();
  1674. for (int i = 0; i < 10; i++) {
  1675. ASSERT_LEVELDB_OK(Put("foo", "v2"));
  1676. Compact("a", "z");
  1677. }
  1678. ASSERT_EQ(CountFiles(), num_files);
  1679. }
  1680. TEST_F(DBTest, BloomFilter) {
  1681. env_->count_random_reads_ = true;
  1682. Options options = CurrentOptions();
  1683. options.env = env_;
  1684. options.block_cache = NewLRUCache(0); // Prevent cache hits
  1685. options.filter_policy = NewBloomFilterPolicy(10);
  1686. Reopen(&options);
  1687. // Populate multiple layers
  1688. const int N = 10000;
  1689. for (int i = 0; i < N; i++) {
  1690. ASSERT_LEVELDB_OK(Put(Key(i), Key(i)));
  1691. }
  1692. Compact("a", "z");
  1693. for (int i = 0; i < N; i += 100) {
  1694. ASSERT_LEVELDB_OK(Put(Key(i), Key(i)));
  1695. }
  1696. dbfull()->TEST_CompactMemTable();
  1697. // Prevent auto compactions triggered by seeks
  1698. env_->delay_data_sync_.store(true, std::memory_order_release);
  1699. // Lookup present keys. Should rarely read from small sstable.
  1700. env_->random_read_counter_.Reset();
  1701. for (int i = 0; i < N; i++) {
  1702. ASSERT_EQ(Key(i), Get(Key(i)));
  1703. }
  1704. int reads = env_->random_read_counter_.Read();
  1705. std::fprintf(stderr, "%d present => %d reads\n", N, reads);
  1706. ASSERT_GE(reads, N);
  1707. ASSERT_LE(reads, N + 2 * N / 100);
  1708. // Lookup present keys. Should rarely read from either sstable.
  1709. env_->random_read_counter_.Reset();
  1710. for (int i = 0; i < N; i++) {
  1711. ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing"));
  1712. }
  1713. reads = env_->random_read_counter_.Read();
  1714. std::fprintf(stderr, "%d missing => %d reads\n", N, reads);
  1715. ASSERT_LE(reads, 3 * N / 100);
  1716. env_->delay_data_sync_.store(false, std::memory_order_release);
  1717. Close();
  1718. delete options.block_cache;
  1719. delete options.filter_policy;
  1720. }
  1721. TEST_F(DBTest, LogCloseError) {
  1722. // Regression test for bug where we could ignore log file
  1723. // Close() error when switching to a new log file.
  1724. const int kValueSize = 20000;
  1725. const int kWriteCount = 10;
  1726. const int kWriteBufferSize = (kValueSize * kWriteCount) / 2;
  1727. Options options = CurrentOptions();
  1728. options.env = env_;
  1729. options.write_buffer_size = kWriteBufferSize; // Small write buffer
  1730. Reopen(&options);
  1731. env_->log_file_close_.store(true, std::memory_order_release);
  1732. std::string value(kValueSize, 'x');
  1733. Status s;
  1734. for (int i = 0; i < kWriteCount && s.ok(); i++) {
  1735. s = Put(Key(i), value);
  1736. }
  1737. ASSERT_TRUE(!s.ok()) << "succeeded even after log file Close failure";
  1738. // Future writes should also fail after an earlier error.
  1739. s = Put("hello", "world");
  1740. ASSERT_TRUE(!s.ok()) << "write succeeded after log file Close failure";
  1741. env_->log_file_close_.store(false, std::memory_order_release);
  1742. }
  1743. // Multi-threaded test:
  1744. namespace {
  1745. static const int kNumThreads = 4;
  1746. static const int kTestSeconds = 10;
  1747. static const int kNumKeys = 1000;
  1748. struct MTState {
  1749. DBTest* test;
  1750. std::atomic<bool> stop;
  1751. std::atomic<int> counter[kNumThreads];
  1752. std::atomic<bool> thread_done[kNumThreads];
  1753. };
  1754. struct MTThread {
  1755. MTState* state;
  1756. int id;
  1757. };
  1758. static void MTThreadBody(void* arg) {
  1759. MTThread* t = reinterpret_cast<MTThread*>(arg);
  1760. int id = t->id;
  1761. DB* db = t->state->test->db_;
  1762. int counter = 0;
  1763. std::fprintf(stderr, "... starting thread %d\n", id);
  1764. Random rnd(1000 + id);
  1765. std::string value;
  1766. char valbuf[1500];
  1767. while (!t->state->stop.load(std::memory_order_acquire)) {
  1768. t->state->counter[id].store(counter, std::memory_order_release);
  1769. int key = rnd.Uniform(kNumKeys);
  1770. char keybuf[20];
  1771. std::snprintf(keybuf, sizeof(keybuf), "%016d", key);
  1772. if (rnd.OneIn(2)) {
  1773. // Write values of the form <key, my id, counter>.
  1774. // We add some padding for force compactions.
  1775. std::snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d", key, id,
  1776. static_cast<int>(counter));
  1777. ASSERT_LEVELDB_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
  1778. } else {
  1779. // Read a value and verify that it matches the pattern written above.
  1780. Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
  1781. if (s.IsNotFound()) {
  1782. // Key has not yet been written
  1783. } else {
  1784. // Check that the writer thread counter is >= the counter in the value
  1785. ASSERT_LEVELDB_OK(s);
  1786. int k, w, c;
  1787. ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
  1788. ASSERT_EQ(k, key);
  1789. ASSERT_GE(w, 0);
  1790. ASSERT_LT(w, kNumThreads);
  1791. ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
  1792. }
  1793. }
  1794. counter++;
  1795. }
  1796. t->state->thread_done[id].store(true, std::memory_order_release);
  1797. std::fprintf(stderr, "... stopping thread %d after %d ops\n", id, counter);
  1798. }
  1799. } // namespace
  1800. TEST_F(DBTest, MultiThreaded) {
  1801. do {
  1802. // Initialize state
  1803. MTState mt;
  1804. mt.test = this;
  1805. mt.stop.store(false, std::memory_order_release);
  1806. for (int id = 0; id < kNumThreads; id++) {
  1807. mt.counter[id].store(false, std::memory_order_release);
  1808. mt.thread_done[id].store(false, std::memory_order_release);
  1809. }
  1810. // Start threads
  1811. MTThread thread[kNumThreads];
  1812. for (int id = 0; id < kNumThreads; id++) {
  1813. thread[id].state = &mt;
  1814. thread[id].id = id;
  1815. env_->StartThread(MTThreadBody, &thread[id]);
  1816. }
  1817. // Let them run for a while
  1818. DelayMilliseconds(kTestSeconds * 1000);
  1819. // Stop the threads and wait for them to finish
  1820. mt.stop.store(true, std::memory_order_release);
  1821. for (int id = 0; id < kNumThreads; id++) {
  1822. while (!mt.thread_done[id].load(std::memory_order_acquire)) {
  1823. DelayMilliseconds(100);
  1824. }
  1825. }
  1826. } while (ChangeOptions());
  1827. }
  1828. namespace {
  1829. typedef std::map<std::string, std::string> KVMap;
  1830. }
  1831. class ModelDB : public DB {
  1832. public:
  1833. class ModelSnapshot : public Snapshot {
  1834. public:
  1835. KVMap map_;
  1836. };
  1837. explicit ModelDB(const Options& options) : options_(options) {}
  1838. ~ModelDB() override = default;
  1839. Status Put(const WriteOptions& o, const Slice& k, const Slice& v) override {
  1840. return DB::Put(o, k, v);
  1841. }
  1842. Status Delete(const WriteOptions& o, const Slice& key) override {
  1843. return DB::Delete(o, key);
  1844. }
  1845. Status Get(const ReadOptions& options, const Slice& key,
  1846. std::string* value) override {
  1847. assert(false); // Not implemented
  1848. return Status::NotFound(key);
  1849. }
  1850. Iterator* NewIterator(const ReadOptions& options) override {
  1851. if (options.snapshot == nullptr) {
  1852. KVMap* saved = new KVMap;
  1853. *saved = map_;
  1854. return new ModelIter(saved, true);
  1855. } else {
  1856. const KVMap* snapshot_state =
  1857. &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
  1858. return new ModelIter(snapshot_state, false);
  1859. }
  1860. }
  1861. const Snapshot* GetSnapshot() override {
  1862. ModelSnapshot* snapshot = new ModelSnapshot;
  1863. snapshot->map_ = map_;
  1864. return snapshot;
  1865. }
  1866. void ReleaseSnapshot(const Snapshot* snapshot) override {
  1867. delete reinterpret_cast<const ModelSnapshot*>(snapshot);
  1868. }
  1869. Status Write(const WriteOptions& options, WriteBatch* batch) override {
  1870. class Handler : public WriteBatch::Handler {
  1871. public:
  1872. KVMap* map_;
  1873. void Put(const Slice& key, const Slice& value) override {
  1874. (*map_)[key.ToString()] = value.ToString();
  1875. }
  1876. void Delete(const Slice& key) override { map_->erase(key.ToString()); }
  1877. };
  1878. Handler handler;
  1879. handler.map_ = &map_;
  1880. return batch->Iterate(&handler);
  1881. }
  1882. bool GetProperty(const Slice& property, std::string* value) override {
  1883. return false;
  1884. }
  1885. void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) override {
  1886. for (int i = 0; i < n; i++) {
  1887. sizes[i] = 0;
  1888. }
  1889. }
  1890. void CompactRange(const Slice* start, const Slice* end) override {}
  1891. private:
  1892. class ModelIter : public Iterator {
  1893. public:
  1894. ModelIter(const KVMap* map, bool owned)
  1895. : map_(map), owned_(owned), iter_(map_->end()) {}
  1896. ~ModelIter() override {
  1897. if (owned_) delete map_;
  1898. }
  1899. bool Valid() const override { return iter_ != map_->end(); }
  1900. void SeekToFirst() override { iter_ = map_->begin(); }
  1901. void SeekToLast() override {
  1902. if (map_->empty()) {
  1903. iter_ = map_->end();
  1904. } else {
  1905. iter_ = map_->find(map_->rbegin()->first);
  1906. }
  1907. }
  1908. void Seek(const Slice& k) override {
  1909. iter_ = map_->lower_bound(k.ToString());
  1910. }
  1911. void Next() override { ++iter_; }
  1912. void Prev() override { --iter_; }
  1913. Slice key() const override { return iter_->first; }
  1914. Slice value() const override { return iter_->second; }
  1915. Status status() const override { return Status::OK(); }
  1916. private:
  1917. const KVMap* const map_;
  1918. const bool owned_; // Do we own map_
  1919. KVMap::const_iterator iter_;
  1920. };
  1921. const Options options_;
  1922. KVMap map_;
  1923. };
  1924. static bool CompareIterators(int step, DB* model, DB* db,
  1925. const Snapshot* model_snap,
  1926. const Snapshot* db_snap) {
  1927. ReadOptions options;
  1928. options.snapshot = model_snap;
  1929. Iterator* miter = model->NewIterator(options);
  1930. options.snapshot = db_snap;
  1931. Iterator* dbiter = db->NewIterator(options);
  1932. bool ok = true;
  1933. int count = 0;
  1934. std::vector<std::string> seek_keys;
  1935. // Compare equality of all elements using Next(). Save some of the keys for
  1936. // comparing Seek equality.
  1937. for (miter->SeekToFirst(), dbiter->SeekToFirst();
  1938. ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) {
  1939. count++;
  1940. if (miter->key().compare(dbiter->key()) != 0) {
  1941. std::fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step,
  1942. EscapeString(miter->key()).c_str(),
  1943. EscapeString(dbiter->key()).c_str());
  1944. ok = false;
  1945. break;
  1946. }
  1947. if (miter->value().compare(dbiter->value()) != 0) {
  1948. std::fprintf(stderr,
  1949. "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
  1950. step, EscapeString(miter->key()).c_str(),
  1951. EscapeString(miter->value()).c_str(),
  1952. EscapeString(miter->value()).c_str());
  1953. ok = false;
  1954. break;
  1955. }
  1956. if (count % 10 == 0) {
  1957. seek_keys.push_back(miter->key().ToString());
  1958. }
  1959. }
  1960. if (ok) {
  1961. if (miter->Valid() != dbiter->Valid()) {
  1962. std::fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
  1963. step, miter->Valid(), dbiter->Valid());
  1964. ok = false;
  1965. }
  1966. }
  1967. if (ok) {
  1968. // Validate iterator equality when performing seeks.
  1969. for (auto kiter = seek_keys.begin(); ok && kiter != seek_keys.end();
  1970. ++kiter) {
  1971. miter->Seek(*kiter);
  1972. dbiter->Seek(*kiter);
  1973. if (!miter->Valid() || !dbiter->Valid()) {
  1974. std::fprintf(stderr, "step %d: Seek iterators invalid: %d vs. %d\n",
  1975. step, miter->Valid(), dbiter->Valid());
  1976. ok = false;
  1977. }
  1978. if (miter->key().compare(dbiter->key()) != 0) {
  1979. std::fprintf(stderr, "step %d: Seek key mismatch: '%s' vs. '%s'\n",
  1980. step, EscapeString(miter->key()).c_str(),
  1981. EscapeString(dbiter->key()).c_str());
  1982. ok = false;
  1983. break;
  1984. }
  1985. if (miter->value().compare(dbiter->value()) != 0) {
  1986. std::fprintf(
  1987. stderr,
  1988. "step %d: Seek value mismatch for key '%s': '%s' vs. '%s'\n", step,
  1989. EscapeString(miter->key()).c_str(),
  1990. EscapeString(miter->value()).c_str(),
  1991. EscapeString(miter->value()).c_str());
  1992. ok = false;
  1993. break;
  1994. }
  1995. }
  1996. }
  1997. std::fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
  1998. delete miter;
  1999. delete dbiter;
  2000. return ok;
  2001. }
  2002. TEST_F(DBTest, Randomized) {
  2003. Random rnd(test::RandomSeed());
  2004. do {
  2005. ModelDB model(CurrentOptions());
  2006. const int N = 10000;
  2007. const Snapshot* model_snap = nullptr;
  2008. const Snapshot* db_snap = nullptr;
  2009. std::string k, v;
  2010. for (int step = 0; step < N; step++) {
  2011. if (step % 100 == 0) {
  2012. std::fprintf(stderr, "Step %d of %d\n", step, N);
  2013. }
  2014. // TODO(sanjay): Test Get() works
  2015. int p = rnd.Uniform(100);
  2016. if (p < 45) { // Put
  2017. k = RandomKey(&rnd);
  2018. v = RandomString(
  2019. &rnd, rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8));
  2020. ASSERT_LEVELDB_OK(model.Put(WriteOptions(), k, v));
  2021. ASSERT_LEVELDB_OK(db_->Put(WriteOptions(), k, v));
  2022. } else if (p < 90) { // Delete
  2023. k = RandomKey(&rnd);
  2024. ASSERT_LEVELDB_OK(model.Delete(WriteOptions(), k));
  2025. ASSERT_LEVELDB_OK(db_->Delete(WriteOptions(), k));
  2026. } else { // Multi-element batch
  2027. WriteBatch b;
  2028. const int num = rnd.Uniform(8);
  2029. for (int i = 0; i < num; i++) {
  2030. if (i == 0 || !rnd.OneIn(10)) {
  2031. k = RandomKey(&rnd);
  2032. } else {
  2033. // Periodically re-use the same key from the previous iter, so
  2034. // we have multiple entries in the write batch for the same key
  2035. }
  2036. if (rnd.OneIn(2)) {
  2037. v = RandomString(&rnd, rnd.Uniform(10));
  2038. b.Put(k, v);
  2039. } else {
  2040. b.Delete(k);
  2041. }
  2042. }
  2043. ASSERT_LEVELDB_OK(model.Write(WriteOptions(), &b));
  2044. ASSERT_LEVELDB_OK(db_->Write(WriteOptions(), &b));
  2045. }
  2046. if ((step % 100) == 0) {
  2047. ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
  2048. ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
  2049. // Save a snapshot from each DB this time that we'll use next
  2050. // time we compare things, to make sure the current state is
  2051. // preserved with the snapshot
  2052. if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
  2053. if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
  2054. Reopen();
  2055. ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
  2056. model_snap = model.GetSnapshot();
  2057. db_snap = db_->GetSnapshot();
  2058. }
  2059. }
  2060. if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
  2061. if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
  2062. } while (ChangeOptions());
  2063. }
  2064. } // namespace leveldb