From 13e3c4efc66b8d7317c7648766a930b5d7e48aa7 Mon Sep 17 00:00:00 2001 From: Sanjay Ghemawat Date: Thu, 20 May 2021 19:02:41 +0000 Subject: [PATCH] Fix compactions that could end up breaking a run of the same user key across multiple files. As reported in Github issue #339, it is incorrect to split the same user key across multiple compacted files since it causes tombstones/newer-versions to be dropped, thereby exposing obsolete data. There was a fix for #339, but it ended up not fully fixing the problem. (It checked for boundary problems in the first level being compacted, but not the second). This problem was revealed by Github issue 887. We now adjust boundaries to avoid splitting user keys in both the first level and the second level. PiperOrigin-RevId: 374921082 --- db/version_set.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/db/version_set.cc b/db/version_set.cc index 1963353..8d85fce 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1392,6 +1392,7 @@ void VersionSet::SetupOtherInputs(Compaction* c) { current_->GetOverlappingInputs(level + 1, &smallest, &largest, &c->inputs_[1]); + AddBoundaryInputs(icmp_, current_->files_[level + 1], &c->inputs_[1]); // Get entire range covered by compaction InternalKey all_start, all_limit; @@ -1414,6 +1415,7 @@ void VersionSet::SetupOtherInputs(Compaction* c) { std::vector expanded1; current_->GetOverlappingInputs(level + 1, &new_start, &new_limit, &expanded1); + AddBoundaryInputs(icmp_, current_->files_[level + 1], &expanded1); if (expanded1.size() == c->inputs_[1].size()) { Log(options_->info_log, "Expanding@%d %d+%d (%ld+%ld bytes) to %d+%d (%ld+%ld bytes)\n",