Browse Source

leveldb: Report missing CURRENT manifest file as database corruption.

BTRFS reorders rename and write operations, so it is possible that a filesystem crash and recovery results in a situation where the file pointed to by CURRENT does not exist. DB::Open currently reports an I/O error in this case. Reporting database corruption is a better hint to the caller, which can attempt to recover the database or erase it and start over.

This issue is not merely theoretical. It was reported as having showed up in the wild at https://github.com/google/leveldb/issues/195 and at https://crbug.com/738961. Also, asides from the BTRFS case described above, incorrect data in CURRENT seems like a possible corruption case that should be handled gracefully.

The Env API changes here can be considered backwards compatible, because an implementation that returns Status::IOError instead of Status::NotFound will still get the same functionality as before.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=161432630
main
costan 7 years ago
committed by Victor Costan
parent
commit
8415f00eee
5 changed files with 76 additions and 31 deletions
  1. +19
    -2
      db/recovery_test.cc
  2. +4
    -0
      db/version_set.cc
  3. +4
    -2
      include/leveldb/env.h
  4. +31
    -27
      util/env_posix.cc
  5. +18
    -0
      util/env_test.cc

+ 19
- 2
db/recovery_test.cc View File

@ -47,7 +47,7 @@ class RecoveryTest {
db_ = NULL; db_ = NULL;
} }
void Open(Options* options = NULL) {
Status OpenWithStatus(Options* options = NULL) {
Close(); Close();
Options opts; Options opts;
if (options != NULL) { if (options != NULL) {
@ -59,7 +59,11 @@ class RecoveryTest {
if (opts.env == NULL) { if (opts.env == NULL) {
opts.env = env_; opts.env = env_;
} }
ASSERT_OK(DB::Open(opts, dbname_, &db_));
return DB::Open(opts, dbname_, &db_);
}
void Open(Options* options = NULL) {
ASSERT_OK(OpenWithStatus(options));
ASSERT_EQ(1, NumLogs()); ASSERT_EQ(1, NumLogs());
} }
@ -100,6 +104,10 @@ class RecoveryTest {
return logs.size(); return logs.size();
} }
void DeleteManifestFile() {
ASSERT_OK(env_->DeleteFile(ManifestFileName()));
}
uint64_t FirstLogFile() { uint64_t FirstLogFile() {
return GetFiles(kLogFile)[0]; return GetFiles(kLogFile)[0];
} }
@ -317,6 +325,15 @@ TEST(RecoveryTest, MultipleLogFiles) {
ASSERT_EQ("there", Get("hi")); ASSERT_EQ("there", Get("hi"));
} }
TEST(RecoveryTest, ManifestMissing) {
ASSERT_OK(Put("foo", "bar"));
Close();
DeleteManifestFile();
Status status = OpenWithStatus();
ASSERT_TRUE(status.IsCorruption());
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {

+ 4
- 0
db/version_set.cc View File

@ -925,6 +925,10 @@ Status VersionSet::Recover(bool *save_manifest) {
SequentialFile* file; SequentialFile* file;
s = env_->NewSequentialFile(dscname, &file); s = env_->NewSequentialFile(dscname, &file);
if (!s.ok()) { if (!s.ok()) {
if (s.IsNotFound()) {
return Status::Corruption(
"CURRENT points to a non-existent file", s.ToString());
}
return s; return s;
} }

+ 4
- 2
include/leveldb/env.h View File

@ -43,7 +43,8 @@ class Env {
// Create a brand new sequentially-readable file with the specified name. // Create a brand new sequentially-readable file with the specified name.
// On success, stores a pointer to the new file in *result and returns OK. // On success, stores a pointer to the new file in *result and returns OK.
// On failure stores NULL in *result and returns non-OK. If the file does // On failure stores NULL in *result and returns non-OK. If the file does
// not exist, returns a non-OK status.
// not exist, returns a non-OK status. Implementations should return a
// NotFound status when the file does not exist.
// //
// The returned file will only be accessed by one thread at a time. // The returned file will only be accessed by one thread at a time.
virtual Status NewSequentialFile(const std::string& fname, virtual Status NewSequentialFile(const std::string& fname,
@ -53,7 +54,8 @@ class Env {
// specified name. On success, stores a pointer to the new file in // specified name. On success, stores a pointer to the new file in
// *result and returns OK. On failure stores NULL in *result and // *result and returns OK. On failure stores NULL in *result and
// returns non-OK. If the file does not exist, returns a non-OK // returns non-OK. If the file does not exist, returns a non-OK
// status.
// status. Implementations should return a NotFound status when the file does
// not exist.
// //
// The returned file may be concurrently accessed by multiple threads. // The returned file may be concurrently accessed by multiple threads.
virtual Status NewRandomAccessFile(const std::string& fname, virtual Status NewRandomAccessFile(const std::string& fname,

+ 31
- 27
util/env_posix.cc View File

@ -34,8 +34,12 @@ namespace {
static int open_read_only_file_limit = -1; static int open_read_only_file_limit = -1;
static int mmap_limit = -1; static int mmap_limit = -1;
static Status IOError(const std::string& context, int err_number) {
return Status::IOError(context, strerror(err_number));
static Status PosixError(const std::string& context, int err_number) {
if (err_number == ENOENT) {
return Status::NotFound(context, strerror(err_number));
} else {
return Status::IOError(context, strerror(err_number));
}
} }
// Helper class to limit resource usage to avoid exhaustion. // Helper class to limit resource usage to avoid exhaustion.
@ -108,7 +112,7 @@ class PosixSequentialFile: public SequentialFile {
// We leave status as ok if we hit the end of the file // We leave status as ok if we hit the end of the file
} else { } else {
// A partial read with an error: return a non-ok status // A partial read with an error: return a non-ok status
s = IOError(filename_, errno);
s = PosixError(filename_, errno);
} }
} }
return s; return s;
@ -116,7 +120,7 @@ class PosixSequentialFile: public SequentialFile {
virtual Status Skip(uint64_t n) { virtual Status Skip(uint64_t n) {
if (fseek(file_, n, SEEK_CUR)) { if (fseek(file_, n, SEEK_CUR)) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
} }
return Status::OK(); return Status::OK();
} }
@ -154,7 +158,7 @@ class PosixRandomAccessFile: public RandomAccessFile {
if (temporary_fd_) { if (temporary_fd_) {
fd = open(filename_.c_str(), O_RDONLY); fd = open(filename_.c_str(), O_RDONLY);
if (fd < 0) { if (fd < 0) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
} }
} }
@ -163,7 +167,7 @@ class PosixRandomAccessFile: public RandomAccessFile {
*result = Slice(scratch, (r < 0) ? 0 : r); *result = Slice(scratch, (r < 0) ? 0 : r);
if (r < 0) { if (r < 0) {
// An error: return a non-ok status // An error: return a non-ok status
s = IOError(filename_, errno);
s = PosixError(filename_, errno);
} }
if (temporary_fd_) { if (temporary_fd_) {
// Close the temporary file descriptor opened earlier. // Close the temporary file descriptor opened earlier.
@ -199,7 +203,7 @@ class PosixMmapReadableFile: public RandomAccessFile {
Status s; Status s;
if (offset + n > length_) { if (offset + n > length_) {
*result = Slice(); *result = Slice();
s = IOError(filename_, EINVAL);
s = PosixError(filename_, EINVAL);
} else { } else {
*result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n); *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
} }
@ -226,7 +230,7 @@ class PosixWritableFile : public WritableFile {
virtual Status Append(const Slice& data) { virtual Status Append(const Slice& data) {
size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_); size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_);
if (r != data.size()) { if (r != data.size()) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
} }
return Status::OK(); return Status::OK();
} }
@ -234,7 +238,7 @@ class PosixWritableFile : public WritableFile {
virtual Status Close() { virtual Status Close() {
Status result; Status result;
if (fclose(file_) != 0) { if (fclose(file_) != 0) {
result = IOError(filename_, errno);
result = PosixError(filename_, errno);
} }
file_ = NULL; file_ = NULL;
return result; return result;
@ -242,7 +246,7 @@ class PosixWritableFile : public WritableFile {
virtual Status Flush() { virtual Status Flush() {
if (fflush_unlocked(file_) != 0) { if (fflush_unlocked(file_) != 0) {
return IOError(filename_, errno);
return PosixError(filename_, errno);
} }
return Status::OK(); return Status::OK();
} }
@ -263,10 +267,10 @@ class PosixWritableFile : public WritableFile {
if (basename.starts_with("MANIFEST")) { if (basename.starts_with("MANIFEST")) {
int fd = open(dir.c_str(), O_RDONLY); int fd = open(dir.c_str(), O_RDONLY);
if (fd < 0) { if (fd < 0) {
s = IOError(dir, errno);
s = PosixError(dir, errno);
} else { } else {
if (fsync(fd) < 0) { if (fsync(fd) < 0) {
s = IOError(dir, errno);
s = PosixError(dir, errno);
} }
close(fd); close(fd);
} }
@ -337,7 +341,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "r"); FILE* f = fopen(fname.c_str(), "r");
if (f == NULL) { if (f == NULL) {
*result = NULL; *result = NULL;
return IOError(fname, errno);
return PosixError(fname, errno);
} else { } else {
*result = new PosixSequentialFile(fname, f); *result = new PosixSequentialFile(fname, f);
return Status::OK(); return Status::OK();
@ -350,7 +354,7 @@ class PosixEnv : public Env {
Status s; Status s;
int fd = open(fname.c_str(), O_RDONLY); int fd = open(fname.c_str(), O_RDONLY);
if (fd < 0) { if (fd < 0) {
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else if (mmap_limit_.Acquire()) { } else if (mmap_limit_.Acquire()) {
uint64_t size; uint64_t size;
s = GetFileSize(fname, &size); s = GetFileSize(fname, &size);
@ -359,7 +363,7 @@ class PosixEnv : public Env {
if (base != MAP_FAILED) { if (base != MAP_FAILED) {
*result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_); *result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_);
} else { } else {
s = IOError(fname, errno);
s = PosixError(fname, errno);
} }
} }
close(fd); close(fd);
@ -378,7 +382,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "w"); FILE* f = fopen(fname.c_str(), "w");
if (f == NULL) { if (f == NULL) {
*result = NULL; *result = NULL;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else { } else {
*result = new PosixWritableFile(fname, f); *result = new PosixWritableFile(fname, f);
} }
@ -391,7 +395,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "a"); FILE* f = fopen(fname.c_str(), "a");
if (f == NULL) { if (f == NULL) {
*result = NULL; *result = NULL;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else { } else {
*result = new PosixWritableFile(fname, f); *result = new PosixWritableFile(fname, f);
} }
@ -407,7 +411,7 @@ class PosixEnv : public Env {
result->clear(); result->clear();
DIR* d = opendir(dir.c_str()); DIR* d = opendir(dir.c_str());
if (d == NULL) { if (d == NULL) {
return IOError(dir, errno);
return PosixError(dir, errno);
} }
struct dirent* entry; struct dirent* entry;
while ((entry = readdir(d)) != NULL) { while ((entry = readdir(d)) != NULL) {
@ -420,7 +424,7 @@ class PosixEnv : public Env {
virtual Status DeleteFile(const std::string& fname) { virtual Status DeleteFile(const std::string& fname) {
Status result; Status result;
if (unlink(fname.c_str()) != 0) { if (unlink(fname.c_str()) != 0) {
result = IOError(fname, errno);
result = PosixError(fname, errno);
} }
return result; return result;
} }
@ -428,7 +432,7 @@ class PosixEnv : public Env {
virtual Status CreateDir(const std::string& name) { virtual Status CreateDir(const std::string& name) {
Status result; Status result;
if (mkdir(name.c_str(), 0755) != 0) { if (mkdir(name.c_str(), 0755) != 0) {
result = IOError(name, errno);
result = PosixError(name, errno);
} }
return result; return result;
} }
@ -436,7 +440,7 @@ class PosixEnv : public Env {
virtual Status DeleteDir(const std::string& name) { virtual Status DeleteDir(const std::string& name) {
Status result; Status result;
if (rmdir(name.c_str()) != 0) { if (rmdir(name.c_str()) != 0) {
result = IOError(name, errno);
result = PosixError(name, errno);
} }
return result; return result;
} }
@ -446,7 +450,7 @@ class PosixEnv : public Env {
struct stat sbuf; struct stat sbuf;
if (stat(fname.c_str(), &sbuf) != 0) { if (stat(fname.c_str(), &sbuf) != 0) {
*size = 0; *size = 0;
s = IOError(fname, errno);
s = PosixError(fname, errno);
} else { } else {
*size = sbuf.st_size; *size = sbuf.st_size;
} }
@ -456,7 +460,7 @@ class PosixEnv : public Env {
virtual Status RenameFile(const std::string& src, const std::string& target) { virtual Status RenameFile(const std::string& src, const std::string& target) {
Status result; Status result;
if (rename(src.c_str(), target.c_str()) != 0) { if (rename(src.c_str(), target.c_str()) != 0) {
result = IOError(src, errno);
result = PosixError(src, errno);
} }
return result; return result;
} }
@ -466,12 +470,12 @@ class PosixEnv : public Env {
Status result; Status result;
int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644); int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
if (fd < 0) { if (fd < 0) {
result = IOError(fname, errno);
result = PosixError(fname, errno);
} else if (!locks_.Insert(fname)) { } else if (!locks_.Insert(fname)) {
close(fd); close(fd);
result = Status::IOError("lock " + fname, "already held by process"); result = Status::IOError("lock " + fname, "already held by process");
} else if (LockOrUnlock(fd, true) == -1) { } else if (LockOrUnlock(fd, true) == -1) {
result = IOError("lock " + fname, errno);
result = PosixError("lock " + fname, errno);
close(fd); close(fd);
locks_.Remove(fname); locks_.Remove(fname);
} else { } else {
@ -487,7 +491,7 @@ class PosixEnv : public Env {
PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock); PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
Status result; Status result;
if (LockOrUnlock(my_lock->fd_, false) == -1) { if (LockOrUnlock(my_lock->fd_, false) == -1) {
result = IOError("unlock", errno);
result = PosixError("unlock", errno);
} }
locks_.Remove(my_lock->name_); locks_.Remove(my_lock->name_);
close(my_lock->fd_); close(my_lock->fd_);
@ -524,7 +528,7 @@ class PosixEnv : public Env {
FILE* f = fopen(fname.c_str(), "w"); FILE* f = fopen(fname.c_str(), "w");
if (f == NULL) { if (f == NULL) {
*result = NULL; *result = NULL;
return IOError(fname, errno);
return PosixError(fname, errno);
} else { } else {
*result = new PosixLogger(f, &PosixEnv::gettid); *result = new PosixLogger(f, &PosixEnv::gettid);
return Status::OK(); return Status::OK();

+ 18
- 0
util/env_test.cc View File

@ -99,6 +99,24 @@ TEST(EnvTest, StartThread) {
ASSERT_EQ(state.val, 3); ASSERT_EQ(state.val, 3);
} }
TEST(EnvTest, TestOpenNonExistentFile) {
// Write some test data to a single file that will be opened |n| times.
std::string test_dir;
ASSERT_OK(env_->GetTestDirectory(&test_dir));
std::string non_existent_file = test_dir + "/non_existent_file";
ASSERT_TRUE(!env_->FileExists(non_existent_file));
RandomAccessFile* random_access_file;
Status status = env_->NewRandomAccessFile(
non_existent_file, &random_access_file);
ASSERT_TRUE(status.IsNotFound());
SequentialFile* sequential_file;
status = env_->NewSequentialFile(non_existent_file, &sequential_file);
ASSERT_TRUE(status.IsNotFound());
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {

Loading…
Cancel
Save