git-svn-id: https://leveldb.googlecode.com/svn/trunk@2 62dab493-f737-651d-591e-8d6aee1b9529xry
| @ -0,0 +1,8 @@ | |||
| # Names should be added to this file like so: | |||
| # Name or Organization <email address> | |||
| Google Inc. | |||
| # Initial version authors: | |||
| Jeffrey Dean <jeff@google.com> | |||
| Sanjay Ghemawat <sanjay@google.com> | |||
| @ -0,0 +1,64 @@ | |||
| # Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| # Use of this source code is governed by a BSD-style license that can be | |||
| # found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| # INSTRUCTIONS | |||
| # After you've downloaded and installed the Android NDK from: | |||
| # http://developer.android.com/sdk/ndk/index.html | |||
| # 1. In the same directory as this file, Android.mk, type: | |||
| # $ ln -s leveldb ../jni | |||
| # (The Android NDK will only build native projects in | |||
| # subdirectories named "jni".) | |||
| # 2. $ cd .. | |||
| # 3. Execute ndk-build: | |||
| # $ $(ANDROID_NDK_DIR)/ndk-build | |||
| LOCAL_PATH := $(call my-dir) | |||
| include $(CLEAR_VARS) | |||
| LOCAL_MODULE := leveldb | |||
| # Build flags: | |||
| # - LEVELDB_PLATFORM_ANDROID to use the correct port header: port_android.h | |||
| LOCAL_CFLAGS := -DLEVELDB_PLATFORM_ANDROID -std=gnu++0x | |||
| LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../ | |||
| LOCAL_CPP_EXTENSION := .cc | |||
| LOCAL_SRC_FILES := ./db/builder.cc \ | |||
| ./db/db_bench.cc \ | |||
| ./db/db_impl.cc \ | |||
| ./db/db_iter.cc \ | |||
| ./db/filename.cc \ | |||
| ./db/dbformat.cc \ | |||
| ./db/log_reader.cc \ | |||
| ./db/log_writer.cc \ | |||
| ./db/memtable.cc \ | |||
| ./db/repair.cc \ | |||
| ./db/table_cache.cc \ | |||
| ./db/version_edit.cc \ | |||
| ./db/version_set.cc \ | |||
| ./db/write_batch.cc \ | |||
| ./port/port_android.cc \ | |||
| ./table/block.cc \ | |||
| ./table/block_builder.cc \ | |||
| ./table/format.cc \ | |||
| ./table/iterator.cc \ | |||
| ./table/merger.cc \ | |||
| ./table/table.cc \ | |||
| ./table/table_builder.cc \ | |||
| ./table/two_level_iterator.cc \ | |||
| ./util/arena.cc \ | |||
| ./util/cache.cc \ | |||
| ./util/coding.cc \ | |||
| ./util/comparator.cc \ | |||
| ./util/crc32c.cc \ | |||
| ./util/env.cc \ | |||
| ./util/env_posix.cc \ | |||
| ./util/hash.cc \ | |||
| ./util/histogram.cc \ | |||
| ./util/logging.cc \ | |||
| ./util/options.cc \ | |||
| ./util/status.cc \ | |||
| ./util/testharness.cc \ | |||
| ./util/testutil.cc | |||
| include $(BUILD_SHARED_LIBRARY) | |||
| @ -0,0 +1,6 @@ | |||
| # Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| # Use of this source code is governed by a BSD-style license that can be | |||
| # found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| APP_ABI := armeabi-v7a | |||
| APP_STL := gnustl_static | |||
| @ -0,0 +1,27 @@ | |||
| Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| * Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| * Redistributions in binary form must reproduce the above | |||
| copyright notice, this list of conditions and the following disclaimer | |||
| in the documentation and/or other materials provided with the | |||
| distribution. | |||
| * Neither the name of Google Inc. nor the names of its | |||
| contributors may be used to endorse or promote products derived from | |||
| this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| @ -0,0 +1,134 @@ | |||
| # Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| # Use of this source code is governed by a BSD-style license that can be | |||
| # found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| CC = g++ | |||
| # Uncomment one of the following to switch between debug and opt mode | |||
| #OPT = -O2 -DNDEBUG | |||
| OPT = -g2 | |||
| CFLAGS = -c -DLEVELDB_PLATFORM_POSIX -I. -std=c++0x $(OPT) | |||
| LDFLAGS=-lpthread | |||
| LIBOBJECTS = \ | |||
| ./db/builder.o \ | |||
| ./db/db_impl.o \ | |||
| ./db/db_iter.o \ | |||
| ./db/filename.o \ | |||
| ./db/format.o \ | |||
| ./db/log_reader.o \ | |||
| ./db/log_writer.o \ | |||
| ./db/memtable.o \ | |||
| ./db/repair.o \ | |||
| ./db/table_cache.o \ | |||
| ./db/version_edit.o \ | |||
| ./db/version_set.o \ | |||
| ./db/write_batch.o \ | |||
| ./port/port_posix.o \ | |||
| ./port/sha1_portable.o \ | |||
| ./table/block.o \ | |||
| ./table/block_builder.o \ | |||
| ./table/format.o \ | |||
| ./table/iterator.o \ | |||
| ./table/merger.o \ | |||
| ./table/table.o \ | |||
| ./table/table_builder.o \ | |||
| ./table/two_level_iterator.o \ | |||
| ./util/arena.o \ | |||
| ./util/cache.o \ | |||
| ./util/coding.o \ | |||
| ./util/comparator.o \ | |||
| ./util/crc32c.o \ | |||
| ./util/env.o \ | |||
| ./util/env_posix.o \ | |||
| ./util/hash.o \ | |||
| ./util/histogram.o \ | |||
| ./util/logging.o \ | |||
| ./util/options.o \ | |||
| ./util/status.o | |||
| TESTUTIL = ./util/testutil.o | |||
| TESTHARNESS = ./util/testharness.o $(TESTUTIL) | |||
| TESTS = \ | |||
| arena_test \ | |||
| cache_test \ | |||
| coding_test \ | |||
| corruption_test \ | |||
| crc32c_test \ | |||
| db_test \ | |||
| dbformat_test \ | |||
| env_test \ | |||
| filename_test \ | |||
| log_test \ | |||
| sha1_test \ | |||
| skiplist_test \ | |||
| table_test \ | |||
| version_edit_test \ | |||
| write_batch_test | |||
| PROGRAMS = db_bench $(TESTS) | |||
| all: $(PROGRAMS) | |||
| check: $(TESTS) | |||
| for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done | |||
| clean: | |||
| rm -f $(PROGRAMS) */*.o | |||
| db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) | |||
| $(CC) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ | |||
| arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| sha1_test: port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) | |||
| $(CC) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ | |||
| .cc.o: | |||
| $(CC) $(CFLAGS) $< -o $@ | |||
| # TODO(gabor): dependencies for .o files | |||
| # TODO(gabor): Build library | |||
| @ -0,0 +1,51 @@ | |||
| leveldb: A key-value store | |||
| Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com) | |||
| The code under this directory implements a system for maintaining a | |||
| persistent key/value store. | |||
| See doc/index.html for more explanation. | |||
| See doc/db_layout.txt for a brief overview of the implementation. | |||
| The public interface is in include/*.h. Callers should not include or | |||
| rely on the details of any other header files in this package. Those | |||
| internal APIs may be changed without warning. | |||
| Guide to header files: | |||
| include/db.h | |||
| Main interface to the DB: Start here | |||
| include/options.h | |||
| Control over the behavior of an entire database, and also | |||
| control over the behavior of individual reads and writes. | |||
| include/comparator.h | |||
| Abstraction for user-specified comparison function. If you want | |||
| just bytewise comparison of keys, you can use the default comparator, | |||
| but clients can write their own comparator implementations if they | |||
| want custom ordering (e.g. to handle different character | |||
| encodings, etc.) | |||
| include/iterator.h | |||
| Interface for iterating over data. You can get an iterator | |||
| from a DB object. | |||
| include/write_batch.h | |||
| Interface for atomically applying multiple updates to a database. | |||
| include/slice.h | |||
| A simple module for maintaining a pointer and a length into some | |||
| other byte array. | |||
| include/status.h | |||
| Status is returned from many of the public interfaces and is used | |||
| to report success and various kinds of errors. | |||
| include/env.h | |||
| Abstraction of the OS environment. A posix implementation of | |||
| this interface is in util/env_posix.cc | |||
| include/table.h | |||
| include/table_builder.h | |||
| Lower-level modules that most clients probably won't use directly | |||
| @ -0,0 +1,23 @@ | |||
| Before adding to chrome | |||
| ----------------------- | |||
| - multi-threaded test/benchmark | |||
| - Allow missing crc32c in Table format? | |||
| Maybe afterwards | |||
| ---------------- | |||
| ss | |||
| - Stats | |||
| - Speed up backwards scan (avoid three passes over data) | |||
| db | |||
| - Maybe implement DB::BulkDeleteForRange(start_key, end_key) | |||
| that would blow away files whose ranges are entirely contained | |||
| within [start_key..end_key]? For Chrome, deletion of obsolete | |||
| object stores, etc. can be done in the background anyway, so | |||
| probably not that important. | |||
| api changes? | |||
| - Efficient large value reading and writing | |||
| Faster Get implementation | |||
| @ -0,0 +1,97 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/builder.h" | |||
| #include "db/filename.h" | |||
| #include "db/dbformat.h" | |||
| #include "db/table_cache.h" | |||
| #include "db/version_edit.h" | |||
| #include "include/db.h" | |||
| #include "include/env.h" | |||
| #include "include/iterator.h" | |||
| namespace leveldb { | |||
| Status BuildTable(const std::string& dbname, | |||
| Env* env, | |||
| const Options& options, | |||
| TableCache* table_cache, | |||
| Iterator* iter, | |||
| FileMetaData* meta, | |||
| VersionEdit* edit) { | |||
| Status s; | |||
| meta->file_size = 0; | |||
| iter->SeekToFirst(); | |||
| std::string fname = TableFileName(dbname, meta->number); | |||
| if (iter->Valid()) { | |||
| WritableFile* file; | |||
| s = env->NewWritableFile(fname, &file); | |||
| if (!s.ok()) { | |||
| return s; | |||
| } | |||
| TableBuilder* builder = new TableBuilder(options, file); | |||
| meta->smallest.DecodeFrom(iter->key()); | |||
| for (; iter->Valid(); iter->Next()) { | |||
| Slice key = iter->key(); | |||
| meta->largest.DecodeFrom(key); | |||
| if (ExtractValueType(key) == kTypeLargeValueRef) { | |||
| if (iter->value().size() != LargeValueRef::ByteSize()) { | |||
| s = Status::Corruption("invalid indirect reference hash value (L0)"); | |||
| break; | |||
| } | |||
| edit->AddLargeValueRef(LargeValueRef::FromRef(iter->value()), | |||
| meta->number, | |||
| iter->key()); | |||
| } | |||
| builder->Add(key, iter->value()); | |||
| } | |||
| // Finish and check for builder errors | |||
| if (s.ok()) { | |||
| s = builder->Finish(); | |||
| if (s.ok()) { | |||
| meta->file_size = builder->FileSize(); | |||
| assert(meta->file_size > 0); | |||
| } | |||
| } else { | |||
| builder->Abandon(); | |||
| } | |||
| delete builder; | |||
| // Finish and check for file errors | |||
| if (s.ok()) { | |||
| s = file->Sync(); | |||
| } | |||
| if (s.ok()) { | |||
| s = file->Close(); | |||
| } | |||
| delete file; | |||
| file = NULL; | |||
| if (s.ok()) { | |||
| // Verify that the table is usable | |||
| Iterator* it = table_cache->NewIterator(ReadOptions(), meta->number); | |||
| s = it->status(); | |||
| delete it; | |||
| } | |||
| } | |||
| // Check for input iterator errors | |||
| if (!iter->status().ok()) { | |||
| s = iter->status(); | |||
| } | |||
| if (s.ok() && meta->file_size > 0) { | |||
| edit->AddFile(0, meta->number, meta->file_size, | |||
| meta->smallest, meta->largest); | |||
| } else { | |||
| env->DeleteFile(fname); | |||
| } | |||
| return s; | |||
| } | |||
| } | |||
| @ -0,0 +1,36 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_BUILDER_H_ | |||
| #define STORAGE_LEVELDB_DB_BUILDER_H_ | |||
| #include "include/status.h" | |||
| namespace leveldb { | |||
| struct Options; | |||
| struct FileMetaData; | |||
| class Env; | |||
| class Iterator; | |||
| class TableCache; | |||
| class VersionEdit; | |||
| // Build a Table file from the contents of *iter. The generated file | |||
| // will be named according to meta->number. On success, the rest of | |||
| // *meta will be filled with metadata about the generated table, and | |||
| // large value refs and the added file information will be added to | |||
| // *edit. If no data is present in *iter, meta->file_size will be set | |||
| // to zero, and no Table file will be produced. | |||
| extern Status BuildTable(const std::string& dbname, | |||
| Env* env, | |||
| const Options& options, | |||
| TableCache* table_cache, | |||
| Iterator* iter, | |||
| FileMetaData* meta, | |||
| VersionEdit* edit); | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_BUILDER_H_ | |||
| @ -0,0 +1,366 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/db.h" | |||
| #include <errno.h> | |||
| #include <fcntl.h> | |||
| #include <sys/stat.h> | |||
| #include <sys/types.h> | |||
| #include "include/env.h" | |||
| #include "include/table.h" | |||
| #include "include/write_batch.h" | |||
| #include "db/db_impl.h" | |||
| #include "db/filename.h" | |||
| #include "db/version_set.h" | |||
| #include "util/logging.h" | |||
| #include "util/testharness.h" | |||
| #include "util/testutil.h" | |||
| namespace leveldb { | |||
| static const int kValueSize = 1000; | |||
| class CorruptionTest { | |||
| public: | |||
| test::ErrorEnv env_; | |||
| Random rnd_; | |||
| std::string dbname_; | |||
| Options options_; | |||
| DB* db_; | |||
| CorruptionTest() : rnd_(test::RandomSeed()) { | |||
| options_.env = &env_; | |||
| dbname_ = test::TmpDir() + "/db_test"; | |||
| DestroyDB(dbname_, options_); | |||
| db_ = NULL; | |||
| options_.create_if_missing = true; | |||
| Reopen(); | |||
| options_.create_if_missing = false; | |||
| } | |||
| ~CorruptionTest() { | |||
| delete db_; | |||
| DestroyDB(dbname_, Options()); | |||
| } | |||
| Status TryReopen(Options* options = NULL) { | |||
| delete db_; | |||
| db_ = NULL; | |||
| Options opt = (options ? *options : options_); | |||
| opt.env = &env_; | |||
| return DB::Open(opt, dbname_, &db_); | |||
| } | |||
| void Reopen(Options* options = NULL) { | |||
| ASSERT_OK(TryReopen(options)); | |||
| } | |||
| void RepairDB() { | |||
| delete db_; | |||
| db_ = NULL; | |||
| ASSERT_OK(::leveldb::RepairDB(dbname_, options_)); | |||
| } | |||
| void Build(int n) { | |||
| std::string key_space, value_space; | |||
| WriteBatch batch; | |||
| for (int i = 0; i < n; i++) { | |||
| //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); | |||
| Slice key = Key(i, &key_space); | |||
| batch.Clear(); | |||
| batch.Put(key, Value(i, &value_space)); | |||
| ASSERT_OK(db_->Write(WriteOptions(), &batch)); | |||
| } | |||
| } | |||
| void Check(int min_expected, int max_expected) { | |||
| int next_expected = 0; | |||
| int missed = 0; | |||
| int bad_keys = 0; | |||
| int bad_values = 0; | |||
| int correct = 0; | |||
| std::string value_space; | |||
| Iterator* iter = db_->NewIterator(ReadOptions()); | |||
| for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |||
| uint64_t key; | |||
| Slice in(iter->key()); | |||
| if (!ConsumeDecimalNumber(&in, &key) || | |||
| !in.empty() || | |||
| key < next_expected) { | |||
| bad_keys++; | |||
| continue; | |||
| } | |||
| missed += (key - next_expected); | |||
| next_expected = key + 1; | |||
| if (iter->value() != Value(key, &value_space)) { | |||
| bad_values++; | |||
| } else { | |||
| correct++; | |||
| } | |||
| } | |||
| delete iter; | |||
| fprintf(stderr, | |||
| "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n", | |||
| min_expected, max_expected, correct, bad_keys, bad_values, missed); | |||
| ASSERT_LE(min_expected, correct); | |||
| ASSERT_GE(max_expected, correct); | |||
| } | |||
| void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { | |||
| // Pick file to corrupt | |||
| std::vector<std::string> filenames; | |||
| ASSERT_OK(env_.GetChildren(dbname_, &filenames)); | |||
| uint64_t number; | |||
| LargeValueRef large_ref; | |||
| FileType type; | |||
| std::vector<std::string> candidates; | |||
| for (int i = 0; i < filenames.size(); i++) { | |||
| if (ParseFileName(filenames[i], &number, &large_ref, &type) && | |||
| type == filetype) { | |||
| candidates.push_back(dbname_ + "/" + filenames[i]); | |||
| } | |||
| } | |||
| ASSERT_TRUE(!candidates.empty()) << filetype; | |||
| std::string fname = candidates[rnd_.Uniform(candidates.size())]; | |||
| struct stat sbuf; | |||
| if (stat(fname.c_str(), &sbuf) != 0) { | |||
| const char* msg = strerror(errno); | |||
| ASSERT_TRUE(false) << fname << ": " << msg; | |||
| } | |||
| if (offset < 0) { | |||
| // Relative to end of file; make it absolute | |||
| if (-offset > sbuf.st_size) { | |||
| offset = 0; | |||
| } else { | |||
| offset = sbuf.st_size + offset; | |||
| } | |||
| } | |||
| if (offset > sbuf.st_size) { | |||
| offset = sbuf.st_size; | |||
| } | |||
| if (offset + bytes_to_corrupt > sbuf.st_size) { | |||
| bytes_to_corrupt = sbuf.st_size - offset; | |||
| } | |||
| // Do it | |||
| std::string contents; | |||
| Status s = ReadFileToString(Env::Default(), fname, &contents); | |||
| ASSERT_TRUE(s.ok()) << s.ToString(); | |||
| for (int i = 0; i < bytes_to_corrupt; i++) { | |||
| contents[i + offset] ^= 0x80; | |||
| } | |||
| s = WriteStringToFile(Env::Default(), contents, fname); | |||
| ASSERT_TRUE(s.ok()) << s.ToString(); | |||
| } | |||
| uint64_t Property(const std::string& name) { | |||
| uint64_t result; | |||
| if (!db_->GetProperty(name, &result)) { | |||
| result = ~static_cast<uint64_t>(0); | |||
| } | |||
| return result; | |||
| } | |||
| // Return the ith key | |||
| Slice Key(int i, std::string* storage) { | |||
| char buf[100]; | |||
| snprintf(buf, sizeof(buf), "%016d", i); | |||
| storage->assign(buf, strlen(buf)); | |||
| return Slice(*storage); | |||
| } | |||
| // Return the value to associate with the specified key | |||
| Slice Value(int k, std::string* storage) { | |||
| Random r(k); | |||
| return test::RandomString(&r, kValueSize, storage); | |||
| } | |||
| }; | |||
| TEST(CorruptionTest, Recovery) { | |||
| Build(10); | |||
| Check(10, 10); | |||
| Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record | |||
| Corrupt(kLogFile, 2*kValueSize, 1); // Somewhere in second log record? | |||
| Reopen(); | |||
| Check(8, 8); | |||
| } | |||
| TEST(CorruptionTest, RecoverWriteError) { | |||
| env_.writable_file_error_ = true; | |||
| Status s = TryReopen(); | |||
| ASSERT_TRUE(!s.ok()); | |||
| } | |||
| TEST(CorruptionTest, NewFileErrorDuringWrite) { | |||
| // Do enough writing to force minor compaction | |||
| env_.writable_file_error_ = true; | |||
| const int num = 3 + (Options().write_buffer_size / kValueSize); | |||
| std::string value_storage; | |||
| Status s; | |||
| for (int i = 0; s.ok() && i < num; i++) { | |||
| WriteBatch batch; | |||
| batch.Put("a", Value(100, &value_storage)); | |||
| s = db_->Write(WriteOptions(), &batch); | |||
| } | |||
| ASSERT_TRUE(!s.ok()); | |||
| ASSERT_GE(env_.num_writable_file_errors_, 1); | |||
| env_.writable_file_error_ = false; | |||
| Reopen(); | |||
| } | |||
| TEST(CorruptionTest, TableFile) { | |||
| Build(100); | |||
| DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |||
| dbi->TEST_CompactMemTable(); | |||
| dbi->TEST_CompactRange(0, "", "~"); | |||
| dbi->TEST_CompactRange(1, "", "~"); | |||
| Corrupt(kTableFile, 100, 1); | |||
| Check(99, 99); | |||
| } | |||
| TEST(CorruptionTest, TableFileIndexData) { | |||
| Build(10000); // Enough to build multiple Tables | |||
| DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |||
| dbi->TEST_CompactMemTable(); | |||
| dbi->TEST_CompactRange(0, "", "~"); | |||
| dbi->TEST_CompactRange(1, "", "~"); | |||
| Corrupt(kTableFile, -1000, 500); | |||
| Reopen(); | |||
| Check(5000, 9999); | |||
| } | |||
| TEST(CorruptionTest, MissingDescriptor) { | |||
| Build(1000); | |||
| RepairDB(); | |||
| Reopen(); | |||
| Check(1000, 1000); | |||
| } | |||
| TEST(CorruptionTest, SequenceNumberRecovery) { | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3")); | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4")); | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5")); | |||
| RepairDB(); | |||
| Reopen(); | |||
| std::string v; | |||
| ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |||
| ASSERT_EQ("v5", v); | |||
| // Write something. If sequence number was not recovered properly, | |||
| // it will be hidden by an earlier write. | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6")); | |||
| ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |||
| ASSERT_EQ("v6", v); | |||
| Reopen(); | |||
| ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |||
| ASSERT_EQ("v6", v); | |||
| } | |||
| TEST(CorruptionTest, LargeValueRecovery) { | |||
| Options options; | |||
| options.large_value_threshold = 10000; | |||
| Reopen(&options); | |||
| Random rnd(301); | |||
| std::string big; | |||
| ASSERT_OK(db_->Put(WriteOptions(), | |||
| "foo", test::RandomString(&rnd, 100000, &big))); | |||
| std::string v; | |||
| ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |||
| ASSERT_EQ(big, v); | |||
| RepairDB(); | |||
| Reopen(); | |||
| ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |||
| ASSERT_EQ(big, v); | |||
| Reopen(); | |||
| ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |||
| ASSERT_EQ(big, v); | |||
| } | |||
| TEST(CorruptionTest, CorruptedDescriptor) { | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); | |||
| DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |||
| dbi->TEST_CompactMemTable(); | |||
| dbi->TEST_CompactRange(0, "", "~"); | |||
| Corrupt(kDescriptorFile, 0, 1000); | |||
| Status s = TryReopen(); | |||
| ASSERT_TRUE(!s.ok()); | |||
| RepairDB(); | |||
| Reopen(); | |||
| std::string v; | |||
| ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |||
| ASSERT_EQ("hello", v); | |||
| } | |||
| TEST(CorruptionTest, CompactionInputError) { | |||
| Build(10); | |||
| DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |||
| dbi->TEST_CompactMemTable(); | |||
| ASSERT_EQ(1, Property("leveldb.num-files-at-level0")); | |||
| Corrupt(kTableFile, 100, 1); | |||
| Check(9, 9); | |||
| // Force compactions by writing lots of values | |||
| Build(10000); | |||
| Check(10000, 10000); | |||
| dbi->TEST_CompactRange(0, "", "~"); | |||
| ASSERT_EQ(0, Property("leveldb.num-files-at-level0")); | |||
| } | |||
| TEST(CorruptionTest, CompactionInputErrorParanoid) { | |||
| Options options; | |||
| options.paranoid_checks = true; | |||
| Reopen(&options); | |||
| Build(10); | |||
| DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |||
| dbi->TEST_CompactMemTable(); | |||
| ASSERT_EQ(1, Property("leveldb.num-files-at-level0")); | |||
| Corrupt(kTableFile, 100, 1); | |||
| Check(9, 9); | |||
| // Write must eventually fail because of corrupted table | |||
| Status s; | |||
| std::string tmp1, tmp2; | |||
| for (int i = 0; i < 10000 && s.ok(); i++) { | |||
| s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2)); | |||
| } | |||
| ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db"; | |||
| } | |||
| TEST(CorruptionTest, UnrelatedKeys) { | |||
| Build(10); | |||
| DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |||
| dbi->TEST_CompactMemTable(); | |||
| Corrupt(kTableFile, 100, 1); | |||
| std::string tmp1, tmp2; | |||
| ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2))); | |||
| std::string v; | |||
| ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); | |||
| ASSERT_EQ(Value(1000, &tmp2).ToString(), v); | |||
| dbi->TEST_CompactMemTable(); | |||
| ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); | |||
| ASSERT_EQ(Value(1000, &tmp2).ToString(), v); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,376 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include <sys/types.h> | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include "db/db_impl.h" | |||
| #include "db/version_set.h" | |||
| #include "include/cache.h" | |||
| #include "include/db.h" | |||
| #include "include/env.h" | |||
| #include "include/write_batch.h" | |||
| #include "util/histogram.h" | |||
| #include "util/random.h" | |||
| #include "util/testutil.h" | |||
| // Comma-separated list of operations to run in the specified order | |||
| // Actual benchmarks: | |||
| // writeseq -- write N values in sequential key order | |||
| // writerandom -- write N values in random key order | |||
| // writebig -- write N/1000 100K valuesin random order | |||
| // readseq -- read N values sequentially | |||
| // readrandom -- read N values in random order | |||
| // Meta operations: | |||
| // compact -- Compact the entire DB | |||
| // heapprofile -- Dump a heap profile (if supported by this port) | |||
| // sync -- switch to synchronous writes (not the default) | |||
| // nosync -- switch to asynchronous writes (the default) | |||
| // tenth -- divide N by 10 (i.e., following benchmarks are smaller) | |||
| // normal -- reset N back to its normal value (1000000) | |||
| static const char* FLAGS_benchmarks = | |||
| "writeseq," | |||
| "writeseq," | |||
| "writerandom," | |||
| "sync,tenth,tenth,writerandom,nosync,normal," | |||
| "readseq," | |||
| "readrandom," | |||
| "compact," | |||
| "readseq," | |||
| "readrandom," | |||
| "writebig"; | |||
| // Number of key/values to place in database | |||
| static int FLAGS_num = 1000000; | |||
| // Size of each value | |||
| static int FLAGS_value_size = 100; | |||
| // Arrange to generate values that shrink to this fraction of | |||
| // their original size after compression | |||
| static double FLAGS_compression_ratio = 0.25; | |||
| // Print histogram of operation timings | |||
| static bool FLAGS_histogram = false; | |||
| // Number of bytes to buffer in memtable before compacting | |||
| static int FLAGS_write_buffer_size = 1 << 20; | |||
| namespace leveldb { | |||
| // Helper for quickly generating random data. | |||
| namespace { | |||
| class RandomGenerator { | |||
| private: | |||
| std::string data_; | |||
| int pos_; | |||
| public: | |||
| RandomGenerator() { | |||
| // We use a limited amount of data over and over again and ensure | |||
| // that it is larger than the compression window (32KB), and also | |||
| // large enough to serve all typical value sizes we want to write. | |||
| Random rnd(301); | |||
| std::string piece; | |||
| while (data_.size() < 1048576) { | |||
| // Add a short fragment that is as compressible as specified | |||
| // by FLAGS_compression_ratio. | |||
| test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece); | |||
| data_.append(piece); | |||
| } | |||
| pos_ = 0; | |||
| } | |||
| Slice Generate(int len) { | |||
| if (pos_ + len > data_.size()) { | |||
| pos_ = 0; | |||
| assert(len < data_.size()); | |||
| } | |||
| pos_ += len; | |||
| return Slice(data_.data() + pos_ - len, len); | |||
| } | |||
| }; | |||
| } | |||
| class Benchmark { | |||
| private: | |||
| Cache* cache_; | |||
| DB* db_; | |||
| int num_; | |||
| bool sync_; | |||
| int heap_counter_; | |||
| double start_; | |||
| double last_op_finish_; | |||
| int64_t bytes_; | |||
| std::string message_; | |||
| Histogram hist_; | |||
| RandomGenerator gen_; | |||
| Random rand_; | |||
| // State kept for progress messages | |||
| int done_; | |||
| int next_report_; // When to report next | |||
| void Start() { | |||
| start_ = Env::Default()->NowMicros() * 1e-6; | |||
| bytes_ = 0; | |||
| message_.clear(); | |||
| last_op_finish_ = start_; | |||
| hist_.Clear(); | |||
| done_ = 0; | |||
| next_report_ = 100; | |||
| } | |||
| void FinishedSingleOp() { | |||
| if (FLAGS_histogram) { | |||
| double now = Env::Default()->NowMicros() * 1e-6; | |||
| double micros = (now - last_op_finish_) * 1e6; | |||
| hist_.Add(micros); | |||
| if (micros > 20000) { | |||
| fprintf(stderr, "long op: %.1f micros%30s\r", micros, ""); | |||
| fflush(stderr); | |||
| } | |||
| last_op_finish_ = now; | |||
| } | |||
| done_++; | |||
| if (done_ >= next_report_) { | |||
| if (next_report_ < 1000) { | |||
| next_report_ += 100; | |||
| } else if (next_report_ < 10000) { | |||
| next_report_ += 1000; | |||
| } else if (next_report_ < 100000) { | |||
| next_report_ += 10000; | |||
| } else { | |||
| next_report_ += 100000; | |||
| } | |||
| fprintf(stderr, "... finished %d ops%30s\r", done_, ""); | |||
| fflush(stderr); | |||
| } | |||
| } | |||
| void Stop(const Slice& name) { | |||
| double finish = Env::Default()->NowMicros() * 1e-6; | |||
| // Pretend at least one op was done in case we are running a benchmark | |||
| // that does nto call FinishedSingleOp(). | |||
| if (done_ < 1) done_ = 1; | |||
| if (bytes_ > 0) { | |||
| char rate[100]; | |||
| snprintf(rate, sizeof(rate), "%5.1f MB/s", | |||
| (bytes_ / 1048576.0) / (finish - start_)); | |||
| if (!message_.empty()) { | |||
| message_.push_back(' '); | |||
| } | |||
| message_.append(rate); | |||
| } | |||
| fprintf(stdout, "%-12s : %10.3f micros/op;%s%s\n", | |||
| name.ToString().c_str(), | |||
| (finish - start_) * 1e6 / done_, | |||
| (message_.empty() ? "" : " "), | |||
| message_.c_str()); | |||
| if (FLAGS_histogram) { | |||
| fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str()); | |||
| } | |||
| fflush(stdout); | |||
| } | |||
| public: | |||
| enum Order { SEQUENTIAL, RANDOM }; | |||
| Benchmark() : cache_(NewLRUCache(200<<20)), | |||
| db_(NULL), | |||
| num_(FLAGS_num), | |||
| sync_(false), | |||
| heap_counter_(0), | |||
| bytes_(0), | |||
| rand_(301) { | |||
| std::vector<std::string> files; | |||
| Env::Default()->GetChildren("/tmp/dbbench", &files); | |||
| for (int i = 0; i < files.size(); i++) { | |||
| if (Slice(files[i]).starts_with("heap-")) { | |||
| Env::Default()->DeleteFile("/tmp/dbbench/" + files[i]); | |||
| } | |||
| } | |||
| DestroyDB("/tmp/dbbench", Options()); | |||
| } | |||
| ~Benchmark() { | |||
| delete db_; | |||
| delete cache_; | |||
| } | |||
| void Run() { | |||
| Options options; | |||
| options.create_if_missing = true; | |||
| options.max_open_files = 10000; | |||
| options.block_cache = cache_; | |||
| options.write_buffer_size = FLAGS_write_buffer_size; | |||
| Start(); | |||
| Status s = DB::Open(options, "/tmp/dbbench", &db_); | |||
| Stop("open"); | |||
| if (!s.ok()) { | |||
| fprintf(stderr, "open error: %s\n", s.ToString().c_str()); | |||
| exit(1); | |||
| } | |||
| const char* benchmarks = FLAGS_benchmarks; | |||
| while (benchmarks != NULL) { | |||
| const char* sep = strchr(benchmarks, ','); | |||
| Slice name; | |||
| if (sep == NULL) { | |||
| name = benchmarks; | |||
| benchmarks = NULL; | |||
| } else { | |||
| name = Slice(benchmarks, sep - benchmarks); | |||
| benchmarks = sep + 1; | |||
| } | |||
| Start(); | |||
| if (name == Slice("writeseq")) { | |||
| Write(SEQUENTIAL, num_, FLAGS_value_size); | |||
| } else if (name == Slice("writerandom")) { | |||
| Write(RANDOM, num_, FLAGS_value_size); | |||
| } else if (name == Slice("writebig")) { | |||
| Write(RANDOM, num_ / 1000, 100 * 1000); | |||
| } else if (name == Slice("readseq")) { | |||
| Read(SEQUENTIAL); | |||
| } else if (name == Slice("readrandom")) { | |||
| Read(RANDOM); | |||
| } else if (name == Slice("compact")) { | |||
| Compact(); | |||
| } else if (name == Slice("heapprofile")) { | |||
| HeapProfile(); | |||
| } else if (name == Slice("sync")) { | |||
| sync_ = true; | |||
| } else if (name == Slice("nosync")) { | |||
| sync_ = false; | |||
| } else if (name == Slice("tenth")) { | |||
| num_ = num_ / 10; | |||
| } else if (name == Slice("normal")) { | |||
| num_ = FLAGS_num; | |||
| } else { | |||
| fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str()); | |||
| } | |||
| Stop(name); | |||
| } | |||
| } | |||
| void Write(Order order, int num_entries, int value_size) { | |||
| WriteBatch batch; | |||
| Status s; | |||
| std::string val; | |||
| WriteOptions options; | |||
| options.sync = sync_; | |||
| for (int i = 0; i < num_entries; i++) { | |||
| const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % FLAGS_num); | |||
| char key[100]; | |||
| snprintf(key, sizeof(key), "%012d", k); | |||
| batch.Clear(); | |||
| batch.Put(key, gen_.Generate(value_size)); | |||
| s = db_->Write(options, &batch); | |||
| bytes_ += value_size + strlen(key); | |||
| if (!s.ok()) { | |||
| fprintf(stderr, "put error: %s\n", s.ToString().c_str()); | |||
| exit(1); | |||
| } | |||
| FinishedSingleOp(); | |||
| } | |||
| } | |||
| void Read(Order order) { | |||
| ReadOptions options; | |||
| if (order == SEQUENTIAL) { | |||
| Iterator* iter = db_->NewIterator(options); | |||
| int i = 0; | |||
| for (iter->SeekToFirst(); i < num_ && iter->Valid(); iter->Next()) { | |||
| bytes_ += iter->key().size() + iter->value().size(); | |||
| FinishedSingleOp(); | |||
| ++i; | |||
| } | |||
| delete iter; | |||
| } else { | |||
| std::string value; | |||
| for (int i = 0; i < num_; i++) { | |||
| char key[100]; | |||
| const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % FLAGS_num); | |||
| snprintf(key, sizeof(key), "%012d", k); | |||
| db_->Get(options, key, &value); | |||
| FinishedSingleOp(); | |||
| } | |||
| } | |||
| } | |||
| void Compact() { | |||
| DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |||
| dbi->TEST_CompactMemTable(); | |||
| int max_level_with_files = 1; | |||
| for (int level = 1; level < config::kNumLevels; level++) { | |||
| uint64_t v; | |||
| char name[100]; | |||
| snprintf(name, sizeof(name), "leveldb.num-files-at-level%d", level); | |||
| if (db_->GetProperty(name, &v) && v > 0) { | |||
| max_level_with_files = level; | |||
| } | |||
| } | |||
| for (int level = 0; level < max_level_with_files; level++) { | |||
| dbi->TEST_CompactRange(level, "", "~"); | |||
| } | |||
| } | |||
| static void WriteToFile(void* arg, const char* buf, int n) { | |||
| reinterpret_cast<WritableFile*>(arg)->Append(Slice(buf, n)); | |||
| } | |||
| void HeapProfile() { | |||
| char fname[100]; | |||
| snprintf(fname, sizeof(fname), "/tmp/dbbench/heap-%04d", ++heap_counter_); | |||
| WritableFile* file; | |||
| Status s = Env::Default()->NewWritableFile(fname, &file); | |||
| if (!s.ok()) { | |||
| message_ = s.ToString(); | |||
| return; | |||
| } | |||
| bool ok = port::GetHeapProfile(WriteToFile, file); | |||
| delete file; | |||
| if (!ok) { | |||
| message_ = "not supported"; | |||
| Env::Default()->DeleteFile(fname); | |||
| } | |||
| } | |||
| }; | |||
| } | |||
| int main(int argc, char** argv) { | |||
| for (int i = 1; i < argc; i++) { | |||
| double d; | |||
| int n; | |||
| char junk; | |||
| if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) { | |||
| FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); | |||
| } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { | |||
| FLAGS_compression_ratio = d; | |||
| } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && | |||
| (n == 0 || n == 1)) { | |||
| FLAGS_histogram = n; | |||
| } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { | |||
| FLAGS_num = n; | |||
| } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) { | |||
| FLAGS_value_size = n; | |||
| } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) { | |||
| FLAGS_write_buffer_size = n; | |||
| } else { | |||
| fprintf(stderr, "Invalid flag '%s'\n", argv[i]); | |||
| exit(1); | |||
| } | |||
| } | |||
| leveldb::Benchmark benchmark; | |||
| benchmark.Run(); | |||
| return 0; | |||
| } | |||
| @ -0,0 +1,192 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ | |||
| #define STORAGE_LEVELDB_DB_DB_IMPL_H_ | |||
| #include <set> | |||
| #include "db/dbformat.h" | |||
| #include "db/log_writer.h" | |||
| #include "db/snapshot.h" | |||
| #include "include/db.h" | |||
| #include "include/env.h" | |||
| #include "port/port.h" | |||
| namespace leveldb { | |||
| class MemTable; | |||
| class TableCache; | |||
| class Version; | |||
| class VersionEdit; | |||
| class VersionSet; | |||
| class DBImpl : public DB { | |||
| public: | |||
| DBImpl(const Options& options, const std::string& dbname); | |||
| virtual ~DBImpl(); | |||
| // Implementations of the DB interface | |||
| virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); | |||
| virtual Status Delete(const WriteOptions&, const Slice& key); | |||
| virtual Status Write(const WriteOptions& options, WriteBatch* updates); | |||
| virtual Status Get(const ReadOptions& options, | |||
| const Slice& key, | |||
| std::string* value); | |||
| virtual Iterator* NewIterator(const ReadOptions&); | |||
| virtual const Snapshot* GetSnapshot(); | |||
| virtual void ReleaseSnapshot(const Snapshot* snapshot); | |||
| virtual bool GetProperty(const Slice& property, uint64_t* value); | |||
| virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); | |||
| // Extra methods (for testing) that are not in the public DB interface | |||
| // Compact any files in the named level that overlap [begin,end] | |||
| void TEST_CompactRange( | |||
| int level, | |||
| const std::string& begin, | |||
| const std::string& end); | |||
| // Force current memtable contents to be compacted. | |||
| Status TEST_CompactMemTable(); | |||
| // Return an internal iterator over the current state of the database. | |||
| // The keys of this iterator are internal keys (see format.h). | |||
| // The returned iterator should be deleted when no longer needed. | |||
| Iterator* TEST_NewInternalIterator(); | |||
| private: | |||
| friend class DB; | |||
| Iterator* NewInternalIterator(const ReadOptions&, | |||
| SequenceNumber* latest_snapshot); | |||
| Status NewDB(); | |||
| // Recover the descriptor from persistent storage. May do a significant | |||
| // amount of work to recover recently logged updates. Any changes to | |||
| // be made to the descriptor are added to *edit. | |||
| Status Recover(VersionEdit* edit); | |||
| // Apply the specified updates and save the resulting descriptor to | |||
| // persistent storage. If cleanup_mem is non-NULL, arrange to | |||
| // delete it when all existing snapshots have gone away iff Install() | |||
| // returns OK. | |||
| Status Install(VersionEdit* edit, | |||
| uint64_t new_log_number, | |||
| MemTable* cleanup_mem); | |||
| void MaybeIgnoreError(Status* s) const; | |||
| // Delete any unneeded files and stale in-memory entries. | |||
| void DeleteObsoleteFiles(); | |||
| // Called when an iterator over a particular version of the | |||
| // descriptor goes away. | |||
| static void Unref(void* arg1, void* arg2); | |||
| // Compact the in-memory write buffer to disk. Switches to a new | |||
| // log-file/memtable and writes a new descriptor iff successful. | |||
| Status CompactMemTable(); | |||
| Status RecoverLogFile(uint64_t log_number, | |||
| VersionEdit* edit, | |||
| SequenceNumber* max_sequence); | |||
| Status WriteLevel0Table(MemTable* mem, VersionEdit* edit); | |||
| bool HasLargeValues(const WriteBatch& batch) const; | |||
| // Process data in "*updates" and return a status. "assigned_seq" | |||
| // is the sequence number assigned to the first mod in "*updates". | |||
| // If no large values are encountered, "*final" is set to "updates". | |||
| // If large values were encountered, registers the references of the | |||
| // large values with the VersionSet, writes the large values to | |||
| // files (if appropriate), and allocates a new WriteBatch with the | |||
| // large values replaced with indirect references and stores a | |||
| // pointer to the new WriteBatch in *final. If *final != updates on | |||
| // return, then the client should delete *final when no longer | |||
| // needed. Returns OK on success, and an appropriate error | |||
| // otherwise. | |||
| Status HandleLargeValues(SequenceNumber assigned_seq, | |||
| WriteBatch* updates, | |||
| WriteBatch** final); | |||
| // Helper routine for HandleLargeValues | |||
| void MaybeCompressLargeValue( | |||
| const Slice& raw_value, | |||
| Slice* file_bytes, | |||
| std::string* scratch, | |||
| LargeValueRef* ref); | |||
| struct CompactionState; | |||
| void MaybeScheduleCompaction(); | |||
| static void BGWork(void* db); | |||
| void BackgroundCall(); | |||
| void BackgroundCompaction(); | |||
| void CleanupCompaction(CompactionState* compact); | |||
| Status DoCompactionWork(CompactionState* compact); | |||
| Status OpenCompactionOutputFile(CompactionState* compact); | |||
| Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); | |||
| Status InstallCompactionResults(CompactionState* compact); | |||
| // Constant after construction | |||
| Env* const env_; | |||
| const InternalKeyComparator internal_comparator_; | |||
| const Options options_; // options_.comparator == &internal_comparator_ | |||
| bool owns_info_log_; | |||
| const std::string dbname_; | |||
| // table_cache_ provides its own synchronization | |||
| TableCache* table_cache_; | |||
| // Lock over the persistent DB state. Non-NULL iff successfully acquired. | |||
| FileLock* db_lock_; | |||
| // State below is protected by mutex_ | |||
| port::Mutex mutex_; | |||
| port::AtomicPointer shutting_down_; | |||
| port::CondVar bg_cv_; // Signalled when !bg_compaction_scheduled_ | |||
| port::CondVar compacting_cv_; // Signalled when !compacting_ | |||
| SequenceNumber last_sequence_; | |||
| MemTable* mem_; | |||
| WritableFile* logfile_; | |||
| log::Writer* log_; | |||
| uint64_t log_number_; | |||
| SnapshotList snapshots_; | |||
| // Set of table files to protect from deletion because they are | |||
| // part of ongoing compactions. | |||
| std::set<uint64_t> pending_outputs_; | |||
| // Has a background compaction been scheduled or is running? | |||
| bool bg_compaction_scheduled_; | |||
| // Is there a compaction running? | |||
| bool compacting_; | |||
| VersionSet* versions_; | |||
| // Have we encountered a background error in paranoid mode? | |||
| Status bg_error_; | |||
| // No copying allowed | |||
| DBImpl(const DBImpl&); | |||
| void operator=(const DBImpl&); | |||
| const Comparator* user_comparator() const { | |||
| return internal_comparator_.user_comparator(); | |||
| } | |||
| }; | |||
| // Sanitize db options. The caller should delete result.info_log if | |||
| // it is not equal to src.info_log. | |||
| extern Options SanitizeOptions(const std::string& db, | |||
| const InternalKeyComparator* icmp, | |||
| const Options& src); | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ | |||
| @ -0,0 +1,412 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/db_iter.h" | |||
| #include "db/filename.h" | |||
| #include "db/dbformat.h" | |||
| #include "include/env.h" | |||
| #include "include/iterator.h" | |||
| #include "port/port.h" | |||
| #include "util/logging.h" | |||
| #include "util/mutexlock.h" | |||
| namespace leveldb { | |||
| #if 0 | |||
| static void DumpInternalIter(Iterator* iter) { | |||
| for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |||
| ParsedInternalKey k; | |||
| if (!ParseInternalKey(iter->key(), &k)) { | |||
| fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str()); | |||
| } else { | |||
| fprintf(stderr, "@ '%s'\n", k.DebugString().c_str()); | |||
| } | |||
| } | |||
| } | |||
| #endif | |||
| namespace { | |||
| // Memtables and sstables that make the DB representation contain | |||
| // (userkey,seq,type) => uservalue entries. DBIter | |||
| // combines multiple entries for the same userkey found in the DB | |||
| // representation into a single entry while accounting for sequence | |||
| // numbers, deletion markers, overwrites, etc. | |||
| class DBIter: public Iterator { | |||
| public: | |||
| DBIter(const std::string* dbname, Env* env, | |||
| const Comparator* cmp, Iterator* iter, SequenceNumber s) | |||
| : dbname_(dbname), | |||
| env_(env), | |||
| user_comparator_(cmp), | |||
| iter_(iter), | |||
| sequence_(s), | |||
| large_(NULL), | |||
| valid_(false) { | |||
| } | |||
| virtual ~DBIter() { | |||
| delete iter_; | |||
| delete large_; | |||
| } | |||
| virtual bool Valid() const { return valid_; } | |||
| virtual Slice key() const { | |||
| assert(valid_); | |||
| return key_; | |||
| } | |||
| virtual Slice value() const { | |||
| assert(valid_); | |||
| if (large_ == NULL) { | |||
| return value_; | |||
| } else { | |||
| MutexLock l(&large_->mutex); | |||
| if (!large_->produced) { | |||
| ReadIndirectValue(); | |||
| } | |||
| return large_->value; | |||
| } | |||
| } | |||
| virtual void Next() { | |||
| assert(valid_); | |||
| // iter_ is already positioned past DBIter::key() | |||
| FindNextUserEntry(); | |||
| } | |||
| virtual void Prev() { | |||
| assert(valid_); | |||
| bool ignored; | |||
| ScanUntilBeforeCurrentKey(&ignored); | |||
| FindPrevUserEntry(); | |||
| } | |||
| virtual void Seek(const Slice& target) { | |||
| ParsedInternalKey ikey(target, sequence_, kValueTypeForSeek); | |||
| std::string tmp; | |||
| AppendInternalKey(&tmp, ikey); | |||
| iter_->Seek(tmp); | |||
| FindNextUserEntry(); | |||
| } | |||
| virtual void SeekToFirst() { | |||
| iter_->SeekToFirst(); | |||
| FindNextUserEntry(); | |||
| } | |||
| virtual void SeekToLast(); | |||
| virtual Status status() const { | |||
| if (status_.ok()) { | |||
| if (large_ != NULL && !large_->status.ok()) return large_->status; | |||
| return iter_->status(); | |||
| } else { | |||
| return status_; | |||
| } | |||
| } | |||
| private: | |||
| void FindNextUserEntry(); | |||
| void FindPrevUserEntry(); | |||
| void SaveKey(const Slice& k) { key_.assign(k.data(), k.size()); } | |||
| void SaveValue(const Slice& v) { | |||
| if (value_.capacity() > v.size() + 1048576) { | |||
| std::string empty; | |||
| swap(empty, value_); | |||
| } | |||
| value_.assign(v.data(), v.size()); | |||
| } | |||
| bool ParseKey(ParsedInternalKey* key); | |||
| void SkipPast(const Slice& k); | |||
| void ScanUntilBeforeCurrentKey(bool* found_live); | |||
| void ReadIndirectValue() const; | |||
| struct Large { | |||
| port::Mutex mutex; | |||
| std::string value; | |||
| bool produced; | |||
| Status status; | |||
| }; | |||
| const std::string* const dbname_; | |||
| Env* const env_; | |||
| const Comparator* const user_comparator_; | |||
| // iter_ is positioned just past current entry for DBIter if valid_ | |||
| Iterator* const iter_; | |||
| SequenceNumber const sequence_; | |||
| Status status_; | |||
| std::string key_; // Always a user key | |||
| std::string value_; | |||
| Large* large_; // Non-NULL if value is an indirect reference | |||
| bool valid_; | |||
| // No copying allowed | |||
| DBIter(const DBIter&); | |||
| void operator=(const DBIter&); | |||
| }; | |||
| inline bool DBIter::ParseKey(ParsedInternalKey* ikey) { | |||
| if (!ParseInternalKey(iter_->key(), ikey)) { | |||
| status_ = Status::Corruption("corrupted internal key in DBIter"); | |||
| return false; | |||
| } else { | |||
| return true; | |||
| } | |||
| } | |||
| void DBIter::FindNextUserEntry() { | |||
| if (large_ != NULL) { | |||
| if (status_.ok() && !large_->status.ok()) { | |||
| status_ = large_->status; | |||
| } | |||
| delete large_; | |||
| large_ = NULL; | |||
| } | |||
| while (iter_->Valid()) { | |||
| ParsedInternalKey ikey; | |||
| if (!ParseKey(&ikey)) { | |||
| // Skip past corrupted entry | |||
| iter_->Next(); | |||
| continue; | |||
| } | |||
| if (ikey.sequence > sequence_) { | |||
| // Ignore entries newer than the snapshot | |||
| iter_->Next(); | |||
| continue; | |||
| } | |||
| switch (ikey.type) { | |||
| case kTypeDeletion: | |||
| SaveKey(ikey.user_key); // Make local copy for use by SkipPast() | |||
| iter_->Next(); | |||
| SkipPast(key_); | |||
| // Do not return deleted entries. Instead keep looping. | |||
| break; | |||
| case kTypeValue: | |||
| SaveKey(ikey.user_key); | |||
| SaveValue(iter_->value()); | |||
| iter_->Next(); | |||
| SkipPast(key_); | |||
| // Yield the value we just found. | |||
| valid_ = true; | |||
| return; | |||
| case kTypeLargeValueRef: | |||
| SaveKey(ikey.user_key); | |||
| // Save the large value ref as value_, and read it lazily on a call | |||
| // to value() | |||
| SaveValue(iter_->value()); | |||
| large_ = new Large; | |||
| large_->produced = false; | |||
| iter_->Next(); | |||
| SkipPast(key_); | |||
| // Yield the value we just found. | |||
| valid_ = true; | |||
| return; | |||
| } | |||
| } | |||
| valid_ = false; | |||
| key_.clear(); | |||
| value_.clear(); | |||
| assert(large_ == NULL); | |||
| } | |||
| void DBIter::SkipPast(const Slice& k) { | |||
| while (iter_->Valid()) { | |||
| ParsedInternalKey ikey; | |||
| // Note that if we cannot parse an internal key, we keep looping | |||
| // so that if we have a run like the following: | |||
| // <x,100,v> => value100 | |||
| // <corrupted entry for user key x> | |||
| // <x,50,v> => value50 | |||
| // we will skip over the corrupted entry as well as value50. | |||
| if (ParseKey(&ikey) && user_comparator_->Compare(ikey.user_key, k) != 0) { | |||
| break; | |||
| } | |||
| iter_->Next(); | |||
| } | |||
| } | |||
| void DBIter::SeekToLast() { | |||
| // Position iter_ at the last uncorrupted user key and then | |||
| // let FindPrevUserEntry() do the heavy lifting to find | |||
| // a user key that is live. | |||
| iter_->SeekToLast(); | |||
| ParsedInternalKey current; | |||
| while (iter_->Valid() && !ParseKey(¤t)) { | |||
| iter_->Prev(); | |||
| } | |||
| if (iter_->Valid()) { | |||
| SaveKey(current.user_key); | |||
| } | |||
| FindPrevUserEntry(); | |||
| } | |||
| // Let X be the user key at which iter_ is currently positioned. | |||
| // Adjust DBIter to point at the last entry with a key <= X that | |||
| // has a live value. | |||
| void DBIter::FindPrevUserEntry() { | |||
| // Consider the following example: | |||
| // | |||
| // A@540 | |||
| // A@400 | |||
| // | |||
| // B@300 | |||
| // B@200 | |||
| // B@100 <- iter_ | |||
| // | |||
| // C@301 | |||
| // C@201 | |||
| // | |||
| // The comments marked "(first iteration)" below relate what happens | |||
| // for the preceding example in the first iteration of the while loop | |||
| // below. There may be more than one iteration either if there are | |||
| // no live values for B, or if there is a corruption. | |||
| while (iter_->Valid()) { | |||
| std::string saved = key_; | |||
| bool found_live; | |||
| ScanUntilBeforeCurrentKey(&found_live); | |||
| // (first iteration) iter_ at A@400 | |||
| if (found_live) { | |||
| // Step forward into range of entries with user key >= saved | |||
| if (!iter_->Valid()) { | |||
| iter_->SeekToFirst(); | |||
| } else { | |||
| iter_->Next(); | |||
| } | |||
| // (first iteration) iter_ at B@300 | |||
| FindNextUserEntry(); // Sets key_ to the key of the next value it found | |||
| if (valid_ && user_comparator_->Compare(key_, saved) == 0) { | |||
| // (first iteration) iter_ at C@301 | |||
| return; | |||
| } | |||
| // FindNextUserEntry() could not find any entries under the | |||
| // user key "saved". This is probably a corruption since | |||
| // ScanUntilBefore(saved) found a live value. So we skip | |||
| // backwards to an earlier key and ignore the corrupted | |||
| // entries for "saved". | |||
| // | |||
| // (first iteration) iter_ at C@301 and saved == "B" | |||
| key_ = saved; | |||
| bool ignored; | |||
| ScanUntilBeforeCurrentKey(&ignored); | |||
| // (first iteration) iter_ at A@400 | |||
| } | |||
| } | |||
| valid_ = false; | |||
| key_.clear(); | |||
| value_.clear(); | |||
| } | |||
| void DBIter::ScanUntilBeforeCurrentKey(bool* found_live) { | |||
| *found_live = false; | |||
| if (!iter_->Valid()) { | |||
| iter_->SeekToLast(); | |||
| } | |||
| while (iter_->Valid()) { | |||
| ParsedInternalKey current; | |||
| if (!ParseKey(¤t)) { | |||
| iter_->Prev(); | |||
| continue; | |||
| } | |||
| if (current.sequence > sequence_) { | |||
| // Ignore entries that are serialized after this read | |||
| iter_->Prev(); | |||
| continue; | |||
| } | |||
| const int cmp = user_comparator_->Compare(current.user_key, key_); | |||
| if (cmp < 0) { | |||
| SaveKey(current.user_key); | |||
| return; | |||
| } else if (cmp == 0) { | |||
| switch (current.type) { | |||
| case kTypeDeletion: | |||
| *found_live = false; | |||
| break; | |||
| case kTypeValue: | |||
| case kTypeLargeValueRef: | |||
| *found_live = true; | |||
| break; | |||
| } | |||
| } else { // cmp > 0 | |||
| *found_live = false; | |||
| } | |||
| iter_->Prev(); | |||
| } | |||
| } | |||
| void DBIter::ReadIndirectValue() const { | |||
| assert(!large_->produced); | |||
| large_->produced = true; | |||
| LargeValueRef large_ref; | |||
| if (value_.size() != LargeValueRef::ByteSize()) { | |||
| large_->status = Status::Corruption("malformed large value reference"); | |||
| return; | |||
| } | |||
| memcpy(large_ref.data, value_.data(), LargeValueRef::ByteSize()); | |||
| std::string fname = LargeValueFileName(*dbname_, large_ref); | |||
| RandomAccessFile* file; | |||
| Status s = env_->NewRandomAccessFile(fname, &file); | |||
| if (s.ok()) { | |||
| uint64_t file_size = file->Size(); | |||
| uint64_t value_size = large_ref.ValueSize(); | |||
| large_->value.resize(value_size); | |||
| Slice result; | |||
| s = file->Read(0, file_size, &result, | |||
| const_cast<char*>(large_->value.data())); | |||
| if (s.ok()) { | |||
| if (result.size() == file_size) { | |||
| switch (large_ref.compression_type()) { | |||
| case kNoCompression: { | |||
| if (result.data() != large_->value.data()) { | |||
| large_->value.assign(result.data(), result.size()); | |||
| } | |||
| break; | |||
| } | |||
| case kLightweightCompression: { | |||
| std::string uncompressed; | |||
| if (port::Lightweight_Uncompress(result.data(), result.size(), | |||
| &uncompressed) && | |||
| uncompressed.size() == large_ref.ValueSize()) { | |||
| swap(uncompressed, large_->value); | |||
| } else { | |||
| s = Status::Corruption( | |||
| "Unable to read entire compressed large value file"); | |||
| } | |||
| } | |||
| } | |||
| } else { | |||
| s = Status::Corruption("Unable to read entire large value file"); | |||
| } | |||
| } | |||
| delete file; // Ignore errors on closing | |||
| } | |||
| if (!s.ok()) { | |||
| large_->value.clear(); | |||
| large_->status = s; | |||
| } | |||
| } | |||
| } // anonymous namespace | |||
| Iterator* NewDBIterator( | |||
| const std::string* dbname, | |||
| Env* env, | |||
| const Comparator* user_key_comparator, | |||
| Iterator* internal_iter, | |||
| const SequenceNumber& sequence) { | |||
| return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence); | |||
| } | |||
| } | |||
| @ -0,0 +1,26 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_DB_ITER_H_ | |||
| #define STORAGE_LEVELDB_DB_DB_ITER_H_ | |||
| #include <stdint.h> | |||
| #include "include/db.h" | |||
| #include "db/dbformat.h" | |||
| namespace leveldb { | |||
| // Return a new iterator that converts internal keys (yielded by | |||
| // "*internal_iter") that were live at the specified "sequence" number | |||
| // into appropriate user keys. | |||
| extern Iterator* NewDBIterator( | |||
| const std::string* dbname, | |||
| Env* env, | |||
| const Comparator* user_key_comparator, | |||
| Iterator* internal_iter, | |||
| const SequenceNumber& sequence); | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_DB_ITER_H_ | |||
| @ -0,0 +1,963 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/db.h" | |||
| #include "db/db_impl.h" | |||
| #include "db/filename.h" | |||
| #include "db/version_set.h" | |||
| #include "db/write_batch_internal.h" | |||
| #include "include/env.h" | |||
| #include "include/table.h" | |||
| #include "util/logging.h" | |||
| #include "util/testharness.h" | |||
| #include "util/testutil.h" | |||
| namespace leveldb { | |||
| static std::string RandomString(Random* rnd, int len) { | |||
| std::string r; | |||
| test::RandomString(rnd, len, &r); | |||
| return r; | |||
| } | |||
| class DBTest { | |||
| public: | |||
| std::string dbname_; | |||
| Env* env_; | |||
| DB* db_; | |||
| Options last_options_; | |||
| DBTest() : env_(Env::Default()) { | |||
| dbname_ = test::TmpDir() + "/db_test"; | |||
| DestroyDB(dbname_, Options()); | |||
| db_ = NULL; | |||
| Reopen(); | |||
| } | |||
| ~DBTest() { | |||
| delete db_; | |||
| DestroyDB(dbname_, Options()); | |||
| } | |||
| DBImpl* dbfull() { | |||
| return reinterpret_cast<DBImpl*>(db_); | |||
| } | |||
| void Reopen(Options* options = NULL) { | |||
| ASSERT_OK(TryReopen(options)); | |||
| } | |||
| void DestroyAndReopen(Options* options = NULL) { | |||
| delete db_; | |||
| db_ = NULL; | |||
| DestroyDB(dbname_, Options()); | |||
| ASSERT_OK(TryReopen(options)); | |||
| } | |||
| Status TryReopen(Options* options) { | |||
| delete db_; | |||
| db_ = NULL; | |||
| Options opts; | |||
| if (options != NULL) { | |||
| opts = *options; | |||
| } else { | |||
| opts.create_if_missing = true; | |||
| } | |||
| last_options_ = opts; | |||
| return DB::Open(opts, dbname_, &db_); | |||
| } | |||
| Status Put(const std::string& k, const std::string& v) { | |||
| WriteBatch batch; | |||
| batch.Put(k, v); | |||
| return db_->Write(WriteOptions(), &batch); | |||
| } | |||
| Status Delete(const std::string& k) { | |||
| WriteBatch batch; | |||
| batch.Delete(k); | |||
| return db_->Write(WriteOptions(), &batch); | |||
| } | |||
| std::string Get(const std::string& k, const Snapshot* snapshot = NULL) { | |||
| ReadOptions options; | |||
| options.snapshot = snapshot; | |||
| std::string result; | |||
| Status s = db_->Get(options, k, &result); | |||
| if (s.IsNotFound()) { | |||
| result = "NOT_FOUND"; | |||
| } else if (!s.ok()) { | |||
| result = s.ToString(); | |||
| } | |||
| return result; | |||
| } | |||
| std::string AllEntriesFor(const Slice& user_key) { | |||
| Iterator* iter = dbfull()->TEST_NewInternalIterator(); | |||
| InternalKey target(user_key, kMaxSequenceNumber, kTypeValue); | |||
| iter->Seek(target.Encode()); | |||
| std::string result; | |||
| if (!iter->status().ok()) { | |||
| result = iter->status().ToString(); | |||
| } else { | |||
| result = "[ "; | |||
| bool first = true; | |||
| while (iter->Valid()) { | |||
| ParsedInternalKey ikey; | |||
| if (!ParseInternalKey(iter->key(), &ikey)) { | |||
| result += "CORRUPTED"; | |||
| } else { | |||
| if (last_options_.comparator->Compare( | |||
| ikey.user_key, user_key) != 0) { | |||
| break; | |||
| } | |||
| if (!first) { | |||
| result += ", "; | |||
| } | |||
| first = false; | |||
| switch (ikey.type) { | |||
| case kTypeValue: | |||
| result += iter->value().ToString(); | |||
| break; | |||
| case kTypeLargeValueRef: | |||
| result += "LARGEVALUE(" + EscapeString(iter->value()) + ")"; | |||
| break; | |||
| case kTypeDeletion: | |||
| result += "DEL"; | |||
| break; | |||
| } | |||
| } | |||
| iter->Next(); | |||
| } | |||
| if (!first) { | |||
| result += " "; | |||
| } | |||
| result += "]"; | |||
| } | |||
| delete iter; | |||
| return result; | |||
| } | |||
| int NumTableFilesAtLevel(int level) { | |||
| uint64_t val; | |||
| ASSERT_TRUE( | |||
| db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level), | |||
| &val)); | |||
| return val; | |||
| } | |||
| uint64_t Size(const Slice& start, const Slice& limit) { | |||
| Range r(start, limit); | |||
| uint64_t size; | |||
| db_->GetApproximateSizes(&r, 1, &size); | |||
| return size; | |||
| } | |||
| std::set<LargeValueRef> LargeValueFiles() const { | |||
| // Return the set of large value files that exist in the database | |||
| std::vector<std::string> filenames; | |||
| env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose | |||
| uint64_t number; | |||
| LargeValueRef large_ref; | |||
| FileType type; | |||
| std::set<LargeValueRef> live; | |||
| for (int i = 0; i < filenames.size(); i++) { | |||
| if (ParseFileName(filenames[i], &number, &large_ref, &type) && | |||
| type == kLargeValueFile) { | |||
| fprintf(stderr, " live: %s\n", | |||
| LargeValueRefToFilenameString(large_ref).c_str()); | |||
| live.insert(large_ref); | |||
| } | |||
| } | |||
| fprintf(stderr, "Found %d live large value files\n", (int)live.size()); | |||
| return live; | |||
| } | |||
| }; | |||
| TEST(DBTest, Empty) { | |||
| ASSERT_TRUE(db_ != NULL); | |||
| ASSERT_EQ("NOT_FOUND", Get("foo")); | |||
| } | |||
| TEST(DBTest, ReadWrite) { | |||
| ASSERT_OK(Put("foo", "v1")); | |||
| ASSERT_EQ("v1", Get("foo")); | |||
| ASSERT_OK(Put("bar", "v2")); | |||
| ASSERT_OK(Put("foo", "v3")); | |||
| ASSERT_EQ("v3", Get("foo")); | |||
| ASSERT_EQ("v2", Get("bar")); | |||
| } | |||
| TEST(DBTest, PutDeleteGet) { | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); | |||
| ASSERT_EQ("v1", Get("foo")); | |||
| ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); | |||
| ASSERT_EQ("v2", Get("foo")); | |||
| ASSERT_OK(db_->Delete(WriteOptions(), "foo")); | |||
| ASSERT_EQ("NOT_FOUND", Get("foo")); | |||
| } | |||
| TEST(DBTest, Recover) { | |||
| ASSERT_OK(Put("foo", "v1")); | |||
| ASSERT_OK(Put("baz", "v5")); | |||
| Reopen(); | |||
| ASSERT_EQ("v1", Get("foo")); | |||
| ASSERT_EQ("v1", Get("foo")); | |||
| ASSERT_EQ("v5", Get("baz")); | |||
| ASSERT_OK(Put("bar", "v2")); | |||
| ASSERT_OK(Put("foo", "v3")); | |||
| Reopen(); | |||
| ASSERT_EQ("v3", Get("foo")); | |||
| ASSERT_OK(Put("foo", "v4")); | |||
| ASSERT_EQ("v4", Get("foo")); | |||
| ASSERT_EQ("v2", Get("bar")); | |||
| ASSERT_EQ("v5", Get("baz")); | |||
| } | |||
| TEST(DBTest, RecoveryWithEmptyLog) { | |||
| ASSERT_OK(Put("foo", "v1")); | |||
| ASSERT_OK(Put("foo", "v2")); | |||
| Reopen(); | |||
| Reopen(); | |||
| ASSERT_OK(Put("foo", "v3")); | |||
| Reopen(); | |||
| ASSERT_EQ("v3", Get("foo")); | |||
| } | |||
| static std::string Key(int i) { | |||
| char buf[100]; | |||
| snprintf(buf, sizeof(buf), "key%06d", i); | |||
| return std::string(buf); | |||
| } | |||
| TEST(DBTest, MinorCompactionsHappen) { | |||
| Options options; | |||
| options.write_buffer_size = 10000; | |||
| Reopen(&options); | |||
| const int N = 100; | |||
| int starting_num_tables = NumTableFilesAtLevel(0); | |||
| for (int i = 0; i < N; i++) { | |||
| ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v'))); | |||
| } | |||
| int ending_num_tables = NumTableFilesAtLevel(0); | |||
| ASSERT_GT(ending_num_tables, starting_num_tables); | |||
| for (int i = 0; i < N; i++) { | |||
| ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i))); | |||
| } | |||
| Reopen(); | |||
| for (int i = 0; i < N; i++) { | |||
| ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i))); | |||
| } | |||
| } | |||
| TEST(DBTest, RecoverWithLargeLog) { | |||
| { | |||
| Options options; | |||
| options.large_value_threshold = 1048576; | |||
| Reopen(&options); | |||
| ASSERT_OK(Put("big1", std::string(200000, '1'))); | |||
| ASSERT_OK(Put("big2", std::string(200000, '2'))); | |||
| ASSERT_OK(Put("small3", std::string(10, '3'))); | |||
| ASSERT_OK(Put("small4", std::string(10, '4'))); | |||
| ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |||
| } | |||
| // Make sure that if we re-open with a small write buffer size that | |||
| // we flush table files in the middle of a large log file. | |||
| Options options; | |||
| options.write_buffer_size = 100000; | |||
| options.large_value_threshold = 1048576; | |||
| Reopen(&options); | |||
| ASSERT_EQ(NumTableFilesAtLevel(0), 3); | |||
| ASSERT_EQ(std::string(200000, '1'), Get("big1")); | |||
| ASSERT_EQ(std::string(200000, '2'), Get("big2")); | |||
| ASSERT_EQ(std::string(10, '3'), Get("small3")); | |||
| ASSERT_EQ(std::string(10, '4'), Get("small4")); | |||
| ASSERT_GT(NumTableFilesAtLevel(0), 1); | |||
| } | |||
| TEST(DBTest, CompactionsGenerateMultipleFiles) { | |||
| Options options; | |||
| options.write_buffer_size = 100000000; // Large write buffer | |||
| options.large_value_threshold = 1048576; | |||
| Reopen(&options); | |||
| Random rnd(301); | |||
| // Write 8MB (80 values, each 100K) | |||
| ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |||
| std::vector<std::string> values; | |||
| for (int i = 0; i < 80; i++) { | |||
| values.push_back(RandomString(&rnd, 100000)); | |||
| ASSERT_OK(Put(Key(i), values[i])); | |||
| } | |||
| // Reopening moves updates to level-0 | |||
| Reopen(&options); | |||
| dbfull()->TEST_CompactRange(0, "", Key(100000)); | |||
| ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |||
| ASSERT_GT(NumTableFilesAtLevel(1), 1); | |||
| for (int i = 0; i < 80; i++) { | |||
| ASSERT_EQ(Get(Key(i)), values[i]); | |||
| } | |||
| } | |||
| static bool Between(uint64_t val, uint64_t low, uint64_t high) { | |||
| bool result = (val >= low) && (val <= high); | |||
| if (!result) { | |||
| fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", | |||
| (unsigned long long)(val), | |||
| (unsigned long long)(low), | |||
| (unsigned long long)(high)); | |||
| } | |||
| return result; | |||
| } | |||
| TEST(DBTest, ApproximateSizes) { | |||
| for (int test = 0; test < 2; test++) { | |||
| // test==0: default large_value_threshold | |||
| // test==1: 1 MB large_value_threshold | |||
| Options options; | |||
| options.large_value_threshold = (test == 0) ? 65536 : 1048576; | |||
| options.write_buffer_size = 100000000; // Large write buffer | |||
| options.compression = kNoCompression; | |||
| DestroyAndReopen(); | |||
| ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); | |||
| Reopen(&options); | |||
| ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); | |||
| // Write 8MB (80 values, each 100K) | |||
| ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |||
| const int N = 80; | |||
| Random rnd(301); | |||
| for (int i = 0; i < N; i++) { | |||
| ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000))); | |||
| } | |||
| if (test == 1) { | |||
| // 0 because GetApproximateSizes() does not account for memtable space for | |||
| // non-large values | |||
| ASSERT_TRUE(Between(Size("", Key(50)), 0, 0)); | |||
| } else { | |||
| ASSERT_TRUE(Between(Size("", Key(50)), 100000*50, 100000*50 + 10000)); | |||
| ASSERT_TRUE(Between(Size(Key(20), Key(30)), | |||
| 100000*10, 100000*10 + 10000)); | |||
| } | |||
| // Check sizes across recovery by reopening a few times | |||
| for (int run = 0; run < 3; run++) { | |||
| Reopen(&options); | |||
| for (int compact_start = 0; compact_start < N; compact_start += 10) { | |||
| for (int i = 0; i < N; i += 10) { | |||
| ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000)); | |||
| ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), | |||
| 100000 * (i+1), 100000 * (i+1) + 10000)); | |||
| ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), | |||
| 100000 * 10, 100000 * 10 + 10000)); | |||
| } | |||
| ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000)); | |||
| ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000)); | |||
| dbfull()->TEST_CompactRange(0, | |||
| Key(compact_start), | |||
| Key(compact_start + 9)); | |||
| } | |||
| ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |||
| ASSERT_GT(NumTableFilesAtLevel(1), 0); | |||
| } | |||
| } | |||
| } | |||
| TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) { | |||
| Options options; | |||
| options.large_value_threshold = 65536; | |||
| options.compression = kNoCompression; | |||
| Reopen(); | |||
| Random rnd(301); | |||
| std::string big1 = RandomString(&rnd, 100000); | |||
| ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000))); | |||
| ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000))); | |||
| ASSERT_OK(Put(Key(2), big1)); | |||
| ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000))); | |||
| ASSERT_OK(Put(Key(4), big1)); | |||
| ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000))); | |||
| ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000))); | |||
| ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000))); | |||
| // Check sizes across recovery by reopening a few times | |||
| for (int run = 0; run < 3; run++) { | |||
| Reopen(&options); | |||
| ASSERT_TRUE(Between(Size("", Key(0)), 0, 0)); | |||
| ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000)); | |||
| ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000)); | |||
| ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000)); | |||
| ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000)); | |||
| ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000)); | |||
| ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000)); | |||
| ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000)); | |||
| ASSERT_TRUE(Between(Size("", Key(8)), 550000, 551000)); | |||
| ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000)); | |||
| dbfull()->TEST_CompactRange(0, Key(0), Key(100)); | |||
| } | |||
| } | |||
| TEST(DBTest, IteratorPinsRef) { | |||
| Put("foo", "hello"); | |||
| // Get iterator that will yield the current contents of the DB. | |||
| Iterator* iter = db_->NewIterator(ReadOptions()); | |||
| // Write to force compactions | |||
| Put("foo", "newvalue1"); | |||
| for (int i = 0; i < 100; i++) { | |||
| ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values | |||
| } | |||
| Put("foo", "newvalue2"); | |||
| iter->SeekToFirst(); | |||
| ASSERT_TRUE(iter->Valid()); | |||
| ASSERT_EQ("foo", iter->key().ToString()); | |||
| ASSERT_EQ("hello", iter->value().ToString()); | |||
| iter->Next(); | |||
| ASSERT_TRUE(!iter->Valid()); | |||
| delete iter; | |||
| } | |||
| TEST(DBTest, Snapshot) { | |||
| Put("foo", "v1"); | |||
| const Snapshot* s1 = db_->GetSnapshot(); | |||
| Put("foo", "v2"); | |||
| const Snapshot* s2 = db_->GetSnapshot(); | |||
| Put("foo", "v3"); | |||
| const Snapshot* s3 = db_->GetSnapshot(); | |||
| Put("foo", "v4"); | |||
| ASSERT_EQ("v1", Get("foo", s1)); | |||
| ASSERT_EQ("v2", Get("foo", s2)); | |||
| ASSERT_EQ("v3", Get("foo", s3)); | |||
| ASSERT_EQ("v4", Get("foo")); | |||
| db_->ReleaseSnapshot(s3); | |||
| ASSERT_EQ("v1", Get("foo", s1)); | |||
| ASSERT_EQ("v2", Get("foo", s2)); | |||
| ASSERT_EQ("v4", Get("foo")); | |||
| db_->ReleaseSnapshot(s1); | |||
| ASSERT_EQ("v2", Get("foo", s2)); | |||
| ASSERT_EQ("v4", Get("foo")); | |||
| db_->ReleaseSnapshot(s2); | |||
| ASSERT_EQ("v4", Get("foo")); | |||
| } | |||
| TEST(DBTest, HiddenValuesAreRemoved) { | |||
| Random rnd(301); | |||
| std::string big = RandomString(&rnd, 50000); | |||
| Put("foo", big); | |||
| Put("pastfoo", "v"); | |||
| const Snapshot* snapshot = db_->GetSnapshot(); | |||
| Put("foo", "tiny"); | |||
| Put("pastfoo2", "v2"); // Advance sequence number one more | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| ASSERT_GT(NumTableFilesAtLevel(0), 0); | |||
| ASSERT_EQ(big, Get("foo", snapshot)); | |||
| ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000)); | |||
| db_->ReleaseSnapshot(snapshot); | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]"); | |||
| dbfull()->TEST_CompactRange(0, "", "x"); | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); | |||
| ASSERT_EQ(NumTableFilesAtLevel(0), 0); | |||
| ASSERT_GE(NumTableFilesAtLevel(1), 1); | |||
| dbfull()->TEST_CompactRange(1, "", "x"); | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); | |||
| ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000)); | |||
| } | |||
| TEST(DBTest, DeletionMarkers1) { | |||
| Put("foo", "v1"); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| dbfull()->TEST_CompactRange(1, "", "z"); | |||
| ASSERT_EQ(NumTableFilesAtLevel(2), 1); // foo => v1 is now in level 2 file | |||
| Delete("foo"); | |||
| Put("foo", "v2"); | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| // DEL eliminated, but v1 remains because we aren't compacting that level | |||
| // (DEL can be eliminated because v2 hides v1). | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]"); | |||
| dbfull()->TEST_CompactRange(1, "", "z"); | |||
| // Merging L1 w/ L2, so we are the base level for "foo", so DEL is removed. | |||
| // (as is v1). | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]"); | |||
| } | |||
| TEST(DBTest, DeletionMarkers2) { | |||
| Put("foo", "v1"); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| dbfull()->TEST_CompactRange(1, "", "z"); | |||
| ASSERT_EQ(NumTableFilesAtLevel(2), 1); // foo => v1 is now in level 2 file | |||
| Delete("foo"); | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| // DEL kept: L2 file overlaps | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); | |||
| dbfull()->TEST_CompactRange(1, "", "z"); | |||
| // Merging L1 w/ L2, so we are the base level for "foo", so DEL is removed. | |||
| // (as is v1). | |||
| ASSERT_EQ(AllEntriesFor("foo"), "[ ]"); | |||
| } | |||
| TEST(DBTest, ComparatorCheck) { | |||
| class NewComparator : public Comparator { | |||
| public: | |||
| virtual const char* Name() const { return "leveldb.NewComparator"; } | |||
| virtual int Compare(const Slice& a, const Slice& b) const { | |||
| return BytewiseComparator()->Compare(a, b); | |||
| } | |||
| virtual void FindShortestSeparator(std::string* s, const Slice& l) const { | |||
| BytewiseComparator()->FindShortestSeparator(s, l); | |||
| } | |||
| virtual void FindShortSuccessor(std::string* key) const { | |||
| BytewiseComparator()->FindShortSuccessor(key); | |||
| } | |||
| }; | |||
| NewComparator cmp; | |||
| Options new_options; | |||
| new_options.comparator = &cmp; | |||
| Status s = TryReopen(&new_options); | |||
| ASSERT_TRUE(!s.ok()); | |||
| ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) | |||
| << s.ToString(); | |||
| } | |||
| static bool LargeValuesOK(DBTest* db, | |||
| const std::set<LargeValueRef>& expected) { | |||
| std::set<LargeValueRef> actual = db->LargeValueFiles(); | |||
| if (actual.size() != expected.size()) { | |||
| fprintf(stderr, "Sets differ in size: %d vs %d\n", | |||
| (int)actual.size(), (int)expected.size()); | |||
| return false; | |||
| } | |||
| for (std::set<LargeValueRef>::const_iterator it = expected.begin(); | |||
| it != expected.end(); | |||
| ++it) { | |||
| if (actual.count(*it) != 1) { | |||
| fprintf(stderr, " key '%s' not found in actual set\n", | |||
| LargeValueRefToFilenameString(*it).c_str()); | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| TEST(DBTest, LargeValues1) { | |||
| Options options; | |||
| options.large_value_threshold = 10000; | |||
| Reopen(&options); | |||
| Random rnd(301); | |||
| std::string big1; | |||
| test::CompressibleString(&rnd, 1.0, 100000, &big1); // Not compressible | |||
| std::set<LargeValueRef> expected; | |||
| ASSERT_OK(Put("big1", big1)); | |||
| expected.insert(LargeValueRef::Make(big1, kNoCompression)); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(Delete("big1")); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| // No handling of deletion markers on memtable compactions, so big1 remains | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| expected.erase(LargeValueRef::Make(big1, kNoCompression)); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| } | |||
| TEST(DBTest, LargeValues2) { | |||
| Options options; | |||
| options.large_value_threshold = 10000; | |||
| Reopen(&options); | |||
| Random rnd(301); | |||
| std::string big1, big2; | |||
| test::CompressibleString(&rnd, 1.0, 20000, &big1); // Not compressible | |||
| test::CompressibleString(&rnd, 0.6, 40000, &big2); // Compressible | |||
| std::set<LargeValueRef> expected; | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(Put("big1", big1)); | |||
| expected.insert(LargeValueRef::Make(big1, kNoCompression)); | |||
| ASSERT_EQ(big1, Get("big1")); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(Put("big2", big2)); | |||
| ASSERT_EQ(big2, Get("big2")); | |||
| #if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_CHROMIUM) | |||
| // TODO(sanjay) Reenable after compression support is added | |||
| expected.insert(LargeValueRef::Make(big2, kNoCompression)); | |||
| #else | |||
| expected.insert(LargeValueRef::Make(big2, kLightweightCompression)); | |||
| #endif | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(Put("big2", big2)); | |||
| ASSERT_OK(Put("big2_b", big2)); | |||
| ASSERT_EQ(big1, Get("big1")); | |||
| ASSERT_EQ(big2, Get("big2")); | |||
| ASSERT_EQ(big2, Get("big2_b")); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(Delete("big1")); | |||
| ASSERT_EQ("NOT_FOUND", Get("big1")); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| expected.erase(LargeValueRef::Make(big1, kNoCompression)); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| dbfull()->TEST_CompactRange(1, "", "z"); | |||
| ASSERT_OK(Delete("big2")); | |||
| ASSERT_EQ("NOT_FOUND", Get("big2")); | |||
| ASSERT_EQ(big2, Get("big2_b")); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| // Make sure the large value refs survive a reload and compactions after | |||
| // the reload. | |||
| Reopen(); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| ASSERT_OK(Put("foo", "bar")); | |||
| ASSERT_OK(dbfull()->TEST_CompactMemTable()); | |||
| dbfull()->TEST_CompactRange(0, "", "z"); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| } | |||
| TEST(DBTest, LargeValues3) { | |||
| // Make sure we don't compress values if | |||
| Options options; | |||
| options.large_value_threshold = 10000; | |||
| options.compression = kNoCompression; | |||
| Reopen(&options); | |||
| Random rnd(301); | |||
| std::string big1 = std::string(100000, 'x'); // Very compressible | |||
| std::set<LargeValueRef> expected; | |||
| ASSERT_OK(Put("big1", big1)); | |||
| ASSERT_EQ(big1, Get("big1")); | |||
| expected.insert(LargeValueRef::Make(big1, kNoCompression)); | |||
| ASSERT_TRUE(LargeValuesOK(this, expected)); | |||
| } | |||
| TEST(DBTest, DBOpen_Options) { | |||
| std::string dbname = test::TmpDir() + "/db_options_test"; | |||
| DestroyDB(dbname, Options()); | |||
| // Does not exist, and create_if_missing == false: error | |||
| DB* db = NULL; | |||
| Options opts; | |||
| opts.create_if_missing = false; | |||
| Status s = DB::Open(opts, dbname, &db); | |||
| ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL); | |||
| ASSERT_TRUE(db == NULL); | |||
| // Does not exist, and create_if_missing == true: OK | |||
| opts.create_if_missing = true; | |||
| s = DB::Open(opts, dbname, &db); | |||
| ASSERT_OK(s); | |||
| ASSERT_TRUE(db != NULL); | |||
| delete db; | |||
| db = NULL; | |||
| // Does exist, and error_if_exists == true: error | |||
| opts.create_if_missing = false; | |||
| opts.error_if_exists = true; | |||
| s = DB::Open(opts, dbname, &db); | |||
| ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL); | |||
| ASSERT_TRUE(db == NULL); | |||
| // Does exist, and error_if_exists == false: OK | |||
| opts.create_if_missing = true; | |||
| opts.error_if_exists = false; | |||
| s = DB::Open(opts, dbname, &db); | |||
| ASSERT_OK(s); | |||
| ASSERT_TRUE(db != NULL); | |||
| delete db; | |||
| db = NULL; | |||
| } | |||
| class ModelDB: public DB { | |||
| public: | |||
| explicit ModelDB(const Options& options): options_(options) { } | |||
| ~ModelDB() { } | |||
| virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) { | |||
| return DB::Put(o, k, v); | |||
| } | |||
| virtual Status Delete(const WriteOptions& o, const Slice& key) { | |||
| return DB::Delete(o, key); | |||
| } | |||
| virtual Status Get(const ReadOptions& options, | |||
| const Slice& key, std::string* value) { | |||
| assert(false); // Not implemented | |||
| return Status::NotFound(key); | |||
| } | |||
| virtual Iterator* NewIterator(const ReadOptions& options) { | |||
| if (options.snapshot == NULL) { | |||
| KVMap* saved = new KVMap; | |||
| *saved = map_; | |||
| return new ModelIter(saved, true); | |||
| } else { | |||
| const KVMap* snapshot_state = | |||
| reinterpret_cast<const KVMap*>(options.snapshot->number_); | |||
| return new ModelIter(snapshot_state, false); | |||
| } | |||
| } | |||
| virtual const Snapshot* GetSnapshot() { | |||
| KVMap* saved = new KVMap; | |||
| *saved = map_; | |||
| return snapshots_.New( | |||
| reinterpret_cast<SequenceNumber>(saved)); | |||
| } | |||
| virtual void ReleaseSnapshot(const Snapshot* snapshot) { | |||
| const KVMap* saved = reinterpret_cast<const KVMap*>(snapshot->number_); | |||
| delete saved; | |||
| snapshots_.Delete(snapshot); | |||
| } | |||
| virtual Status Write(const WriteOptions& options, WriteBatch* batch) { | |||
| assert(options.post_write_snapshot == NULL); // Not supported | |||
| for (WriteBatchInternal::Iterator it(*batch); !it.Done(); it.Next()) { | |||
| switch (it.op()) { | |||
| case kTypeValue: | |||
| map_[it.key().ToString()] = it.value().ToString(); | |||
| break; | |||
| case kTypeLargeValueRef: | |||
| assert(false); // Should not occur | |||
| break; | |||
| case kTypeDeletion: | |||
| map_.erase(it.key().ToString()); | |||
| break; | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| virtual bool GetProperty(const Slice& property, uint64_t* value) { | |||
| return false; | |||
| } | |||
| virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) { | |||
| for (int i = 0; i < n; i++) { | |||
| sizes[i] = 0; | |||
| } | |||
| } | |||
| private: | |||
| typedef std::map<std::string, std::string> KVMap; | |||
| class ModelIter: public Iterator { | |||
| public: | |||
| ModelIter(const KVMap* map, bool owned) | |||
| : map_(map), owned_(owned), iter_(map_->end()) { | |||
| } | |||
| ~ModelIter() { | |||
| if (owned_) delete map_; | |||
| } | |||
| virtual bool Valid() const { return iter_ != map_->end(); } | |||
| virtual void SeekToFirst() { iter_ = map_->begin(); } | |||
| virtual void SeekToLast() { | |||
| if (map_->empty()) { | |||
| iter_ = map_->end(); | |||
| } else { | |||
| iter_ = map_->find(map_->rbegin()->first); | |||
| } | |||
| } | |||
| virtual void Seek(const Slice& k) { | |||
| iter_ = map_->lower_bound(k.ToString()); | |||
| } | |||
| virtual void Next() { ++iter_; } | |||
| virtual void Prev() { --iter_; } | |||
| virtual Slice key() const { return iter_->first; } | |||
| virtual Slice value() const { return iter_->second; } | |||
| virtual Status status() const { return Status::OK(); } | |||
| private: | |||
| const KVMap* const map_; | |||
| const bool owned_; // Do we own map_ | |||
| KVMap::const_iterator iter_; | |||
| }; | |||
| const Options options_; | |||
| KVMap map_; | |||
| SnapshotList snapshots_; | |||
| }; | |||
| static std::string RandomKey(Random* rnd) { | |||
| int len = (rnd->OneIn(3) | |||
| ? 1 // Short sometimes to encourage collisions | |||
| : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); | |||
| return test::RandomKey(rnd, len); | |||
| } | |||
| static bool CompareIterators(int step, | |||
| DB* model, | |||
| DB* db, | |||
| const Snapshot* model_snap, | |||
| const Snapshot* db_snap) { | |||
| ReadOptions options; | |||
| options.snapshot = model_snap; | |||
| Iterator* miter = model->NewIterator(options); | |||
| options.snapshot = db_snap; | |||
| Iterator* dbiter = db->NewIterator(options); | |||
| bool ok = true; | |||
| int count = 0; | |||
| for (miter->SeekToFirst(), dbiter->SeekToFirst(); | |||
| ok && miter->Valid() && dbiter->Valid(); | |||
| miter->Next(), dbiter->Next()) { | |||
| count++; | |||
| if (miter->key().compare(dbiter->key()) != 0) { | |||
| fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", | |||
| step, | |||
| EscapeString(miter->key()).c_str(), | |||
| EscapeString(dbiter->key()).c_str()); | |||
| ok = false; | |||
| break; | |||
| } | |||
| if (miter->value().compare(dbiter->value()) != 0) { | |||
| fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", | |||
| step, | |||
| EscapeString(miter->key()).c_str(), | |||
| EscapeString(miter->value()).c_str(), | |||
| EscapeString(miter->value()).c_str()); | |||
| ok = false; | |||
| } | |||
| } | |||
| if (ok) { | |||
| if (miter->Valid() != dbiter->Valid()) { | |||
| fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n", | |||
| step, miter->Valid(), dbiter->Valid()); | |||
| ok = false; | |||
| } | |||
| } | |||
| fprintf(stderr, "%d entries compared: ok=%d\n", count, ok); | |||
| delete miter; | |||
| delete dbiter; | |||
| return ok; | |||
| } | |||
| TEST(DBTest, Randomized) { | |||
| Random rnd(test::RandomSeed()); | |||
| ModelDB model(last_options_); | |||
| const int N = 10000; | |||
| const Snapshot* model_snap = NULL; | |||
| const Snapshot* db_snap = NULL; | |||
| std::string k, v; | |||
| for (int step = 0; step < N; step++) { | |||
| if (step % 100 == 0) { | |||
| fprintf(stderr, "Step %d of %d\n", step, N); | |||
| } | |||
| int p = rnd.Uniform(100); | |||
| if (p < 45) { // Put | |||
| k = RandomKey(&rnd); | |||
| v = RandomString(&rnd, | |||
| rnd.OneIn(20) | |||
| ? 100 + rnd.Uniform(100) | |||
| : rnd.Uniform(8)); | |||
| ASSERT_OK(model.Put(WriteOptions(), k, v)); | |||
| ASSERT_OK(db_->Put(WriteOptions(), k, v)); | |||
| } else if (p < 90) { // Delete | |||
| k = RandomKey(&rnd); | |||
| ASSERT_OK(model.Delete(WriteOptions(), k)); | |||
| ASSERT_OK(db_->Delete(WriteOptions(), k)); | |||
| } else { // Multi-element batch | |||
| WriteBatch b; | |||
| const int num = rnd.Uniform(8); | |||
| for (int i = 0; i < num; i++) { | |||
| if (i == 0 || !rnd.OneIn(10)) { | |||
| k = RandomKey(&rnd); | |||
| } else { | |||
| // Periodically re-use the same key from the previous iter, so | |||
| // we have multiple entries in the write batch for the same key | |||
| } | |||
| if (rnd.OneIn(2)) { | |||
| v = RandomString(&rnd, rnd.Uniform(10)); | |||
| b.Put(k, v); | |||
| } else { | |||
| b.Delete(k); | |||
| } | |||
| } | |||
| ASSERT_OK(model.Write(WriteOptions(), &b)); | |||
| ASSERT_OK(db_->Write(WriteOptions(), &b)); | |||
| } | |||
| if ((step % 100) == 0) { | |||
| ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL)); | |||
| ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); | |||
| // Save a snapshot from each DB this time that we'll use next | |||
| // time we compare things, to make sure the current state is | |||
| // preserved with the snapshot | |||
| if (model_snap != NULL) model.ReleaseSnapshot(model_snap); | |||
| if (db_snap != NULL) db_->ReleaseSnapshot(db_snap); | |||
| Reopen(); | |||
| ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL)); | |||
| model_snap = model.GetSnapshot(); | |||
| db_snap = db_->GetSnapshot(); | |||
| } | |||
| } | |||
| if (model_snap != NULL) model.ReleaseSnapshot(model_snap); | |||
| if (db_snap != NULL) db_->ReleaseSnapshot(db_snap); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,152 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include <stdio.h> | |||
| #include "db/dbformat.h" | |||
| #include "port/port.h" | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) { | |||
| assert(seq <= kMaxSequenceNumber); | |||
| assert(t <= kValueTypeForSeek); | |||
| return (seq << 8) | t; | |||
| } | |||
| void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { | |||
| result->append(key.user_key.data(), key.user_key.size()); | |||
| PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); | |||
| } | |||
| std::string ParsedInternalKey::DebugString() const { | |||
| char buf[50]; | |||
| snprintf(buf, sizeof(buf), "' @ %llu : %d", | |||
| (unsigned long long) sequence, | |||
| int(type)); | |||
| std::string result = "'"; | |||
| result += user_key.ToString(); | |||
| result += buf; | |||
| return result; | |||
| } | |||
| const char* InternalKeyComparator::Name() const { | |||
| return "leveldb.InternalKeyComparator"; | |||
| } | |||
| int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const { | |||
| // Order by: | |||
| // increasing user key (according to user-supplied comparator) | |||
| // decreasing sequence number | |||
| // decreasing type (though sequence# should be enough to disambiguate) | |||
| int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); | |||
| if (r == 0) { | |||
| const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); | |||
| const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); | |||
| if (anum > bnum) { | |||
| r = -1; | |||
| } else if (anum < bnum) { | |||
| r = +1; | |||
| } | |||
| } | |||
| return r; | |||
| } | |||
| void InternalKeyComparator::FindShortestSeparator( | |||
| std::string* start, | |||
| const Slice& limit) const { | |||
| // Attempt to shorten the user portion of the key | |||
| Slice user_start = ExtractUserKey(*start); | |||
| Slice user_limit = ExtractUserKey(limit); | |||
| std::string tmp(user_start.data(), user_start.size()); | |||
| user_comparator_->FindShortestSeparator(&tmp, user_limit); | |||
| if (user_comparator_->Compare(*start, tmp) < 0) { | |||
| // User key has become larger. Tack on the earliest possible | |||
| // number to the shortened user key. | |||
| PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); | |||
| assert(this->Compare(*start, tmp) < 0); | |||
| assert(this->Compare(tmp, limit) < 0); | |||
| start->swap(tmp); | |||
| } | |||
| } | |||
| void InternalKeyComparator::FindShortSuccessor(std::string* key) const { | |||
| Slice user_key = ExtractUserKey(*key); | |||
| std::string tmp(user_key.data(), user_key.size()); | |||
| user_comparator_->FindShortSuccessor(&tmp); | |||
| if (user_comparator_->Compare(user_key, tmp) < 0) { | |||
| // User key has become larger. Tack on the earliest possible | |||
| // number to the shortened user key. | |||
| PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); | |||
| assert(this->Compare(*key, tmp) < 0); | |||
| key->swap(tmp); | |||
| } | |||
| } | |||
| LargeValueRef LargeValueRef::Make(const Slice& value, CompressionType ctype) { | |||
| LargeValueRef result; | |||
| port::SHA1_Hash(value.data(), value.size(), &result.data[0]); | |||
| EncodeFixed64(&result.data[20], value.size()); | |||
| result.data[28] = static_cast<unsigned char>(ctype); | |||
| return result; | |||
| } | |||
| std::string LargeValueRefToFilenameString(const LargeValueRef& h) { | |||
| assert(sizeof(h.data) == LargeValueRef::ByteSize()); | |||
| assert(sizeof(h.data) == 29); // So we can hardcode the array size of buf | |||
| static const char tohex[] = "0123456789abcdef"; | |||
| char buf[20*2]; | |||
| for (int i = 0; i < 20; i++) { | |||
| buf[2*i] = tohex[(h.data[i] >> 4) & 0xf]; | |||
| buf[2*i+1] = tohex[h.data[i] & 0xf]; | |||
| } | |||
| std::string result = std::string(buf, sizeof(buf)); | |||
| result += "-"; | |||
| result += NumberToString(h.ValueSize()); | |||
| result += "-"; | |||
| result += NumberToString(static_cast<uint64_t>(h.compression_type())); | |||
| return result; | |||
| } | |||
| static uint32_t hexvalue(char c) { | |||
| if (c >= '0' && c <= '9') { | |||
| return c - '0'; | |||
| } else if (c >= 'A' && c <= 'F') { | |||
| return 10 + c - 'A'; | |||
| } else { | |||
| assert(c >= 'a' && c <= 'f'); | |||
| return 10 + c - 'a'; | |||
| } | |||
| } | |||
| bool FilenameStringToLargeValueRef(const Slice& s, LargeValueRef* h) { | |||
| Slice in = s; | |||
| if (in.size() < 40) { | |||
| return false; | |||
| } | |||
| for (int i = 0; i < 20; i++) { | |||
| if (!isxdigit(in[i*2]) || !isxdigit(in[i*2+1])) { | |||
| return false; | |||
| } | |||
| unsigned char c = (hexvalue(in[i*2])<<4) | hexvalue(in[i*2+1]); | |||
| h->data[i] = c; | |||
| } | |||
| in.remove_prefix(40); | |||
| uint64_t value_size, ctype; | |||
| if (ConsumeChar(&in, '-') && | |||
| ConsumeDecimalNumber(&in, &value_size) && | |||
| ConsumeChar(&in, '-') && | |||
| ConsumeDecimalNumber(&in, &ctype) && | |||
| in.empty() && | |||
| (ctype <= kLightweightCompression)) { | |||
| EncodeFixed64(&h->data[20], value_size); | |||
| h->data[28] = static_cast<unsigned char>(ctype); | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,198 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_FORMAT_H_ | |||
| #define STORAGE_LEVELDB_DB_FORMAT_H_ | |||
| #include <stdio.h> | |||
| #include "include/comparator.h" | |||
| #include "include/db.h" | |||
| #include "include/slice.h" | |||
| #include "include/table_builder.h" | |||
| #include "util/coding.h" | |||
| #include "util/logging.h" | |||
| namespace leveldb { | |||
| class InternalKey; | |||
| // Value types encoded as the last component of internal keys. | |||
| // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk | |||
| // data structures. | |||
| enum ValueType { | |||
| kTypeDeletion = 0x0, | |||
| kTypeValue = 0x1, | |||
| kTypeLargeValueRef = 0x2, | |||
| }; | |||
| // kValueTypeForSeek defines the ValueType that should be passed when | |||
| // constructing a ParsedInternalKey object for seeking to a particular | |||
| // sequence number (since we sort sequence numbers in decreasing order | |||
| // and the value type is embedded as the low 8 bits in the sequence | |||
| // number in internal keys, we need to use the highest-numbered | |||
| // ValueType, not the lowest). | |||
| static const ValueType kValueTypeForSeek = kTypeLargeValueRef; | |||
| typedef uint64_t SequenceNumber; | |||
| // We leave eight bits empty at the bottom so a type and sequence# | |||
| // can be packed together into 64-bits. | |||
| static const SequenceNumber kMaxSequenceNumber = | |||
| ((0x1ull << 56) - 1); | |||
| struct ParsedInternalKey { | |||
| Slice user_key; | |||
| SequenceNumber sequence; | |||
| ValueType type; | |||
| ParsedInternalKey() { } // Intentionally left uninitialized (for speed) | |||
| ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t) | |||
| : user_key(u), sequence(seq), type(t) { } | |||
| std::string DebugString() const; | |||
| }; | |||
| // Return the length of the encoding of "key". | |||
| inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { | |||
| return key.user_key.size() + 8; | |||
| } | |||
| // Append the serialization of "key" to *result. | |||
| extern void AppendInternalKey(std::string* result, | |||
| const ParsedInternalKey& key); | |||
| // Attempt to parse an internal key from "internal_key". On success, | |||
| // stores the parsed data in "*result", and returns true. | |||
| // | |||
| // On error, returns false, leaves "*result" in an undefined state. | |||
| extern bool ParseInternalKey(const Slice& internal_key, | |||
| ParsedInternalKey* result); | |||
| // Returns the user key portion of an internal key. | |||
| inline Slice ExtractUserKey(const Slice& internal_key) { | |||
| assert(internal_key.size() >= 8); | |||
| return Slice(internal_key.data(), internal_key.size() - 8); | |||
| } | |||
| inline ValueType ExtractValueType(const Slice& internal_key) { | |||
| assert(internal_key.size() >= 8); | |||
| const size_t n = internal_key.size(); | |||
| uint64_t num = DecodeFixed64(internal_key.data() + n - 8); | |||
| unsigned char c = num & 0xff; | |||
| return static_cast<ValueType>(c); | |||
| } | |||
| // A comparator for internal keys that uses a specified comparator for | |||
| // the user key portion and breaks ties by decreasing sequence number. | |||
| class InternalKeyComparator : public Comparator { | |||
| private: | |||
| const Comparator* user_comparator_; | |||
| public: | |||
| explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { } | |||
| virtual const char* Name() const; | |||
| virtual int Compare(const Slice& a, const Slice& b) const; | |||
| virtual void FindShortestSeparator( | |||
| std::string* start, | |||
| const Slice& limit) const; | |||
| virtual void FindShortSuccessor(std::string* key) const; | |||
| const Comparator* user_comparator() const { return user_comparator_; } | |||
| int Compare(const InternalKey& a, const InternalKey& b) const; | |||
| }; | |||
| // Modules in this directory should keep internal keys wrapped inside | |||
| // the following class instead of plain strings so that we do not | |||
| // incorrectly use string comparisons instead of an InternalKeyComparator. | |||
| class InternalKey { | |||
| private: | |||
| std::string rep_; | |||
| public: | |||
| InternalKey() { } // Leave rep_ as empty to indicate it is invalid | |||
| InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) { | |||
| AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t)); | |||
| } | |||
| void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); } | |||
| Slice Encode() const { | |||
| assert(!rep_.empty()); | |||
| return rep_; | |||
| } | |||
| Slice user_key() const { return ExtractUserKey(rep_); } | |||
| void SetFrom(const ParsedInternalKey& p) { | |||
| rep_.clear(); | |||
| AppendInternalKey(&rep_, p); | |||
| } | |||
| void Clear() { rep_.clear(); } | |||
| }; | |||
| inline int InternalKeyComparator::Compare( | |||
| const InternalKey& a, const InternalKey& b) const { | |||
| return Compare(a.Encode(), b.Encode()); | |||
| } | |||
| // LargeValueRef is a 160-bit hash value (20 bytes), plus an 8 byte | |||
| // uncompressed size, and a 1 byte CompressionType code. An | |||
| // encoded form of it is embedded in the filenames of large value | |||
| // files stored in the database, and the raw binary form is stored as | |||
| // the iter->value() result for values of type kTypeLargeValueRef in | |||
| // the table and log files that make up the database. | |||
| struct LargeValueRef { | |||
| char data[29]; | |||
| // Initialize a large value ref for the given data | |||
| static LargeValueRef Make(const Slice& data, | |||
| CompressionType compression_type); | |||
| // Initialize a large value ref from a serialized, 29-byte reference value | |||
| static LargeValueRef FromRef(const Slice& ref) { | |||
| LargeValueRef result; | |||
| assert(ref.size() == sizeof(result.data)); | |||
| memcpy(result.data, ref.data(), sizeof(result.data)); | |||
| return result; | |||
| } | |||
| // Return the number of bytes in a LargeValueRef (not the | |||
| // number of bytes in the value referenced). | |||
| static size_t ByteSize() { return sizeof(LargeValueRef().data); } | |||
| // Return the number of bytes in the value referenced by "*this". | |||
| uint64_t ValueSize() const { return DecodeFixed64(&data[20]); } | |||
| CompressionType compression_type() const { | |||
| return static_cast<CompressionType>(data[28]); | |||
| } | |||
| bool operator==(const LargeValueRef& b) const { | |||
| return memcmp(data, b.data, sizeof(data)) == 0; | |||
| } | |||
| bool operator<(const LargeValueRef& b) const { | |||
| return memcmp(data, b.data, sizeof(data)) < 0; | |||
| } | |||
| }; | |||
| // Convert the large value ref to a human-readable string suitable | |||
| // for embedding in a large value filename. | |||
| extern std::string LargeValueRefToFilenameString(const LargeValueRef& h); | |||
| // Parse the large value filename string in "input" and store it in | |||
| // "*h". If successful, returns true. Otherwise returns false. | |||
| extern bool FilenameStringToLargeValueRef(const Slice& in, LargeValueRef* ref); | |||
| inline bool ParseInternalKey(const Slice& internal_key, | |||
| ParsedInternalKey* result) { | |||
| const size_t n = internal_key.size(); | |||
| if (n < 8) return false; | |||
| uint64_t num = DecodeFixed64(internal_key.data() + n - 8); | |||
| unsigned char c = num & 0xff; | |||
| result->sequence = num >> 8; | |||
| result->type = static_cast<ValueType>(c); | |||
| result->user_key = Slice(internal_key.data(), n - 8); | |||
| return (c <= static_cast<unsigned char>(kTypeLargeValueRef)); | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_FORMAT_H_ | |||
| @ -0,0 +1,127 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/dbformat.h" | |||
| #include "util/logging.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| static std::string IKey(const std::string& user_key, | |||
| uint64_t seq, | |||
| ValueType vt) { | |||
| std::string encoded; | |||
| AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt)); | |||
| return encoded; | |||
| } | |||
| static std::string Shorten(const std::string& s, const std::string& l) { | |||
| std::string result = s; | |||
| InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l); | |||
| return result; | |||
| } | |||
| static std::string ShortSuccessor(const std::string& s) { | |||
| std::string result = s; | |||
| InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result); | |||
| return result; | |||
| } | |||
| static void TestKey(const std::string& key, | |||
| uint64_t seq, | |||
| ValueType vt) { | |||
| std::string encoded = IKey(key, seq, vt); | |||
| Slice in(encoded); | |||
| ParsedInternalKey decoded("", 0, kTypeValue); | |||
| ASSERT_TRUE(ParseInternalKey(in, &decoded)); | |||
| ASSERT_EQ(key, decoded.user_key.ToString()); | |||
| ASSERT_EQ(seq, decoded.sequence); | |||
| ASSERT_EQ(vt, decoded.type); | |||
| ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded)); | |||
| } | |||
| class FormatTest { }; | |||
| TEST(FormatTest, InternalKey_EncodeDecode) { | |||
| const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" }; | |||
| const uint64_t seq[] = { | |||
| 1, 2, 3, | |||
| (1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1, | |||
| (1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1, | |||
| (1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1 | |||
| }; | |||
| for (int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) { | |||
| for (int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) { | |||
| TestKey(keys[k], seq[s], kTypeValue); | |||
| TestKey("hello", 1, kTypeDeletion); | |||
| } | |||
| } | |||
| } | |||
| TEST(FormatTest, InternalKeyShortSeparator) { | |||
| // When user keys are same | |||
| ASSERT_EQ(IKey("foo", 100, kTypeValue), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("foo", 99, kTypeValue))); | |||
| ASSERT_EQ(IKey("foo", 100, kTypeValue), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("foo", 101, kTypeValue))); | |||
| ASSERT_EQ(IKey("foo", 100, kTypeValue), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("foo", 100, kTypeValue))); | |||
| ASSERT_EQ(IKey("foo", 100, kTypeValue), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("foo", 100, kTypeDeletion))); | |||
| ASSERT_EQ(IKey("foo", 100, kTypeValue), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("foo", 100, kTypeLargeValueRef))); | |||
| // When user keys are misordered | |||
| ASSERT_EQ(IKey("foo", 100, kTypeValue), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("bar", 99, kTypeValue))); | |||
| // When user keys are different, but correctly ordered | |||
| ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("hello", 200, kTypeValue))); | |||
| // When start user key is prefix of limit user key | |||
| ASSERT_EQ(IKey("foo", 100, kTypeValue), | |||
| Shorten(IKey("foo", 100, kTypeValue), | |||
| IKey("foobar", 200, kTypeValue))); | |||
| // When limit user key is prefix of start user key | |||
| ASSERT_EQ(IKey("foobar", 100, kTypeValue), | |||
| Shorten(IKey("foobar", 100, kTypeValue), | |||
| IKey("foo", 200, kTypeValue))); | |||
| } | |||
| TEST(FormatTest, InternalKeyShortestSuccessor) { | |||
| ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), | |||
| ShortSuccessor(IKey("foo", 100, kTypeValue))); | |||
| ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue), | |||
| ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); | |||
| } | |||
| TEST(FormatTest, SHA1) { | |||
| // Check that we are computing the same value as sha1. | |||
| // Note that the last two numbers are the length of the input and the | |||
| // compression type. | |||
| ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-0", // SHA1, uncompr | |||
| LargeValueRefToFilenameString( | |||
| LargeValueRef::Make("hello", kNoCompression))); | |||
| ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-1", // SHA1, lwcompr | |||
| LargeValueRefToFilenameString( | |||
| LargeValueRef::Make("hello", kLightweightCompression))); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,154 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include <ctype.h> | |||
| #include <stdio.h> | |||
| #include "db/filename.h" | |||
| #include "db/dbformat.h" | |||
| #include "include/env.h" | |||
| #include "util/logging.h" | |||
| namespace leveldb { | |||
| static std::string MakeFileName(const std::string& name, uint64_t number, | |||
| const char* suffix) { | |||
| char buf[100]; | |||
| snprintf(buf, sizeof(buf), "/%06llu.%s", | |||
| static_cast<unsigned long long>(number), | |||
| suffix); | |||
| return name + buf; | |||
| } | |||
| std::string LogFileName(const std::string& name, uint64_t number) { | |||
| assert(number > 0); | |||
| return MakeFileName(name, number, "log"); | |||
| } | |||
| std::string TableFileName(const std::string& name, uint64_t number) { | |||
| assert(number > 0); | |||
| return MakeFileName(name, number, "sst"); | |||
| } | |||
| std::string LargeValueFileName(const std::string& name, | |||
| const LargeValueRef& large_ref) { | |||
| std::string result = name + "/"; | |||
| result += LargeValueRefToFilenameString(large_ref); | |||
| result += ".val"; | |||
| return result; | |||
| } | |||
| std::string DescriptorFileName(const std::string& dbname, uint64_t number) { | |||
| assert(number > 0); | |||
| char buf[100]; | |||
| snprintf(buf, sizeof(buf), "/MANIFEST-%06llu", | |||
| static_cast<unsigned long long>(number)); | |||
| return dbname + buf; | |||
| } | |||
| std::string CurrentFileName(const std::string& dbname) { | |||
| return dbname + "/CURRENT"; | |||
| } | |||
| std::string LockFileName(const std::string& dbname) { | |||
| return dbname + "/LOCK"; | |||
| } | |||
| std::string TempFileName(const std::string& dbname, uint64_t number) { | |||
| assert(number > 0); | |||
| return MakeFileName(dbname, number, "dbtmp"); | |||
| } | |||
| std::string InfoLogFileName(const std::string& dbname) { | |||
| return dbname + "/LOG"; | |||
| } | |||
| // Return the name of the old info log file for "dbname". | |||
| std::string OldInfoLogFileName(const std::string& dbname) { | |||
| return dbname + "/LOG.old"; | |||
| } | |||
| // Owned filenames have the form: | |||
| // dbname/CURRENT | |||
| // dbname/LOCK | |||
| // dbname/LOG | |||
| // dbname/LOG.old | |||
| // dbname/MANIFEST-[0-9]+ | |||
| // dbname/[0-9a-f]{20}-[0-9]+-[0-9]+.val | |||
| // dbname/[0-9]+.(log|sst) | |||
| bool ParseFileName(const std::string& fname, | |||
| uint64_t* number, | |||
| LargeValueRef* large_ref, | |||
| FileType* type) { | |||
| Slice rest(fname); | |||
| if (rest == "CURRENT") { | |||
| *number = 0; | |||
| *type = kCurrentFile; | |||
| } else if (rest == "LOCK") { | |||
| *number = 0; | |||
| *type = kDBLockFile; | |||
| } else if (rest == "LOG" || rest == "LOG.old") { | |||
| *number = 0; | |||
| *type = kInfoLogFile; | |||
| } else if (rest.size() >= 4 && | |||
| Slice(rest.data() + rest.size() - 4, 4) == ".val") { | |||
| LargeValueRef h; | |||
| if (!FilenameStringToLargeValueRef(Slice(rest.data(), rest.size() - 4), | |||
| &h)) { | |||
| return false; | |||
| } | |||
| *large_ref = h; | |||
| *type = kLargeValueFile; | |||
| } else if (rest.starts_with("MANIFEST-")) { | |||
| rest.remove_prefix(strlen("MANIFEST-")); | |||
| uint64_t num; | |||
| if (!ConsumeDecimalNumber(&rest, &num)) { | |||
| return false; | |||
| } | |||
| if (!rest.empty()) { | |||
| return false; | |||
| } | |||
| *type = kDescriptorFile; | |||
| *number = num; | |||
| } else { | |||
| // Avoid strtoull() to keep filename format independent of the | |||
| // current locale | |||
| uint64_t num; | |||
| if (!ConsumeDecimalNumber(&rest, &num)) { | |||
| return false; | |||
| } | |||
| Slice suffix = rest; | |||
| if (suffix == Slice(".log")) { | |||
| *type = kLogFile; | |||
| } else if (suffix == Slice(".sst")) { | |||
| *type = kTableFile; | |||
| } else if (suffix == Slice(".dbtmp")) { | |||
| *type = kTempFile; | |||
| } else { | |||
| return false; | |||
| } | |||
| *number = num; | |||
| } | |||
| return true; | |||
| } | |||
| Status SetCurrentFile(Env* env, const std::string& dbname, | |||
| uint64_t descriptor_number) { | |||
| // Remove leading "dbname/" and add newline to manifest file name | |||
| std::string manifest = DescriptorFileName(dbname, descriptor_number); | |||
| Slice contents = manifest; | |||
| assert(contents.starts_with(dbname + "/")); | |||
| contents.remove_prefix(dbname.size() + 1); | |||
| std::string tmp = TempFileName(dbname, descriptor_number); | |||
| Status s = WriteStringToFile(env, contents.ToString() + "\n", tmp); | |||
| if (s.ok()) { | |||
| s = env->RenameFile(tmp, CurrentFileName(dbname)); | |||
| } | |||
| if (!s.ok()) { | |||
| env->DeleteFile(tmp); | |||
| } | |||
| return s; | |||
| } | |||
| } | |||
| @ -0,0 +1,92 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // File names used by DB code | |||
| #ifndef STORAGE_LEVELDB_DB_FILENAME_H_ | |||
| #define STORAGE_LEVELDB_DB_FILENAME_H_ | |||
| #include <stdint.h> | |||
| #include <string> | |||
| #include "include/slice.h" | |||
| #include "include/status.h" | |||
| #include "port/port.h" | |||
| namespace leveldb { | |||
| class Env; | |||
| struct LargeValueRef; | |||
| enum FileType { | |||
| kLogFile, | |||
| kDBLockFile, | |||
| kTableFile, | |||
| kLargeValueFile, | |||
| kDescriptorFile, | |||
| kCurrentFile, | |||
| kTempFile, | |||
| kInfoLogFile, // Either the current one, or an old one | |||
| }; | |||
| // Return the name of the log file with the specified number | |||
| // in the db named by "dbname". The result will be prefixed with | |||
| // "dbname". | |||
| extern std::string LogFileName(const std::string& dbname, uint64_t number); | |||
| // Return the name of the sstable with the specified number | |||
| // in the db named by "dbname". The result will be prefixed with | |||
| // "dbname". | |||
| extern std::string TableFileName(const std::string& dbname, uint64_t number); | |||
| // Return the name of the large value file with the specified large | |||
| // value reference in the db named by "dbname". The result will be | |||
| // prefixed with "dbname". | |||
| extern std::string LargeValueFileName(const std::string& dbname, | |||
| const LargeValueRef& large_ref); | |||
| // Return the name of the descriptor file for the db named by | |||
| // "dbname" and the specified incarnation number. The result will be | |||
| // prefixed with "dbname". | |||
| extern std::string DescriptorFileName(const std::string& dbname, | |||
| uint64_t number); | |||
| // Return the name of the current file. This file contains the name | |||
| // of the current manifest file. The result will be prefixed with | |||
| // "dbname". | |||
| extern std::string CurrentFileName(const std::string& dbname); | |||
| // Return the name of the lock file for the db named by | |||
| // "dbname". The result will be prefixed with "dbname". | |||
| extern std::string LockFileName(const std::string& dbname); | |||
| // Return the name of a temporary file owned by the db named "dbname". | |||
| // The result will be prefixed with "dbname". | |||
| extern std::string TempFileName(const std::string& dbname, uint64_t number); | |||
| // Return the name of the info log file for "dbname". | |||
| extern std::string InfoLogFileName(const std::string& dbname); | |||
| // Return the name of the old info log file for "dbname". | |||
| extern std::string OldInfoLogFileName(const std::string& dbname); | |||
| // If filename is a leveldb file, store the type of the file in *type. | |||
| // If *type is kLargeValueFile, then the large value reference data | |||
| // from the filename is stored in "*large_ref. For all other types of | |||
| // files, the number encoded in the filename is stored in *number. If | |||
| // the filename was successfully parsed, returns true. Else return | |||
| // false. | |||
| extern bool ParseFileName(const std::string& filename, | |||
| uint64_t* number, | |||
| LargeValueRef* large_ref, | |||
| FileType* type); | |||
| // Make the CURRENT file point to the descriptor file with the | |||
| // specified number. | |||
| extern Status SetCurrentFile(Env* env, const std::string& dbname, | |||
| uint64_t descriptor_number); | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_FILENAME_H_ | |||
| @ -0,0 +1,156 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/filename.h" | |||
| #include "db/dbformat.h" | |||
| #include "port/port.h" | |||
| #include "util/logging.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| class FileNameTest { }; | |||
| TEST(FileNameTest, Parse) { | |||
| Slice db; | |||
| FileType type; | |||
| uint64_t number; | |||
| LargeValueRef large_ref; | |||
| // Successful parses | |||
| static struct { | |||
| const char* fname; | |||
| uint64_t number; | |||
| const char* large_ref; | |||
| FileType type; | |||
| } cases[] = { | |||
| { "100.log", 100, "", kLogFile }, | |||
| { "0.log", 0, "", kLogFile }, | |||
| { "0.sst", 0, "", kTableFile }, | |||
| { "CURRENT", 0, "", kCurrentFile }, | |||
| { "LOCK", 0, "", kDBLockFile }, | |||
| { "MANIFEST-2", 2, "", kDescriptorFile }, | |||
| { "MANIFEST-7", 7, "", kDescriptorFile }, | |||
| { "LOG", 0, "", kInfoLogFile }, | |||
| { "LOG.old", 0, "", kInfoLogFile }, | |||
| { "18446744073709551615.log", 18446744073709551615ull, "", | |||
| kLogFile }, | |||
| { "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0.val", 0, | |||
| "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0", kLargeValueFile }, | |||
| { "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0.val", 0, | |||
| "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0", | |||
| kLargeValueFile }, | |||
| }; | |||
| for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { | |||
| std::string f = cases[i].fname; | |||
| ASSERT_TRUE(ParseFileName(f, &number, &large_ref, &type)) << f; | |||
| ASSERT_EQ(cases[i].type, type) << f; | |||
| if (type == kLargeValueFile) { | |||
| ASSERT_EQ(cases[i].large_ref, LargeValueRefToFilenameString(large_ref)) | |||
| << f; | |||
| } else { | |||
| ASSERT_EQ(cases[i].number, number) << f; | |||
| } | |||
| } | |||
| // Errors | |||
| static const char* errors[] = { | |||
| "", | |||
| "foo", | |||
| "foo-dx-100.log", | |||
| ".log", | |||
| "", | |||
| "manifest", | |||
| "CURREN", | |||
| "CURRENTX", | |||
| "MANIFES", | |||
| "MANIFEST", | |||
| "MANIFEST-", | |||
| "XMANIFEST-3", | |||
| "MANIFEST-3x", | |||
| "LOC", | |||
| "LOCKx", | |||
| "LO", | |||
| "LOGx", | |||
| "18446744073709551616.log", | |||
| "184467440737095516150.log", | |||
| "100", | |||
| "100.", | |||
| "100.lop", | |||
| "100.val", | |||
| ".val", | |||
| "123456789012345678901234567890123456789-12340.val", | |||
| "1234567890123456789012345678901234567-123-0.val", | |||
| "12345678901234567890123456789012345678902-100-1-.val", | |||
| // Overflow on value size | |||
| "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000000000000000000-1.val", | |||
| // '03.val' is a bad compression type | |||
| "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000-3.val" }; | |||
| for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { | |||
| std::string f = errors[i]; | |||
| ASSERT_TRUE(!ParseFileName(f, &number, &large_ref, &type)) << f; | |||
| }; | |||
| } | |||
| TEST(FileNameTest, Construction) { | |||
| uint64_t number; | |||
| FileType type; | |||
| LargeValueRef large_ref; | |||
| std::string fname; | |||
| fname = CurrentFileName("foo"); | |||
| ASSERT_EQ("foo/", std::string(fname.data(), 4)); | |||
| ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); | |||
| ASSERT_EQ(0, number); | |||
| ASSERT_EQ(kCurrentFile, type); | |||
| fname = LockFileName("foo"); | |||
| ASSERT_EQ("foo/", std::string(fname.data(), 4)); | |||
| ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); | |||
| ASSERT_EQ(0, number); | |||
| ASSERT_EQ(kDBLockFile, type); | |||
| fname = LogFileName("foo", 192); | |||
| ASSERT_EQ("foo/", std::string(fname.data(), 4)); | |||
| ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); | |||
| ASSERT_EQ(192, number); | |||
| ASSERT_EQ(kLogFile, type); | |||
| fname = TableFileName("bar", 200); | |||
| ASSERT_EQ("bar/", std::string(fname.data(), 4)); | |||
| ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); | |||
| ASSERT_EQ(200, number); | |||
| ASSERT_EQ(kTableFile, type); | |||
| fname = DescriptorFileName("bar", 100); | |||
| ASSERT_EQ("bar/", std::string(fname.data(), 4)); | |||
| ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); | |||
| ASSERT_EQ(100, number); | |||
| ASSERT_EQ(kDescriptorFile, type); | |||
| fname = TempFileName("tmp", 999); | |||
| ASSERT_EQ("tmp/", std::string(fname.data(), 4)); | |||
| ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); | |||
| ASSERT_EQ(999, number); | |||
| ASSERT_EQ(kTempFile, type); | |||
| for (int i = 0; i <= kLightweightCompression; i++) { | |||
| CompressionType ctype = static_cast<CompressionType>(i); | |||
| std::string value = "abcdef"; | |||
| LargeValueRef real_large_ref = LargeValueRef::Make(Slice(value), ctype); | |||
| fname = LargeValueFileName("tmp", real_large_ref); | |||
| ASSERT_EQ("tmp/", std::string(fname.data(), 4)); | |||
| ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); | |||
| ASSERT_TRUE(real_large_ref == large_ref); | |||
| ASSERT_EQ(kLargeValueFile, type); | |||
| ASSERT_EQ(large_ref.compression_type(), ctype); | |||
| } | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,35 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // Log format information shared by reader and writer. | |||
| // See ../doc/log_format.txt for more detail. | |||
| #ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_ | |||
| #define STORAGE_LEVELDB_DB_LOG_FORMAT_H_ | |||
| namespace leveldb { | |||
| namespace log { | |||
| enum RecordType { | |||
| // Zero is reserved for preallocated files | |||
| kZeroType = 0, | |||
| kFullType = 1, | |||
| // For fragments | |||
| kFirstType = 2, | |||
| kMiddleType = 3, | |||
| kLastType = 4, | |||
| }; | |||
| static const int kMaxRecordType = kLastType; | |||
| static const int kBlockSize = 32768; | |||
| // Header is checksum (4 bytes), type (1 byte), length (2 bytes). | |||
| static const int kHeaderSize = 4 + 1 + 2; | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_LOG_FORMAT_H_ | |||
| @ -0,0 +1,172 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/log_reader.h" | |||
| #include <stdint.h> | |||
| #include "include/env.h" | |||
| #include "util/coding.h" | |||
| #include "util/crc32c.h" | |||
| namespace leveldb { | |||
| namespace log { | |||
| Reader::Reporter::~Reporter() { | |||
| } | |||
| Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum) | |||
| : file_(file), | |||
| reporter_(reporter), | |||
| checksum_(checksum), | |||
| backing_store_(new char[kBlockSize]), | |||
| buffer_(), | |||
| eof_(false) { | |||
| } | |||
| Reader::~Reader() { | |||
| delete[] backing_store_; | |||
| } | |||
| bool Reader::ReadRecord(Slice* record, std::string* scratch) { | |||
| scratch->clear(); | |||
| record->clear(); | |||
| bool in_fragmented_record = false; | |||
| Slice fragment; | |||
| while (true) { | |||
| switch (ReadPhysicalRecord(&fragment)) { | |||
| case kFullType: | |||
| if (in_fragmented_record) { | |||
| ReportDrop(scratch->size(), "partial record without end"); | |||
| } | |||
| scratch->clear(); | |||
| *record = fragment; | |||
| return true; | |||
| case kFirstType: | |||
| if (in_fragmented_record) { | |||
| ReportDrop(scratch->size(), "partial record without end"); | |||
| } | |||
| scratch->assign(fragment.data(), fragment.size()); | |||
| in_fragmented_record = true; | |||
| break; | |||
| case kMiddleType: | |||
| if (!in_fragmented_record) { | |||
| ReportDrop(fragment.size(), "missing start of fragmented record"); | |||
| } else { | |||
| scratch->append(fragment.data(), fragment.size()); | |||
| } | |||
| break; | |||
| case kLastType: | |||
| if (!in_fragmented_record) { | |||
| ReportDrop(fragment.size(), "missing start of fragmented record"); | |||
| } else { | |||
| scratch->append(fragment.data(), fragment.size()); | |||
| *record = Slice(*scratch); | |||
| return true; | |||
| } | |||
| break; | |||
| case kEof: | |||
| if (in_fragmented_record) { | |||
| ReportDrop(scratch->size(), "partial record without end"); | |||
| scratch->clear(); | |||
| } | |||
| return false; | |||
| case kBadRecord: | |||
| if (in_fragmented_record) { | |||
| ReportDrop(scratch->size(), "error in middle of record"); | |||
| in_fragmented_record = false; | |||
| scratch->clear(); | |||
| } | |||
| break; | |||
| default: | |||
| ReportDrop( | |||
| (fragment.size() + (in_fragmented_record ? scratch->size() : 0)), | |||
| "unknown record type"); | |||
| in_fragmented_record = false; | |||
| scratch->clear(); | |||
| break; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| void Reader::ReportDrop(size_t bytes, const char* reason) { | |||
| if (reporter_ != NULL) { | |||
| reporter_->Corruption(bytes, Status::Corruption(reason)); | |||
| } | |||
| } | |||
| unsigned int Reader::ReadPhysicalRecord(Slice* result) { | |||
| while (true) { | |||
| if (buffer_.size() <= kHeaderSize) { | |||
| if (!eof_) { | |||
| // Last read was a full read, so this is a trailer to skip | |||
| buffer_.clear(); | |||
| Status status = file_->Read(kBlockSize, &buffer_, backing_store_); | |||
| if (!status.ok()) { | |||
| if (reporter_ != NULL) { | |||
| reporter_->Corruption(kBlockSize, status); | |||
| } | |||
| buffer_.clear(); | |||
| eof_ = true; | |||
| return kEof; | |||
| } else if (buffer_.size() < kBlockSize) { | |||
| eof_ = true; | |||
| } | |||
| continue; | |||
| } else if (buffer_.size() == 0) { | |||
| // End of file | |||
| return kEof; | |||
| } else if (buffer_.size() < kHeaderSize) { | |||
| ReportDrop(buffer_.size(), "truncated record at end of file"); | |||
| buffer_.clear(); | |||
| return kEof; | |||
| } else { | |||
| // We have a trailing zero-length record. Fall through and check it. | |||
| } | |||
| } | |||
| // Parse the header | |||
| const char* header = buffer_.data(); | |||
| const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff; | |||
| const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff; | |||
| const unsigned int type = header[6]; | |||
| const uint32_t length = a | (b << 8); | |||
| if (kHeaderSize + length > buffer_.size()) { | |||
| ReportDrop(buffer_.size(), "bad record length"); | |||
| buffer_.clear(); | |||
| return kBadRecord; | |||
| } | |||
| // Check crc | |||
| if (checksum_) { | |||
| if (type == kZeroType && length == 0) { | |||
| // Skip zero length record | |||
| buffer_.remove_prefix(kHeaderSize + length); | |||
| return kBadRecord; | |||
| } | |||
| uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header)); | |||
| uint32_t actual_crc = crc32c::Value(header + 6, 1 + length); | |||
| if (actual_crc != expected_crc) { | |||
| ReportDrop(length, "checksum mismatch"); | |||
| buffer_.remove_prefix(kHeaderSize + length); | |||
| return kBadRecord; | |||
| } | |||
| } | |||
| buffer_.remove_prefix(kHeaderSize + length); | |||
| *result = Slice(header + kHeaderSize, length); | |||
| return type; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,75 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_LOG_READER_H_ | |||
| #define STORAGE_LEVELDB_DB_LOG_READER_H_ | |||
| #include "db/log_format.h" | |||
| #include "include/slice.h" | |||
| #include "include/status.h" | |||
| namespace leveldb { | |||
| class SequentialFile; | |||
| namespace log { | |||
| class Reader { | |||
| public: | |||
| // Interface for reporting errors. | |||
| class Reporter { | |||
| public: | |||
| virtual ~Reporter(); | |||
| // Some corruption was detected. "size" is the approximate number | |||
| // of bytes dropped due to the corruption. | |||
| virtual void Corruption(size_t bytes, const Status& status) = 0; | |||
| }; | |||
| // Create a reader that will return log records from "*file". | |||
| // "*file" must remain live while this Reader is in use. | |||
| // | |||
| // If "reporter" is non-NULL, it is notified whenever some data is | |||
| // dropped due to a detected corruption. "*reporter" must remain | |||
| // live while this Reader is in use. | |||
| // | |||
| // If "checksum" is true, verify checksums if available. | |||
| Reader(SequentialFile* file, Reporter* reporter, bool checksum); | |||
| ~Reader(); | |||
| // Read the next record into *record. Returns true if read | |||
| // successfully, false if we hit end of the input. May use | |||
| // "*scratch" as temporary storage. The contents filled in *record | |||
| // will only be valid until the next mutating operation on this | |||
| // reader or the next mutation to *scratch. | |||
| bool ReadRecord(Slice* record, std::string* scratch); | |||
| private: | |||
| SequentialFile* const file_; | |||
| Reporter* const reporter_; | |||
| bool const checksum_; | |||
| char* const backing_store_; | |||
| Slice buffer_; | |||
| bool eof_; // Last Read() indicated EOF by returning < kBlockSize | |||
| // Extend record types with the following special values | |||
| enum { | |||
| kEof = kMaxRecordType + 1, | |||
| kBadRecord = kMaxRecordType + 2 | |||
| }; | |||
| // Return type, or one of the preceding special values | |||
| unsigned int ReadPhysicalRecord(Slice* result); | |||
| void ReportDrop(size_t bytes, const char* reason); | |||
| // No copying allowed | |||
| Reader(const Reader&); | |||
| void operator=(const Reader&); | |||
| }; | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_LOG_READER_H_ | |||
| @ -0,0 +1,361 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/log_reader.h" | |||
| #include "db/log_writer.h" | |||
| #include "include/env.h" | |||
| #include "util/coding.h" | |||
| #include "util/crc32c.h" | |||
| #include "util/random.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| namespace log { | |||
| // Construct a string of the specified length made out of the supplied | |||
| // partial string. | |||
| static std::string BigString(const std::string& partial_string, size_t n) { | |||
| std::string result; | |||
| while (result.size() < n) { | |||
| result.append(partial_string); | |||
| } | |||
| result.resize(n); | |||
| return result; | |||
| } | |||
| // Construct a string from a number | |||
| static std::string NumberString(int n) { | |||
| char buf[50]; | |||
| snprintf(buf, sizeof(buf), "%d.", n); | |||
| return std::string(buf); | |||
| } | |||
| // Return a skewed potentially long string | |||
| static std::string RandomSkewedString(int i, Random* rnd) { | |||
| return BigString(NumberString(i), rnd->Skewed(17)); | |||
| } | |||
| class LogTest { | |||
| private: | |||
| class StringDest : public WritableFile { | |||
| public: | |||
| std::string contents_; | |||
| virtual Status Close() { return Status::OK(); } | |||
| virtual Status Flush() { return Status::OK(); } | |||
| virtual Status Sync() { return Status::OK(); } | |||
| virtual Status Append(const Slice& slice) { | |||
| contents_.append(slice.data(), slice.size()); | |||
| return Status::OK(); | |||
| } | |||
| }; | |||
| class StringSource : public SequentialFile { | |||
| public: | |||
| Slice contents_; | |||
| bool force_error_; | |||
| bool returned_partial_; | |||
| StringSource() : force_error_(false), returned_partial_(false) { } | |||
| virtual Status Read(size_t n, Slice* result, char* scratch) { | |||
| ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error"; | |||
| ASSERT_EQ(kBlockSize, n); | |||
| if (force_error_) { | |||
| force_error_ = false; | |||
| returned_partial_ = true; | |||
| return Status::Corruption("read error"); | |||
| } | |||
| if (contents_.size() < n) { | |||
| n = contents_.size(); | |||
| returned_partial_ = true; | |||
| } | |||
| *result = Slice(contents_.data(), n); | |||
| contents_.remove_prefix(n); | |||
| return Status::OK(); | |||
| } | |||
| }; | |||
| class ReportCollector : public Reader::Reporter { | |||
| public: | |||
| size_t dropped_bytes_; | |||
| std::string message_; | |||
| ReportCollector() : dropped_bytes_(0) { } | |||
| virtual void Corruption(size_t bytes, const Status& status) { | |||
| dropped_bytes_ += bytes; | |||
| message_.append(status.ToString()); | |||
| } | |||
| }; | |||
| StringDest dest_; | |||
| StringSource source_; | |||
| ReportCollector report_; | |||
| bool reading_; | |||
| Writer writer_; | |||
| Reader reader_; | |||
| public: | |||
| LogTest() : reading_(false), | |||
| writer_(&dest_), | |||
| reader_(&source_, &report_, true/*checksum*/) { | |||
| } | |||
| void Write(const std::string& msg) { | |||
| ASSERT_TRUE(!reading_) << "Write() after starting to read"; | |||
| writer_.AddRecord(Slice(msg)); | |||
| } | |||
| size_t WrittenBytes() const { | |||
| return dest_.contents_.size(); | |||
| } | |||
| std::string Read() { | |||
| if (!reading_) { | |||
| reading_ = true; | |||
| source_.contents_ = Slice(dest_.contents_); | |||
| } | |||
| std::string scratch; | |||
| Slice record; | |||
| if (reader_.ReadRecord(&record, &scratch)) { | |||
| return record.ToString(); | |||
| } else { | |||
| return "EOF"; | |||
| } | |||
| } | |||
| void IncrementByte(int offset, int delta) { | |||
| dest_.contents_[offset] += delta; | |||
| } | |||
| void SetByte(int offset, char new_byte) { | |||
| dest_.contents_[offset] = new_byte; | |||
| } | |||
| void ShrinkSize(int bytes) { | |||
| dest_.contents_.resize(dest_.contents_.size() - bytes); | |||
| } | |||
| void FixChecksum(int header_offset, int len) { | |||
| // Compute crc of type/len/data | |||
| uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len); | |||
| crc = crc32c::Mask(crc); | |||
| EncodeFixed32(&dest_.contents_[header_offset], crc); | |||
| } | |||
| void ForceError() { | |||
| source_.force_error_ = true; | |||
| } | |||
| size_t DroppedBytes() const { | |||
| return report_.dropped_bytes_; | |||
| } | |||
| // Returns OK iff recorded error message contains "msg" | |||
| std::string MatchError(const std::string& msg) const { | |||
| if (report_.message_.find(msg) == std::string::npos) { | |||
| return report_.message_; | |||
| } else { | |||
| return "OK"; | |||
| } | |||
| } | |||
| }; | |||
| TEST(LogTest, Empty) { | |||
| ASSERT_EQ("EOF", Read()); | |||
| } | |||
| TEST(LogTest, ReadWrite) { | |||
| Write("foo"); | |||
| Write("bar"); | |||
| Write(""); | |||
| Write("xxxx"); | |||
| ASSERT_EQ("foo", Read()); | |||
| ASSERT_EQ("bar", Read()); | |||
| ASSERT_EQ("", Read()); | |||
| ASSERT_EQ("xxxx", Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ("EOF", Read()); // Make sure reads at eof work | |||
| } | |||
| TEST(LogTest, ManyBlocks) { | |||
| for (int i = 0; i < 100000; i++) { | |||
| Write(NumberString(i)); | |||
| } | |||
| for (int i = 0; i < 100000; i++) { | |||
| ASSERT_EQ(NumberString(i), Read()); | |||
| } | |||
| ASSERT_EQ("EOF", Read()); | |||
| } | |||
| TEST(LogTest, Fragmentation) { | |||
| Write("small"); | |||
| Write(BigString("medium", 50000)); | |||
| Write(BigString("large", 100000)); | |||
| ASSERT_EQ("small", Read()); | |||
| ASSERT_EQ(BigString("medium", 50000), Read()); | |||
| ASSERT_EQ(BigString("large", 100000), Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| } | |||
| TEST(LogTest, MarginalTrailer) { | |||
| // Make a trailer that is exactly the same length as an empty record. | |||
| const int n = kBlockSize - 2*kHeaderSize; | |||
| Write(BigString("foo", n)); | |||
| ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes()); | |||
| Write(""); | |||
| Write("bar"); | |||
| ASSERT_EQ(BigString("foo", n), Read()); | |||
| ASSERT_EQ("", Read()); | |||
| ASSERT_EQ("bar", Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| } | |||
| TEST(LogTest, ShortTrailer) { | |||
| const int n = kBlockSize - 2*kHeaderSize + 4; | |||
| Write(BigString("foo", n)); | |||
| ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes()); | |||
| Write(""); | |||
| Write("bar"); | |||
| ASSERT_EQ(BigString("foo", n), Read()); | |||
| ASSERT_EQ("", Read()); | |||
| ASSERT_EQ("bar", Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| } | |||
| TEST(LogTest, AlignedEof) { | |||
| const int n = kBlockSize - 2*kHeaderSize + 4; | |||
| Write(BigString("foo", n)); | |||
| ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes()); | |||
| ASSERT_EQ(BigString("foo", n), Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| } | |||
| TEST(LogTest, RandomRead) { | |||
| const int N = 500; | |||
| Random write_rnd(301); | |||
| for (int i = 0; i < N; i++) { | |||
| Write(RandomSkewedString(i, &write_rnd)); | |||
| } | |||
| Random read_rnd(301); | |||
| for (int i = 0; i < N; i++) { | |||
| ASSERT_EQ(RandomSkewedString(i, &read_rnd), Read()); | |||
| } | |||
| ASSERT_EQ("EOF", Read()); | |||
| } | |||
| // Tests of all the error paths in log_reader.cc follow: | |||
| TEST(LogTest, ReadError) { | |||
| Write("foo"); | |||
| ForceError(); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(kBlockSize, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("read error")); | |||
| } | |||
| TEST(LogTest, BadRecordType) { | |||
| Write("foo"); | |||
| // Type is stored in header[6] | |||
| IncrementByte(6, 100); | |||
| FixChecksum(0, 3); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(3, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("unknown record type")); | |||
| } | |||
| TEST(LogTest, TruncatedTrailingRecord) { | |||
| Write("foo"); | |||
| ShrinkSize(4); // Drop all payload as well as a header byte | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(kHeaderSize - 1, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("truncated record at end of file")); | |||
| } | |||
| TEST(LogTest, BadLength) { | |||
| Write("foo"); | |||
| ShrinkSize(1); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(kHeaderSize + 2, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("bad record length")); | |||
| } | |||
| TEST(LogTest, ChecksumMismatch) { | |||
| Write("foo"); | |||
| IncrementByte(0, 10); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(3, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("checksum mismatch")); | |||
| } | |||
| TEST(LogTest, UnexpectedMiddleType) { | |||
| Write("foo"); | |||
| SetByte(6, kMiddleType); | |||
| FixChecksum(0, 3); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(3, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("missing start")); | |||
| } | |||
| TEST(LogTest, UnexpectedLastType) { | |||
| Write("foo"); | |||
| SetByte(6, kLastType); | |||
| FixChecksum(0, 3); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(3, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("missing start")); | |||
| } | |||
| TEST(LogTest, UnexpectedFullType) { | |||
| Write("foo"); | |||
| Write("bar"); | |||
| SetByte(6, kFirstType); | |||
| FixChecksum(0, 3); | |||
| ASSERT_EQ("bar", Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(3, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("partial record without end")); | |||
| } | |||
| TEST(LogTest, UnexpectedFirstType) { | |||
| Write("foo"); | |||
| Write(BigString("bar", 100000)); | |||
| SetByte(6, kFirstType); | |||
| FixChecksum(0, 3); | |||
| ASSERT_EQ(BigString("bar", 100000), Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| ASSERT_EQ(3, DroppedBytes()); | |||
| ASSERT_EQ("OK", MatchError("partial record without end")); | |||
| } | |||
| TEST(LogTest, ErrorJoinsRecords) { | |||
| // Consider two fragmented records: | |||
| // first(R1) last(R1) first(R2) last(R2) | |||
| // where the middle two fragments disappear. We do not want | |||
| // first(R1),last(R2) to get joined and returned as a valid record. | |||
| // Write records that span two blocks | |||
| Write(BigString("foo", kBlockSize)); | |||
| Write(BigString("bar", kBlockSize)); | |||
| Write("correct"); | |||
| // Wipe the middle block | |||
| for (int offset = kBlockSize; offset < 2*kBlockSize; offset++) { | |||
| SetByte(offset, 'x'); | |||
| } | |||
| ASSERT_EQ("correct", Read()); | |||
| ASSERT_EQ("EOF", Read()); | |||
| const int dropped = DroppedBytes(); | |||
| ASSERT_LE(dropped, 2*kBlockSize + 100); | |||
| ASSERT_GE(dropped, 2*kBlockSize); | |||
| } | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,101 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/log_writer.h" | |||
| #include <stdint.h> | |||
| #include "include/env.h" | |||
| #include "util/coding.h" | |||
| #include "util/crc32c.h" | |||
| namespace leveldb { | |||
| namespace log { | |||
| Writer::Writer(WritableFile* dest) | |||
| : dest_(dest), | |||
| block_offset_(0) { | |||
| for (int i = 0; i <= kMaxRecordType; i++) { | |||
| char t = static_cast<char>(i); | |||
| type_crc_[i] = crc32c::Value(&t, 1); | |||
| } | |||
| } | |||
| Writer::~Writer() { | |||
| } | |||
| Status Writer::AddRecord(const Slice& slice) { | |||
| const char* ptr = slice.data(); | |||
| size_t left = slice.size(); | |||
| // Fragment the record if necessary and emit it. Note that if slice | |||
| // is empty, we still want to iterate once to emit a single | |||
| // zero-length record | |||
| Status s; | |||
| do { | |||
| const int leftover = kBlockSize - block_offset_; | |||
| assert(leftover >= 0); | |||
| if (leftover <= kHeaderSize) { | |||
| // Switch to a new block | |||
| if (leftover > 0) { | |||
| // Fill the trailer | |||
| dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00", leftover)); | |||
| } | |||
| block_offset_ = 0; | |||
| } | |||
| // Invariant: we never leave <= kHeaderSize bytes in a block. | |||
| const int avail = kBlockSize - block_offset_ - kHeaderSize; | |||
| assert(avail > 0); | |||
| const size_t fragment_length = (left < avail) ? left : avail; | |||
| RecordType type; | |||
| const bool begin = (ptr == slice.data()); | |||
| const bool end = (left == fragment_length); | |||
| if (begin && end) { | |||
| type = kFullType; | |||
| } else if (begin) { | |||
| type = kFirstType; | |||
| } else if (end) { | |||
| type = kLastType; | |||
| } else { | |||
| type = kMiddleType; | |||
| } | |||
| s = EmitPhysicalRecord(type, ptr, fragment_length); | |||
| ptr += fragment_length; | |||
| left -= fragment_length; | |||
| } while (s.ok() && left > 0); | |||
| return s; | |||
| } | |||
| Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) { | |||
| assert(n <= 0xffff); // Must fit in two bytes | |||
| assert(block_offset_ + kHeaderSize + n <= kBlockSize); | |||
| // Format the header | |||
| char buf[kHeaderSize]; | |||
| buf[4] = static_cast<char>(n & 0xff); | |||
| buf[5] = static_cast<char>(n >> 8); | |||
| buf[6] = static_cast<char>(t); | |||
| // Compute the crc of the record type and the payload. | |||
| uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n); | |||
| crc = crc32c::Mask(crc); // Adjust for storage | |||
| EncodeFixed32(buf, crc); | |||
| // Write the header and the payload | |||
| Status s = dest_->Append(Slice(buf, kHeaderSize)); | |||
| if (s.ok()) { | |||
| s = dest_->Append(Slice(ptr, n)); | |||
| if (s.ok()) { | |||
| s = dest_->Flush(); | |||
| } | |||
| } | |||
| block_offset_ += kHeaderSize + n; | |||
| return s; | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,48 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_ | |||
| #define STORAGE_LEVELDB_DB_LOG_WRITER_H_ | |||
| #include <stdint.h> | |||
| #include "db/log_format.h" | |||
| #include "include/slice.h" | |||
| #include "include/status.h" | |||
| namespace leveldb { | |||
| class WritableFile; | |||
| namespace log { | |||
| class Writer { | |||
| public: | |||
| // Create a writer that will append data to "*dest". | |||
| // "*dest" must be initially empty. | |||
| // "*dest" must remain live while this Writer is in use. | |||
| explicit Writer(WritableFile* dest); | |||
| ~Writer(); | |||
| Status AddRecord(const Slice& slice); | |||
| private: | |||
| WritableFile* dest_; | |||
| int block_offset_; // Current offset in block | |||
| // crc32c values for all supported record types. These are | |||
| // pre-computed to reduce the overhead of computing the crc of the | |||
| // record type stored in the header. | |||
| uint32_t type_crc_[kMaxRecordType + 1]; | |||
| Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length); | |||
| // No copying allowed | |||
| Writer(const Writer&); | |||
| void operator=(const Writer&); | |||
| }; | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_ | |||
| @ -0,0 +1,109 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/memtable.h" | |||
| #include "db/dbformat.h" | |||
| #include "include/comparator.h" | |||
| #include "include/env.h" | |||
| #include "include/iterator.h" | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| static Slice GetLengthPrefixedSlice(const char* data) { | |||
| uint32_t len; | |||
| const char* p = data; | |||
| p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted | |||
| return Slice(p, len); | |||
| } | |||
| MemTable::MemTable(const InternalKeyComparator& cmp) | |||
| : comparator_(cmp), | |||
| table_(comparator_, &arena_) { | |||
| } | |||
| MemTable::~MemTable() { | |||
| } | |||
| size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); } | |||
| int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr) | |||
| const { | |||
| // Internal keys are encoded as length-prefixed strings. | |||
| Slice a = GetLengthPrefixedSlice(aptr); | |||
| Slice b = GetLengthPrefixedSlice(bptr); | |||
| return comparator.Compare(a, b); | |||
| } | |||
| // Encode a suitable internal key target for "target" and return it. | |||
| // Uses *scratch as scratch space, and the returned pointer will point | |||
| // into this scratch space. | |||
| static const char* EncodeKey(std::string* scratch, const Slice& target) { | |||
| scratch->clear(); | |||
| PutVarint32(scratch, target.size()); | |||
| scratch->append(target.data(), target.size()); | |||
| return scratch->data(); | |||
| } | |||
| class MemTableIterator: public Iterator { | |||
| public: | |||
| explicit MemTableIterator(MemTable::Table* table) { | |||
| iter_ = new MemTable::Table::Iterator(table); | |||
| } | |||
| virtual ~MemTableIterator() { delete iter_; } | |||
| virtual bool Valid() const { return iter_->Valid(); } | |||
| virtual void Seek(const Slice& k) { iter_->Seek(EncodeKey(&tmp_, k)); } | |||
| virtual void SeekToFirst() { iter_->SeekToFirst(); } | |||
| virtual void SeekToLast() { iter_->SeekToLast(); } | |||
| virtual void Next() { iter_->Next(); } | |||
| virtual void Prev() { iter_->Prev(); } | |||
| virtual Slice key() const { return GetLengthPrefixedSlice(iter_->key()); } | |||
| virtual Slice value() const { | |||
| Slice key_slice = GetLengthPrefixedSlice(iter_->key()); | |||
| return GetLengthPrefixedSlice(key_slice.data() + key_slice.size()); | |||
| } | |||
| virtual Status status() const { return Status::OK(); } | |||
| private: | |||
| MemTable::Table::Iterator* iter_; | |||
| std::string tmp_; // For passing to EncodeKey | |||
| // No copying allowed | |||
| MemTableIterator(const MemTableIterator&); | |||
| void operator=(const MemTableIterator&); | |||
| }; | |||
| Iterator* MemTable::NewIterator() { | |||
| return new MemTableIterator(&table_); | |||
| } | |||
| void MemTable::Add(SequenceNumber s, ValueType type, | |||
| const Slice& key, | |||
| const Slice& value) { | |||
| // Format of an entry is concatenation of: | |||
| // key_size : varint32 of internal_key.size() | |||
| // key bytes : char[internal_key.size()] | |||
| // value_size : varint32 of value.size() | |||
| // value bytes : char[value.size()] | |||
| size_t key_size = key.size(); | |||
| size_t val_size = value.size(); | |||
| size_t internal_key_size = key_size + 8; | |||
| const size_t encoded_len = | |||
| VarintLength(internal_key_size) + internal_key_size + | |||
| VarintLength(val_size) + val_size; | |||
| char* buf = arena_.Allocate(encoded_len); | |||
| char* p = EncodeVarint32(buf, internal_key_size); | |||
| memcpy(p, key.data(), key_size); | |||
| p += key_size; | |||
| EncodeFixed64(p, (s << 8) | type); | |||
| p += 8; | |||
| p = EncodeVarint32(p, val_size); | |||
| memcpy(p, value.data(), val_size); | |||
| assert((p + val_size) - buf == encoded_len); | |||
| table_.Insert(buf); | |||
| } | |||
| } | |||
| @ -0,0 +1,69 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_ | |||
| #define STORAGE_LEVELDB_DB_MEMTABLE_H_ | |||
| #include <string> | |||
| #include "include/db.h" | |||
| #include "db/dbformat.h" | |||
| #include "db/skiplist.h" | |||
| #include "util/arena.h" | |||
| namespace leveldb { | |||
| class InternalKeyComparator; | |||
| class Mutex; | |||
| class MemTableIterator; | |||
| class MemTable { | |||
| public: | |||
| explicit MemTable(const InternalKeyComparator& comparator); | |||
| ~MemTable(); | |||
| // Returns an estimate of the number of bytes of data in use by this | |||
| // data structure. | |||
| // | |||
| // REQUIRES: external synchronization to prevent simultaneous | |||
| // operations on the same MemTable. | |||
| size_t ApproximateMemoryUsage(); | |||
| // Return an iterator that yields the contents of the memtable. | |||
| // | |||
| // The caller must ensure that the underlying MemTable remains live | |||
| // while the returned iterator is live. The keys returned by this | |||
| // iterator are internal keys encoded by AppendInternalKey in the | |||
| // db/format.{h,cc} module. | |||
| Iterator* NewIterator(); | |||
| // Add an entry into memtable that maps key to value at the | |||
| // specified sequence number and with the specified type. | |||
| // Typically value will be empty if type==kTypeDeletion. | |||
| void Add(SequenceNumber seq, ValueType type, | |||
| const Slice& key, | |||
| const Slice& value); | |||
| private: | |||
| struct KeyComparator { | |||
| const InternalKeyComparator comparator; | |||
| explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } | |||
| int operator()(const char* a, const char* b) const; | |||
| }; | |||
| friend class MemTableIterator; | |||
| friend class MemTableBackwardIterator; | |||
| typedef SkipList<const char*, KeyComparator> Table; | |||
| KeyComparator comparator_; | |||
| Arena arena_; | |||
| Table table_; | |||
| // No copying allowed | |||
| MemTable(const MemTable&); | |||
| void operator=(const MemTable&); | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_MEMTABLE_H_ | |||
| @ -0,0 +1,396 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // We recover the contents of the descriptor from the other files we find. | |||
| // (1) Any log files are first converted to tables | |||
| // (2) We scan every table to compute | |||
| // (a) smallest/largest for the table | |||
| // (b) large value refs from the table | |||
| // (c) largest sequence number in the table | |||
| // (3) We generate descriptor contents: | |||
| // - log number is set to zero | |||
| // - next-file-number is set to 1 + largest file number we found | |||
| // - last-sequence-number is set to largest sequence# found across | |||
| // all tables (see 2c) | |||
| // - compaction pointers are cleared | |||
| // - every table file is added at level 0 | |||
| // | |||
| // Possible optimization 1: | |||
| // (a) Compute total size and use to pick appropriate max-level M | |||
| // (b) Sort tables by largest sequence# in the table | |||
| // (c) For each table: if it overlaps earlier table, place in level-0, | |||
| // else place in level-M. | |||
| // Possible optimization 2: | |||
| // Store per-table metadata (smallest, largest, largest-seq#, | |||
| // large-value-refs, ...) in the table's meta section to speed up | |||
| // ScanTable. | |||
| #include "db/builder.h" | |||
| #include "db/db_impl.h" | |||
| #include "db/dbformat.h" | |||
| #include "db/filename.h" | |||
| #include "db/log_reader.h" | |||
| #include "db/log_writer.h" | |||
| #include "db/memtable.h" | |||
| #include "db/table_cache.h" | |||
| #include "db/version_edit.h" | |||
| #include "db/write_batch_internal.h" | |||
| #include "include/comparator.h" | |||
| #include "include/db.h" | |||
| #include "include/env.h" | |||
| namespace leveldb { | |||
| namespace { | |||
| class Repairer { | |||
| public: | |||
| Repairer(const std::string& dbname, const Options& options) | |||
| : dbname_(dbname), | |||
| env_(options.env), | |||
| icmp_(options.comparator), | |||
| options_(SanitizeOptions(dbname, &icmp_, options)), | |||
| owns_info_log_(options_.info_log != options.info_log), | |||
| next_file_number_(1) { | |||
| // TableCache can be small since we expect each table to be opened once. | |||
| table_cache_ = new TableCache(dbname_, &options_, 10); | |||
| } | |||
| ~Repairer() { | |||
| delete table_cache_; | |||
| if (owns_info_log_) { | |||
| delete options_.info_log; | |||
| } | |||
| } | |||
| Status Run() { | |||
| Status status = FindFiles(); | |||
| if (status.ok()) { | |||
| ConvertLogFilesToTables(); | |||
| ExtractMetaData(); | |||
| status = WriteDescriptor(); | |||
| } | |||
| if (status.ok()) { | |||
| unsigned long long bytes = 0; | |||
| for (int i = 0; i < tables_.size(); i++) { | |||
| bytes += tables_[i].meta.file_size; | |||
| } | |||
| Log(env_, options_.info_log, | |||
| "**** Repaired leveldb %s; " | |||
| "recovered %d files; %llu bytes. " | |||
| "Some data may have been lost. " | |||
| "****", | |||
| dbname_.c_str(), | |||
| static_cast<int>(tables_.size()), | |||
| bytes); | |||
| } | |||
| return status; | |||
| } | |||
| private: | |||
| struct TableInfo { | |||
| FileMetaData meta; | |||
| SequenceNumber max_sequence; | |||
| }; | |||
| std::string const dbname_; | |||
| Env* const env_; | |||
| InternalKeyComparator const icmp_; | |||
| Options const options_; | |||
| bool owns_info_log_; | |||
| TableCache* table_cache_; | |||
| VersionEdit edit_; | |||
| std::vector<std::string> manifests_; | |||
| std::vector<uint64_t> table_numbers_; | |||
| std::vector<uint64_t> logs_; | |||
| std::vector<TableInfo> tables_; | |||
| uint64_t next_file_number_; | |||
| Status FindFiles() { | |||
| std::vector<std::string> filenames; | |||
| Status status = env_->GetChildren(dbname_, &filenames); | |||
| if (!status.ok()) { | |||
| return status; | |||
| } | |||
| if (filenames.empty()) { | |||
| return Status::IOError(dbname_, "repair found no files"); | |||
| } | |||
| uint64_t number; | |||
| LargeValueRef large_ref; | |||
| FileType type; | |||
| for (int i = 0; i < filenames.size(); i++) { | |||
| if (ParseFileName(filenames[i], &number, &large_ref, &type)) { | |||
| if (type == kLargeValueFile) { | |||
| // Will be picked up when we process a Table that points to it | |||
| } else if (type == kDescriptorFile) { | |||
| manifests_.push_back(filenames[i]); | |||
| } else { | |||
| if (number + 1 > next_file_number_) { | |||
| next_file_number_ = number + 1; | |||
| } | |||
| if (type == kLogFile) { | |||
| logs_.push_back(number); | |||
| } else if (type == kTableFile) { | |||
| table_numbers_.push_back(number); | |||
| } else { | |||
| // Ignore other files | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return status; | |||
| } | |||
| void ConvertLogFilesToTables() { | |||
| for (int i = 0; i < logs_.size(); i++) { | |||
| std::string logname = LogFileName(dbname_, logs_[i]); | |||
| Status status = ConvertLogToTable(logs_[i]); | |||
| if (!status.ok()) { | |||
| Log(env_, options_.info_log, "Log #%llu: ignoring conversion error: %s", | |||
| (unsigned long long) logs_[i], | |||
| status.ToString().c_str()); | |||
| } | |||
| ArchiveFile(logname); | |||
| } | |||
| } | |||
| Status ConvertLogToTable(uint64_t log) { | |||
| struct LogReporter : public log::Reader::Reporter { | |||
| Env* env; | |||
| WritableFile* info_log; | |||
| uint64_t lognum; | |||
| virtual void Corruption(size_t bytes, const Status& s) { | |||
| // We print error messages for corruption, but continue repairing. | |||
| Log(env, info_log, "Log #%llu: dropping %d bytes; %s", | |||
| (unsigned long long) lognum, | |||
| static_cast<int>(bytes), | |||
| s.ToString().c_str()); | |||
| } | |||
| }; | |||
| // Open the log file | |||
| std::string logname = LogFileName(dbname_, log); | |||
| SequentialFile* lfile; | |||
| Status status = env_->NewSequentialFile(logname, &lfile); | |||
| if (!status.ok()) { | |||
| return status; | |||
| } | |||
| // Create the log reader. | |||
| LogReporter reporter; | |||
| reporter.env = env_; | |||
| reporter.info_log = options_.info_log; | |||
| reporter.lognum = log; | |||
| // We intentially make log::Reader do checksumming so that | |||
| // corruptions cause entire commits to be skipped instead of | |||
| // propagating bad information (like overly large sequence | |||
| // numbers). | |||
| log::Reader reader(lfile, &reporter, false/*do not checksum*/); | |||
| // Read all the records and add to a memtable | |||
| std::string scratch; | |||
| Slice record; | |||
| WriteBatch batch; | |||
| MemTable mem(icmp_); | |||
| int counter = 0; | |||
| while (reader.ReadRecord(&record, &scratch)) { | |||
| if (record.size() < 12) { | |||
| reporter.Corruption( | |||
| record.size(), Status::Corruption("log record too small")); | |||
| continue; | |||
| } | |||
| WriteBatchInternal::SetContents(&batch, record); | |||
| status = WriteBatchInternal::InsertInto(&batch, &mem); | |||
| if (status.ok()) { | |||
| counter += WriteBatchInternal::Count(&batch); | |||
| } else { | |||
| Log(env_, options_.info_log, "Log #%llu: ignoring %s", | |||
| (unsigned long long) log, | |||
| status.ToString().c_str()); | |||
| status = Status::OK(); // Keep going with rest of file | |||
| } | |||
| } | |||
| delete lfile; | |||
| // We ignore any version edits generated by the conversion to a Table | |||
| // since ExtractMetaData() will also generate edits. | |||
| VersionEdit skipped; | |||
| FileMetaData meta; | |||
| meta.number = next_file_number_++; | |||
| Iterator* iter = mem.NewIterator(); | |||
| status = BuildTable(dbname_, env_, options_, table_cache_, iter, | |||
| &meta, &skipped); | |||
| delete iter; | |||
| if (status.ok()) { | |||
| if (meta.file_size > 0) { | |||
| table_numbers_.push_back(meta.number); | |||
| } | |||
| } | |||
| Log(env_, options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s", | |||
| (unsigned long long) log, | |||
| counter, | |||
| (unsigned long long) meta.number, | |||
| status.ToString().c_str()); | |||
| return status; | |||
| } | |||
| void ExtractMetaData() { | |||
| std::vector<TableInfo> kept; | |||
| for (int i = 0; i < table_numbers_.size(); i++) { | |||
| TableInfo t; | |||
| t.meta.number = table_numbers_[i]; | |||
| Status status = ScanTable(&t); | |||
| if (!status.ok()) { | |||
| std::string fname = TableFileName(dbname_, table_numbers_[i]); | |||
| Log(env_, options_.info_log, "Table #%llu: ignoring %s", | |||
| (unsigned long long) table_numbers_[i], | |||
| status.ToString().c_str()); | |||
| ArchiveFile(fname); | |||
| } else { | |||
| tables_.push_back(t); | |||
| } | |||
| } | |||
| } | |||
| Status ScanTable(TableInfo* t) { | |||
| std::string fname = TableFileName(dbname_, t->meta.number); | |||
| int counter = 0; | |||
| Status status = env_->GetFileSize(fname, &t->meta.file_size); | |||
| if (status.ok()) { | |||
| Iterator* iter = table_cache_->NewIterator( | |||
| ReadOptions(), t->meta.number); | |||
| bool empty = true; | |||
| ParsedInternalKey parsed; | |||
| t->max_sequence = 0; | |||
| for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |||
| Slice key = iter->key(); | |||
| if (!ParseInternalKey(key, &parsed)) { | |||
| Log(env_, options_.info_log, "Table #%llu: unparsable key %s", | |||
| (unsigned long long) t->meta.number, | |||
| EscapeString(key).c_str()); | |||
| continue; | |||
| } | |||
| counter++; | |||
| if (empty) { | |||
| empty = false; | |||
| t->meta.smallest.DecodeFrom(key); | |||
| } | |||
| t->meta.largest.DecodeFrom(key); | |||
| if (parsed.sequence > t->max_sequence) { | |||
| t->max_sequence = parsed.sequence; | |||
| } | |||
| if (ExtractValueType(key) == kTypeLargeValueRef) { | |||
| if (iter->value().size() != LargeValueRef::ByteSize()) { | |||
| Log(env_, options_.info_log, "Table #%llu: bad large value ref", | |||
| (unsigned long long) t->meta.number); | |||
| } else { | |||
| edit_.AddLargeValueRef(LargeValueRef::FromRef(iter->value()), | |||
| t->meta.number, | |||
| key); | |||
| } | |||
| } | |||
| } | |||
| if (!iter->status().ok()) { | |||
| status = iter->status(); | |||
| } | |||
| delete iter; | |||
| } | |||
| Log(env_, options_.info_log, "Table #%llu: %d entries %s", | |||
| (unsigned long long) t->meta.number, | |||
| counter, | |||
| status.ToString().c_str()); | |||
| return status; | |||
| } | |||
| Status WriteDescriptor() { | |||
| std::string tmp = TempFileName(dbname_, 1); | |||
| WritableFile* file; | |||
| Status status = env_->NewWritableFile(tmp, &file); | |||
| if (!status.ok()) { | |||
| return status; | |||
| } | |||
| SequenceNumber max_sequence = 0; | |||
| for (int i = 0; i < tables_.size(); i++) { | |||
| if (max_sequence < tables_[i].max_sequence) { | |||
| max_sequence = tables_[i].max_sequence; | |||
| } | |||
| } | |||
| edit_.SetComparatorName(icmp_.user_comparator()->Name()); | |||
| edit_.SetLogNumber(0); | |||
| edit_.SetNextFile(next_file_number_); | |||
| edit_.SetLastSequence(max_sequence); | |||
| for (int i = 0; i < tables_.size(); i++) { | |||
| // TODO(opt): separate out into multiple levels | |||
| const TableInfo& t = tables_[i]; | |||
| edit_.AddFile(0, t.meta.number, t.meta.file_size, | |||
| t.meta.smallest, t.meta.largest); | |||
| } | |||
| //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); | |||
| { | |||
| log::Writer log(file); | |||
| std::string record; | |||
| edit_.EncodeTo(&record); | |||
| status = log.AddRecord(record); | |||
| } | |||
| if (status.ok()) { | |||
| status = file->Close(); | |||
| } | |||
| delete file; | |||
| file = NULL; | |||
| if (!status.ok()) { | |||
| env_->DeleteFile(tmp); | |||
| } else { | |||
| // Discard older manifests | |||
| for (int i = 0; i < manifests_.size(); i++) { | |||
| ArchiveFile(dbname_ + "/" + manifests_[i]); | |||
| } | |||
| // Install new manifest | |||
| status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1)); | |||
| if (status.ok()) { | |||
| status = SetCurrentFile(env_, dbname_, 1); | |||
| } else { | |||
| env_->DeleteFile(tmp); | |||
| } | |||
| } | |||
| return status; | |||
| } | |||
| void ArchiveFile(const std::string& fname) { | |||
| // Move into another directory. E.g., for | |||
| // dir/foo | |||
| // rename to | |||
| // dir/lost/foo | |||
| const char* slash = strrchr(fname.c_str(), '/'); | |||
| std::string new_dir; | |||
| if (slash != NULL) { | |||
| new_dir.assign(fname.data(), slash - fname.data()); | |||
| } | |||
| new_dir.append("/lost"); | |||
| env_->CreateDir(new_dir); // Ignore error | |||
| std::string new_file = new_dir; | |||
| new_file.append("/"); | |||
| new_file.append((slash == NULL) ? fname.c_str() : slash + 1); | |||
| Status s = env_->RenameFile(fname, new_file); | |||
| Log(env_, options_.info_log, "Archiving %s: %s\n", | |||
| fname.c_str(), s.ToString().c_str()); | |||
| } | |||
| }; | |||
| } | |||
| Status RepairDB(const std::string& dbname, const Options& options) { | |||
| Repairer repairer(dbname, options); | |||
| return repairer.Run(); | |||
| } | |||
| } | |||
| @ -0,0 +1,378 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // Thread safety | |||
| // ------------- | |||
| // | |||
| // Writes require external synchronization, most likely a mutex. | |||
| // Reads require a guarantee that the SkipList will not be destroyed | |||
| // while the read is in progress. Apart from that, reads progress | |||
| // without any internal locking or synchronization. | |||
| // | |||
| // Invariants: | |||
| // | |||
| // (1) Allocated nodes are never deleted until the SkipList is | |||
| // destroyed. This is trivially guaranteed by the code since we | |||
| // never delete any skip list nodes. | |||
| // | |||
| // (2) The contents of a Node except for the next/prev pointers are | |||
| // immutable after the Node has been linked into the SkipList. | |||
| // Only Insert() modifies the list, and it is careful to initialize | |||
| // a node and use release-stores to publish the nodes in one or | |||
| // more lists. | |||
| // | |||
| // ... prev vs. next pointer ordering ... | |||
| #include <assert.h> | |||
| #include <stdlib.h> | |||
| #include "port/port.h" | |||
| #include "util/arena.h" | |||
| #include "util/random.h" | |||
| namespace leveldb { | |||
| class Arena; | |||
| template<typename Key, class Comparator> | |||
| class SkipList { | |||
| private: | |||
| struct Node; | |||
| public: | |||
| // Create a new SkipList object that will use "cmp" for comparing keys, | |||
| // and will allocate memory using "*arena". Objects allocated in the arena | |||
| // must remain allocated for the lifetime of the skiplist object. | |||
| explicit SkipList(Comparator cmp, Arena* arena); | |||
| // Insert key into the list. | |||
| // REQUIRES: nothing that compares equal to key is currently in the list. | |||
| void Insert(const Key& key); | |||
| // Returns true iff an entry that compares equal to key is in the list. | |||
| bool Contains(const Key& key) const; | |||
| // Iteration over the contents of a skip list | |||
| class Iterator { | |||
| public: | |||
| // Initialize an iterator over the specified list. | |||
| // The returned iterator is not valid. | |||
| explicit Iterator(const SkipList* list); | |||
| // Returns true iff the iterator is positioned at a valid node. | |||
| bool Valid() const; | |||
| // Returns the key at the current position. | |||
| // REQUIRES: Valid() | |||
| const Key& key() const; | |||
| // Advances to the next position. | |||
| // REQUIRES: Valid() | |||
| void Next(); | |||
| // Advances to the previous position. | |||
| // REQUIRES: Valid() | |||
| void Prev(); | |||
| // Advance to the first entry with a key >= target | |||
| void Seek(const Key& target); | |||
| // Position at the first entry in list. | |||
| // Final state of iterator is Valid() iff list is not empty. | |||
| void SeekToFirst(); | |||
| // Position at the last entry in list. | |||
| // Final state of iterator is Valid() iff list is not empty. | |||
| void SeekToLast(); | |||
| private: | |||
| const SkipList* list_; | |||
| Node* node_; | |||
| // Intentionally copyable | |||
| }; | |||
| private: | |||
| enum { kMaxHeight = 12 }; | |||
| // Immutable after construction | |||
| Comparator const compare_; | |||
| Arena* const arena_; // Arena used for allocations of nodes | |||
| Node* const head_; | |||
| // Modified only by Insert(). Read racily by readers, but stale | |||
| // values are ok. | |||
| port::AtomicPointer max_height_; // Height of the entire list | |||
| inline int GetMaxHeight() const { | |||
| return reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()); | |||
| } | |||
| // Read/written only by Insert(). | |||
| Random rnd_; | |||
| Node* NewNode(const Key& key, int height); | |||
| int RandomHeight(); | |||
| bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } | |||
| // Return true if key is greater than the data stored in "n" | |||
| bool KeyIsAfterNode(const Key& key, Node* n) const; | |||
| // Return the earliest node that comes at or after key. | |||
| // Return NULL if there is no such node. | |||
| // | |||
| // If prev is non-NULL, fills prev[level] with pointer to previous | |||
| // node at "level" for every level in [0..max_height_-1]. | |||
| Node* FindGreaterOrEqual(const Key& key, Node** prev) const; | |||
| // Return the latest node with a key < key. | |||
| // Return head_ if there is no such node. | |||
| Node* FindLessThan(const Key& key) const; | |||
| // Return the last node in the list. | |||
| // Return head_ if list is empty. | |||
| Node* FindLast() const; | |||
| // No copying allowed | |||
| SkipList(const SkipList&); | |||
| void operator=(const SkipList&); | |||
| }; | |||
| // Implementation details follow | |||
| template<typename Key, class Comparator> | |||
| struct SkipList<Key,Comparator>::Node { | |||
| explicit Node(const Key& k) : key(k) { } | |||
| Key const key; | |||
| // Accessors/mutators for links. Wrapped in methods so we can | |||
| // add the appropriate barriers as necessary. | |||
| Node* Next(int n) { | |||
| assert(n >= 0); | |||
| // Use an 'acquire load' so that we observe a fully initialized | |||
| // version of the returned Node. | |||
| return reinterpret_cast<Node*>(next_[n].Acquire_Load()); | |||
| } | |||
| void SetNext(int n, Node* x) { | |||
| assert(n >= 0); | |||
| // Use a 'release store' so that anybody who reads through this | |||
| // pointer observes a fully initialized version of the inserted node. | |||
| next_[n].Release_Store(x); | |||
| } | |||
| // No-barrier variants that can be safely used in a few locations. | |||
| Node* NoBarrier_Next(int n) { | |||
| assert(n >= 0); | |||
| return reinterpret_cast<Node*>(next_[n].NoBarrier_Load()); | |||
| } | |||
| void NoBarrier_SetNext(int n, Node* x) { | |||
| assert(n >= 0); | |||
| next_[n].NoBarrier_Store(x); | |||
| } | |||
| private: | |||
| // Array of length equal to the node height. next_[0] is lowest level link. | |||
| port::AtomicPointer next_[1]; | |||
| }; | |||
| template<typename Key, class Comparator> | |||
| typename SkipList<Key,Comparator>::Node* | |||
| SkipList<Key,Comparator>::NewNode(const Key& key, int height) { | |||
| char* mem = arena_->AllocateAligned( | |||
| sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1)); | |||
| return new (mem) Node(key); | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) { | |||
| list_ = list; | |||
| node_ = NULL; | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline bool SkipList<Key,Comparator>::Iterator::Valid() const { | |||
| return node_ != NULL; | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline const Key& SkipList<Key,Comparator>::Iterator::key() const { | |||
| assert(Valid()); | |||
| return node_->key; | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline void SkipList<Key,Comparator>::Iterator::Next() { | |||
| assert(Valid()); | |||
| node_ = node_->Next(0); | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline void SkipList<Key,Comparator>::Iterator::Prev() { | |||
| // Instead of using explicit "prev" links, we just search for the | |||
| // last node that falls before key. | |||
| assert(Valid()); | |||
| node_ = list_->FindLessThan(node_->key); | |||
| if (node_ == list_->head_) { | |||
| node_ = NULL; | |||
| } | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) { | |||
| node_ = list_->FindGreaterOrEqual(target, NULL); | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() { | |||
| node_ = list_->head_->Next(0); | |||
| } | |||
| template<typename Key, class Comparator> | |||
| inline void SkipList<Key,Comparator>::Iterator::SeekToLast() { | |||
| node_ = list_->FindLast(); | |||
| if (node_ == list_->head_) { | |||
| node_ = NULL; | |||
| } | |||
| } | |||
| template<typename Key, class Comparator> | |||
| int SkipList<Key,Comparator>::RandomHeight() { | |||
| // Increase height with probability 1 in kBranching | |||
| static const unsigned int kBranching = 4; | |||
| int height = 1; | |||
| while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) { | |||
| height++; | |||
| } | |||
| assert(height > 0); | |||
| assert(height <= kMaxHeight); | |||
| return height; | |||
| } | |||
| template<typename Key, class Comparator> | |||
| bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const { | |||
| // NULL n is considered infinite | |||
| return (n != NULL) && (compare_(n->key, key) < 0); | |||
| } | |||
| template<typename Key, class Comparator> | |||
| typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev) | |||
| const { | |||
| Node* x = head_; | |||
| int level = GetMaxHeight() - 1; | |||
| while (true) { | |||
| Node* next = x->Next(level); | |||
| if (KeyIsAfterNode(key, next)) { | |||
| // Keep searching in this list | |||
| x = next; | |||
| } else { | |||
| if (prev != NULL) prev[level] = x; | |||
| if (level == 0) { | |||
| return next; | |||
| } else { | |||
| // Switch to next list | |||
| level--; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| template<typename Key, class Comparator> | |||
| typename SkipList<Key,Comparator>::Node* | |||
| SkipList<Key,Comparator>::FindLessThan(const Key& key) const { | |||
| Node* x = head_; | |||
| int level = GetMaxHeight() - 1; | |||
| while (true) { | |||
| assert(x == head_ || compare_(x->key, key) < 0); | |||
| Node* next = x->Next(level); | |||
| if (next == NULL || compare_(next->key, key) >= 0) { | |||
| if (level == 0) { | |||
| return x; | |||
| } else { | |||
| // Switch to next list | |||
| level--; | |||
| } | |||
| } else { | |||
| x = next; | |||
| } | |||
| } | |||
| } | |||
| template<typename Key, class Comparator> | |||
| typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast() | |||
| const { | |||
| Node* x = head_; | |||
| int level = GetMaxHeight() - 1; | |||
| while (true) { | |||
| Node* next = x->Next(level); | |||
| if (next == NULL) { | |||
| if (level == 0) { | |||
| return x; | |||
| } else { | |||
| // Switch to next list | |||
| level--; | |||
| } | |||
| } else { | |||
| x = next; | |||
| } | |||
| } | |||
| } | |||
| template<typename Key, class Comparator> | |||
| SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena) | |||
| : compare_(cmp), | |||
| arena_(arena), | |||
| head_(NewNode(0 /* any key will do */, kMaxHeight)), | |||
| max_height_(reinterpret_cast<void*>(1)), | |||
| rnd_(0xdeadbeef) { | |||
| for (int i = 0; i < kMaxHeight; i++) { | |||
| head_->SetNext(i, NULL); | |||
| } | |||
| } | |||
| template<typename Key, class Comparator> | |||
| void SkipList<Key,Comparator>::Insert(const Key& key) { | |||
| // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual() | |||
| // here since Insert() is externally synchronized. | |||
| Node* prev[kMaxHeight]; | |||
| Node* x = FindGreaterOrEqual(key, prev); | |||
| // Our data structure does not allow duplicate insertion | |||
| assert(x == NULL || !Equal(key, x->key)); | |||
| int height = RandomHeight(); | |||
| if (height > GetMaxHeight()) { | |||
| for (int i = GetMaxHeight(); i < height; i++) { | |||
| prev[i] = head_; | |||
| } | |||
| //fprintf(stderr, "Change height from %d to %d\n", max_height_, height); | |||
| // It is ok to mutate max_height_ without any synchronization | |||
| // with concurrent readers. A concurrent reader that observes | |||
| // the new value of max_height_ will see either the old value of | |||
| // new level pointers from head_ (NULL), or a new value set in | |||
| // the loop below. In the former case the reader will | |||
| // immediately drop to the next level since NULL sorts after all | |||
| // keys. In the latter case the reader will use the new node. | |||
| max_height_.NoBarrier_Store(reinterpret_cast<void*>(height)); | |||
| } | |||
| x = NewNode(key, height); | |||
| for (int i = 0; i < height; i++) { | |||
| // NoBarrier_SetNext() suffices since we will add a barrier when | |||
| // we publish a pointer to "x" in prev[i]. | |||
| x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i)); | |||
| prev[i]->SetNext(i, x); | |||
| } | |||
| } | |||
| template<typename Key, class Comparator> | |||
| bool SkipList<Key,Comparator>::Contains(const Key& key) const { | |||
| Node* x = FindGreaterOrEqual(key, NULL); | |||
| if (x != NULL && Equal(key, x->key)) { | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,378 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/skiplist.h" | |||
| #include <set> | |||
| #include "include/env.h" | |||
| #include "util/arena.h" | |||
| #include "util/hash.h" | |||
| #include "util/random.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| typedef uint64_t Key; | |||
| struct Comparator { | |||
| int operator()(const Key& a, const Key& b) const { | |||
| if (a < b) { | |||
| return -1; | |||
| } else if (a > b) { | |||
| return +1; | |||
| } else { | |||
| return 0; | |||
| } | |||
| } | |||
| }; | |||
| class SkipTest { }; | |||
| TEST(SkipTest, Empty) { | |||
| Arena arena; | |||
| Comparator cmp; | |||
| SkipList<Key, Comparator> list(cmp, &arena); | |||
| ASSERT_TRUE(!list.Contains(10)); | |||
| SkipList<Key, Comparator>::Iterator iter(&list); | |||
| ASSERT_TRUE(!iter.Valid()); | |||
| iter.SeekToFirst(); | |||
| ASSERT_TRUE(!iter.Valid()); | |||
| iter.Seek(100); | |||
| ASSERT_TRUE(!iter.Valid()); | |||
| iter.SeekToLast(); | |||
| ASSERT_TRUE(!iter.Valid()); | |||
| } | |||
| TEST(SkipTest, InsertAndLookup) { | |||
| const int N = 2000; | |||
| const int R = 5000; | |||
| Random rnd(1000); | |||
| std::set<Key> keys; | |||
| Arena arena; | |||
| Comparator cmp; | |||
| SkipList<Key, Comparator> list(cmp, &arena); | |||
| for (int i = 0; i < N; i++) { | |||
| Key key = rnd.Next() % R; | |||
| if (keys.insert(key).second) { | |||
| list.Insert(key); | |||
| } | |||
| } | |||
| for (int i = 0; i < R; i++) { | |||
| if (list.Contains(i)) { | |||
| ASSERT_EQ(keys.count(i), 1); | |||
| } else { | |||
| ASSERT_EQ(keys.count(i), 0); | |||
| } | |||
| } | |||
| // Simple iterator tests | |||
| { | |||
| SkipList<Key, Comparator>::Iterator iter(&list); | |||
| ASSERT_TRUE(!iter.Valid()); | |||
| iter.Seek(0); | |||
| ASSERT_TRUE(iter.Valid()); | |||
| ASSERT_EQ(*(keys.begin()), iter.key()); | |||
| iter.SeekToFirst(); | |||
| ASSERT_TRUE(iter.Valid()); | |||
| ASSERT_EQ(*(keys.begin()), iter.key()); | |||
| iter.SeekToLast(); | |||
| ASSERT_TRUE(iter.Valid()); | |||
| ASSERT_EQ(*(keys.rbegin()), iter.key()); | |||
| } | |||
| // Forward iteration test | |||
| for (int i = 0; i < R; i++) { | |||
| SkipList<Key, Comparator>::Iterator iter(&list); | |||
| iter.Seek(i); | |||
| // Compare against model iterator | |||
| std::set<Key>::iterator model_iter = keys.lower_bound(i); | |||
| for (int j = 0; j < 3; j++) { | |||
| if (model_iter == keys.end()) { | |||
| ASSERT_TRUE(!iter.Valid()); | |||
| break; | |||
| } else { | |||
| ASSERT_TRUE(iter.Valid()); | |||
| ASSERT_EQ(*model_iter, iter.key()); | |||
| ++model_iter; | |||
| iter.Next(); | |||
| } | |||
| } | |||
| } | |||
| // Backward iteration test | |||
| { | |||
| SkipList<Key, Comparator>::Iterator iter(&list); | |||
| iter.SeekToLast(); | |||
| // Compare against model iterator | |||
| for (std::set<Key>::reverse_iterator model_iter = keys.rbegin(); | |||
| model_iter != keys.rend(); | |||
| ++model_iter) { | |||
| ASSERT_TRUE(iter.Valid()); | |||
| ASSERT_EQ(*model_iter, iter.key()); | |||
| iter.Prev(); | |||
| } | |||
| ASSERT_TRUE(!iter.Valid()); | |||
| } | |||
| } | |||
| // We want to make sure that with a single writer and multiple | |||
| // concurrent readers (with no synchronization other than when a | |||
| // reader's iterator is created), the reader always observes all the | |||
| // data that was present in the skip list when the iterator was | |||
| // constructor. Because insertions are happening concurrently, we may | |||
| // also observe new values that were inserted since the iterator was | |||
| // constructed, but we should never miss any values that were present | |||
| // at iterator construction time. | |||
| // | |||
| // We generate multi-part keys: | |||
| // <key,gen,hash> | |||
| // where: | |||
| // key is in range [0..K-1] | |||
| // gen is a generation number for key | |||
| // hash is hash(key,gen) | |||
| // | |||
| // The insertion code picks a random key, sets gen to be 1 + the last | |||
| // generation number inserted for that key, and sets hash to Hash(key,gen). | |||
| // | |||
| // At the beginning of a read, we snapshot the last inserted | |||
| // generation number for each key. We then iterate, including random | |||
| // calls to Next() and Seek(). For every key we encounter, we | |||
| // check that it is either expected given the initial snapshot or has | |||
| // been concurrently added since the iterator started. | |||
| class ConcurrentTest { | |||
| private: | |||
| static const uint32_t K = 4; | |||
| static uint64_t key(Key key) { return (key >> 40); } | |||
| static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } | |||
| static uint64_t hash(Key key) { return key & 0xff; } | |||
| static uint64_t HashNumbers(uint64_t k, uint64_t g) { | |||
| uint64_t data[2] = { k, g }; | |||
| return Hash(reinterpret_cast<char*>(data), sizeof(data), 0); | |||
| } | |||
| static Key MakeKey(uint64_t k, uint64_t g) { | |||
| assert(sizeof(Key) == sizeof(uint64_t)); | |||
| assert(k <= K); // We sometimes pass K to seek to the end of the skiplist | |||
| assert(g <= 0xffffffffu); | |||
| return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff)); | |||
| } | |||
| static bool IsValidKey(Key k) { | |||
| return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); | |||
| } | |||
| static Key RandomTarget(Random* rnd) { | |||
| switch (rnd->Next() % 10) { | |||
| case 0: | |||
| // Seek to beginning | |||
| return MakeKey(0, 0); | |||
| case 1: | |||
| // Seek to end | |||
| return MakeKey(K, 0); | |||
| default: | |||
| // Seek to middle | |||
| return MakeKey(rnd->Next() % K, 0); | |||
| } | |||
| } | |||
| // Per-key generation | |||
| struct State { | |||
| port::AtomicPointer generation[K]; | |||
| void Set(int k, intptr_t v) { | |||
| generation[k].Release_Store(reinterpret_cast<void*>(v)); | |||
| } | |||
| intptr_t Get(int k) { | |||
| return reinterpret_cast<intptr_t>(generation[k].Acquire_Load()); | |||
| } | |||
| State() { | |||
| for (int k = 0; k < K; k++) { | |||
| Set(k, 0); | |||
| } | |||
| } | |||
| }; | |||
| // Current state of the test | |||
| State current_; | |||
| Arena arena_; | |||
| // SkipList is not protected by mu_. We just use a single writer | |||
| // thread to modify it. | |||
| SkipList<Key, Comparator> list_; | |||
| public: | |||
| ConcurrentTest() : list_(Comparator(), &arena_) { } | |||
| // REQUIRES: External synchronization | |||
| void WriteStep(Random* rnd) { | |||
| const uint32_t k = rnd->Next() % K; | |||
| const intptr_t g = current_.Get(k) + 1; | |||
| const Key key = MakeKey(k, g); | |||
| list_.Insert(key); | |||
| current_.Set(k, g); | |||
| } | |||
| void ReadStep(Random* rnd) { | |||
| // Remember the initial committed state of the skiplist. | |||
| State initial_state; | |||
| for (int k = 0; k < K; k++) { | |||
| initial_state.Set(k, current_.Get(k)); | |||
| } | |||
| Key pos = RandomTarget(rnd); | |||
| SkipList<Key, Comparator>::Iterator iter(&list_); | |||
| iter.Seek(pos); | |||
| while (true) { | |||
| Key current; | |||
| if (!iter.Valid()) { | |||
| current = MakeKey(K, 0); | |||
| } else { | |||
| current = iter.key(); | |||
| ASSERT_TRUE(IsValidKey(current)) << std::hex << current; | |||
| } | |||
| ASSERT_LE(pos, current) << "should not go backwards"; | |||
| // Verify that everything in [pos,current) was not present in | |||
| // initial_state. | |||
| while (pos < current) { | |||
| ASSERT_LT(key(pos), K) << std::hex << pos; | |||
| // Note that generation 0 is never inserted, so it is ok if | |||
| // <*,0,*> is missing. | |||
| ASSERT_TRUE((gen(pos) == 0) || | |||
| (gen(pos) > initial_state.Get(key(pos))) | |||
| ) << "key: " << key(pos) | |||
| << "; gen: " << gen(pos) | |||
| << "; initgen: " | |||
| << initial_state.Get(key(pos)); | |||
| // Advance to next key in the valid key space | |||
| if (key(pos) < key(current)) { | |||
| pos = MakeKey(key(pos) + 1, 0); | |||
| } else { | |||
| pos = MakeKey(key(pos), gen(pos) + 1); | |||
| } | |||
| } | |||
| if (!iter.Valid()) { | |||
| break; | |||
| } | |||
| if (rnd->Next() % 2) { | |||
| iter.Next(); | |||
| pos = MakeKey(key(pos), gen(pos) + 1); | |||
| } else { | |||
| Key new_target = RandomTarget(rnd); | |||
| if (new_target > pos) { | |||
| pos = new_target; | |||
| iter.Seek(new_target); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| }; | |||
| const uint32_t ConcurrentTest::K; | |||
| // Simple test that does single-threaded testing of the ConcurrentTest | |||
| // scaffolding. | |||
| TEST(SkipTest, ConcurrentWithoutThreads) { | |||
| ConcurrentTest test; | |||
| Random rnd(test::RandomSeed()); | |||
| for (int i = 0; i < 10000; i++) { | |||
| test.ReadStep(&rnd); | |||
| test.WriteStep(&rnd); | |||
| } | |||
| } | |||
| class TestState { | |||
| public: | |||
| ConcurrentTest t_; | |||
| int seed_; | |||
| port::AtomicPointer quit_flag_; | |||
| enum ReaderState { | |||
| STARTING, | |||
| RUNNING, | |||
| DONE | |||
| }; | |||
| explicit TestState(int s) | |||
| : seed_(s), | |||
| quit_flag_(NULL), | |||
| state_(STARTING), | |||
| state_cv_(&mu_) {} | |||
| void Wait(ReaderState s) { | |||
| mu_.Lock(); | |||
| while (state_ != s) { | |||
| state_cv_.Wait(); | |||
| } | |||
| mu_.Unlock(); | |||
| } | |||
| void Change(ReaderState s) { | |||
| mu_.Lock(); | |||
| state_ = s; | |||
| state_cv_.Signal(); | |||
| mu_.Unlock(); | |||
| } | |||
| private: | |||
| port::Mutex mu_; | |||
| ReaderState state_; | |||
| port::CondVar state_cv_; | |||
| }; | |||
| static void ConcurrentReader(void* arg) { | |||
| TestState* state = reinterpret_cast<TestState*>(arg); | |||
| Random rnd(state->seed_); | |||
| int64_t reads = 0; | |||
| state->Change(TestState::RUNNING); | |||
| while (!state->quit_flag_.Acquire_Load()) { | |||
| state->t_.ReadStep(&rnd); | |||
| ++reads; | |||
| } | |||
| state->Change(TestState::DONE); | |||
| } | |||
| static void RunConcurrent(int run) { | |||
| const int seed = test::RandomSeed() + (run * 100); | |||
| Random rnd(seed); | |||
| const int N = 1000; | |||
| const int kSize = 1000; | |||
| for (int i = 0; i < N; i++) { | |||
| if ((i % 100) == 0) { | |||
| fprintf(stderr, "Run %d of %d\n", i, N); | |||
| } | |||
| TestState state(seed + 1); | |||
| Env::Default()->Schedule(ConcurrentReader, &state); | |||
| state.Wait(TestState::RUNNING); | |||
| for (int i = 0; i < kSize; i++) { | |||
| state.t_.WriteStep(&rnd); | |||
| } | |||
| state.quit_flag_.Release_Store(&state); // Any non-NULL arg will do | |||
| state.Wait(TestState::DONE); | |||
| } | |||
| } | |||
| TEST(SkipTest, Concurrent1) { RunConcurrent(1); } | |||
| TEST(SkipTest, Concurrent2) { RunConcurrent(2); } | |||
| TEST(SkipTest, Concurrent3) { RunConcurrent(3); } | |||
| TEST(SkipTest, Concurrent4) { RunConcurrent(4); } | |||
| TEST(SkipTest, Concurrent5) { RunConcurrent(5); } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,66 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_ | |||
| #define STORAGE_LEVELDB_DB_SNAPSHOT_H_ | |||
| #include "include/db.h" | |||
| namespace leveldb { | |||
| class SnapshotList; | |||
| // Snapshots are kept in a doubly-linked list in the DB. | |||
| // Each Snapshot corresponds to a particular sequence number. | |||
| class Snapshot { | |||
| public: | |||
| SequenceNumber number_; // const after creation | |||
| private: | |||
| friend class SnapshotList; | |||
| // Snapshot is kept in a doubly-linked circular list | |||
| Snapshot* prev_; | |||
| Snapshot* next_; | |||
| SnapshotList* list_; // just for sanity checks | |||
| }; | |||
| class SnapshotList { | |||
| public: | |||
| SnapshotList() { | |||
| list_.prev_ = &list_; | |||
| list_.next_ = &list_; | |||
| } | |||
| bool empty() const { return list_.next_ == &list_; } | |||
| Snapshot* oldest() const { assert(!empty()); return list_.next_; } | |||
| Snapshot* newest() const { assert(!empty()); return list_.prev_; } | |||
| const Snapshot* New(SequenceNumber seq) { | |||
| Snapshot* s = new Snapshot; | |||
| s->number_ = seq; | |||
| s->list_ = this; | |||
| s->next_ = &list_; | |||
| s->prev_ = list_.prev_; | |||
| s->prev_->next_ = s; | |||
| s->next_->prev_ = s; | |||
| return s; | |||
| } | |||
| void Delete(const Snapshot* s) { | |||
| assert(s->list_ == this); | |||
| s->prev_->next_ = s->next_; | |||
| s->next_->prev_ = s->prev_; | |||
| delete s; | |||
| } | |||
| private: | |||
| // Dummy head of doubly-linked list of snapshots | |||
| Snapshot list_; | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_SNAPSHOT_H_ | |||
| @ -0,0 +1,94 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/table_cache.h" | |||
| #include "db/filename.h" | |||
| #include "include/env.h" | |||
| #include "include/table.h" | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| struct TableAndFile { | |||
| RandomAccessFile* file; | |||
| Table* table; | |||
| }; | |||
| static void DeleteEntry(const Slice& key, void* value) { | |||
| TableAndFile* tf = reinterpret_cast<TableAndFile*>(value); | |||
| delete tf->table; | |||
| delete tf->file; | |||
| delete tf; | |||
| } | |||
| static void UnrefEntry(void* arg1, void* arg2) { | |||
| Cache* cache = reinterpret_cast<Cache*>(arg1); | |||
| Cache::Handle* h = reinterpret_cast<Cache::Handle*>(arg2); | |||
| cache->Release(h); | |||
| } | |||
| TableCache::TableCache(const std::string& dbname, | |||
| const Options* options, | |||
| int entries) | |||
| : env_(options->env), | |||
| dbname_(dbname), | |||
| options_(options), | |||
| cache_(NewLRUCache(entries)) { | |||
| } | |||
| TableCache::~TableCache() { | |||
| delete cache_; | |||
| } | |||
| Iterator* TableCache::NewIterator(const ReadOptions& options, | |||
| uint64_t file_number, | |||
| Table** tableptr) { | |||
| if (tableptr != NULL) { | |||
| *tableptr = NULL; | |||
| } | |||
| char buf[sizeof(file_number)]; | |||
| EncodeFixed64(buf, file_number); | |||
| Slice key(buf, sizeof(buf)); | |||
| Cache::Handle* handle = cache_->Lookup(key); | |||
| if (handle == NULL) { | |||
| std::string fname = TableFileName(dbname_, file_number); | |||
| RandomAccessFile* file = NULL; | |||
| Table* table = NULL; | |||
| Status s = env_->NewRandomAccessFile(fname, &file); | |||
| if (s.ok()) { | |||
| s = Table::Open(*options_, file, &table); | |||
| } | |||
| if (!s.ok()) { | |||
| assert(table == NULL); | |||
| delete file; | |||
| // We do not cache error results so that if the error is transient, | |||
| // or somebody repairs the file, we recover automatically. | |||
| return NewErrorIterator(s); | |||
| } | |||
| TableAndFile* tf = new TableAndFile; | |||
| tf->file = file; | |||
| tf->table = table; | |||
| handle = cache_->Insert(key, tf, 1, &DeleteEntry); | |||
| } | |||
| Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table; | |||
| Iterator* result = table->NewIterator(options); | |||
| result->RegisterCleanup(&UnrefEntry, cache_, handle); | |||
| if (tableptr != NULL) { | |||
| *tableptr = table; | |||
| } | |||
| return result; | |||
| } | |||
| void TableCache::Evict(uint64_t file_number) { | |||
| char buf[sizeof(file_number)]; | |||
| EncodeFixed64(buf, file_number); | |||
| cache_->Erase(Slice(buf, sizeof(buf))); | |||
| } | |||
| } | |||
| @ -0,0 +1,49 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // Thread-safe (provides internal synchronization) | |||
| #ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_ | |||
| #define STORAGE_LEVELDB_DB_TABLE_CACHE_H_ | |||
| #include <string> | |||
| #include <stdint.h> | |||
| #include "db/dbformat.h" | |||
| #include "include/cache.h" | |||
| #include "include/table.h" | |||
| #include "port/port.h" | |||
| namespace leveldb { | |||
| class Env; | |||
| class TableCache { | |||
| public: | |||
| TableCache(const std::string& dbname, const Options* options, int entries); | |||
| ~TableCache(); | |||
| // Get an iterator for the specified file number and return it. If | |||
| // "tableptr" is non-NULL, also sets "*tableptr" to point to the | |||
| // Table object underlying the returned iterator, or NULL if no | |||
| // Table object underlies the returned iterator. The returned | |||
| // "*tableptr" object is owned by the cache and should not be | |||
| // deleted, and is valid for as long as the returned iterator is | |||
| // live. | |||
| Iterator* NewIterator(const ReadOptions& options, | |||
| uint64_t file_number, | |||
| Table** tableptr = NULL); | |||
| // Evict any entry for the specified file number | |||
| void Evict(uint64_t file_number); | |||
| private: | |||
| Env* const env_; | |||
| const std::string dbname_; | |||
| const Options* options_; | |||
| Cache* cache_; | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_TABLE_CACHE_H_ | |||
| @ -0,0 +1,282 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/version_edit.h" | |||
| #include "db/version_set.h" | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| // Tag numbers for serialized VersionEdit. These numbers are written to | |||
| // disk and should not be changed. | |||
| enum Tag { | |||
| kComparator = 1, | |||
| kLogNumber = 2, | |||
| kNextFileNumber = 3, | |||
| kLastSequence = 4, | |||
| kCompactPointer = 5, | |||
| kDeletedFile = 6, | |||
| kNewFile = 7, | |||
| kLargeValueRef = 8, | |||
| }; | |||
| void VersionEdit::Clear() { | |||
| comparator_.clear(); | |||
| log_number_ = 0; | |||
| last_sequence_ = 0; | |||
| next_file_number_ = 0; | |||
| has_comparator_ = false; | |||
| has_log_number_ = false; | |||
| has_next_file_number_ = false; | |||
| has_last_sequence_ = false; | |||
| deleted_files_.clear(); | |||
| new_files_.clear(); | |||
| large_refs_added_.clear(); | |||
| } | |||
| void VersionEdit::EncodeTo(std::string* dst) const { | |||
| if (has_comparator_) { | |||
| PutVarint32(dst, kComparator); | |||
| PutLengthPrefixedSlice(dst, comparator_); | |||
| } | |||
| if (has_log_number_) { | |||
| PutVarint32(dst, kLogNumber); | |||
| PutVarint64(dst, log_number_); | |||
| } | |||
| if (has_next_file_number_) { | |||
| PutVarint32(dst, kNextFileNumber); | |||
| PutVarint64(dst, next_file_number_); | |||
| } | |||
| if (has_last_sequence_) { | |||
| PutVarint32(dst, kLastSequence); | |||
| PutVarint64(dst, last_sequence_); | |||
| } | |||
| for (int i = 0; i < compact_pointers_.size(); i++) { | |||
| PutVarint32(dst, kCompactPointer); | |||
| PutVarint32(dst, compact_pointers_[i].first); // level | |||
| PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode()); | |||
| } | |||
| for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); | |||
| iter != deleted_files_.end(); | |||
| ++iter) { | |||
| PutVarint32(dst, kDeletedFile); | |||
| PutVarint32(dst, iter->first); // level | |||
| PutVarint64(dst, iter->second); // file number | |||
| } | |||
| for (int i = 0; i < new_files_.size(); i++) { | |||
| const FileMetaData& f = new_files_[i].second; | |||
| PutVarint32(dst, kNewFile); | |||
| PutVarint32(dst, new_files_[i].first); // level | |||
| PutVarint64(dst, f.number); | |||
| PutVarint64(dst, f.file_size); | |||
| PutLengthPrefixedSlice(dst, f.smallest.Encode()); | |||
| PutLengthPrefixedSlice(dst, f.largest.Encode()); | |||
| } | |||
| for (int i = 0; i < large_refs_added_.size(); i++) { | |||
| const VersionEdit::Large& l = large_refs_added_[i]; | |||
| PutVarint32(dst, kLargeValueRef); | |||
| PutLengthPrefixedSlice(dst, | |||
| Slice(l.large_ref.data, LargeValueRef::ByteSize())); | |||
| PutVarint64(dst, l.fnum); | |||
| PutLengthPrefixedSlice(dst, l.internal_key.Encode()); | |||
| } | |||
| } | |||
| static bool GetInternalKey(Slice* input, InternalKey* dst) { | |||
| Slice str; | |||
| if (GetLengthPrefixedSlice(input, &str)) { | |||
| dst->DecodeFrom(str); | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| static bool GetLevel(Slice* input, int* level) { | |||
| uint32_t v; | |||
| if (GetVarint32(input, &v) && | |||
| v < config::kNumLevels) { | |||
| *level = v; | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| Status VersionEdit::DecodeFrom(const Slice& src) { | |||
| Clear(); | |||
| Slice input = src; | |||
| const char* msg = NULL; | |||
| uint32_t tag; | |||
| // Temporary storage for parsing | |||
| int level; | |||
| uint64_t number; | |||
| FileMetaData f; | |||
| Slice str; | |||
| Large large; | |||
| InternalKey key; | |||
| while (msg == NULL && GetVarint32(&input, &tag)) { | |||
| switch (tag) { | |||
| case kComparator: | |||
| if (GetLengthPrefixedSlice(&input, &str)) { | |||
| comparator_ = str.ToString(); | |||
| has_comparator_ = true; | |||
| } else { | |||
| msg = "comparator name"; | |||
| } | |||
| break; | |||
| case kLogNumber: | |||
| if (GetVarint64(&input, &log_number_)) { | |||
| has_log_number_ = true; | |||
| } else { | |||
| msg = "log number"; | |||
| } | |||
| break; | |||
| case kNextFileNumber: | |||
| if (GetVarint64(&input, &next_file_number_)) { | |||
| has_next_file_number_ = true; | |||
| } else { | |||
| msg = "next file number"; | |||
| } | |||
| break; | |||
| case kLastSequence: | |||
| if (GetVarint64(&input, &last_sequence_)) { | |||
| has_last_sequence_ = true; | |||
| } else { | |||
| msg = "last sequence number"; | |||
| } | |||
| break; | |||
| case kCompactPointer: | |||
| if (GetLevel(&input, &level) && | |||
| GetInternalKey(&input, &key)) { | |||
| compact_pointers_.push_back(std::make_pair(level, key)); | |||
| } else { | |||
| msg = "compaction pointer"; | |||
| } | |||
| break; | |||
| case kDeletedFile: | |||
| if (GetLevel(&input, &level) && | |||
| GetVarint64(&input, &number)) { | |||
| deleted_files_.insert(std::make_pair(level, number)); | |||
| } else { | |||
| msg = "deleted file"; | |||
| } | |||
| break; | |||
| case kNewFile: | |||
| if (GetLevel(&input, &level) && | |||
| GetVarint64(&input, &f.number) && | |||
| GetVarint64(&input, &f.file_size) && | |||
| GetInternalKey(&input, &f.smallest) && | |||
| GetInternalKey(&input, &f.largest)) { | |||
| new_files_.push_back(std::make_pair(level, f)); | |||
| } else { | |||
| msg = "new-file entry"; | |||
| } | |||
| break; | |||
| case kLargeValueRef: | |||
| if (GetLengthPrefixedSlice(&input, &str) && | |||
| (str.size() == LargeValueRef::ByteSize()) && | |||
| GetVarint64(&input, &large.fnum) && | |||
| GetInternalKey(&input, &large.internal_key)) { | |||
| large.large_ref = LargeValueRef::FromRef(str); | |||
| large_refs_added_.push_back(large); | |||
| } else { | |||
| msg = "large ref"; | |||
| } | |||
| break; | |||
| default: | |||
| msg = "unknown tag"; | |||
| break; | |||
| } | |||
| } | |||
| if (msg == NULL && !input.empty()) { | |||
| msg = "invalid tag"; | |||
| } | |||
| Status result; | |||
| if (msg != NULL) { | |||
| result = Status::Corruption("VersionEdit", msg); | |||
| } | |||
| return result; | |||
| } | |||
| std::string VersionEdit::DebugString() const { | |||
| std::string r; | |||
| r.append("VersionEdit {"); | |||
| if (has_comparator_) { | |||
| r.append("\n Comparator: "); | |||
| r.append(comparator_); | |||
| } | |||
| if (has_log_number_) { | |||
| r.append("\n LogNumber: "); | |||
| AppendNumberTo(&r, log_number_); | |||
| } | |||
| if (has_next_file_number_) { | |||
| r.append("\n NextFile: "); | |||
| AppendNumberTo(&r, next_file_number_); | |||
| } | |||
| if (has_last_sequence_) { | |||
| r.append("\n LastSeq: "); | |||
| AppendNumberTo(&r, last_sequence_); | |||
| } | |||
| for (int i = 0; i < compact_pointers_.size(); i++) { | |||
| r.append("\n CompactPointer: "); | |||
| AppendNumberTo(&r, compact_pointers_[i].first); | |||
| r.append(" '"); | |||
| AppendEscapedStringTo(&r, compact_pointers_[i].second.Encode()); | |||
| r.append("'"); | |||
| } | |||
| for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); | |||
| iter != deleted_files_.end(); | |||
| ++iter) { | |||
| r.append("\n DeleteFile: "); | |||
| AppendNumberTo(&r, iter->first); | |||
| r.append(" "); | |||
| AppendNumberTo(&r, iter->second); | |||
| } | |||
| for (int i = 0; i < new_files_.size(); i++) { | |||
| const FileMetaData& f = new_files_[i].second; | |||
| r.append("\n AddFile: "); | |||
| AppendNumberTo(&r, new_files_[i].first); | |||
| r.append(" "); | |||
| AppendNumberTo(&r, f.number); | |||
| r.append(" "); | |||
| AppendNumberTo(&r, f.file_size); | |||
| r.append(" '"); | |||
| AppendEscapedStringTo(&r, f.smallest.Encode()); | |||
| r.append("' .. '"); | |||
| AppendEscapedStringTo(&r, f.largest.Encode()); | |||
| r.append("'"); | |||
| } | |||
| for (int i = 0; i < large_refs_added_.size(); i++) { | |||
| const VersionEdit::Large& l = large_refs_added_[i]; | |||
| r.append("\n LargeRef: "); | |||
| AppendNumberTo(&r, l.fnum); | |||
| r.append(" "); | |||
| r.append(LargeValueRefToFilenameString(l.large_ref)); | |||
| r.append(" '"); | |||
| AppendEscapedStringTo(&r, l.internal_key.Encode()); | |||
| r.append("'"); | |||
| } | |||
| r.append("\n}\n"); | |||
| return r; | |||
| } | |||
| } | |||
| @ -0,0 +1,118 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_VERSION_EDIT_H_ | |||
| #define STORAGE_LEVELDB_DB_VERSION_EDIT_H_ | |||
| #include <set> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "db/dbformat.h" | |||
| namespace leveldb { | |||
| class VersionSet; | |||
| struct FileMetaData { | |||
| int refs; | |||
| uint64_t number; | |||
| uint64_t file_size; // File size in bytes | |||
| InternalKey smallest; // Smallest internal key served by table | |||
| InternalKey largest; // Largest internal key served by table | |||
| FileMetaData() : refs(0), file_size(0) { } | |||
| }; | |||
| class VersionEdit { | |||
| public: | |||
| VersionEdit() { Clear(); } | |||
| ~VersionEdit() { } | |||
| void Clear(); | |||
| void SetComparatorName(const Slice& name) { | |||
| has_comparator_ = true; | |||
| comparator_ = name.ToString(); | |||
| } | |||
| void SetLogNumber(uint64_t num) { | |||
| has_log_number_ = true; | |||
| log_number_ = num; | |||
| } | |||
| void SetNextFile(uint64_t num) { | |||
| has_next_file_number_ = true; | |||
| next_file_number_ = num; | |||
| } | |||
| void SetLastSequence(SequenceNumber seq) { | |||
| has_last_sequence_ = true; | |||
| last_sequence_ = seq; | |||
| } | |||
| void SetCompactPointer(int level, const InternalKey& key) { | |||
| compact_pointers_.push_back(std::make_pair(level, key)); | |||
| } | |||
| // Add the specified file at the specified number. | |||
| // REQUIRES: This version has not been saved (see VersionSet::SaveTo) | |||
| // REQUIRES: "smallest" and "largest" are smallest and largest keys in file | |||
| void AddFile(int level, uint64_t file, | |||
| uint64_t file_size, | |||
| const InternalKey& smallest, | |||
| const InternalKey& largest) { | |||
| FileMetaData f; | |||
| f.number = file; | |||
| f.file_size = file_size; | |||
| f.smallest = smallest; | |||
| f.largest = largest; | |||
| new_files_.push_back(std::make_pair(level, f)); | |||
| } | |||
| // Delete the specified "file" from the specified "level". | |||
| void DeleteFile(int level, uint64_t file) { | |||
| deleted_files_.insert(std::make_pair(level, file)); | |||
| } | |||
| // Record that a large value with the specified large_ref was | |||
| // written to the output file numbered "fnum" | |||
| void AddLargeValueRef(const LargeValueRef& large_ref, | |||
| uint64_t fnum, | |||
| const Slice& internal_key) { | |||
| large_refs_added_.resize(large_refs_added_.size() + 1); | |||
| Large* large = &(large_refs_added_.back()); | |||
| large->large_ref = large_ref; | |||
| large->fnum = fnum; | |||
| large->internal_key.DecodeFrom(internal_key); | |||
| } | |||
| void EncodeTo(std::string* dst) const; | |||
| Status DecodeFrom(const Slice& src); | |||
| std::string DebugString() const; | |||
| private: | |||
| friend class VersionSet; | |||
| typedef std::set< std::pair<int, uint64_t> > DeletedFileSet; | |||
| std::string comparator_; | |||
| uint64_t log_number_; | |||
| uint64_t next_file_number_; | |||
| SequenceNumber last_sequence_; | |||
| bool has_comparator_; | |||
| bool has_log_number_; | |||
| bool has_next_file_number_; | |||
| bool has_last_sequence_; | |||
| std::vector< std::pair<int, InternalKey> > compact_pointers_; | |||
| DeletedFileSet deleted_files_; | |||
| std::vector< std::pair<int, FileMetaData> > new_files_; | |||
| struct Large { | |||
| LargeValueRef large_ref; | |||
| uint64_t fnum; | |||
| InternalKey internal_key; | |||
| }; | |||
| std::vector<Large> large_refs_added_; | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_VERSION_EDIT_H_ | |||
| @ -0,0 +1,50 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "db/version_edit.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| static void TestEncodeDecode(const VersionEdit& edit) { | |||
| std::string encoded, encoded2; | |||
| edit.EncodeTo(&encoded); | |||
| VersionEdit parsed; | |||
| Status s = parsed.DecodeFrom(encoded); | |||
| ASSERT_TRUE(s.ok()) << s.ToString(); | |||
| parsed.EncodeTo(&encoded2); | |||
| ASSERT_EQ(encoded, encoded2); | |||
| } | |||
| class VersionEditTest { }; | |||
| TEST(VersionEditTest, EncodeDecode) { | |||
| static const uint64_t kBig = 1ull << 50; | |||
| VersionEdit edit; | |||
| for (int i = 0; i < 4; i++) { | |||
| TestEncodeDecode(edit); | |||
| edit.AddFile(3, kBig + 300 + i, kBig + 400 + i, | |||
| InternalKey("foo", kBig + 500 + i, kTypeLargeValueRef), | |||
| InternalKey("zoo", kBig + 600 + i, kTypeDeletion)); | |||
| edit.DeleteFile(4, kBig + 700 + i); | |||
| edit.AddLargeValueRef(LargeValueRef::Make("big", kNoCompression), | |||
| kBig + 800 + i, "foobar"); | |||
| edit.AddLargeValueRef(LargeValueRef::Make("big2", kLightweightCompression), | |||
| kBig + 801 + i, "baz"); | |||
| edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue)); | |||
| } | |||
| edit.SetComparatorName("foo"); | |||
| edit.SetLogNumber(kBig + 100); | |||
| edit.SetNextFile(kBig + 200); | |||
| edit.SetLastSequence(kBig + 1000); | |||
| TestEncodeDecode(edit); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,290 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // The representation of a DBImpl consists of a set of Versions. The | |||
| // newest version is called "current". Older versions may be kept | |||
| // around to provide a consistent view to live iterators. | |||
| // | |||
| // Each Version keeps track of a set of Table files per level. The | |||
| // entire set of versions is maintained in a VersionSet. | |||
| // | |||
| // Version,VersionSet are thread-compatible, but require external | |||
| // synchronization on all accesses. | |||
| #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ | |||
| #define STORAGE_LEVELDB_DB_VERSION_SET_H_ | |||
| #include <map> | |||
| #include <set> | |||
| #include <vector> | |||
| #include "db/dbformat.h" | |||
| #include "db/version_edit.h" | |||
| #include "port/port.h" | |||
| namespace leveldb { | |||
| // Grouping of constants. We may want to make some of these | |||
| // parameters set via options. | |||
| namespace config { | |||
| static const int kNumLevels = 7; | |||
| } | |||
| namespace log { class Writer; } | |||
| class Compaction; | |||
| class Iterator; | |||
| class MemTable; | |||
| class TableBuilder; | |||
| class TableCache; | |||
| class Version; | |||
| class VersionSet; | |||
| class WritableFile; | |||
| class Version { | |||
| public: | |||
| // Append to *iters a sequence of iterators that will | |||
| // yield the contents of this Version when merged together. | |||
| // REQUIRES: This version has been saved (see VersionSet::SaveTo) | |||
| void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters); | |||
| // Reference count management (so Versions do not disappear out from | |||
| // under live iterators) | |||
| void Ref(); | |||
| void Unref(); | |||
| // Return a human readable string that describes this version's contents. | |||
| std::string DebugString() const; | |||
| private: | |||
| friend class Compaction; | |||
| friend class VersionSet; | |||
| class LevelFileNumIterator; | |||
| Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; | |||
| VersionSet* vset_; // VersionSet to which this Version belongs | |||
| Version* next_; // Next version in linked list | |||
| int refs_; // Number of live refs to this version | |||
| MemTable* cleanup_mem_; // NULL, or table to delete when version dropped | |||
| // List of files per level | |||
| std::vector<FileMetaData*> files_[config::kNumLevels]; | |||
| // Level that should be compacted next and its compaction score. | |||
| // Score < 1 means compaction is not strictly needed. These fields | |||
| // are initialized by Finalize(). | |||
| double compaction_score_; | |||
| int compaction_level_; | |||
| explicit Version(VersionSet* vset) | |||
| : vset_(vset), next_(NULL), refs_(0), | |||
| cleanup_mem_(NULL), | |||
| compaction_score_(-1), | |||
| compaction_level_(-1) { | |||
| } | |||
| ~Version(); | |||
| // No copying allowed | |||
| Version(const Version&); | |||
| void operator=(const Version&); | |||
| }; | |||
| class VersionSet { | |||
| public: | |||
| VersionSet(const std::string& dbname, | |||
| const Options* options, | |||
| TableCache* table_cache, | |||
| const InternalKeyComparator*); | |||
| ~VersionSet(); | |||
| // Apply *edit to the current version to form a new descriptor that | |||
| // is both saved to persistent state and installed as the new | |||
| // current version. Iff Apply() returns OK, arrange to delete | |||
| // cleanup_mem (if cleanup_mem != NULL) when it is no longer needed | |||
| // by older versions. | |||
| Status LogAndApply(VersionEdit* edit, MemTable* cleanup_mem); | |||
| // Recover the last saved descriptor from persistent storage. | |||
| Status Recover(uint64_t* log_number, SequenceNumber* last_sequence); | |||
| // Save current contents to *log | |||
| Status WriteSnapshot(log::Writer* log); | |||
| // Return the current version. | |||
| Version* current() const { return current_; } | |||
| // Return the current manifest file number | |||
| uint64_t ManifestFileNumber() const { return manifest_file_number_; } | |||
| // Allocate and return a new file number | |||
| uint64_t NewFileNumber() { return next_file_number_++; } | |||
| // Return the number of Table files at the specified level. | |||
| int NumLevelFiles(int level) const; | |||
| // Pick level and inputs for a new compaction. | |||
| // Returns NULL if there is no compaction to be done. | |||
| // Otherwise returns a pointer to a heap-allocated object that | |||
| // describes the compaction. Caller should delete the result. | |||
| Compaction* PickCompaction(); | |||
| // Return a compaction object for compacting the range [begin,end] in | |||
| // the specified level. Returns NULL if there is nothing in that | |||
| // level that overlaps the specified range. Caller should delete | |||
| // the result. | |||
| Compaction* CompactRange( | |||
| int level, | |||
| const InternalKey& begin, | |||
| const InternalKey& end); | |||
| // Create an iterator that reads over the compaction inputs for "*c". | |||
| // The caller should delete the iterator when no longer needed. | |||
| Iterator* MakeInputIterator(Compaction* c); | |||
| // Returns true iff some level needs a compaction. | |||
| bool NeedsCompaction() const { return current_->compaction_score_ >= 1; } | |||
| // Add all files listed in any live version to *live. | |||
| // May also mutate some internal state. | |||
| void AddLiveFiles(std::set<uint64_t>* live); | |||
| // Return the approximate offset in the database of the data for | |||
| // "key" as of version "v". | |||
| uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); | |||
| // Register a reference to a large value with the specified | |||
| // large_ref from the specified file number. Returns "true" if this | |||
| // is the first recorded reference to the "large_ref" value in the | |||
| // database, and false otherwise. | |||
| bool RegisterLargeValueRef(const LargeValueRef& large_ref, | |||
| uint64_t filenum, | |||
| const InternalKey& internal_key); | |||
| // Cleanup the large value reference state by eliminating any | |||
| // references from files that are not includes in either "live_tables" | |||
| // or "log_file". | |||
| void CleanupLargeValueRefs(const std::set<uint64_t>& live_tables, | |||
| uint64_t log_file_num); | |||
| // Returns true if a large value with the given reference is live. | |||
| bool LargeValueIsLive(const LargeValueRef& large_ref); | |||
| private: | |||
| class Builder; | |||
| friend class Compaction; | |||
| friend class Version; | |||
| Status Finalize(Version* v); | |||
| // Delete any old versions that are no longer needed. | |||
| void MaybeDeleteOldVersions(); | |||
| struct BySmallestKey; | |||
| Status SortLevel(Version* v, uint64_t level); | |||
| void GetOverlappingInputs( | |||
| int level, | |||
| const InternalKey& begin, | |||
| const InternalKey& end, | |||
| std::vector<FileMetaData*>* inputs); | |||
| void GetRange(const std::vector<FileMetaData*>& inputs, | |||
| InternalKey* smallest, | |||
| InternalKey* largest); | |||
| Env* const env_; | |||
| const std::string dbname_; | |||
| const Options* const options_; | |||
| TableCache* const table_cache_; | |||
| const InternalKeyComparator icmp_; | |||
| uint64_t next_file_number_; | |||
| uint64_t manifest_file_number_; | |||
| // Opened lazily | |||
| WritableFile* descriptor_file_; | |||
| log::Writer* descriptor_log_; | |||
| // Versions are kept in a singly linked list that is never empty | |||
| Version* current_; // Pointer to the last (newest) list entry | |||
| Version* oldest_; // Pointer to the first (oldest) list entry | |||
| // Map from large value reference to the set of <file numbers,internal_key> | |||
| // values containing references to the value. We keep the | |||
| // internal key as a std::string rather than as an InternalKey because | |||
| // we want to be able to easily use a set. | |||
| typedef std::set<std::pair<uint64_t, std::string> > LargeReferencesSet; | |||
| typedef std::map<LargeValueRef, LargeReferencesSet> LargeValueMap; | |||
| LargeValueMap large_value_refs_; | |||
| // Per-level key at which the next compaction at that level should start. | |||
| // Either an empty string, or a valid InternalKey. | |||
| std::string compact_pointer_[config::kNumLevels]; | |||
| // No copying allowed | |||
| VersionSet(const VersionSet&); | |||
| void operator=(const VersionSet&); | |||
| }; | |||
| // A Compaction encapsulates information about a compaction. | |||
| class Compaction { | |||
| public: | |||
| ~Compaction(); | |||
| // Return the level that is being compacted. Inputs from "level" | |||
| // and "level+1" will be merged to produce a set of "level+1" files. | |||
| int level() const { return level_; } | |||
| // Return the object that holds the edits to the descriptor done | |||
| // by this compaction. | |||
| VersionEdit* edit() { return &edit_; } | |||
| // "which" must be either 0 or 1 | |||
| int num_input_files(int which) const { return inputs_[which].size(); } | |||
| // Return the ith input file at "level()+which" ("which" must be 0 or 1). | |||
| FileMetaData* input(int which, int i) const { return inputs_[which][i]; } | |||
| // Maximum size of files to build during this compaction. | |||
| uint64_t MaxOutputFileSize() const { return max_output_file_size_; } | |||
| // Add all inputs to this compaction as delete operations to *edit. | |||
| void AddInputDeletions(VersionEdit* edit); | |||
| // Returns true if the information we have available guarantees that | |||
| // the compaction is producing data in "level+1" for which no data exists | |||
| // in levels greater than "level+1". | |||
| bool IsBaseLevelForKey(const Slice& user_key); | |||
| // Release the input version for the compaction, once the compaction | |||
| // is successful. | |||
| void ReleaseInputs(); | |||
| private: | |||
| friend class Version; | |||
| friend class VersionSet; | |||
| explicit Compaction(int level); | |||
| int level_; | |||
| uint64_t max_output_file_size_; | |||
| Version* input_version_; | |||
| VersionEdit edit_; | |||
| // Each compaction reads inputs from "level_" and "level_+1" | |||
| std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs | |||
| // State for implementing IsBaseLevelForKey | |||
| // level_ptrs_ holds indices into input_version_->levels_: our state | |||
| // is that we are positioned at one of the file ranges for each | |||
| // higher level than the ones involved in this compaction (i.e. for | |||
| // all L >= level_ + 2). | |||
| int level_ptrs_[config::kNumLevels]; | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_VERSION_SET_H_ | |||
| @ -0,0 +1,164 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // WriteBatch::rep_ := | |||
| // sequence: fixed64 | |||
| // count: fixed32 | |||
| // data: record[count] | |||
| // record := | |||
| // kTypeValue varstring varstring | | |||
| // kTypeLargeValueRef varstring varstring | | |||
| // kTypeDeletion varstring | |||
| // varstring := | |||
| // len: varint32 | |||
| // data: uint8[len] | |||
| #include "include/write_batch.h" | |||
| #include "include/db.h" | |||
| #include "db/dbformat.h" | |||
| #include "db/memtable.h" | |||
| #include "db/write_batch_internal.h" | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| WriteBatch::WriteBatch() { | |||
| Clear(); | |||
| } | |||
| WriteBatch::~WriteBatch() { } | |||
| void WriteBatch::Clear() { | |||
| rep_.clear(); | |||
| rep_.resize(12); | |||
| } | |||
| int WriteBatchInternal::Count(const WriteBatch* b) { | |||
| return DecodeFixed32(b->rep_.data() + 8); | |||
| } | |||
| void WriteBatchInternal::SetCount(WriteBatch* b, int n) { | |||
| EncodeFixed32(&b->rep_[8], n); | |||
| } | |||
| SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) { | |||
| return SequenceNumber(DecodeFixed64(b->rep_.data())); | |||
| } | |||
| void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) { | |||
| EncodeFixed64(&b->rep_[0], seq); | |||
| } | |||
| void WriteBatch::Put(const Slice& key, const Slice& value) { | |||
| WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1); | |||
| rep_.push_back(static_cast<char>(kTypeValue)); | |||
| PutLengthPrefixedSlice(&rep_, key); | |||
| PutLengthPrefixedSlice(&rep_, value); | |||
| } | |||
| void WriteBatchInternal::PutLargeValueRef(WriteBatch* b, | |||
| const Slice& key, | |||
| const LargeValueRef& large_ref) { | |||
| WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); | |||
| b->rep_.push_back(static_cast<char>(kTypeLargeValueRef)); | |||
| PutLengthPrefixedSlice(&b->rep_, key); | |||
| PutLengthPrefixedSlice(&b->rep_, | |||
| Slice(large_ref.data, sizeof(large_ref.data))); | |||
| } | |||
| void WriteBatch::Delete(const Slice& key) { | |||
| WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1); | |||
| rep_.push_back(static_cast<char>(kTypeDeletion)); | |||
| PutLengthPrefixedSlice(&rep_, key); | |||
| } | |||
| Status WriteBatchInternal::InsertInto(const WriteBatch* b, | |||
| MemTable* memtable) { | |||
| const int count = WriteBatchInternal::Count(b); | |||
| int found = 0; | |||
| Iterator it(*b); | |||
| for (; !it.Done(); it.Next()) { | |||
| switch (it.op()) { | |||
| case kTypeDeletion: | |||
| memtable->Add(it.sequence_number(), kTypeDeletion, it.key(), Slice()); | |||
| break; | |||
| case kTypeValue: | |||
| memtable->Add(it.sequence_number(), kTypeValue, it.key(), it.value()); | |||
| break; | |||
| case kTypeLargeValueRef: | |||
| memtable->Add(it.sequence_number(), kTypeLargeValueRef, | |||
| it.key(), it.value()); | |||
| break; | |||
| } | |||
| found++; | |||
| } | |||
| if (!it.status().ok()) { | |||
| return it.status(); | |||
| } else if (found != count) { | |||
| return Status::Corruption("wrong count in WriteBatch"); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) { | |||
| assert(contents.size() >= 12); | |||
| b->rep_.assign(contents.data(), contents.size()); | |||
| } | |||
| WriteBatchInternal::Iterator::Iterator(const WriteBatch& batch) | |||
| : input_(WriteBatchInternal::Contents(&batch)), | |||
| done_(false) { | |||
| if (input_.size() < 12) { | |||
| done_ = true; | |||
| } else { | |||
| seq_ = WriteBatchInternal::Sequence(&batch), | |||
| input_.remove_prefix(12); | |||
| GetNextEntry(); | |||
| } | |||
| } | |||
| void WriteBatchInternal::Iterator::Next() { | |||
| assert(!done_); | |||
| seq_++; | |||
| GetNextEntry(); | |||
| } | |||
| void WriteBatchInternal::Iterator::GetNextEntry() { | |||
| if (input_.empty()) { | |||
| done_ = true; | |||
| return; | |||
| } | |||
| char tag = input_[0]; | |||
| input_.remove_prefix(1); | |||
| switch (tag) { | |||
| case kTypeValue: | |||
| case kTypeLargeValueRef: | |||
| if (GetLengthPrefixedSlice(&input_, &key_) && | |||
| GetLengthPrefixedSlice(&input_, &value_)) { | |||
| op_ = static_cast<ValueType>(tag); | |||
| } else { | |||
| status_ = Status::Corruption("bad WriteBatch Put"); | |||
| done_ = true; | |||
| input_.clear(); | |||
| } | |||
| break; | |||
| case kTypeDeletion: | |||
| if (GetLengthPrefixedSlice(&input_, &key_)) { | |||
| op_ = kTypeDeletion; | |||
| } else { | |||
| status_ = Status::Corruption("bad WriteBatch Delete"); | |||
| done_ = true; | |||
| input_.clear(); | |||
| } | |||
| break; | |||
| default: | |||
| status_ = Status::Corruption("unknown WriteBatch tag"); | |||
| done_ = true; | |||
| input_.clear(); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,73 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ | |||
| #define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ | |||
| #include "include/write_batch.h" | |||
| namespace leveldb { | |||
| // WriteBatchInternal provides static methods for manipulating a | |||
| // WriteBatch that we don't want in the public WriteBatch interface. | |||
| class WriteBatchInternal { | |||
| public: | |||
| static void PutLargeValueRef(WriteBatch* batch, | |||
| const Slice& key, | |||
| const LargeValueRef& large_ref); | |||
| // Return the number of entries in the batch. | |||
| static int Count(const WriteBatch* batch); | |||
| // Set the count for the number of entries in the batch. | |||
| static void SetCount(WriteBatch* batch, int n); | |||
| // Return the seqeunce number for the start of this batch. | |||
| static SequenceNumber Sequence(const WriteBatch* batch); | |||
| // Store the specified number as the seqeunce number for the start of | |||
| // this batch. | |||
| static void SetSequence(WriteBatch* batch, SequenceNumber seq); | |||
| static Slice Contents(const WriteBatch* batch) { | |||
| return Slice(batch->rep_); | |||
| } | |||
| static size_t ByteSize(const WriteBatch* batch) { | |||
| return batch->rep_.size(); | |||
| } | |||
| static void SetContents(WriteBatch* batch, const Slice& contents); | |||
| static Status InsertInto(const WriteBatch* batch, MemTable* memtable); | |||
| // Iterate over the contents of a write batch. | |||
| class Iterator { | |||
| public: | |||
| explicit Iterator(const WriteBatch& batch); | |||
| bool Done() const { return done_; } | |||
| void Next(); | |||
| ValueType op() const { return op_; } | |||
| const Slice& key() const { return key_; } | |||
| const Slice& value() const { return value_; } | |||
| SequenceNumber sequence_number() const { return seq_; } | |||
| Status status() const { return status_; } | |||
| private: | |||
| void GetNextEntry(); | |||
| Slice input_; | |||
| bool done_; | |||
| ValueType op_; | |||
| Slice key_; | |||
| Slice value_; | |||
| SequenceNumber seq_; | |||
| Status status_; | |||
| }; | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ | |||
| @ -0,0 +1,110 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/db.h" | |||
| #include "db/memtable.h" | |||
| #include "db/write_batch_internal.h" | |||
| #include "include/env.h" | |||
| #include "util/logging.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| static std::string PrintContents(WriteBatch* b) { | |||
| InternalKeyComparator cmp(BytewiseComparator()); | |||
| MemTable mem(cmp); | |||
| std::string state; | |||
| Status s = WriteBatchInternal::InsertInto(b, &mem); | |||
| Iterator* iter = mem.NewIterator(); | |||
| for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |||
| ParsedInternalKey ikey; | |||
| ASSERT_TRUE(ParseInternalKey(iter->key(), &ikey)); | |||
| switch (ikey.type) { | |||
| case kTypeValue: | |||
| state.append("Put("); | |||
| state.append(ikey.user_key.ToString()); | |||
| state.append(", "); | |||
| state.append(iter->value().ToString()); | |||
| state.append(")"); | |||
| break; | |||
| case kTypeLargeValueRef: | |||
| state.append("PutRef("); | |||
| state.append(ikey.user_key.ToString()); | |||
| state.append(", "); | |||
| state.append(iter->value().ToString()); | |||
| state.append(")"); | |||
| break; | |||
| case kTypeDeletion: | |||
| state.append("Delete("); | |||
| state.append(ikey.user_key.ToString()); | |||
| state.append(")"); | |||
| break; | |||
| } | |||
| state.append("@"); | |||
| state.append(NumberToString(ikey.sequence)); | |||
| } | |||
| delete iter; | |||
| if (!s.ok()) { | |||
| state.append("ParseError()"); | |||
| } | |||
| return state; | |||
| } | |||
| class WriteBatchTest { }; | |||
| TEST(WriteBatchTest, Empty) { | |||
| WriteBatch batch; | |||
| ASSERT_EQ("", PrintContents(&batch)); | |||
| ASSERT_EQ(0, WriteBatchInternal::Count(&batch)); | |||
| } | |||
| TEST(WriteBatchTest, Multiple) { | |||
| WriteBatch batch; | |||
| batch.Put(Slice("foo"), Slice("bar")); | |||
| batch.Delete(Slice("box")); | |||
| batch.Put(Slice("baz"), Slice("boo")); | |||
| WriteBatchInternal::SetSequence(&batch, 100); | |||
| ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch)); | |||
| ASSERT_EQ(3, WriteBatchInternal::Count(&batch)); | |||
| ASSERT_EQ("Put(baz, boo)@102" | |||
| "Delete(box)@101" | |||
| "Put(foo, bar)@100", | |||
| PrintContents(&batch)); | |||
| } | |||
| TEST(WriteBatchTest, PutIndirect) { | |||
| WriteBatch batch; | |||
| batch.Put(Slice("baz"), Slice("boo")); | |||
| LargeValueRef h; | |||
| for (int i = 0; i < LargeValueRef::ByteSize(); i++) { | |||
| h.data[i] = (i < 20) ? 'a' : 'b'; | |||
| } | |||
| WriteBatchInternal::PutLargeValueRef(&batch, Slice("foo"), h); | |||
| WriteBatchInternal::SetSequence(&batch, 100); | |||
| ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch)); | |||
| ASSERT_EQ(2, WriteBatchInternal::Count(&batch)); | |||
| ASSERT_EQ("Put(baz, boo)@100" | |||
| "PutRef(foo, aaaaaaaaaaaaaaaaaaaabbbbbbbbb)@101", | |||
| PrintContents(&batch)); | |||
| } | |||
| TEST(WriteBatchTest, Corruption) { | |||
| WriteBatch batch; | |||
| batch.Put(Slice("foo"), Slice("bar")); | |||
| batch.Delete(Slice("box")); | |||
| WriteBatchInternal::SetSequence(&batch, 200); | |||
| Slice contents = WriteBatchInternal::Contents(&batch); | |||
| WriteBatchInternal::SetContents(&batch, | |||
| Slice(contents.data(),contents.size()-1)); | |||
| ASSERT_EQ("Put(foo, bar)@200" | |||
| "ParseError()", | |||
| PrintContents(&batch)); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,89 @@ | |||
| body { | |||
| margin-left: 0.5in; | |||
| margin-right: 0.5in; | |||
| background: white; | |||
| color: black; | |||
| } | |||
| h1 { | |||
| margin-left: -0.2in; | |||
| font-size: 14pt; | |||
| } | |||
| h2 { | |||
| margin-left: -0in; | |||
| font-size: 12pt; | |||
| } | |||
| h3 { | |||
| margin-left: -0in; | |||
| } | |||
| h4 { | |||
| margin-left: -0in; | |||
| } | |||
| hr { | |||
| margin-left: -0in; | |||
| } | |||
| /* Definition lists: definition term bold */ | |||
| dt { | |||
| font-weight: bold; | |||
| } | |||
| address { | |||
| text-align: center; | |||
| } | |||
| code,samp,var { | |||
| color: blue; | |||
| } | |||
| kbd { | |||
| color: #600000; | |||
| } | |||
| div.note p { | |||
| float: right; | |||
| width: 3in; | |||
| margin-right: 0%; | |||
| padding: 1px; | |||
| border: 2px solid #6060a0; | |||
| background-color: #fffff0; | |||
| } | |||
| ul { | |||
| margin-top: -0em; | |||
| margin-bottom: -0em; | |||
| } | |||
| ol { | |||
| margin-top: -0em; | |||
| margin-bottom: -0em; | |||
| } | |||
| UL.nobullets { | |||
| list-style-type: none; | |||
| list-style-image: none; | |||
| margin-left: -1em; | |||
| } | |||
| p { | |||
| margin: 1em 0 1em 0; | |||
| padding: 0 0 0 0; | |||
| } | |||
| pre { | |||
| line-height: 1.3em; | |||
| padding: 0.4em 0 0.8em 0; | |||
| margin: 0 0 0 0; | |||
| border: 0 0 0 0; | |||
| color: blue; | |||
| } | |||
| .datatable { | |||
| margin-left: auto; | |||
| margin-right: auto; | |||
| margin-top: 2em; | |||
| margin-bottom: 2em; | |||
| border: 1px solid; | |||
| } | |||
| .datatable td,th { | |||
| padding: 0 0.5em 0 0.5em; | |||
| text-align: right; | |||
| } | |||
| @ -0,0 +1,222 @@ | |||
| <!DOCTYPE html> | |||
| <html> | |||
| <head> | |||
| <link rel="stylesheet" type="text/css" href="doc.css" /> | |||
| <title>Leveldb file layout and compactions</title> | |||
| </head> | |||
| <body> | |||
| <h1>Files</h1> | |||
| The implementation of leveldb is similar in spirit to the | |||
| representation of a single | |||
| <a href="http://labs.google.com/papers/bigtable.html"> | |||
| Bigtable tablet (section 5.3)</a>. | |||
| However the organization of the files that make up the representation | |||
| is somewhat different and is explained below. | |||
| <p> | |||
| Each database is represented by a set of file stored in a directory. | |||
| There are several different types of files as documented below: | |||
| <p> | |||
| <h2>Log files</h2> | |||
| <p> | |||
| A log file (*.log) stores a sequence of recent updates. Each update | |||
| is appended to the current log file. When the log file reaches a | |||
| pre-determined size (approximately 1MB by default), it is converted | |||
| to a sorted table (see below) and a new log file is created for future | |||
| updates. | |||
| <p> | |||
| A copy of the current log file is kept in an in-memory structure (the | |||
| <code>memtable</code>). This copy is consulted on every read so that read | |||
| operations reflect all logged updates. | |||
| <p> | |||
| <h2>Sorted tables</h2> | |||
| <p> | |||
| A sorted table (*.sst) stores a sequence of entries sorted by key. | |||
| Each entry is either a value for the key, or a deletion marker for the | |||
| key. (Deletion markers are kept around to hide obsolete values | |||
| present in older sorted tables). | |||
| <p> | |||
| The set of sorted tables are organized into a sequence of levels. The | |||
| sorted table generated from a log file is placed in a special <code>young</code> | |||
| level (also called level-0). When the number of young files exceeds a | |||
| certain threshold (currently four), all of the young files are merged | |||
| together with all of the overlapping level-1 files to produce a | |||
| sequence of new level-1 files (we create a new level-1 file for every | |||
| 2MB of data.) | |||
| <p> | |||
| Files in the young level may contain overlapping keys. However files | |||
| in other levels have distinct non-overlapping key ranges. Consider | |||
| level number L where L >= 1. When the combined size of files in | |||
| level-L exceeds (10^L) MB (i.e., 10MB for level-1, 100MB for level-2, | |||
| ...), one file in level-L, and all of the overlapping files in | |||
| level-(L+1) are merged to form a set of new files for level-(L+1). | |||
| These merges have the effect of gradually migrating new updates from | |||
| the young level to the largest level using only bulk reads and writes | |||
| (i.e., minimizing expensive seeks). | |||
| <h2>Large value files</h2> | |||
| <p> | |||
| Each large value (greater than 64KB by default) is placed in a large | |||
| value file (*.val) of its own. An entry is maintained in the log | |||
| and/or sorted tables that maps from the corresponding key to the | |||
| name of this large value file. The name of the large value file | |||
| is derived from a SHA1 hash of the value and its length so that | |||
| identical values share the same file. | |||
| <p> | |||
| <h2>Manifest</h2> | |||
| <p> | |||
| A MANIFEST file lists the set of sorted tables that make up each | |||
| level, the corresponding key ranges, and other important metadata. | |||
| A new MANIFEST file (with a new number embedded in the file name) | |||
| is created whenever the database is reopened. The MANIFEST file is | |||
| formatted as a log, and changes made to the serving state (as files | |||
| are added or removed) are appended to this log. | |||
| <p> | |||
| <h2>Current</h2> | |||
| <p> | |||
| CURRENT is a simple text file that contains the name of the latest | |||
| MANIFEST file. | |||
| <p> | |||
| <h2>Info logs</h2> | |||
| <p> | |||
| Informational messages are printed to files named LOG and LOG.old. | |||
| <p> | |||
| <h2>Others</h2> | |||
| <p> | |||
| Other files used for miscellaneous purposes may also be present | |||
| (LOCK, *.dbtmp). | |||
| <h1>Level 0</h1> | |||
| When the log file grows above a certain size (1MB by default): | |||
| <ul> | |||
| <li>Write the contents of the current memtable to an sstable | |||
| <li>Replace the current memtable by a brand new empty memtable | |||
| <li>Switch to a new log file | |||
| <li>Delete the old log file and the old memtable | |||
| </ul> | |||
| Experimental measurements show that generating an sstable from a 1MB | |||
| log file takes ~12ms, which seems like an acceptable latency hiccup to | |||
| add infrequently to a log write. | |||
| <p> | |||
| The new sstable is added to a special level-0 level. level-0 contains | |||
| a set of files (up to 4 by default). However unlike other levels, | |||
| these files do not cover disjoint ranges, but may overlap each other. | |||
| <h1>Compactions</h1> | |||
| <p> | |||
| When the size of level L exceeds its limit, we compact it in a | |||
| background thread. The compaction picks a file from level L and all | |||
| overlapping files from the next level L+1. Note that if a level-L | |||
| file overlaps only part of a level-(L+1) file, the entire file at | |||
| level-(L+1) is used as an input to the compaction and will be | |||
| discarded after the compaction. Aside: because level-0 is special | |||
| (files in it may overlap each other), we treat compactions from | |||
| level-0 to level-1 specially: a level-0 compaction may pick more than | |||
| one level-0 file in case some of these files overlap each other. | |||
| <p> | |||
| A compaction merges the contents of the picked files to produce a | |||
| sequence of level-(L+1) files. We switch to producing a new | |||
| level-(L+1) file after the current output file has reached the target | |||
| file size (2MB). The old files are discarded and the new files are | |||
| added to the serving state. | |||
| <p> | |||
| Compactions for a particular level rotate through the key space. In | |||
| more detail, for each level L, we remember the ending key of the last | |||
| compaction at level L. The next compaction for level L will pick the | |||
| first file that starts after this key (wrapping around to the | |||
| beginning of the key space if there is no such file). | |||
| <p> | |||
| Compactions drop overwritten values. They also drop deletion markers | |||
| if there are no higher numbered levels that contain a file whose range | |||
| overlaps the current key. | |||
| <h2>Timing</h2> | |||
| Level-0 compactions will read up to four 1MB files from level-0, and | |||
| at worst all the level-1 files (10MB). I.e., we will read 14MB and | |||
| write 14MB. | |||
| <p> | |||
| Other than the special level-0 compactions, we will pick one 2MB file | |||
| from level L. In the worst case, this will overlap ~ 12 files from | |||
| level L+1 (10 because level-(L+1) is ten times the size of level-L, | |||
| and another two at the boundaries since the file ranges at level-L | |||
| will usually not be aligned with the file ranges at level-L+1). The | |||
| compaction will therefore read 26MB and write 26MB. Assuming a disk | |||
| IO rate of 100MB/s (ballpark range for modern drives), the worst | |||
| compaction cost will be approximately 0.5 second. | |||
| <p> | |||
| If we throttle the background writing to something small, say 10% of | |||
| the full 100MB/s speed, a compaction may take up to 5 seconds. If the | |||
| user is writing at 10MB/s, we might build up lots of level-0 files | |||
| (~50 to hold the 5*10MB). This may signficantly increase the cost of | |||
| reads due to the overhead of merging more files together on every | |||
| read. | |||
| <p> | |||
| Solution 1: To reduce this problem, we might want to increase the log | |||
| switching threshold when the number of level-0 files is large. Though | |||
| the downside is that the larger this threshold, the larger the delay | |||
| that we will add to write latency when a write triggers a log switch. | |||
| <p> | |||
| Solution 2: We might want to decrease write rate artificially when the | |||
| number of level-0 files goes up. | |||
| <p> | |||
| Solution 3: We work on reducing the cost of very wide merges. | |||
| Perhaps most of the level-0 files will have their blocks sitting | |||
| uncompressed in the cache and we will only need to worry about the | |||
| O(N) complexity in the merging iterator. | |||
| <h2>Number of files</h2> | |||
| Instead of always making 2MB files, we could make larger files for | |||
| larger levels to reduce the total file count, though at the expense of | |||
| more bursty compactions. Alternatively, we could shard the set of | |||
| files into multiple directories. | |||
| <p> | |||
| An experiment on an <code>ext3</code> filesystem on Feb 04, 2011 shows | |||
| the following timings to do 100K file opens in directories with | |||
| varying number of files: | |||
| <table class="datatable"> | |||
| <tr><th>Files in directory</th><th>Microseconds to open a file</th></tr> | |||
| <tr><td>1000</td><td>9</td> | |||
| <tr><td>10000</td><td>10</td> | |||
| <tr><td>100000</td><td>16</td> | |||
| </table> | |||
| So maybe even the sharding is not necessary on modern filesystems? | |||
| <h1>Recovery</h1> | |||
| <ul> | |||
| <li> Read CURRENT to find name of the latest committed MANIFEST | |||
| <li> Read the named MANIFEST file | |||
| <li> Clean up stale files | |||
| <li> We could open all sstables here, but it is probably better to be lazy... | |||
| <li> Convert log chunk to a new level-0 sstable | |||
| <li> Start directing new writes to a new log file with recovered sequence# | |||
| </ul> | |||
| <h1>Garbage collection of files</h1> | |||
| <code>DeleteObsoleteFiles()</code> is called at the end of every | |||
| compaction and at the end of recovery. It finds the names of all | |||
| files in the database. It deletes all log files that are not the | |||
| current log file. It deletes all table files that are not referenced | |||
| from some level and are not the output of an active compaction. It | |||
| deletes all large value files that are not referenced from any live | |||
| table or log file. | |||
| </body> | |||
| </html> | |||
| @ -0,0 +1,508 @@ | |||
| <!DOCTYPE html> | |||
| <html> | |||
| <head> | |||
| <link rel="stylesheet" type="text/css" href="doc.css" /> | |||
| <title>Leveldb</title> | |||
| </head> | |||
| <body> | |||
| <h1>Leveldb</h1> | |||
| <address>Jeff Dean, Sanjay Ghemawat</address> | |||
| <p> | |||
| The <code>leveldb</code> library provides a persistent key value store. Keys and | |||
| values are arbitrary byte arrays. The keys are ordered within the key | |||
| value store according to a user-specified comparator function. | |||
| <p> | |||
| <h1>Opening A Database</h1> | |||
| <p> | |||
| A <code>leveldb</code> database has a name which corresponds to a file system | |||
| directory. All of the contents of database are stored in this | |||
| directory. The following example shows how to open a database, | |||
| creating it if necessary: | |||
| <p> | |||
| <pre> | |||
| #include <assert> | |||
| #include "leveldb/include/db.h" | |||
| leveldb::DB* db; | |||
| leveldb::Options options; | |||
| options.create_if_missing = true; | |||
| leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db); | |||
| assert(status.ok()); | |||
| ... | |||
| </pre> | |||
| If you want to raise an error if the database already exists, add | |||
| the following line before the <code>leveldb::DB::Open</code> call: | |||
| <pre> | |||
| options.error_if_exists = true; | |||
| </pre> | |||
| <h1>Status</h1> | |||
| <p> | |||
| You may have noticed the <code>leveldb::Status</code> type above. Values of this | |||
| type are returned by most functions in <code>leveldb</code> that may encounter an | |||
| error. You can check if such a result is ok, and also print an | |||
| associated error message: | |||
| <p> | |||
| <pre> | |||
| leveldb::Status s = ...; | |||
| if (!s.ok()) cerr << s.ToString() << endl; | |||
| </pre> | |||
| <h1>Closing A Database</h1> | |||
| <p> | |||
| When you are done with a database, just delete the database object. | |||
| Example: | |||
| <p> | |||
| <pre> | |||
| ... open the db as described above ... | |||
| ... do something with db ... | |||
| delete db; | |||
| </pre> | |||
| <h1>Reads And Writes</h1> | |||
| <p> | |||
| The database provides <code>Put</code>, <code>Delete</code>, and <code>Get</code> methods to | |||
| modify/query the database. For example, the following code | |||
| moves the value stored under key1 to key2. | |||
| <p> | |||
| <pre> | |||
| std::string value; | |||
| leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value); | |||
| if (s.ok()) s = db->Put(leveldb::WriteOptions(), key2, value); | |||
| if (s.ok()) s = db->Delete(leveldb::WriteOptions(), key1); | |||
| </pre> | |||
| See <a href="#async">important performance note</a> below for how to | |||
| speed up writes significantly. | |||
| <h1>Atomic Updates</h1> | |||
| <p> | |||
| Note that if the process dies after the Put of key2 but before the | |||
| delete of key1, the same value may be left stored under multiple keys. | |||
| Such problems can be avoided by using the <code>WriteBatch</code> class to | |||
| atomically apply a set of updates: | |||
| <p> | |||
| <pre> | |||
| #include "leveldb/include/write_batch.h" | |||
| ... | |||
| std::string value; | |||
| leveldb::Status s = db->Get(leveldb::ReadOptions(), key1, &value); | |||
| if (s.ok()) { | |||
| leveldb::WriteBatch batch; | |||
| batch.Delete(key1); | |||
| batch.Put(key2, value); | |||
| s = db->Write(leveldb::WriteOptions(), &batch); | |||
| } | |||
| </pre> | |||
| The <code>WriteBatch</code> holds a sequence of edits to be made to the database, | |||
| and these edits within the batch are applied in order. Note that we | |||
| called <code>Delete</code> before <code>Put</code> so that if <code>key1</code> is identical to <code>key2</code>, | |||
| we do not end up erroneously dropping the value entirely. | |||
| <p> | |||
| Apart from its atomicity benefits, <code>WriteBatch</code> may also be used to | |||
| speed up bulk updates by placing lots of individual mutations into the | |||
| same batch. | |||
| <p> | |||
| <h1>Concurrency</h1> | |||
| <p> | |||
| A database may only be opened by one process at a time. The <code>leveldb</code> | |||
| implementation acquires a lock from the operating system to prevent | |||
| misuse. Within a single process, the same <code>leveldb::DB</code> object may | |||
| be safely used by multiple concurrent threads. | |||
| <p> | |||
| <h1>Iteration</h1> | |||
| <p> | |||
| The following example demonstrates how to print all key,value pairs | |||
| in a database. | |||
| <p> | |||
| <pre> | |||
| leveldb::Iterator* it = db->NewIterator(leveldb::ReadOptions()); | |||
| for (it->SeekToFirst(); it->Valid(); it->Next()) { | |||
| cout << it->key().ToString() << ": " << it->value().ToString() << endl; | |||
| } | |||
| assert(it->status().ok()); // Check for any errors found during the scan | |||
| delete it; | |||
| </pre> | |||
| The following variation shows how to process just the keys in the | |||
| range <code>[start,limit)</code>: | |||
| <p> | |||
| <pre> | |||
| for (it->Seek(start); | |||
| it->Valid() && it->key().ToString() < limit; | |||
| it->Next()) { | |||
| ... | |||
| } | |||
| </pre> | |||
| You can also process entries in reverse order. (Caveat: reverse | |||
| iteration is currently a factor of two or three slower than forward | |||
| iteration.) | |||
| <p> | |||
| <pre> | |||
| for (it->SeekToLast(); it->Valid(); it->Prev()) { | |||
| ... | |||
| } | |||
| </pre> | |||
| <h1>Snapshots</h1> | |||
| <p> | |||
| Snapshots provide consistent read-only views over the entire state of | |||
| the key-value store. <code>ReadOptions::snapshot</code> may be non-NULL to indicate | |||
| that a read should operate on a particular version of the DB state. | |||
| If <code>ReadOptions::snapshot</code> is NULL, the read will operate on an | |||
| implicit snapshot of the current state. | |||
| <p> | |||
| Snapshots typically are created by the DB::GetSnapshot() method: | |||
| <p> | |||
| <pre> | |||
| leveldb::ReadOptions options; | |||
| options.snapshot = db->GetSnapshot(); | |||
| ... apply some updates to db ... | |||
| leveldb::Iterator* iter = db->NewIterator(options); | |||
| ... read using iter to view the state when the snapshot was created ... | |||
| delete iter; | |||
| db->ReleaseSnapshot(options.snapshot); | |||
| </pre> | |||
| Note that when a snapshot is no longer needed, it should be released | |||
| using the DB::ReleaseSnapshot interface. This allows the | |||
| implementation to get rid of state that was being maintained just to | |||
| support reading as of that snapshot. | |||
| <p> | |||
| A Write operation can also return a snapshot that | |||
| represents the state of the database just after applying a particular | |||
| set of updates: | |||
| <p> | |||
| <pre> | |||
| leveldb::Snapshot* snapshot; | |||
| leveldb::WriteOptions write_options; | |||
| write_options.post_write_snapshot = &snapshot; | |||
| leveldb::Status status = db->Write(write_options, ...); | |||
| ... perform other mutations to db ... | |||
| leveldb::ReadOptions read_options; | |||
| read_options.snapshot = snapshot; | |||
| leveldb::Iterator* iter = db->NewIterator(read_options); | |||
| ... read as of the state just after the Write call returned ... | |||
| delete iter; | |||
| db->ReleaseSnapshot(snapshot); | |||
| </pre> | |||
| <h1>Slice</h1> | |||
| <p> | |||
| The return value of the <code>it->key()</code> and <code>it->value()</code> calls above | |||
| are instances of the <code>leveldb::Slice</code> type. <code>Slice</code> is a simple | |||
| structure that contains a length and a pointer to an external byte | |||
| array. Returning a <code>Slice</code> is a cheaper alternative to returning a | |||
| <code>std::string</code> since we do not need to copy potentially large keys and | |||
| values. In addition, <code>leveldb</code> methods do not return null-terminated | |||
| C-style strings since <code>leveldb</code> keys and values are allowed to | |||
| contain '\0' bytes. | |||
| <p> | |||
| C++ strings and null-terminated C-style strings can be easily converted | |||
| to a Slice: | |||
| <p> | |||
| <pre> | |||
| leveldb::Slice s1 = "hello"; | |||
| std::string str("world"); | |||
| leveldb::Slice s2 = str; | |||
| </pre> | |||
| A Slice can be easily converted back to a C++ string: | |||
| <pre> | |||
| std::string str = s1.ToString(); | |||
| assert(str == std::string("hello")); | |||
| </pre> | |||
| Be careful when using Slices since it is up to the caller to ensure that | |||
| the external byte array into which the Slice points remains live while | |||
| the Slice is in use. For example, the following is buggy: | |||
| <p> | |||
| <pre> | |||
| leveldb::Slice slice; | |||
| if (...) { | |||
| std::string str = ...; | |||
| slice = str; | |||
| } | |||
| Use(slice); | |||
| </pre> | |||
| When the <code>if</code> statement goes out of scope, <code>str</code> will be destroyed and the | |||
| backing storage for <code>slice</code> will disappear. | |||
| <p> | |||
| <h1>Comparators</h1> | |||
| <p> | |||
| The preceding examples used the default ordering function for key, | |||
| which orders bytes lexicographically. You can however supply a custom | |||
| comparator when opening a database. For example, suppose each | |||
| database key consists of two numbers and we should sort by the first | |||
| number, breaking ties by the second number. First, define a proper | |||
| subclass of <code>leveldb::Comparator</code> that expresses these rules: | |||
| <p> | |||
| <pre> | |||
| class TwoPartComparator : public leveldb::Comparator { | |||
| public: | |||
| // Three-way comparison function: | |||
| // if a < b: negative result | |||
| // if a > b: positive result | |||
| // else: zero result | |||
| int Compare(const leveldb::Slice& a, const leveldb::Slice& b) const { | |||
| int a1, a2, b1, b2; | |||
| ParseKey(a, &a1, &a2); | |||
| ParseKey(b, &b1, &b2); | |||
| if (a1 < b1) return -1; | |||
| if (a1 > b1) return +1; | |||
| if (a2 < b2) return -1; | |||
| if (a2 > b2) return +1; | |||
| return 0; | |||
| } | |||
| // Ignore the following methods for now: | |||
| const char* Name() { return "TwoPartComparator"; } | |||
| void FindShortestSeparator(std::string*, const leveldb::Slice&) const { } | |||
| void FindShortSuccessor(std::string*) const { } | |||
| }; | |||
| </pre> | |||
| Now create a database using this custom comparator: | |||
| <p> | |||
| <pre> | |||
| TwoPartComparator cmp; | |||
| leveldb::DB* db; | |||
| leveldb::Options options; | |||
| options.create_if_missing = true; | |||
| options.comparator = &cmp; | |||
| leveldb::Status status = leveldb::DB::Open(options, "/tmp/testdb", &db); | |||
| ... | |||
| </pre> | |||
| <h2>Backwards compatibility</h2> | |||
| <p> | |||
| The result of the comparator's <code>Name</code> method is attached to the | |||
| database when it is created, and is checked on every subsequent | |||
| database open. If the name changes, the <code>leveldb::DB::Open</code> call will | |||
| fail. Therefore, change the name if and only if the new key format | |||
| and comparison function are incompatible with existing databases, and | |||
| it is ok to discard the contents of all existing databases. | |||
| <p> | |||
| You can however still gradually evolve your key format over time with | |||
| a little bit of pre-planning. For example, you could store a version | |||
| number at the end of each key (one byte should suffice for most uses). | |||
| When you wish to switch to a new key format (e.g., adding an optional | |||
| third part to the keys processed by <code>TwoPartComparator</code>), | |||
| (a) keep the same comparator name (b) increment the version number | |||
| for new keys (c) change the comparator function so it uses the | |||
| version numbers found in the keys to decide how to interpret them. | |||
| <p> | |||
| <h1>Performance</h1> | |||
| <p> | |||
| Performance can be tuned by changing the default values of the | |||
| types defined in <code>leveldb/include/options.h</code>. | |||
| <p> | |||
| <h2><a name="async">Asynchronous Writes</a></h2> | |||
| By default, each write to <code>leveldb</code> is synchronous: it does | |||
| not return until the write has been pushed from memory to persistent | |||
| storage. (On Posix systems, this is implemented by calling either | |||
| <code>fdatasync(...)</code> or <code>msync(..., MS_SYNC)</code>.) | |||
| <strong>Synchronous writes may be very slow and the synchrony can be | |||
| optionally disabled</strong>: | |||
| <pre> | |||
| leveldb::WriteOptions write_options; | |||
| write_options.sync = false; | |||
| db->Put(write_options, ...); | |||
| </pre> | |||
| Asynchronous writes are often more than a hundred times as fast as | |||
| synchronous writes. The downside of asynchronous writes is that a | |||
| crash of the machine may cause the last few updates to be lost. Note | |||
| that a crash of just the writing process (i.e., not a reboot) will not | |||
| cause any loss since even when <code>sync</code> is false, an update | |||
| is pushed from the process memory into the operating system before it | |||
| is considered done. | |||
| <p> | |||
| Asynchronous writes can be particularly beneficial when loading a | |||
| large amount of data into the database since you can mitigate the | |||
| problem of lost updates by restarting the bulk load. A hybrid scheme | |||
| is also possible where every Nth write is synchronous, and in the | |||
| event of a crash, the bulk load is restarted just after the last | |||
| synchronous write finished by the previous run. | |||
| <p> | |||
| <code>WriteBatch</code> provides an alternative to asynchronous writes. | |||
| Multiple updates may be placed in the same <code>WriteBatch</code> and | |||
| applied together using a synchronous write. The extra cost of the | |||
| synchronous write will be amortized across all of the writes in the batch. | |||
| <p> | |||
| <h2>Block size</h2> | |||
| <p> | |||
| <code>leveldb</code> groups adjacent keys together into the same block and such a | |||
| block is the unit of transfer to and from persistent storage. The | |||
| default block size is approximately 8192 uncompressed bytes. | |||
| Applications that mostly do bulk scans over the contents of the | |||
| database may wish to increase this size. Applications that do a lot | |||
| of point reads of small values may wish to switch to a smaller block | |||
| size if performance measurements indicate an improvement. There isn't | |||
| much benefit in using blocks smaller than one kilobyte, or larger than | |||
| a few megabytes. Also note that compression will be more effective | |||
| with larger block sizes. | |||
| <p> | |||
| <h2>Compression</h2> | |||
| <p> | |||
| Each block is individually compressed before being written to | |||
| persistent storage. Compression is on by default since the default | |||
| compression method is very fast, and is automatically disabled for | |||
| uncompressible data. In rare cases, applications may want to disable | |||
| compression entirely, but should only do so if benchmarks show a | |||
| performance improvement: | |||
| <p> | |||
| <pre> | |||
| leveldb::Options options; | |||
| options.compression = leveldb::kNoCompression; | |||
| ... leveldb::DB::Open(options, name, ...) .... | |||
| </pre> | |||
| <h2>Cache</h2> | |||
| <p> | |||
| The contents of the database are stored in a set of files in the | |||
| filesystem and each file stores a sequence of compressed blocks. If | |||
| <code>options.cache</code> is non-NULL, it is used to cache frequently used | |||
| uncompressed block contents. | |||
| <p> | |||
| <pre> | |||
| #include "leveldb/include/cache.h" | |||
| leveldb::Options options; | |||
| options.cache = leveldb::NewLRUCache(100 * 1048576); // 100MB cache | |||
| leveldb::DB* db; | |||
| leveldb::DB::Open(options, name, &db); | |||
| ... use the db ... | |||
| delete db | |||
| delete options.cache; | |||
| </pre> | |||
| Note that the cache holds uncompressed data, and therefore it should | |||
| be sized according to application level data sizes, without any | |||
| reduction from compression. (Caching of compressed blocks is left to | |||
| the operating system buffer cache, or any custom <code>Env</code> | |||
| implementation provided by the client.) | |||
| <p> | |||
| When performing a bulk read, the application may wish to disable | |||
| caching so that the data processed by the bulk read does not end up | |||
| displacing most of the cached contents. A per-iterator option can be | |||
| used to achieve this: | |||
| <p> | |||
| <pre> | |||
| leveldb::ReadOptions options; | |||
| options.fill_cache = false; | |||
| leveldb::Iterator* it = db->NewIterator(options); | |||
| for (it->SeekToFirst(); it->Valid(); it->Next()) { | |||
| ... | |||
| } | |||
| </pre> | |||
| <h2>Key Layout</h2> | |||
| <p> | |||
| Note that the unit of disk transfer and caching is a block. Adjacent | |||
| keys (according to the database sort order) will usually be placed in | |||
| the same block. Therefore the application can improve its performance | |||
| by placing keys that are accessed together near each other and placing | |||
| infrequently used keys in a separate region of the key space. | |||
| <p> | |||
| For example, suppose we are implementing a simple file system on top | |||
| of <code>leveldb</code>. The types of entries we might wish to store are: | |||
| <p> | |||
| <pre> | |||
| filename -> permission-bits, length, list of file_block_ids | |||
| file_block_id -> data | |||
| </pre> | |||
| We might want to prefix <code>filename</code> keys with one letter (say '/') and the | |||
| <code>file_block_id</code> keys with a different letter (say '0') so that scans | |||
| over just the metadata do not force us to fetch and cache bulky file | |||
| contents. | |||
| <p> | |||
| <h2>Large Values</h2> | |||
| <p> | |||
| <code>leveldb</code> has special treatment of large values (by default, a value | |||
| of length greater than or equal to 64K is considered large, though a | |||
| field in Options can be used to adjust this threshold). Each such | |||
| large value is placed in a separate operating system file, and the | |||
| normal database blocks just contain pointers to such files. | |||
| <p> | |||
| Furthermore, if the same large value occurs multiple times in a single | |||
| database, it will be stored just once. | |||
| <p> | |||
| <h1>Checksums</h1> | |||
| <p> | |||
| <code>leveldb</code> associates checksums with all data it stores in the file system. | |||
| There are two separate controls provided over how aggressively these | |||
| checksums are verified: | |||
| <p> | |||
| <ul> | |||
| <li> <code>ReadOptions::verify_checksums</code> may be set to true to force | |||
| checksum verification of all data that is read from the file system on | |||
| behalf of a particular read. By default, no such verification is | |||
| done. | |||
| <p> | |||
| <li> <code>Options::paranoid_checks</code> may be set to true before opening a | |||
| database to make the database implementation raise an error as soon as | |||
| it detects an internal corruption. Depending on which portion of the | |||
| database has been corrupted, the error may be raised when the database | |||
| is opened, or later by another database operation. By default, | |||
| paranoid checking is off so that the database can be used even if | |||
| parts of its persistent storage have been corrupted. | |||
| <p> | |||
| If a database is corrupted (perhaps it cannot be opened when | |||
| paranoid checking is turned on), the <code>leveldb::RepairDB</code> function | |||
| may be used to recover as much of the data as possible | |||
| <p> | |||
| </ul> | |||
| <h1>Approximate Sizes</h1> | |||
| <p> | |||
| The <code>GetApproximateSizes</code> method can used to get the approximate | |||
| number of bytes of file system space used by one or more key ranges. | |||
| <p> | |||
| <pre> | |||
| leveldb::Range ranges[2]; | |||
| ranges[0] = leveldb::Range("a", "c"); | |||
| ranges[1] = leveldb::Range("x", "z"); | |||
| uint64_t sizes[2]; | |||
| leveldb::Status s = db->GetApproximateSizes(ranges, 2, sizes); | |||
| </pre> | |||
| The preceding call will set <code>sizes[0]</code> to the approximate number of | |||
| bytes of file system space used by the key range <code>[a..c)</code> and | |||
| <code>sizes[1]</code> to the approximate number of bytes used by the key range | |||
| <code>[x..z)</code>. | |||
| <p> | |||
| <h1>Environment</h1> | |||
| <p> | |||
| All file operations (and other operating system calls) issued by the | |||
| <code>leveldb</code> implementation are routed through a <code>leveldb::Env</code> object. | |||
| Sophisticated clients may wish to provide their own <code>Env</code> | |||
| implementation to get better control. For example, an application may | |||
| introduce artificial delays in the file IO paths to limit the impact | |||
| of <code>leveldb</code> on other activities in the system. | |||
| <p> | |||
| <pre> | |||
| class SlowEnv : public leveldb::Env { | |||
| .. implementation of the Env interface ... | |||
| }; | |||
| SlowEnv env; | |||
| leveldb::Options options; | |||
| options.env = &env; | |||
| Status s = leveldb::DB::Open(options, ...); | |||
| </pre> | |||
| <h1>Porting</h1> | |||
| <p> | |||
| <code>leveldb</code> may be ported to a new platform by providing platform | |||
| specific implementations of the types/methods/functions exported by | |||
| <code>leveldb/port/port.h</code>. See <code>leveldb/port/port_example.h</code> for more | |||
| details. | |||
| <p> | |||
| In addition, the new platform may need a new default <code>leveldb::Env</code> | |||
| implementation. See <code>leveldb/util/env_posix.h</code> for an example. | |||
| <h1>Other Information</h1> | |||
| <p> | |||
| Details about the <code>leveldb</code> implementation may be found in | |||
| the following documents: | |||
| <ul> | |||
| <li> <a href="impl.html">Implementation notes</a> | |||
| <li> <a href="table_format.txt">Format of an immutable Table file</a> | |||
| <li> <a href="log_format.txt">Format of a log file</a> | |||
| </ul> | |||
| </body> | |||
| </html> | |||
| @ -0,0 +1,72 @@ | |||
| The log file contents are a sequence of 32KB blocks. The only | |||
| exception is that the tail of the file may contain a partial block. | |||
| Each block consists of a sequence of records: | |||
| block := record* trailer? | |||
| record := | |||
| checksum: uint32 // crc32c of type and data[] | |||
| length: uint16 | |||
| type: uint8 // One of FULL, FIRST, MIDDLE, LAST | |||
| data: uint8[length] | |||
| A record never starts within the last seven bytes of a block. Any | |||
| leftover bytes here form the trailer, which must consist entirely of | |||
| zero bytes and must be skipped by readers. In particular, even if | |||
| there are exactly seven bytes left in the block, and a zero-length | |||
| user record is added (which will fit in these seven bytes), the writer | |||
| must skip these trailer bytes and add the record to the next block. | |||
| More types may be added in the future. Some Readers may skip record | |||
| types they do not understand, others may report that some data was | |||
| skipped. | |||
| FULL == 1 | |||
| FIRST == 2 | |||
| MIDDLE == 3 | |||
| LAST == 4 | |||
| The FULL record contains the contents of an entire user record. | |||
| FIRST, MIDDLE, LAST are types used for user records that have been | |||
| split into multiple fragments (typically because of block boundaries). | |||
| FIRST is the type of the first fragment of a user record, LAST is the | |||
| type of the last fragment of a user record, and MID is the type of all | |||
| interior fragments of a user record. | |||
| Example: consider a sequence of user records: | |||
| A: length 1000 | |||
| B: length 97270 | |||
| C: length 8000 | |||
| A will be stored as a FULL record in the first block. | |||
| B will be split into three fragments: first fragment occupies the rest | |||
| of the first block, second fragment occupies the entirety of the | |||
| second block, and the third fragment occupies a prefix of the third | |||
| block. This will leave six bytes free in the third block, which will | |||
| be left empty as the trailer. | |||
| C will be stored as a FULL record in the fourth block. | |||
| =================== | |||
| Some benefits over the recordio format: | |||
| (1) We do not need any heuristics for resyncing - just go to next | |||
| block boundary and scan. If there is a corruption, skip to the next | |||
| block. As a side-benefit, we do not get confused when part of the | |||
| contents of one log file are embedded as a record inside another log | |||
| file. | |||
| (2) Splitting at approximate boundaries (e.g., for mapreduce) is | |||
| simple: find the next block boundary and skip records until we | |||
| hit a FULL or FIRST record. | |||
| (3) We do not need extra buffering for large records. | |||
| Some downsides compared to recordio format: | |||
| (1) No packing of tiny records. This could be fixed by adding a new | |||
| record type, so it is a shortcoming of the current implementation, | |||
| not necessarily the format. | |||
| (2) No compression. Again, this could be fixed by adding new record types. | |||
| @ -0,0 +1,61 @@ | |||
| File format | |||
| =========== | |||
| <beginning_of_file> | |||
| [data block 1] | |||
| [data block 2] | |||
| ... | |||
| [data block N] | |||
| [meta block 1] | |||
| ... | |||
| [meta block K] | |||
| [metaindex block] | |||
| [index block] | |||
| [Footer] (fixed size; starts at file_size - sizeof(Footer)) | |||
| <end_of_file> | |||
| The file contains internal pointers. Each such pointer is called | |||
| a BlockHandle and contains the following information: | |||
| offset: varint64 | |||
| size: varint64 | |||
| (1) The sequence of key/value pairs in the file are stored in sorted | |||
| order and partitioned into a sequence of data blocks. These blocks | |||
| come one after another at the beginning of the file. Each data block | |||
| is formatted according to the code in block_builder.cc, and then | |||
| optionally compressed. | |||
| (2) After the data blocks we store a bunch of meta blocks. The | |||
| supported meta block types are described below. More meta block types | |||
| may be added in the future. Each meta block is again formatted using | |||
| block_builder.cc and then optionally compressed. | |||
| (3) A "metaindex" block. It contains one entry for every other meta | |||
| block where the key is the name of the meta block and the value is a | |||
| BlockHandle pointing to that meta block. | |||
| (4) An "index" block. This block contains one entry per data block, | |||
| where the key is a string >= last key in that data block and before | |||
| the first key in the successive data block. The value is the | |||
| BlockHandle for the data block. | |||
| (6) At the very end of the file is a fixed length footer that contains | |||
| the BlockHandle of the metaindex and index blocks as well as a magic number. | |||
| metaindex_handle: char[p]; // Block handle for metaindex | |||
| index_handle: char[q]; // Block handle for index | |||
| padding: char[40-p-q]; // 0 bytes to make fixed length | |||
| // (40==2*BlockHandle::kMaxEncodedLength) | |||
| magic: fixed64; // == 0xdb4775248b80fb57 | |||
| "stats" Meta Block | |||
| ------------------ | |||
| This meta block contains a bunch of stats. The key is the name | |||
| of the statistic. The value contains the statistic. | |||
| TODO(postrelease): record following stats. | |||
| data size | |||
| index size | |||
| key size (uncompressed) | |||
| value size (uncompressed) | |||
| number of entries | |||
| number of data blocks | |||
| @ -0,0 +1,99 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // A Cache is an interface that maps keys to values. It has internal | |||
| // synchronization and may be safely accessed concurrently from | |||
| // multiple threads. It may automatically evict entries to make room | |||
| // for new entries. Values have a specified charge against the cache | |||
| // capacity. For example, a cache where the values are variable | |||
| // length strings, may use the length of the string as the charge for | |||
| // the string. | |||
| // | |||
| // A builtin cache implementation with a least-recently-used eviction | |||
| // policy is provided. Clients may use their own implementations if | |||
| // they want something more sophisticated (like scan-resistance, a | |||
| // custom eviction policy, variable cache sizing, etc.) | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_CACHE_H_ | |||
| #include <stdint.h> | |||
| #include "include/slice.h" | |||
| namespace leveldb { | |||
| class Cache; | |||
| // Create a new cache with a fixed size capacity. This implementation | |||
| // of Cache uses a least-recently-used eviction policy. | |||
| extern Cache* NewLRUCache(size_t capacity); | |||
| class Cache { | |||
| public: | |||
| Cache() { } | |||
| // Destroys all existing entries by calling the "deleter" | |||
| // function that was passed to the constructor. | |||
| virtual ~Cache(); | |||
| // Opaque handle to an entry stored in the cache. | |||
| struct Handle { }; | |||
| // Insert a mapping from key->value into the cache and assign it | |||
| // the specified charge against the total cache capacity. | |||
| // | |||
| // Returns a handle that corresponds to the mapping. The caller | |||
| // must call this->Release(handle) when the returned mapping is no | |||
| // longer needed. | |||
| // | |||
| // When the inserted entry is no longer needed, the key and | |||
| // value will be passed to "deleter". | |||
| virtual Handle* Insert(const Slice& key, void* value, size_t charge, | |||
| void (*deleter)(const Slice& key, void* value)) = 0; | |||
| // If the cache has no mapping for "key", returns NULL. | |||
| // | |||
| // Else return a handle that corresponds to the mapping. The caller | |||
| // must call this->Release(handle) when the returned mapping is no | |||
| // longer needed. | |||
| virtual Handle* Lookup(const Slice& key) = 0; | |||
| // Release a mapping returned by a previous Lookup(). | |||
| // REQUIRES: handle must not have been released yet. | |||
| // REQUIRES: handle must have been returned by a method on *this. | |||
| virtual void Release(Handle* handle) = 0; | |||
| // Return the value encapsulated in a handle returned by a | |||
| // successful Lookup(). | |||
| // REQUIRES: handle must not have been released yet. | |||
| // REQUIRES: handle must have been returned by a method on *this. | |||
| virtual void* Value(Handle* handle) = 0; | |||
| // If the cache contains entry for key, erase it. Note that the | |||
| // underlying entry will be kept around until all existing handles | |||
| // to it have been released. | |||
| virtual void Erase(const Slice& key) = 0; | |||
| // Return a new numeric id. May be used by multiple clients who are | |||
| // sharing the same cache to partition the key space. Typically the | |||
| // client will allocate a new id at startup and prepend the id to | |||
| // its cache keys. | |||
| virtual uint64_t NewId() = 0; | |||
| private: | |||
| void LRU_Remove(Handle* e); | |||
| void LRU_Append(Handle* e); | |||
| void Unref(Handle* e); | |||
| struct Rep; | |||
| Rep* rep_; | |||
| // No copying allowed | |||
| Cache(const Cache&); | |||
| void operator=(const Cache&); | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_UTIL_CACHE_H_ | |||
| @ -0,0 +1,61 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ | |||
| #include <string> | |||
| namespace leveldb { | |||
| class Slice; | |||
| // A Comparator object provides a total order across slices that are | |||
| // used as keys in an sstable or a database. | |||
| class Comparator { | |||
| public: | |||
| virtual ~Comparator(); | |||
| // Three-way comparison. Returns value: | |||
| // < 0 iff "a" < "b", | |||
| // == 0 iff "a" == "b", | |||
| // > 0 iff "a" > "b" | |||
| virtual int Compare(const Slice& a, const Slice& b) const = 0; | |||
| // The name of the comparator. Used to check for comparator | |||
| // mismatches (i.e., a DB created with one comparator is | |||
| // accessed using a different comparator. | |||
| // | |||
| // The client of this package should switch to a new name whenever | |||
| // the comparator implementation changes in a way that will cause | |||
| // the relative ordering of any two keys to change. | |||
| // | |||
| // Names starting with "leveldb." are reserved and should not be used | |||
| // by any clients of this package. | |||
| virtual const char* Name() const = 0; | |||
| // Advanced functions: these are used to reduce the space requirements | |||
| // for internal data structures like index blocks. | |||
| // If *start < limit, changes *start to a short string in [start,limit). | |||
| // Simple comparator implementations may return with *start unchanged, | |||
| // i.e., an implementation of this method that does nothing is correct. | |||
| virtual void FindShortestSeparator( | |||
| std::string* start, | |||
| const Slice& limit) const = 0; | |||
| // Changes *key to a short string >= *key. | |||
| // Simple comparator implementations may return with *key unchanged, | |||
| // i.e., an implementation of this method that does nothing is correct. | |||
| virtual void FindShortSuccessor(std::string* key) const = 0; | |||
| }; | |||
| // Return a builtin comparator that uses lexicographic byte-wise | |||
| // ordering. The result remains the property of this module and | |||
| // must not be deleted. | |||
| extern const Comparator* BytewiseComparator(); | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ | |||
| @ -0,0 +1,137 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_DB_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_DB_H_ | |||
| #include <stdint.h> | |||
| #include <stdio.h> | |||
| #include "include/iterator.h" | |||
| #include "include/options.h" | |||
| namespace leveldb { | |||
| struct Options; | |||
| struct ReadOptions; | |||
| struct WriteOptions; | |||
| class Snapshot; | |||
| class WriteBatch; | |||
| // Some internal types. Clients should ignore. | |||
| class WriteBatchInternal; | |||
| struct Range { | |||
| Slice start; | |||
| Slice limit; | |||
| Range(const Slice& s, const Slice& l) : start(s), limit(l) { } | |||
| }; | |||
| // A DB is a persistent ordered map from keys to values. | |||
| class DB { | |||
| public: | |||
| // Open the database with the specified "name". | |||
| // Stores a pointer to a heap-allocated database in *dbptr and returns | |||
| // OK on success. | |||
| // Stores NULL in *dbptr and returns a non-OK status on error. | |||
| // Caller should delete *dbptr when it is no longer needed. | |||
| static Status Open(const Options& options, | |||
| const std::string& name, | |||
| DB** dbptr); | |||
| DB() { } | |||
| virtual ~DB(); | |||
| // Set the database entry for "key" to "value". Returns OK on success, | |||
| // and a non-OK status on error. | |||
| // Note: consider setting options.sync = false. | |||
| virtual Status Put(const WriteOptions& options, | |||
| const Slice& key, | |||
| const Slice& value) = 0; | |||
| // Remove the database entry (if any) for "key". Returns OK on | |||
| // success, and a non-OK status on error. It is not an error if "key" | |||
| // did not exist in the database. | |||
| // Note: consider setting options.sync = false. | |||
| virtual Status Delete(const WriteOptions& options, const Slice& key) = 0; | |||
| // Apply the specified updates to the database. | |||
| // Returns OK on success, non-OK on failure. | |||
| // Note: consider setting options.sync = false. | |||
| virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0; | |||
| // If the database contains an entry for "key" store the | |||
| // corresponding value in *value and return OK. | |||
| // | |||
| // If there is no entry for "key" leave *value unchanged and return | |||
| // a status for which Status::IsNotFound() returns true. | |||
| // | |||
| // May return some other Status on an error. | |||
| virtual Status Get(const ReadOptions& options, | |||
| const Slice& key, std::string* value) = 0; | |||
| // Return a heap-allocated iterator over the contents of the database. | |||
| // The result of NewIterator() is initially invalid (caller must | |||
| // call one of the Seek methods on the iterator before using it). | |||
| // | |||
| // Caller should delete the iterator when it is no longer needed. | |||
| // The returned iterator should be deleted before this db is deleted. | |||
| virtual Iterator* NewIterator(const ReadOptions& options) = 0; | |||
| // Return a handle to the current DB state. Iterators created with | |||
| // this handle will all observe a stable snapshot of the current DB | |||
| // state. The caller must call ReleaseSnapshot(result) when the | |||
| // snapshot is no longer needed. | |||
| virtual const Snapshot* GetSnapshot() = 0; | |||
| // Release a previously acquired snapshot. The caller must not | |||
| // use "snapshot" after this call. | |||
| virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; | |||
| // DB implementations can export properties about their state | |||
| // via this method. If "property" is a valid property understood by this | |||
| // DB implementation, fills "*value" with its current value and returns | |||
| // true. Otherwise returns false. | |||
| // | |||
| // | |||
| // Valid property names include: | |||
| // | |||
| // "leveldb.num-files-at-level<N>" - return the number of files at level <N>, | |||
| // where <N> is an ASCII representation of a level number (e.g. "0"). | |||
| virtual bool GetProperty(const Slice& property, uint64_t* value) = 0; | |||
| // For each i in [0,n-1], store in "sizes[i]", the approximate | |||
| // file system space used by keys in "[range[i].start .. range[i].limit)". | |||
| // | |||
| // Note that the returned sizes measure file system space usage, so | |||
| // if the user data compresses by a factor of ten, the returned | |||
| // sizes will be one-tenth the size of the corresponding user data size. | |||
| // | |||
| // The results may not include the sizes of recently written data. | |||
| virtual void GetApproximateSizes(const Range* range, int n, | |||
| uint64_t* sizes) = 0; | |||
| // Possible extensions: | |||
| // (1) Add a method to compact a range of keys | |||
| private: | |||
| // No copying allowed | |||
| DB(const DB&); | |||
| void operator=(const DB&); | |||
| }; | |||
| // Destroy the contents of the specified database. | |||
| // Be very careful using this method. | |||
| Status DestroyDB(const std::string& name, const Options& options); | |||
| // If a DB cannot be opened, you may attempt to call this method to | |||
| // resurrect as much of the contents of the database as possible. | |||
| // Some data may be lost, so be careful when calling this function | |||
| // on a database that contains important information. | |||
| Status RepairDB(const std::string& dbname, const Options& options); | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_DB_H_ | |||
| @ -0,0 +1,293 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // An Env is an interface used by the leveldb implementation to access | |||
| // operating system functionality like the filesystem etc. Callers | |||
| // may wish to provide a custom Env object when opening a database to | |||
| // get fine gain control; e.g., to rate limit file system operations. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_ENV_H_ | |||
| #include <cstdarg> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <stdint.h> | |||
| #include "include/status.h" | |||
| namespace leveldb { | |||
| class FileLock; | |||
| class RandomAccessFile; | |||
| class SequentialFile; | |||
| class Slice; | |||
| class WritableFile; | |||
| class Env { | |||
| public: | |||
| Env() { } | |||
| virtual ~Env(); | |||
| // Return a default environment suitable for the current operating | |||
| // system. Sophisticated users may wish to provide their own Env | |||
| // implementation instead of relying on this default environment. | |||
| // | |||
| // The result of Default() belongs to leveldb and must never be deleted. | |||
| static Env* Default(); | |||
| // Create a brand new sequentially-readable file with the specified name. | |||
| // On success, stores a pointer to the new file in *result and returns OK. | |||
| // On failure stores NULL in *result and returns non-OK. If the file does | |||
| // not exist, returns a non-OK status. | |||
| // | |||
| // The returned file will only be accessed by one thread at a time. | |||
| virtual Status NewSequentialFile(const std::string& fname, | |||
| SequentialFile** result) = 0; | |||
| // Create a brand new random access read-only file with the | |||
| // specified name. On success, stores a pointer to the new file in | |||
| // *result and returns OK. On failure stores NULL in *result and | |||
| // returns non-OK. If the file does not exist, returns a non-OK | |||
| // status. | |||
| // | |||
| // The returned file may be concurrently accessed by multiple threads. | |||
| virtual Status NewRandomAccessFile(const std::string& fname, | |||
| RandomAccessFile** result) = 0; | |||
| // Create an object that writes to a new file with the specified | |||
| // name. Deletes any existing file with the same name and creates a | |||
| // new file. On success, stores a pointer to the new file in | |||
| // *result and returns OK. On failure stores NULL in *result and | |||
| // returns non-OK. | |||
| // | |||
| // The returned file will only be accessed by one thread at a time. | |||
| virtual Status NewWritableFile(const std::string& fname, | |||
| WritableFile** result) = 0; | |||
| // Returns true iff the named file exists. | |||
| virtual bool FileExists(const std::string& fname) = 0; | |||
| // Store in *result the names of the children of the specified directory. | |||
| // The names are relative to "dir". | |||
| // Original contents of *results are dropped. | |||
| virtual Status GetChildren(const std::string& dir, | |||
| std::vector<std::string>* result) = 0; | |||
| // Delete the named file. | |||
| virtual Status DeleteFile(const std::string& fname) = 0; | |||
| // Create the specified directory. | |||
| virtual Status CreateDir(const std::string& dirname) = 0; | |||
| // Delete the specified directory. | |||
| virtual Status DeleteDir(const std::string& dirname) = 0; | |||
| // Store the size of fname in *file_size. | |||
| virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0; | |||
| // Rename file src to target. | |||
| virtual Status RenameFile(const std::string& src, | |||
| const std::string& target) = 0; | |||
| // Lock the specified file. Used to prevent concurrent access to | |||
| // the same db by multiple processes. On failure, stores NULL in | |||
| // *lock and returns non-OK. | |||
| // | |||
| // On success, stores a pointer to the object that represents the | |||
| // acquired lock in *lock and returns OK. The caller should call | |||
| // UnlockFile(*lock) to release the lock. If the process exits, | |||
| // the lock will be automatically released. | |||
| // | |||
| // If somebody else already holds the lock, finishes immediately | |||
| // with a failure. I.e., this call does not wait for existing locks | |||
| // to go away. | |||
| // | |||
| // May create the named file if it does not already exist. | |||
| virtual Status LockFile(const std::string& fname, FileLock** lock) = 0; | |||
| // Release the lock acquired by a previous successful call to LockFile. | |||
| // REQUIRES: lock was returned by a successful LockFile() call | |||
| // REQUIRES: lock has not already been unlocked. | |||
| virtual Status UnlockFile(FileLock* lock) = 0; | |||
| // Arrange to run "(*function)(arg)" once in a background thread. | |||
| // | |||
| // "function" may run in an unspecified thread. Multiple functions | |||
| // added to the same Env may run concurrently in different threads. | |||
| // I.e., the caller may not assume that background work items are | |||
| // serialized. | |||
| virtual void Schedule( | |||
| void (*function)(void* arg), | |||
| void* arg) = 0; | |||
| // Start a new thread, invoking "function(arg)" within the new thread. | |||
| // When "function(arg)" returns, the thread will be destroyed. | |||
| virtual void StartThread(void (*function)(void* arg), void* arg) = 0; | |||
| // *path is set to a temporary directory that can be used for testing. It may | |||
| // or many not have just been created. The directory may or may not differ | |||
| // between runs of the same process, but subsequent calls will return the | |||
| // same directory. | |||
| virtual Status GetTestDirectory(std::string* path) = 0; | |||
| // Write an entry to the log file with the specified format. | |||
| virtual void Logv(WritableFile* log, const char* format, va_list ap) = 0; | |||
| // Returns the number of micro-seconds since some fixed point in time. Only | |||
| // useful for computing deltas of time. | |||
| virtual uint64_t NowMicros() = 0; | |||
| // Sleep/delay the thread for the perscribed number of micro-seconds. | |||
| virtual void SleepForMicroseconds(int micros) = 0; | |||
| private: | |||
| // No copying allowed | |||
| Env(const Env&); | |||
| void operator=(const Env&); | |||
| }; | |||
| // A file abstraction for reading sequentially through a file | |||
| class SequentialFile { | |||
| public: | |||
| SequentialFile() { } | |||
| virtual ~SequentialFile(); | |||
| // Read up to "n" bytes from the file. "scratch[0..n-1]" may be | |||
| // written by this routine. Sets "*result" to the data that was | |||
| // read (including if fewer than "n" bytes were successfully read). | |||
| // If an error was encountered, returns a non-OK status. | |||
| // | |||
| // REQUIRES: External synchronization | |||
| virtual Status Read(size_t n, Slice* result, char* scratch) = 0; | |||
| }; | |||
| // A file abstraction for randomly reading the contents of a file. | |||
| class RandomAccessFile { | |||
| public: | |||
| RandomAccessFile() { } | |||
| virtual ~RandomAccessFile(); | |||
| // Return the length of this file in bytes. | |||
| virtual uint64_t Size() const = 0; | |||
| // Read up to "n" bytes from the file starting at "offset". | |||
| // "scratch[0..n-1]" may be written by this routine. Sets "*result" | |||
| // to the data that was read (including if fewer than "n" bytes were | |||
| // successfully read). If an error was encountered, returns a | |||
| // non-OK status. | |||
| // | |||
| // Safe for concurrent use by multiple threads. | |||
| virtual Status Read(uint64_t offset, size_t n, Slice* result, | |||
| char* scratch) const = 0; | |||
| }; | |||
| // A file abstraction for sequential writing. The implementation | |||
| // must provide buffering since callers may append small fragments | |||
| // at a time to the file. | |||
| class WritableFile { | |||
| public: | |||
| WritableFile() { } | |||
| virtual ~WritableFile(); | |||
| virtual Status Append(const Slice& data) = 0; | |||
| virtual Status Close() = 0; | |||
| virtual Status Flush() = 0; | |||
| virtual Status Sync() = 0; | |||
| private: | |||
| // No copying allowed | |||
| WritableFile(const WritableFile&); | |||
| void operator=(const WritableFile&); | |||
| }; | |||
| // Identifies a locked file. | |||
| class FileLock { | |||
| public: | |||
| FileLock() { } | |||
| virtual ~FileLock(); | |||
| private: | |||
| // No copying allowed | |||
| FileLock(const FileLock&); | |||
| void operator=(const FileLock&); | |||
| }; | |||
| // Log the specified data to *info_log if info_log is non-NULL. | |||
| extern void Log(Env* env, WritableFile* info_log, const char* format, ...) | |||
| # if defined(__GNUC__) || defined(__clang__) | |||
| __attribute__((__format__ (__printf__, 3, 4))) | |||
| # endif | |||
| ; | |||
| // A utility routine: write "data" to the named file. | |||
| extern Status WriteStringToFile(Env* env, const Slice& data, | |||
| const std::string& fname); | |||
| // A utility routine: read contents of named file into *data | |||
| extern Status ReadFileToString(Env* env, const std::string& fname, | |||
| std::string* data); | |||
| // An implementation of Env that forwards all calls to another Env. | |||
| // May be useful to clients who wish to override just part of the | |||
| // functionality of another Env. | |||
| class EnvWrapper : public Env { | |||
| public: | |||
| // Initialize an EnvWrapper that delegates all calls to *target | |||
| explicit EnvWrapper(Env* target) : target_(target) { } | |||
| virtual ~EnvWrapper(); | |||
| // Return the target to which this Env forwards all calls | |||
| Env* target() const { return target_; } | |||
| // The following text is boilerplate that forwards all methods to target() | |||
| Status NewSequentialFile(const std::string& f, SequentialFile** r) { | |||
| return target_->NewSequentialFile(f, r); | |||
| } | |||
| Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) { | |||
| return target_->NewRandomAccessFile(f, r); | |||
| } | |||
| Status NewWritableFile(const std::string& f, WritableFile** r) { | |||
| return target_->NewWritableFile(f, r); | |||
| } | |||
| bool FileExists(const std::string& f) { return target_->FileExists(f); } | |||
| Status GetChildren(const std::string& dir, std::vector<std::string>* r) { | |||
| return target_->GetChildren(dir, r); | |||
| } | |||
| Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); } | |||
| Status CreateDir(const std::string& d) { return target_->CreateDir(d); } | |||
| Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); } | |||
| Status GetFileSize(const std::string& f, uint64_t* s) { | |||
| return target_->GetFileSize(f, s); | |||
| } | |||
| Status RenameFile(const std::string& s, const std::string& t) { | |||
| return target_->RenameFile(s, t); | |||
| } | |||
| Status LockFile(const std::string& f, FileLock** l) { | |||
| return target_->LockFile(f, l); | |||
| } | |||
| Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); } | |||
| void Schedule(void (*f)(void*), void* a) { | |||
| return target_->Schedule(f, a); | |||
| } | |||
| void StartThread(void (*f)(void*), void* a) { | |||
| return target_->StartThread(f, a); | |||
| } | |||
| virtual Status GetTestDirectory(std::string* path) { | |||
| return target_->GetTestDirectory(path); | |||
| } | |||
| virtual void Logv(WritableFile* log, const char* format, va_list ap) { | |||
| return target_->Logv(log, format, ap); | |||
| } | |||
| uint64_t NowMicros() { | |||
| return target_->NowMicros(); | |||
| } | |||
| void SleepForMicroseconds(int micros) { | |||
| target_->SleepForMicroseconds(micros); | |||
| } | |||
| private: | |||
| Env* target_; | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_ENV_H_ | |||
| @ -0,0 +1,95 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // An iterator yields a sequence of key/value pairs from a source. | |||
| // The following class defines the interface. Multiple implementations | |||
| // are provided by this library. In particular, iterators are provided | |||
| // to access the contents of a Table or a DB. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ | |||
| #include "include/slice.h" | |||
| #include "include/status.h" | |||
| namespace leveldb { | |||
| class Iterator { | |||
| public: | |||
| Iterator(); | |||
| virtual ~Iterator(); | |||
| // An iterator is either positioned at a key/value pair, or | |||
| // not valid. This method returns true iff the iterator is valid. | |||
| virtual bool Valid() const = 0; | |||
| // Position at the first key in the source. The iterator is Valid() | |||
| // after this call iff the source is not empty. | |||
| virtual void SeekToFirst() = 0; | |||
| // Position at the last key in the source. The iterator is | |||
| // Valid() after this call iff the source is not empty. | |||
| virtual void SeekToLast() = 0; | |||
| // Position at the first key in the source that at or past target | |||
| // The iterator is Valid() after this call iff the source contains | |||
| // an entry that comes at or past target. | |||
| virtual void Seek(const Slice& target) = 0; | |||
| // Moves to the next entry in the source. After this call, Valid() is | |||
| // true iff the iterator was not positioned at the last entry in the source. | |||
| // REQUIRES: Valid() | |||
| virtual void Next() = 0; | |||
| // Moves to the previous entry in the source. After this call, Valid() is | |||
| // true iff the iterator was not positioned at the first entry in source. | |||
| // REQUIRES: Valid() | |||
| virtual void Prev() = 0; | |||
| // Return the key for the current entry. The underlying storage for | |||
| // the returned slice is valid only until the next modification of | |||
| // the iterator. | |||
| // REQUIRES: Valid() | |||
| virtual Slice key() const = 0; | |||
| // Return the value for the current entry. The underlying storage for | |||
| // the returned slice is valid only until the next modification of | |||
| // the iterator. | |||
| // REQUIRES: !AtEnd() && !AtStart() | |||
| virtual Slice value() const = 0; | |||
| // If an error has occurred, return it. Else return an ok status. | |||
| virtual Status status() const = 0; | |||
| // Clients are allowed to register function/arg1/arg2 triples that | |||
| // will be invoked when this iterator is destroyed. | |||
| // | |||
| // Note that unlike all of the preceding methods, this method is | |||
| // not abstract and therefore clients should not override it. | |||
| typedef void (*CleanupFunction)(void* arg1, void* arg2); | |||
| void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); | |||
| private: | |||
| struct Cleanup { | |||
| CleanupFunction function; | |||
| void* arg1; | |||
| void* arg2; | |||
| Cleanup* next; | |||
| }; | |||
| Cleanup cleanup_; | |||
| // No copying allowed | |||
| Iterator(const Iterator&); | |||
| void operator=(const Iterator&); | |||
| }; | |||
| // Return an empty iterator (yields nothing). | |||
| extern Iterator* NewEmptyIterator(); | |||
| // Return an empty iterator with the specified status. | |||
| extern Iterator* NewErrorIterator(const Status& status); | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ | |||
| @ -0,0 +1,203 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ | |||
| #include <stddef.h> | |||
| namespace leveldb { | |||
| class Cache; | |||
| class Comparator; | |||
| class Env; | |||
| class Snapshot; | |||
| class WritableFile; | |||
| // DB contents are stored in a set of blocks, each of which holds a | |||
| // sequence of key,value pairs. Each block may be compressed before | |||
| // being stored in a file. The following enum describes which | |||
| // compression method (if any) is used to compress a block. | |||
| enum CompressionType { | |||
| // NOTE: do not change the values of existing entries, as these are | |||
| // part of the persistent format on disk. | |||
| kNoCompression = 0x0, | |||
| kLightweightCompression = 0x1, | |||
| }; | |||
| // Options to control the behavior of a database (passed to DB::Open) | |||
| struct Options { | |||
| // ------------------- | |||
| // Parameters that affect behavior | |||
| // Comparator used to define the order of keys in the table. | |||
| // Default: a comparator that uses lexicographic byte-wise ordering | |||
| // | |||
| // REQUIRES: The client must ensure that the comparator supplied | |||
| // here has the same name and orders keys *exactly* the same as the | |||
| // comparator provided to previous open calls on the same DB. | |||
| const Comparator* comparator; | |||
| // If true, the database will be created if it is missing. | |||
| // Default: false | |||
| bool create_if_missing; | |||
| // If true, an error is raised if the database already exists. | |||
| // Default: false | |||
| bool error_if_exists; | |||
| // If true, the implementation will do aggressive checking of the | |||
| // data it is processing and will stop early if it detects any | |||
| // errors. This may have unforeseen ramifications: for example, a | |||
| // corruption of one DB entry may cause a large number of entries to | |||
| // become unreadable or for the entire DB to become unopenable. | |||
| // Default: false | |||
| bool paranoid_checks; | |||
| // Use the specified object to interact with the environment, | |||
| // e.g. to read/write files, schedule background work, etc. | |||
| // Default: Env::Default() | |||
| Env* env; | |||
| // Any internal progress/error information generated by the db will | |||
| // be to written to info_log if it is non-NULL, or to a file stored | |||
| // in the same directory as the DB contents if info_log is NULL. | |||
| // Default: NULL | |||
| WritableFile* info_log; | |||
| // ------------------- | |||
| // Parameters that affect performance | |||
| // Amount of data to build up in memory before converting to an | |||
| // on-disk file. | |||
| // | |||
| // Some DB operations may encounter a delay proportional to the size | |||
| // of this parameter. Therefore we recommend against increasing | |||
| // this parameter unless you are willing to live with an occasional | |||
| // slow operation in exchange for faster bulk loading throughput. | |||
| // | |||
| // Default: 1MB | |||
| size_t write_buffer_size; | |||
| // Number of open files that can be used by the DB. You may need to | |||
| // increase this if your database has a large working set (budget | |||
| // one open file per 2MB of working set). | |||
| // | |||
| // Default: 1000 | |||
| int max_open_files; | |||
| // Handle values larger than "large_value_threshold" bytes | |||
| // specially, by writing them into their own files (to avoid | |||
| // compaction overhead) and doing content-based elimination of | |||
| // duplicate values to save space. | |||
| // | |||
| // We recommend against changing this value. | |||
| // | |||
| // Default: 64K | |||
| size_t large_value_threshold; | |||
| // Control over blocks (user data is stored in a set of blocks, and | |||
| // a block is the unit of reading from disk). | |||
| // Use the specified cache for blocks (if non-NULL). | |||
| // Default: NULL | |||
| Cache* block_cache; | |||
| // Approximate size of user data packed per block. Note that the | |||
| // block size specified here corresponds to uncompressed data. The | |||
| // actual size of the unit read from disk may be smaller if | |||
| // compression is enabled. This parameter can be changed dynamically. | |||
| // | |||
| // Default: 8K | |||
| int block_size; | |||
| // Number of keys between restart points for delta encoding of keys. | |||
| // This parameter can be changed dynamically. Most clients should | |||
| // leave this parameter alone. | |||
| // | |||
| // Default: 16 | |||
| int block_restart_interval; | |||
| // Compress blocks using the specified compression algorithm. This | |||
| // parameter can be changed dynamically. | |||
| // | |||
| // Default: kLightweightCompression, which gives lightweight but fast | |||
| // compression. | |||
| // | |||
| // Typical speeds of kLightweightCompression on an Intel(R) Core(TM)2 2.4GHz: | |||
| // ~200-500MB/s compression | |||
| // ~400-800MB/s decompression | |||
| // Note that these speeds are significantly faster than most | |||
| // persistent storage speeds, and therefore it is typically never | |||
| // worth switching to kNoCompression. Even if the input data is | |||
| // incompressible, the kLightweightCompression implementation will | |||
| // efficiently detect that and will switch to uncompressed mode. | |||
| CompressionType compression; | |||
| // Create an Options object with default values for all fields. | |||
| Options(); | |||
| }; | |||
| // Options that control read operations | |||
| struct ReadOptions { | |||
| // If true, all data read from underlying storage will be | |||
| // verified against corresponding checksums. | |||
| // Default: false | |||
| bool verify_checksums; | |||
| // Should the data read for this iteration be cached in memory? | |||
| // Callers may wish to set this field to false for bulk scans. | |||
| // Default: true | |||
| bool fill_cache; | |||
| // If "snapshot" is non-NULL, read as of the supplied snapshot | |||
| // (which must belong to the DB that is being read and which must | |||
| // not have been released). If "snapshot" is NULL, use an impliicit | |||
| // snapshot of the state at the beginning of this read operation. | |||
| // Default: NULL | |||
| const Snapshot* snapshot; | |||
| ReadOptions() | |||
| : verify_checksums(false), | |||
| fill_cache(true), | |||
| snapshot(NULL) { | |||
| } | |||
| }; | |||
| // Options that control write operations | |||
| struct WriteOptions { | |||
| // If true, the write will be flushed from the operating system | |||
| // buffer cache (by calling WritableFile::Sync()) before the write | |||
| // is considered complete. If this flag is true, writes will be | |||
| // slower. | |||
| // | |||
| // If this flag is false, and the machine crashes, some recent | |||
| // writes may be lost. Note that if it is just the process that | |||
| // crashes (i.e., the machine does not reboot), no writes will be | |||
| // lost even if sync==false. | |||
| // | |||
| // Default: true | |||
| bool sync; | |||
| // If "post_write_snapshot" is non-NULL, and the write succeeds, | |||
| // *post_write_snapshot will be modified to point to a snapshot of | |||
| // the DB state immediately after this write. The caller must call | |||
| // DB::ReleaseSnapshot(*post_write_snapshotsnapshot) when the | |||
| // snapshot is no longer needed. | |||
| // | |||
| // If "post_write_snapshot" is non-NULL, and the write fails, | |||
| // *post_write_snapshot will be set to NULL. | |||
| // | |||
| // Default: NULL | |||
| const Snapshot** post_write_snapshot; | |||
| WriteOptions() | |||
| : sync(true), | |||
| post_write_snapshot(NULL) { | |||
| } | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ | |||
| @ -0,0 +1,104 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // Slice is a simple structure containing a pointer into some external | |||
| // storage and a size. The user of a Slice must ensure that the slice | |||
| // is not used after the corresponding external storage has been | |||
| // deallocated. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_SLICE_H_ | |||
| #include <assert.h> | |||
| #include <stddef.h> | |||
| #include <string.h> | |||
| #include <string> | |||
| namespace leveldb { | |||
| class Slice { | |||
| public: | |||
| // Create an empty slice. | |||
| Slice() : data_(""), size_(0) { } | |||
| // Create a slice that refers to data[0,n-1]. | |||
| Slice(const char* data, size_t n) : data_(data), size_(n) { } | |||
| // Create a slice that refers to the contents of "s" | |||
| Slice(const std::string& s) : data_(s.data()), size_(s.size()) { } | |||
| // Create a slice that refers to s[0,strlen(s)-1] | |||
| Slice(const char* s) : data_(s), size_(strlen(s)) { } | |||
| // Return a pointer to the beginning of the referenced data | |||
| const char* data() const { return data_; } | |||
| // Return the length (in bytes) of the referenced data | |||
| size_t size() const { return size_; } | |||
| // Return true iff the length of the referenced data is zero | |||
| bool empty() const { return size_ == 0; } | |||
| // Return the ith byte in the referenced data. | |||
| // REQUIRES: n < size() | |||
| char operator[](size_t n) const { | |||
| assert(n < size()); | |||
| return data_[n]; | |||
| } | |||
| // Change this slice to refer to an empty array | |||
| void clear() { data_ = ""; size_ = 0; } | |||
| // Drop the first "n" bytes from this slice. | |||
| void remove_prefix(size_t n) { | |||
| assert(n <= size()); | |||
| data_ += n; | |||
| size_ -= n; | |||
| } | |||
| // Return a string that contains the copy of the referenced data. | |||
| std::string ToString() const { return std::string(data_, size_); } | |||
| // Three-way comparison. Returns value: | |||
| // < 0 iff "*this" < "b", | |||
| // == 0 iff "*this" == "b", | |||
| // > 0 iff "*this" > "b" | |||
| int compare(const Slice& b) const; | |||
| // Return true iff "x" is a prefix of "*this" | |||
| bool starts_with(const Slice& x) const { | |||
| return ((size_ >= x.size_) && | |||
| (memcmp(data_, x.data_, x.size_) == 0)); | |||
| } | |||
| private: | |||
| const char* data_; | |||
| size_t size_; | |||
| // Intentionally copyable | |||
| }; | |||
| inline bool operator==(const Slice& x, const Slice& y) { | |||
| return ((x.size() == y.size()) && | |||
| (memcmp(x.data(), y.data(), x.size()) == 0)); | |||
| } | |||
| inline bool operator!=(const Slice& x, const Slice& y) { | |||
| return !(x == y); | |||
| } | |||
| inline int Slice::compare(const Slice& b) const { | |||
| const int min_len = (size_ < b.size_) ? size_ : b.size_; | |||
| int r = memcmp(data_, b.data_, min_len); | |||
| if (r == 0) { | |||
| if (size_ < b.size_) r = -1; | |||
| else if (size_ > b.size_) r = +1; | |||
| } | |||
| return r; | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_ | |||
| @ -0,0 +1,86 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // A Status encapsulates the result of an operation. It may indicate success, | |||
| // or it may indicate an error with an associated error message. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_STATUS_H_ | |||
| #include <string> | |||
| #include <utility> | |||
| #include "include/slice.h" | |||
| namespace leveldb { | |||
| class Status { | |||
| public: | |||
| // Create a success status. | |||
| Status() : state_(NULL) { } | |||
| ~Status() { delete state_; } | |||
| // Copy the specified status. | |||
| Status(const Status& s); | |||
| void operator=(const Status& s); | |||
| // Return a success status. | |||
| static Status OK() { return Status(); } | |||
| // Return error status of an appropriate type. | |||
| static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) { | |||
| return Status(kNotFound, msg, Slice()); | |||
| } | |||
| static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) { | |||
| return Status(kCorruption, msg, msg2); | |||
| } | |||
| static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) { | |||
| return Status(kNotSupported, msg, msg2); | |||
| } | |||
| static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) { | |||
| return Status(kInvalidArgument, msg, msg2); | |||
| } | |||
| static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) { | |||
| return Status(kIOError, msg, msg2); | |||
| } | |||
| // Returns true iff the status indicates success. | |||
| bool ok() const { return (state_ == NULL); } | |||
| // Returns true iff the status indicates a NotFound error. | |||
| bool IsNotFound() const { return code() == kNotFound; } | |||
| // Return a string representation of this status suitable for printing. | |||
| // Returns the string "OK" for success. | |||
| std::string ToString() const; | |||
| private: | |||
| enum Code { | |||
| kOk = 0, | |||
| kNotFound = 1, | |||
| kCorruption = 2, | |||
| kNotSupported = 3, | |||
| kInvalidArgument = 4, | |||
| kIOError = 5, | |||
| }; | |||
| Code code() const { return (state_ == NULL) ? kOk : state_->first; } | |||
| Status(Code code, const Slice& msg, const Slice& msg2); | |||
| typedef std::pair<Code, std::string> State; | |||
| State* state_; | |||
| }; | |||
| inline Status::Status(const Status& s) { | |||
| state_ = (s.state_ == NULL) ? NULL : new State(*s.state_); | |||
| } | |||
| inline void Status::operator=(const Status& s) { | |||
| if (this != &s) { | |||
| delete state_; | |||
| state_ = (s.state_ == NULL) ? NULL : new State(*s.state_); | |||
| } | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_STATUS_H_ | |||
| @ -0,0 +1,67 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_TABLE_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_TABLE_H_ | |||
| #include <stdint.h> | |||
| #include "include/iterator.h" | |||
| namespace leveldb { | |||
| class Block; | |||
| class BlockHandle; | |||
| struct Options; | |||
| class RandomAccessFile; | |||
| struct ReadOptions; | |||
| // A Table is a sorted map from strings to strings. Tables are | |||
| // immutable and persistent. | |||
| class Table { | |||
| public: | |||
| // Attempt to open the table that is stored in "file", and read the | |||
| // metadata entries necessary to allow retrieving data from the table. | |||
| // | |||
| // If successful, returns ok and sets "*table" to the newly opened | |||
| // table. The client should delete "*table" when no longer needed. | |||
| // If there was an error while initializing the table, sets "*table" | |||
| // to NULL and returns a non-ok status. Does not take ownership of | |||
| // "*source", but the client must ensure that "source" remains live | |||
| // for the duration of the returned table's lifetime. | |||
| // | |||
| // *file must remain live while this Table is in use. | |||
| static Status Open(const Options& options, | |||
| RandomAccessFile* file, | |||
| Table** table); | |||
| ~Table(); | |||
| // Returns a new iterator over the table contents. | |||
| // The result of NewIterator() is initially invalid (caller must | |||
| // call one of the Seek methods on the iterator before using it). | |||
| Iterator* NewIterator(const ReadOptions&) const; | |||
| // Given a key, return an approximate byte offset in the file where | |||
| // the data for that key begins (or would begin if the key were | |||
| // present in the file). The returned value is in terms of file | |||
| // bytes, and so includes effects like compression of the underlying data. | |||
| // E.g., the approximate offset of the last key in the table will | |||
| // be close to the file length. | |||
| uint64_t ApproximateOffsetOf(const Slice& key) const; | |||
| private: | |||
| struct Rep; | |||
| Rep* rep_; | |||
| explicit Table(Rep* rep) { rep_ = rep; } | |||
| static Iterator* BlockReader(void*, const ReadOptions&, const Slice&); | |||
| // No copying allowed | |||
| Table(const Table&); | |||
| void operator=(const Table&); | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_TABLE_H_ | |||
| @ -0,0 +1,86 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // TableBuilder provides the interface used to build a Table | |||
| // (an immutable and sorted map from keys to values). | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ | |||
| #include <stdint.h> | |||
| #include "include/options.h" | |||
| #include "include/status.h" | |||
| namespace leveldb { | |||
| class BlockBuilder; | |||
| class BlockHandle; | |||
| class WritableFile; | |||
| class TableBuilder { | |||
| public: | |||
| // Create a builder that will store the contents of the table it is | |||
| // building in *file. Does not close the file. It is up to the | |||
| // caller to close the file after calling Finish(). | |||
| TableBuilder(const Options& options, WritableFile* file); | |||
| // REQUIRES: Either Finish() or Abandon() has been called. | |||
| ~TableBuilder(); | |||
| // Change the options used by this builder. Note: only some of the | |||
| // option fields can be changed after construction. If a field is | |||
| // not allowed to change dynamically and its value in the structure | |||
| // passed to the constructor is different from its value in the | |||
| // structure passed to this method, this method will return an error | |||
| // without changing any fields. | |||
| Status ChangeOptions(const Options& options); | |||
| // Add key,value to the table being constructed. | |||
| // REQUIRES: key is after any previously added key according to comparator. | |||
| // REQUIRES: Finish(), Abandon() have not been called | |||
| void Add(const Slice& key, const Slice& value); | |||
| // Advanced operation: flush any buffered key/value pairs to file. | |||
| // Can be used to ensure that two adjacent entries never live in | |||
| // the same data block. Most clients should not need to use this method. | |||
| // REQUIRES: Finish(), Abandon() have not been called | |||
| void Flush(); | |||
| // Return non-ok iff some error has been detected. | |||
| Status status() const; | |||
| // Finish building the table. Stops using the file passed to the | |||
| // constructor after this function returns. | |||
| // REQUIRES: Finish(), Abandon() have not been called | |||
| Status Finish(); | |||
| // Indicate that the contents of this builder should be abandoned. Stops | |||
| // using the file passed to the constructor after this function returns. | |||
| // If the caller is not going to call Finish(), it must call Abandon() | |||
| // before destroying this builder. | |||
| // REQUIRES: Finish(), Abandon() have not been called | |||
| void Abandon(); | |||
| // Number of calls to Add() so far. | |||
| uint64_t NumEntries() const; | |||
| // Size of the file generated so far. If invoked after a successful | |||
| // Finish() call, returns the size of the final generated file. | |||
| uint64_t FileSize() const; | |||
| private: | |||
| bool ok() const { return status().ok(); } | |||
| void WriteBlock(BlockBuilder* block, BlockHandle* handle); | |||
| struct Rep; | |||
| Rep* rep_; | |||
| // No copying allowed | |||
| TableBuilder(const TableBuilder&); | |||
| void operator=(const TableBuilder&); | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ | |||
| @ -0,0 +1,49 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // WriteBatch holds a collection of updates to apply atomically to a DB. | |||
| // | |||
| // The updates are applied in the order in which they are added | |||
| // to the WriteBatch. For example, the value of "key" will be "v3" | |||
| // after the following batch is written: | |||
| // | |||
| // batch.Put("key", "v1"); | |||
| // batch.Delete("key"); | |||
| // batch.Put("key", "v2"); | |||
| // batch.Put("key", "v3"); | |||
| #ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ | |||
| #define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ | |||
| #include <string> | |||
| namespace leveldb { | |||
| class Slice; | |||
| class WriteBatch { | |||
| public: | |||
| WriteBatch(); | |||
| ~WriteBatch(); | |||
| // Store the mapping "key->value" in the database. | |||
| void Put(const Slice& key, const Slice& value); | |||
| // If the database contains a mapping for "key", erase it. Else do nothing. | |||
| void Delete(const Slice& key); | |||
| // Clear all updates buffered in this batch. | |||
| void Clear(); | |||
| private: | |||
| friend class WriteBatchInternal; | |||
| std::string rep_; // See comment in write_batch.cc for the format of rep_ | |||
| // Intentionally copyable | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ | |||
| @ -0,0 +1,329 @@ | |||
| # Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| # Use of this source code is governed by a BSD-style license that can be | |||
| # found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| { | |||
| 'variables': { | |||
| 'use_snappy%': 0, | |||
| }, | |||
| 'target_defaults': { | |||
| 'defines': [ | |||
| 'LEVELDB_PLATFORM_CHROMIUM=1', | |||
| ], | |||
| 'include_dirs': [ | |||
| # MOE:begin_strip | |||
| '../..', | |||
| # MOE:end_strip_and_replace '.', | |||
| ], | |||
| 'conditions': [ | |||
| ['OS == "win"', { | |||
| 'include_dirs': [ | |||
| 'port/win', | |||
| ], | |||
| }], | |||
| ['use_snappy', { | |||
| 'defines': [ | |||
| 'USE_SNAPPY=1', | |||
| ], | |||
| }], | |||
| ], | |||
| }, | |||
| 'targets': [ | |||
| { | |||
| 'target_name': 'leveldb', | |||
| 'type': '<(library)', | |||
| 'dependencies': [ | |||
| # The base libary is a lightweight abstraction layer for things like | |||
| # threads and IO. http://src.chromium.org/viewvc/chrome/trunk/src/base/ | |||
| # MOE:begin_strip | |||
| '../../../../base/base.gyp:base', | |||
| # MOE:end_strip_and_replace '../../base/base.gyp:base', | |||
| ], | |||
| 'conditions': [ | |||
| ['use_snappy', { | |||
| 'dependencies': [ | |||
| '../../../../third_party/snappy/snappy.gyp:snappy', | |||
| ], | |||
| }], | |||
| ], | |||
| 'sources': [ | |||
| # Include and then exclude so that all files show up in IDEs, even if | |||
| # they don't build. | |||
| 'db/builder.cc', | |||
| 'db/builder.h', | |||
| 'db/db_impl.cc', | |||
| 'db/db_impl.h', | |||
| 'db/db_iter.cc', | |||
| 'db/db_iter.h', | |||
| 'db/filename.cc', | |||
| 'db/filename.h', | |||
| 'db/dbformat.cc', | |||
| 'db/dbformat.h', | |||
| 'db/log_format.h', | |||
| 'db/log_reader.cc', | |||
| 'db/log_reader.h', | |||
| 'db/log_writer.cc', | |||
| 'db/log_writer.h', | |||
| 'db/memtable.cc', | |||
| 'db/memtable.h', | |||
| 'db/repair.cc', | |||
| 'db/skiplist.h', | |||
| 'db/snapshot.h', | |||
| 'db/table_cache.cc', | |||
| 'db/table_cache.h', | |||
| 'db/version_edit.cc', | |||
| 'db/version_edit.h', | |||
| 'db/version_set.cc', | |||
| 'db/version_set.h', | |||
| 'db/write_batch.cc', | |||
| 'db/write_batch_internal.h', | |||
| 'include/cache.h', | |||
| 'include/comparator.h', | |||
| 'include/db.h', | |||
| 'include/env.h', | |||
| 'include/iterator.h', | |||
| 'include/options.h', | |||
| 'include/slice.h', | |||
| 'include/status.h', | |||
| 'include/table.h', | |||
| 'include/table_builder.h', | |||
| 'include/write_batch.h', | |||
| 'port/port.h', | |||
| 'port/port_chromium.cc', | |||
| 'port/port_chromium.h', | |||
| 'port/port_example.h', | |||
| 'port/port_posix.cc', | |||
| 'port/port_posix.h', | |||
| 'port/sha1_portable.cc', | |||
| 'port/sha1_portable.h', | |||
| 'table/block.cc', | |||
| 'table/block.h', | |||
| 'table/block_builder.cc', | |||
| 'table/block_builder.h', | |||
| 'table/format.cc', | |||
| 'table/format.h', | |||
| 'table/iterator.cc', | |||
| 'table/iterator_wrapper.h', | |||
| 'table/merger.cc', | |||
| 'table/merger.h', | |||
| 'table/table.cc', | |||
| 'table/table_builder.cc', | |||
| 'table/two_level_iterator.cc', | |||
| 'table/two_level_iterator.h', | |||
| 'util/arena.cc', | |||
| 'util/arena.h', | |||
| 'util/cache.cc', | |||
| 'util/coding.cc', | |||
| 'util/coding.h', | |||
| 'util/comparator.cc', | |||
| 'util/crc32c.cc', | |||
| 'util/crc32c.h', | |||
| 'util/env.cc', | |||
| 'util/env_chromium.cc', | |||
| 'util/env_posix.cc', | |||
| 'util/hash.cc', | |||
| 'util/hash.h', | |||
| 'util/logging.cc', | |||
| 'util/logging.h', | |||
| 'util/mutexlock.h', | |||
| 'util/options.cc', | |||
| 'util/random.h', | |||
| 'util/status.cc', | |||
| ], | |||
| 'sources/': [ | |||
| ['exclude', '_(android|example|portable|posix)\\.cc$'], | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_testutil', | |||
| 'type': '<(library)', | |||
| 'dependencies': [ | |||
| # MOE:begin_strip | |||
| '../../../../base/base.gyp:base', | |||
| # MOE:end_strip_and_replace '../../base/base.gyp:base', | |||
| 'leveldb', | |||
| ], | |||
| 'export_dependent_settings': [ | |||
| # The tests use include directories from these projects. | |||
| # MOE:begin_strip | |||
| '../../../../base/base.gyp:base', | |||
| # MOE:end_strip_and_replace '../../base/base.gyp:base', | |||
| 'leveldb', | |||
| ], | |||
| 'sources': [ | |||
| 'util/histogram.cc', | |||
| 'util/histogram.h', | |||
| 'util/testharness.cc', | |||
| 'util/testharness.h', | |||
| 'util/testutil.cc', | |||
| 'util/testutil.h', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_arena_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'util/arena_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_cache_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'util/cache_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_coding_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'util/coding_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_corruption_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/corruption_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_crc32c_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'util/crc32c_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_db_bench', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/db_bench.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_db_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/db_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_dbformat_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/dbformat_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_env_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'util/env_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_filename_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/filename_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_log_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/log_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_sha1_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'port/sha1_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_skiplist_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/skiplist_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_table_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'table/table_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_version_edit_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/version_edit_test.cc', | |||
| ], | |||
| }, | |||
| { | |||
| 'target_name': 'leveldb_write_batch_test', | |||
| 'type': 'executable', | |||
| 'dependencies': [ | |||
| 'leveldb_testutil', | |||
| ], | |||
| 'sources': [ | |||
| 'db/write_batch_test.cc', | |||
| ], | |||
| }, | |||
| ], | |||
| } | |||
| # Local Variables: | |||
| # tab-width:2 | |||
| # indent-tabs-mode:nil | |||
| # End: | |||
| # vim: set expandtab tabstop=2 shiftwidth=2: | |||
| @ -0,0 +1,10 @@ | |||
| This directory contains interfaces and implementations that isolate the | |||
| rest of the package from platform details. | |||
| Code in the rest of the package includes "port.h" from this directory. | |||
| "port.h" in turn includes a platform specific "port_<platform>.h" file | |||
| that provides the platform specific implementation. | |||
| See port_posix.h for an example of what must be provided in a platform | |||
| specific header file. | |||
| @ -0,0 +1,21 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_PORT_PORT_H_ | |||
| #define STORAGE_LEVELDB_PORT_PORT_H_ | |||
| #include <string.h> | |||
| // Include the appropriate platform specific file below. If you are | |||
| // porting to a new platform, see "port_example.h" for documentation | |||
| // of what the new port_<platform>.h file must provide. | |||
| #if defined(LEVELDB_PLATFORM_POSIX) | |||
| # include "port/port_posix.h" | |||
| #elif defined(LEVELDB_PLATFORM_CHROMIUM) | |||
| # include "port/port_chromium.h" | |||
| #elif defined(LEVELDB_PLATFORM_ANDROID) | |||
| # include "port/port_android.h" | |||
| #endif | |||
| #endif // STORAGE_LEVELDB_PORT_PORT_H_ | |||
| @ -0,0 +1,65 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "port/port_android.h" | |||
| #include <cstdlib> | |||
| extern "C" { | |||
| size_t fread_unlocked(void *a, size_t b, size_t c, FILE *d) { | |||
| return fread(a, b, c, d); | |||
| } | |||
| size_t fwrite_unlocked(const void *a, size_t b, size_t c, FILE *d) { | |||
| return fwrite(a, b, c, d); | |||
| } | |||
| int fflush_unlocked(FILE *f) { | |||
| return fflush(f); | |||
| } | |||
| int fdatasync(int fd) { | |||
| return fsync(fd); | |||
| } | |||
| } | |||
| // TODO(gabor): This is copied from port_posix.cc - not sure if I should do this? | |||
| namespace leveldb { | |||
| namespace port { | |||
| static void PthreadCall(const char* label, int result) { | |||
| if (result != 0) { | |||
| fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); | |||
| abort(); | |||
| } | |||
| } | |||
| Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); } | |||
| Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } | |||
| void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); } | |||
| void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } | |||
| CondVar::CondVar(Mutex* mu) | |||
| : mu_(mu) { | |||
| PthreadCall("init cv", pthread_cond_init(&cv_, NULL)); | |||
| } | |||
| CondVar::~CondVar() { | |||
| PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); | |||
| } | |||
| void CondVar::Wait() { | |||
| PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_)); | |||
| } | |||
| void CondVar::Signal(){ | |||
| PthreadCall("signal", pthread_cond_signal(&cv_)); | |||
| } | |||
| void CondVar::SignalAll() { | |||
| PthreadCall("broadcast", pthread_cond_broadcast(&cv_)); | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,131 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // See port_example.h for documentation for the following types/functions. | |||
| #ifndef STORAGE_LEVELDB_PORT_PORT_ANDROID_H_ | |||
| #define STORAGE_LEVELDB_PORT_PORT_ANDROID_H_ | |||
| #include <endian.h> | |||
| #include <pthread.h> | |||
| #include <stdint.h> | |||
| #include <sha1.h> | |||
| #include <cstdatomic> | |||
| #include <string> | |||
| #include <cctype> | |||
| extern "C" { | |||
| size_t fread_unlocked(void *a, size_t b, size_t c, FILE *d); | |||
| size_t fwrite_unlocked(const void *a, size_t b, size_t c, FILE *d); | |||
| int fflush_unlocked(FILE *f); | |||
| int fdatasync (int fd); | |||
| } | |||
| namespace leveldb { | |||
| namespace port { | |||
| static const bool kLittleEndian = __BYTE_ORDER == __LITTLE_ENDIAN; | |||
| class CondVar; | |||
| class Mutex { | |||
| public: | |||
| Mutex(); | |||
| ~Mutex(); | |||
| void Lock(); | |||
| void Unlock(); | |||
| void AssertHeld() { | |||
| //TODO(gabor): How can I implement this? | |||
| } | |||
| private: | |||
| friend class CondVar; | |||
| pthread_mutex_t mu_; | |||
| // No copying | |||
| Mutex(const Mutex&); | |||
| void operator=(const Mutex&); | |||
| }; | |||
| class CondVar { | |||
| public: | |||
| explicit CondVar(Mutex* mu); | |||
| ~CondVar(); | |||
| void Wait(); | |||
| void Signal(); | |||
| void SignalAll(); | |||
| private: | |||
| Mutex* mu_; | |||
| pthread_cond_t cv_; | |||
| }; | |||
| // Storage for a lock-free pointer | |||
| class AtomicPointer { | |||
| private: | |||
| std::atomic<void*> rep_; | |||
| public: | |||
| AtomicPointer() { } | |||
| explicit AtomicPointer(void* v) : rep_(v) { } | |||
| inline void* Acquire_Load() const { | |||
| return rep_.load(std::memory_order_acquire); | |||
| } | |||
| inline void Release_Store(void* v) { | |||
| rep_.store(v, std::memory_order_release); | |||
| } | |||
| inline void* NoBarrier_Load() const { | |||
| return rep_.load(std::memory_order_relaxed); | |||
| } | |||
| inline void NoBarrier_Store(void* v) { | |||
| rep_.store(v, std::memory_order_relaxed); | |||
| } | |||
| }; | |||
| /** | |||
| * TODO(gabor): Implement actual compress | |||
| * This is a hack - it just copies input to output. | |||
| * No actual compression occurs. | |||
| */ | |||
| inline void Lightweight_Compress( | |||
| const char* input, | |||
| size_t input_length, | |||
| std::string* output) { | |||
| output->copy((char*)input,0,input_length); | |||
| } | |||
| /** | |||
| * TODO(gabor): Implement actual compress | |||
| * This is a hack - it just copies input to output. | |||
| * No actual uncompression occurs. | |||
| */ | |||
| inline bool Lightweight_Uncompress( | |||
| const char* input_data, | |||
| size_t input_length, | |||
| std::string* output) { | |||
| output->copy((char*)input_data,0,input_length); | |||
| return (bool)1; | |||
| } | |||
| inline void SHA1_Hash(const char* data, size_t len, char* hash_array) { | |||
| SHA1_CTX sha1_ctx; | |||
| SHA1Init(&sha1_ctx); | |||
| SHA1Update(&sha1_ctx, (const u_char*)data, len); | |||
| SHA1Final((u_char*)hash_array, &sha1_ctx); | |||
| } | |||
| inline uint64_t ThreadIdentifier() { | |||
| pthread_t tid = pthread_self(); | |||
| uint64_t r = 0; | |||
| memcpy(&r, &tid, sizeof(r) < sizeof(tid) ? sizeof(r) : sizeof(tid)); | |||
| return r; | |||
| } | |||
| inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { | |||
| return false; | |||
| } | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_PORT_PORT_ANDROID_H_ | |||
| @ -0,0 +1,83 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "port/port_chromium.h" | |||
| #include "util/logging.h" | |||
| #if defined(USE_SNAPPY) | |||
| # include "third_party/snappy/src/snappy.h" | |||
| # include "third_party/snappy/src/snappy-stubs.h" | |||
| #endif | |||
| namespace leveldb { | |||
| namespace port { | |||
| Mutex::Mutex() { | |||
| } | |||
| Mutex::~Mutex() { | |||
| } | |||
| void Mutex::Lock() { | |||
| mu_.Acquire(); | |||
| } | |||
| void Mutex::Unlock() { | |||
| mu_.Release(); | |||
| } | |||
| void Mutex::AssertHeld() { | |||
| mu_.AssertAcquired(); | |||
| } | |||
| CondVar::CondVar(Mutex* mu) | |||
| : cv_(&mu->mu_) { | |||
| } | |||
| CondVar::~CondVar() { } | |||
| void CondVar::Wait() { | |||
| cv_.Wait(); | |||
| } | |||
| void CondVar::Signal(){ | |||
| cv_.Signal(); | |||
| } | |||
| void CondVar::SignalAll() { | |||
| cv_.Broadcast(); | |||
| } | |||
| void Lightweight_Compress(const char* input, size_t input_length, | |||
| std::string* output) { | |||
| #if defined(USE_SNAPPY) | |||
| output->resize(snappy::MaxCompressedLength(input_length)); | |||
| size_t outlen; | |||
| snappy::RawCompress(snappy::StringPiece(input, input_length), | |||
| &(*output)[0], &outlen); | |||
| output->resize(outlen); | |||
| #else | |||
| output->assign(input, input_length); | |||
| #endif | |||
| } | |||
| bool Lightweight_Uncompress(const char* input_data, size_t input_length, | |||
| std::string* output) { | |||
| #if defined(USE_SNAPPY) | |||
| snappy::StringPiece input(input_data, input_length); | |||
| size_t ulength; | |||
| if (!snappy::GetUncompressedLength(input, &ulength)) { | |||
| return false; | |||
| } | |||
| output->resize(ulength); | |||
| return snappy::RawUncompress(input, &(*output)[0]); | |||
| #else | |||
| output->assign(input_data, input_length); | |||
| return true; | |||
| #endif | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,104 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // See port_example.h for documentation for the following types/functions. | |||
| #ifndef STORAGE_LEVELDB_PORT_PORT_CHROMIUM_H_ | |||
| #define STORAGE_LEVELDB_PORT_PORT_CHROMIUM_H_ | |||
| #include <stdint.h> | |||
| #include <string> | |||
| #include <cstring> | |||
| #include "base/atomicops.h" | |||
| #include "base/basictypes.h" | |||
| #include "base/logging.h" | |||
| #include "base/sha1.h" | |||
| #include "base/synchronization/condition_variable.h" | |||
| #include "base/synchronization/lock.h" | |||
| // Linux's ThreadIdentifier() needs this. | |||
| #if defined(OS_LINUX) | |||
| # include <linux/unistd.h> | |||
| #endif | |||
| #if defined(OS_WIN) | |||
| #define snprintf _snprintf | |||
| #define va_copy(a, b) do { (a) = (b); } while (0) | |||
| #endif | |||
| namespace leveldb { | |||
| namespace port { | |||
| // Chromium only supports little endian. | |||
| static const bool kLittleEndian = true; | |||
| class Mutex { | |||
| public: | |||
| Mutex(); | |||
| ~Mutex(); | |||
| void Lock(); | |||
| void Unlock(); | |||
| void AssertHeld(); | |||
| private: | |||
| base::Lock mu_; | |||
| friend class CondVar; | |||
| DISALLOW_COPY_AND_ASSIGN(Mutex); | |||
| }; | |||
| class CondVar { | |||
| public: | |||
| explicit CondVar(Mutex* mu); | |||
| ~CondVar(); | |||
| void Wait(); | |||
| void Signal(); | |||
| void SignalAll(); | |||
| private: | |||
| base::ConditionVariable cv_; | |||
| DISALLOW_COPY_AND_ASSIGN(CondVar); | |||
| }; | |||
| class AtomicPointer { | |||
| private: | |||
| typedef base::subtle::AtomicWord Rep; | |||
| Rep rep_; | |||
| public: | |||
| AtomicPointer() { } | |||
| explicit AtomicPointer(void* p) : rep_(reinterpret_cast<Rep>(p)) {} | |||
| inline void* Acquire_Load() const { | |||
| return reinterpret_cast<void*>(::base::subtle::Acquire_Load(&rep_)); | |||
| } | |||
| inline void Release_Store(void* v) { | |||
| ::base::subtle::Release_Store(&rep_, reinterpret_cast<Rep>(v)); | |||
| } | |||
| inline void* NoBarrier_Load() const { | |||
| return reinterpret_cast<void*>(::base::subtle::NoBarrier_Load(&rep_)); | |||
| } | |||
| inline void NoBarrier_Store(void* v) { | |||
| ::base::subtle::NoBarrier_Store(&rep_, reinterpret_cast<Rep>(v)); | |||
| } | |||
| }; | |||
| inline void SHA1_Hash(const char* data, size_t len, char* hash_array) { | |||
| return ::base::SHA1HashBytes(reinterpret_cast<const unsigned char*>(data), | |||
| len, | |||
| reinterpret_cast<unsigned char*>(hash_array)); | |||
| } | |||
| void Lightweight_Compress(const char* input, size_t input_length, | |||
| std::string* output); | |||
| bool Lightweight_Uncompress(const char* input_data, size_t input_length, | |||
| std::string* output); | |||
| inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { | |||
| return false; | |||
| } | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_PORT_PORT_CHROMIUM_H_ | |||
| @ -0,0 +1,119 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // This file contains the specification, but not the implementations, | |||
| // of the types/operations/etc. that should be defined by a platform | |||
| // specific port_<platform>.h file. Use this file as a reference for | |||
| // how to port this package to a new platform. | |||
| #ifndef STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ | |||
| #define STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ | |||
| namespace leveldb { | |||
| namespace port { | |||
| // TODO(jorlow): Many of these belong more in the environment class rather than | |||
| // here. We should try moving them and see if it affects perf. | |||
| // The following boolean constant must be true on a little-endian machine | |||
| // and false otherwise. | |||
| static const bool kLittleEndian = true /* or some other expression */; | |||
| // ------------------ Threading ------------------- | |||
| // A Mutex represents an exclusive lock. | |||
| class Mutex { | |||
| public: | |||
| Mutex(); | |||
| ~Mutex(); | |||
| // Lock the mutex. Waits until other lockers have exited. | |||
| // Will deadlock if the mutex is already locked by this thread. | |||
| void Lock(); | |||
| // Unlock the mutex. | |||
| // REQUIRES: This mutex was locked by this thread. | |||
| void Unlock(); | |||
| // Optionally crash if this thread does not hold this mutex. | |||
| // The implementation must be fast, especially if NDEBUG is | |||
| // defined. The implementation is allowed to skip all checks. | |||
| void AssertHeld(); | |||
| }; | |||
| class CondVar { | |||
| public: | |||
| explicit CondVar(Mutex* mu); | |||
| ~CondVar(); | |||
| // Atomically release *mu and block on this condition variable until | |||
| // either a call to SignalAll(), or a call to Signal() that picks | |||
| // this thread to wakeup. | |||
| // REQUIRES: this thread holds *mu | |||
| void Wait(); | |||
| // If there are some threads waiting, wake up at least one of them. | |||
| void Signal(); | |||
| // Wake up all waiting threads. | |||
| void SignallAll(); | |||
| }; | |||
| // A type that holds a pointer that can be read or written atomically | |||
| // (i.e., without word-tearing.) | |||
| class AtomicPointer { | |||
| private: | |||
| intptr_t rep_; | |||
| public: | |||
| // Initialize to arbitrary value | |||
| AtomicPointer(); | |||
| // Initialize to hold v | |||
| explicit AtomicPointer(void* v) : rep_(v) { } | |||
| // Read and return the stored pointer with the guarantee that no | |||
| // later memory access (read or write) by this thread can be | |||
| // reordered ahead of this read. | |||
| void* Acquire_Load() const; | |||
| // Set v as the stored pointer with the guarantee that no earlier | |||
| // memory access (read or write) by this thread can be reordered | |||
| // after this store. | |||
| void Release_Store(void* v); | |||
| // Read the stored pointer with no ordering guarantees. | |||
| void* NoBarrier_Load() const; | |||
| // Set va as the stored pointer with no ordering guarantees. | |||
| void NoBarrier_Store(void* v); | |||
| }; | |||
| // ------------------ Checksumming ------------------- | |||
| // Store a 160-bit hash of "data[0..len-1]" in "hash_array[0]..hash_array[19]" | |||
| extern void SHA1_Hash(const char* data, size_t len, char* hash_array); | |||
| // ------------------ Compression ------------------- | |||
| // Store the lightweight compression of "input[0,input_length-1]" in *output. | |||
| extern void Lightweight_Compress(const char* input, size_t input_length, | |||
| std::string* output); | |||
| // Attempt to lightweight uncompress input[0,input_length-1] into *output. | |||
| // Returns true if successful, false if the input is invalid lightweight | |||
| // compressed data. | |||
| extern bool Lightweight_Uncompress(const char* input_data, size_t input_length, | |||
| std::string* output); | |||
| // ------------------ Miscellaneous ------------------- | |||
| // If heap profiling is not supported, returns false. | |||
| // Else repeatedly calls (*func)(arg, data, n) and then returns true. | |||
| // The concatenation of all "data[0,n-1]" fragments is the heap profile. | |||
| extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg); | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ | |||
| @ -0,0 +1,50 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "port/port_posix.h" | |||
| #include <cstdlib> | |||
| #include <stdio.h> | |||
| #include <string.h> | |||
| #include "util/logging.h" | |||
| namespace leveldb { | |||
| namespace port { | |||
| static void PthreadCall(const char* label, int result) { | |||
| if (result != 0) { | |||
| fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); | |||
| abort(); | |||
| } | |||
| } | |||
| Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); } | |||
| Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } | |||
| void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); } | |||
| void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } | |||
| CondVar::CondVar(Mutex* mu) | |||
| : mu_(mu) { | |||
| PthreadCall("init cv", pthread_cond_init(&cv_, NULL)); | |||
| } | |||
| CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); } | |||
| void CondVar::Wait() { | |||
| PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_)); | |||
| } | |||
| void CondVar::Signal() { | |||
| PthreadCall("signal", pthread_cond_signal(&cv_)); | |||
| } | |||
| void CondVar::SignalAll() { | |||
| PthreadCall("broadcast", pthread_cond_broadcast(&cv_)); | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,108 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // See port_example.h for documentation for the following types/functions. | |||
| #ifndef STORAGE_LEVELDB_PORT_PORT_POSIX_H_ | |||
| #define STORAGE_LEVELDB_PORT_PORT_POSIX_H_ | |||
| #include <endian.h> | |||
| #include <pthread.h> | |||
| #include <stdint.h> | |||
| #include <string> | |||
| #include <cstdatomic> | |||
| #include <cstring> | |||
| #include "port/sha1_portable.h" | |||
| namespace leveldb { | |||
| namespace port { | |||
| static const bool kLittleEndian = (__BYTE_ORDER == __LITTLE_ENDIAN); | |||
| class CondVar; | |||
| class Mutex { | |||
| public: | |||
| Mutex(); | |||
| ~Mutex(); | |||
| void Lock(); | |||
| void Unlock(); | |||
| void AssertHeld() { } | |||
| private: | |||
| friend class CondVar; | |||
| pthread_mutex_t mu_; | |||
| // No copying | |||
| Mutex(const Mutex&); | |||
| void operator=(const Mutex&); | |||
| }; | |||
| class CondVar { | |||
| public: | |||
| explicit CondVar(Mutex* mu); | |||
| ~CondVar(); | |||
| void Wait(); | |||
| void Signal(); | |||
| void SignalAll(); | |||
| private: | |||
| pthread_cond_t cv_; | |||
| Mutex* mu_; | |||
| }; | |||
| // Storage for a lock-free pointer | |||
| class AtomicPointer { | |||
| private: | |||
| std::atomic<void*> rep_; | |||
| public: | |||
| AtomicPointer() { } | |||
| explicit AtomicPointer(void* v) : rep_(v) { } | |||
| inline void* Acquire_Load() const { | |||
| return rep_.load(std::memory_order_acquire); | |||
| } | |||
| inline void Release_Store(void* v) { | |||
| rep_.store(v, std::memory_order_release); | |||
| } | |||
| inline void* NoBarrier_Load() const { | |||
| return rep_.load(std::memory_order_relaxed); | |||
| } | |||
| inline void NoBarrier_Store(void* v) { | |||
| rep_.store(v, std::memory_order_relaxed); | |||
| } | |||
| }; | |||
| inline void SHA1_Hash(const char* data, size_t len, char* hash_array) { | |||
| SHA1_Hash_Portable(data, len, hash_array); | |||
| } | |||
| /** | |||
| * TODO(gabor): Implement actual compress | |||
| * This is a hack - it just copies input to output. | |||
| * No actual compression occurs. | |||
| */ | |||
| inline void Lightweight_Compress(const char* input, size_t input_length, | |||
| std::string* output) { | |||
| output->assign(input, input_length); | |||
| } | |||
| /** | |||
| * TODO(gabor): Implement actual uncompress | |||
| * This is a hack - it just copies input to output. | |||
| * No actual uncompression occurs. | |||
| */ | |||
| inline bool Lightweight_Uncompress(const char* input_data, size_t input_length, | |||
| std::string* output) { | |||
| output->assign(input_data, input_length); | |||
| return true; | |||
| } | |||
| inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { | |||
| return false; | |||
| } | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_ | |||
| @ -0,0 +1,298 @@ | |||
| // Portions copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // This module provides a slow but portable implementation of | |||
| // the SHA1 hash function. | |||
| // | |||
| // It is adapted from free code written by Paul E. Jones | |||
| // <paulej@packetizer.com>. See http://www.packetizer.com/security/sha1/ | |||
| // | |||
| // The license for the original code is: | |||
| /* | |||
| Copyright (C) 1998, 2009 | |||
| Paul E. Jones <paulej@packetizer.com> | |||
| Freeware Public License (FPL) | |||
| This software is licensed as "freeware." Permission to distribute | |||
| this software in source and binary forms, including incorporation | |||
| into other products, is hereby granted without a fee. THIS SOFTWARE | |||
| IS PROVIDED 'AS IS' AND WITHOUT ANY EXPRESSED OR IMPLIED WARRANTIES, | |||
| INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |||
| AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHOR SHALL NOT BE HELD | |||
| LIABLE FOR ANY DAMAGES RESULTING FROM THE USE OF THIS SOFTWARE, EITHER | |||
| DIRECTLY OR INDIRECTLY, INCLUDING, BUT NOT LIMITED TO, LOSS OF DATA | |||
| OR DATA BEING RENDERED INACCURATE. | |||
| */ | |||
| #include "port/sha1_portable.h" | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <stdint.h> | |||
| namespace leveldb { | |||
| namespace port { | |||
| /* | |||
| * Description: | |||
| * This class implements the Secure Hashing Standard as defined | |||
| * in FIPS PUB 180-1 published April 17, 1995. | |||
| */ | |||
| /* | |||
| * This structure will hold context information for the hashing | |||
| * operation | |||
| */ | |||
| typedef struct SHA1Context { | |||
| unsigned Message_Digest[5]; /* Message Digest (output) */ | |||
| unsigned Length_Low; /* Message length in bits */ | |||
| unsigned Length_High; /* Message length in bits */ | |||
| unsigned char Message_Block[64]; /* 512-bit message blocks */ | |||
| int Message_Block_Index; /* Index into message block array */ | |||
| bool Computed; /* Is the digest computed? */ | |||
| bool Corrupted; /* Is the message digest corruped? */ | |||
| } SHA1Context; | |||
| /* | |||
| * Portability Issues: | |||
| * SHA-1 is defined in terms of 32-bit "words". This code was | |||
| * written with the expectation that the processor has at least | |||
| * a 32-bit machine word size. If the machine word size is larger, | |||
| * the code should still function properly. One caveat to that | |||
| * is that the input functions taking characters and character | |||
| * arrays assume that only 8 bits of information are stored in each | |||
| * character. | |||
| */ | |||
| /* | |||
| * Define the circular shift macro | |||
| */ | |||
| #define SHA1CircularShift(bits,word) \ | |||
| ((((word) << (bits)) & 0xFFFFFFFF) | \ | |||
| ((word) >> (32-(bits)))) | |||
| /* Function prototypes */ | |||
| static void SHA1ProcessMessageBlock(SHA1Context *); | |||
| static void SHA1PadMessage(SHA1Context *); | |||
| // Initialize the SHA1Context in preparation for computing a new | |||
| // message digest. | |||
| static void SHA1Reset(SHA1Context* context) { | |||
| context->Length_Low = 0; | |||
| context->Length_High = 0; | |||
| context->Message_Block_Index = 0; | |||
| context->Message_Digest[0] = 0x67452301; | |||
| context->Message_Digest[1] = 0xEFCDAB89; | |||
| context->Message_Digest[2] = 0x98BADCFE; | |||
| context->Message_Digest[3] = 0x10325476; | |||
| context->Message_Digest[4] = 0xC3D2E1F0; | |||
| context->Computed = false; | |||
| context->Corrupted = false; | |||
| } | |||
| // This function will return the 160-bit message digest into the | |||
| // Message_Digest array within the SHA1Context provided | |||
| static bool SHA1Result(SHA1Context *context) { | |||
| if (context->Corrupted) { | |||
| return false; | |||
| } | |||
| if (!context->Computed) { | |||
| SHA1PadMessage(context); | |||
| context->Computed = true; | |||
| } | |||
| return true; | |||
| } | |||
| // This function accepts an array of bytes as the next portion of | |||
| // the message. | |||
| static void SHA1Input(SHA1Context *context, | |||
| const unsigned char *message_array, | |||
| unsigned length) { | |||
| if (!length) return; | |||
| if (context->Computed || context->Corrupted) { | |||
| context->Corrupted = true; | |||
| return; | |||
| } | |||
| while(length-- && !context->Corrupted) { | |||
| context->Message_Block[context->Message_Block_Index++] = | |||
| (*message_array & 0xFF); | |||
| context->Length_Low += 8; | |||
| /* Force it to 32 bits */ | |||
| context->Length_Low &= 0xFFFFFFFF; | |||
| if (context->Length_Low == 0) { | |||
| context->Length_High++; | |||
| /* Force it to 32 bits */ | |||
| context->Length_High &= 0xFFFFFFFF; | |||
| if (context->Length_High == 0) | |||
| { | |||
| /* Message is too long */ | |||
| context->Corrupted = true; | |||
| } | |||
| } | |||
| if (context->Message_Block_Index == 64) | |||
| { | |||
| SHA1ProcessMessageBlock(context); | |||
| } | |||
| message_array++; | |||
| } | |||
| } | |||
| // This function will process the next 512 bits of the message stored | |||
| // in the Message_Block array. | |||
| static void SHA1ProcessMessageBlock(SHA1Context *context) { | |||
| const unsigned K[] = // Constants defined in SHA-1 | |||
| { | |||
| 0x5A827999, | |||
| 0x6ED9EBA1, | |||
| 0x8F1BBCDC, | |||
| 0xCA62C1D6 | |||
| }; | |||
| int t; // Loop counter | |||
| unsigned temp; // Temporary word value | |||
| unsigned W[80]; // Word sequence | |||
| unsigned A, B, C, D, E; // Word buffers | |||
| // Initialize the first 16 words in the array W | |||
| for(t = 0; t < 16; t++) { | |||
| W[t] = ((unsigned) context->Message_Block[t * 4]) << 24; | |||
| W[t] |= ((unsigned) context->Message_Block[t * 4 + 1]) << 16; | |||
| W[t] |= ((unsigned) context->Message_Block[t * 4 + 2]) << 8; | |||
| W[t] |= ((unsigned) context->Message_Block[t * 4 + 3]); | |||
| } | |||
| for(t = 16; t < 80; t++) { | |||
| W[t] = SHA1CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]); | |||
| } | |||
| A = context->Message_Digest[0]; | |||
| B = context->Message_Digest[1]; | |||
| C = context->Message_Digest[2]; | |||
| D = context->Message_Digest[3]; | |||
| E = context->Message_Digest[4]; | |||
| for(t = 0; t < 20; t++) { | |||
| temp = SHA1CircularShift(5,A) + | |||
| ((B & C) | ((~B) & D)) + E + W[t] + K[0]; | |||
| temp &= 0xFFFFFFFF; | |||
| E = D; | |||
| D = C; | |||
| C = SHA1CircularShift(30,B); | |||
| B = A; | |||
| A = temp; | |||
| } | |||
| for(t = 20; t < 40; t++) { | |||
| temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1]; | |||
| temp &= 0xFFFFFFFF; | |||
| E = D; | |||
| D = C; | |||
| C = SHA1CircularShift(30,B); | |||
| B = A; | |||
| A = temp; | |||
| } | |||
| for(t = 40; t < 60; t++) { | |||
| temp = SHA1CircularShift(5,A) + | |||
| ((B & C) | (B & D) | (C & D)) + E + W[t] + K[2]; | |||
| temp &= 0xFFFFFFFF; | |||
| E = D; | |||
| D = C; | |||
| C = SHA1CircularShift(30,B); | |||
| B = A; | |||
| A = temp; | |||
| } | |||
| for(t = 60; t < 80; t++) { | |||
| temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3]; | |||
| temp &= 0xFFFFFFFF; | |||
| E = D; | |||
| D = C; | |||
| C = SHA1CircularShift(30,B); | |||
| B = A; | |||
| A = temp; | |||
| } | |||
| context->Message_Digest[0] = (context->Message_Digest[0] + A) & 0xFFFFFFFF; | |||
| context->Message_Digest[1] = (context->Message_Digest[1] + B) & 0xFFFFFFFF; | |||
| context->Message_Digest[2] = (context->Message_Digest[2] + C) & 0xFFFFFFFF; | |||
| context->Message_Digest[3] = (context->Message_Digest[3] + D) & 0xFFFFFFFF; | |||
| context->Message_Digest[4] = (context->Message_Digest[4] + E) & 0xFFFFFFFF; | |||
| context->Message_Block_Index = 0; | |||
| } | |||
| // According to the standard, the message must be padded to an even | |||
| // 512 bits. The first padding bit must be a '1'. The last 64 bits | |||
| // represent the length of the original message. All bits in between | |||
| // should be 0. This function will pad the message according to those | |||
| // rules by filling the Message_Block array accordingly. It will also | |||
| // call SHA1ProcessMessageBlock() appropriately. When it returns, it | |||
| // can be assumed that the message digest has been computed. | |||
| static void SHA1PadMessage(SHA1Context *context) { | |||
| // Check to see if the current message block is too small to hold | |||
| // the initial padding bits and length. If so, we will pad the | |||
| // block, process it, and then continue padding into a second block. | |||
| if (context->Message_Block_Index > 55) { | |||
| context->Message_Block[context->Message_Block_Index++] = 0x80; | |||
| while(context->Message_Block_Index < 64) { | |||
| context->Message_Block[context->Message_Block_Index++] = 0; | |||
| } | |||
| SHA1ProcessMessageBlock(context); | |||
| while(context->Message_Block_Index < 56) { | |||
| context->Message_Block[context->Message_Block_Index++] = 0; | |||
| } | |||
| } else { | |||
| context->Message_Block[context->Message_Block_Index++] = 0x80; | |||
| while(context->Message_Block_Index < 56) { | |||
| context->Message_Block[context->Message_Block_Index++] = 0; | |||
| } | |||
| } | |||
| // Store the message length as the last 8 octets | |||
| context->Message_Block[56] = (context->Length_High >> 24) & 0xFF; | |||
| context->Message_Block[57] = (context->Length_High >> 16) & 0xFF; | |||
| context->Message_Block[58] = (context->Length_High >> 8) & 0xFF; | |||
| context->Message_Block[59] = (context->Length_High) & 0xFF; | |||
| context->Message_Block[60] = (context->Length_Low >> 24) & 0xFF; | |||
| context->Message_Block[61] = (context->Length_Low >> 16) & 0xFF; | |||
| context->Message_Block[62] = (context->Length_Low >> 8) & 0xFF; | |||
| context->Message_Block[63] = (context->Length_Low) & 0xFF; | |||
| SHA1ProcessMessageBlock(context); | |||
| } | |||
| void SHA1_Hash_Portable(const char* data, size_t len, char* hash_array) { | |||
| SHA1Context context; | |||
| SHA1Reset(&context); | |||
| SHA1Input(&context, reinterpret_cast<const unsigned char*>(data), len); | |||
| bool ok = SHA1Result(&context); | |||
| if (!ok) { | |||
| fprintf(stderr, "Unexpected error in SHA1_Hash_Portable code\n"); | |||
| exit(1); | |||
| } | |||
| for (int i = 0; i < 5; i++) { | |||
| uint32_t value = context.Message_Digest[i]; | |||
| hash_array[i*4 + 0] = (value >> 24) & 0xff; | |||
| hash_array[i*4 + 1] = (value >> 16) & 0xff; | |||
| hash_array[i*4 + 2] = (value >> 8) & 0xff; | |||
| hash_array[i*4 + 3] = value & 0xff; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,25 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_ | |||
| #define STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_ | |||
| #include <stddef.h> | |||
| namespace leveldb { | |||
| namespace port { | |||
| // Compute the SHA1 hash value of "data[0..len-1]" and store it in | |||
| // "hash_array[0..19]". hash_array must have 20 bytes of space available. | |||
| // | |||
| // This function is portable but may not be as fast as a version | |||
| // optimized for your platform. It is provided as a default method | |||
| // that can be used when porting leveldb to a new platform if no | |||
| // better SHA1 hash implementation is available. | |||
| void SHA1_Hash_Portable(const char* data, size_t len, char* hash_array); | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_ | |||
| @ -0,0 +1,55 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "port/port.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| namespace port { | |||
| class SHA1 { }; | |||
| static std::string TestSHA1(const char* data, size_t len) { | |||
| char hash_val[20]; | |||
| SHA1_Hash(data, len, hash_val); | |||
| char buf[41]; | |||
| for (int i = 0; i < 20; i++) { | |||
| snprintf(buf + i * 2, 41 - i * 2, | |||
| "%02x", | |||
| static_cast<unsigned int>(static_cast<unsigned char>( | |||
| hash_val[i]))); | |||
| } | |||
| return std::string(buf, 40); | |||
| } | |||
| TEST(SHA1, Simple) { | |||
| ASSERT_EQ("da39a3ee5e6b4b0d3255bfef95601890afd80709", TestSHA1("", 0)); | |||
| ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d", TestSHA1("hello", 5)); | |||
| std::string x(10000, 'x'); | |||
| ASSERT_EQ("f8c5cde791c5056cf515881e701c8a9ecb439a75", | |||
| TestSHA1(x.data(), x.size())); | |||
| } | |||
| TEST(SHA1, Benchmark) { | |||
| std::string data(1048576 * 100, 'x'); | |||
| double start = Env::Default()->NowMicros() * 1e-6; | |||
| static const int kIters = 10; | |||
| uint32_t sha1 = 0; | |||
| for (int i = 0; i < kIters; i++) { | |||
| char hash_val[20]; | |||
| SHA1_Hash(data.data(), data.size(), hash_val); | |||
| sha1 |= hash_val[0]; | |||
| } | |||
| double finish = Env::Default()->NowMicros() * 1e-6; | |||
| double mb = (static_cast<long long int>(data.size()) * kIters) / 1048576.0; | |||
| fprintf(stderr, "SHA1 %0.0f MB: %.3f secs; %.1f MB/s, dummy=0x%02x\n", | |||
| mb, (finish - start), mb / (finish - start), sha1); | |||
| } | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,261 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // Decodes the blocks generated by block_builder.cc. | |||
| #include "table/block.h" | |||
| #include <vector> | |||
| #include <algorithm> | |||
| #include "include/comparator.h" | |||
| #include "util/coding.h" | |||
| #include "util/logging.h" | |||
| namespace leveldb { | |||
| inline uint32_t Block::NumRestarts() const { | |||
| assert(size_ >= 2*sizeof(uint32_t)); | |||
| return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); | |||
| } | |||
| Block::Block(const char* data, size_t size) | |||
| : data_(data), | |||
| size_(size) { | |||
| if (size_ < sizeof(uint32_t)) { | |||
| size_ = 0; // Error marker | |||
| } else { | |||
| restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); | |||
| if (restart_offset_ > size_ - sizeof(uint32_t)) { | |||
| // The size is too small for NumRestarts() and therefore | |||
| // restart_offset_ wrapped around. | |||
| size_ = 0; | |||
| } | |||
| } | |||
| } | |||
| Block::~Block() { | |||
| delete[] data_; | |||
| } | |||
| // Helper routine: decode the next block entry starting at "p", | |||
| // storing the number of shared key bytes, non_shared key bytes, | |||
| // and the length of the value in "*shared", "*non_shared", and | |||
| // "*value_length", respectively. Will not derefence past "limit". | |||
| // | |||
| // If any errors are detected, returns NULL. Otherwise, returns a | |||
| // pointer to the key delta (just past the three decoded values). | |||
| static inline const char* DecodeEntry(const char* p, const char* limit, | |||
| uint32_t* shared, | |||
| uint32_t* non_shared, | |||
| uint32_t* value_length) { | |||
| if (limit - p < 3) return NULL; | |||
| *shared = reinterpret_cast<const unsigned char*>(p)[0]; | |||
| *non_shared = reinterpret_cast<const unsigned char*>(p)[1]; | |||
| *value_length = reinterpret_cast<const unsigned char*>(p)[2]; | |||
| if ((*shared | *non_shared | *value_length) < 128) { | |||
| // Fast path: all three values are encoded in one byte each | |||
| p += 3; | |||
| } else { | |||
| if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL; | |||
| if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL; | |||
| if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL; | |||
| } | |||
| if (limit - p < (*non_shared + *value_length)) return NULL; | |||
| return p; | |||
| } | |||
| class Block::Iter : public Iterator { | |||
| private: | |||
| const Comparator* const comparator_; | |||
| const char* const data_; // underlying block contents | |||
| uint32_t const restarts_; // Offset of restart array (list of fixed32) | |||
| uint32_t const num_restarts_; // Number of uint32_t entries in restart array | |||
| // current_ is offset in data_ of current entry. >= restarts_ if !Valid | |||
| uint32_t current_; | |||
| uint32_t restart_index_; // Index of restart block in which current_ falls | |||
| std::string key_; | |||
| Slice value_; | |||
| Status status_; | |||
| inline int Compare(const Slice& a, const Slice& b) const { | |||
| return comparator_->Compare(a, b); | |||
| } | |||
| // Return the offset in data_ just past the end of the current entry. | |||
| inline uint32_t NextEntryOffset() const { | |||
| return (value_.data() + value_.size()) - data_; | |||
| } | |||
| uint32_t GetRestartPoint(uint32_t index) { | |||
| assert(index < num_restarts_); | |||
| return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); | |||
| } | |||
| void SeekToRestartPoint(uint32_t index) { | |||
| key_.clear(); | |||
| restart_index_ = index; | |||
| // current_ will be fixed by ParseNextKey(); | |||
| // ParseNextKey() starts at the end of value_, so set value_ accordingly | |||
| uint32_t offset = GetRestartPoint(index); | |||
| value_ = Slice(data_ + offset, 0); | |||
| } | |||
| public: | |||
| Iter(const Comparator* comparator, | |||
| const char* data, | |||
| uint32_t restarts, | |||
| uint32_t num_restarts) | |||
| : comparator_(comparator), | |||
| data_(data), | |||
| restarts_(restarts), | |||
| num_restarts_(num_restarts), | |||
| current_(restarts_), | |||
| restart_index_(num_restarts_) { | |||
| assert(num_restarts_ > 0); | |||
| } | |||
| virtual bool Valid() const { return current_ < restarts_; } | |||
| virtual Status status() const { return status_; } | |||
| virtual Slice key() const { | |||
| assert(Valid()); | |||
| return key_; | |||
| } | |||
| virtual Slice value() const { | |||
| assert(Valid()); | |||
| return value_; | |||
| } | |||
| virtual void Next() { | |||
| assert(Valid()); | |||
| ParseNextKey(); | |||
| } | |||
| virtual void Prev() { | |||
| assert(Valid()); | |||
| // Scan backwards to a restart point before current_ | |||
| const uint32_t original = current_; | |||
| while (GetRestartPoint(restart_index_) >= original) { | |||
| if (restart_index_ == 0) { | |||
| // No more entries | |||
| current_ = restarts_; | |||
| restart_index_ = num_restarts_; | |||
| return; | |||
| } | |||
| restart_index_--; | |||
| } | |||
| SeekToRestartPoint(restart_index_); | |||
| do { | |||
| // Loop until end of current entry hits the start of original entry | |||
| } while (ParseNextKey() && NextEntryOffset() < original); | |||
| } | |||
| virtual void Seek(const Slice& target) { | |||
| // Binary search in restart array to find the first restart point | |||
| // with a key >= target | |||
| uint32_t left = 0; | |||
| uint32_t right = num_restarts_ - 1; | |||
| while (left < right) { | |||
| uint32_t mid = (left + right + 1) / 2; | |||
| uint32_t region_offset = GetRestartPoint(mid); | |||
| uint32_t shared, non_shared, value_length; | |||
| const char* key_ptr = DecodeEntry(data_ + region_offset, | |||
| data_ + restarts_, | |||
| &shared, &non_shared, &value_length); | |||
| if (key_ptr == NULL || (shared != 0)) { | |||
| CorruptionError(); | |||
| return; | |||
| } | |||
| Slice mid_key(key_ptr, non_shared); | |||
| if (Compare(mid_key, target) < 0) { | |||
| // Key at "mid" is smaller than "target". Therefore all | |||
| // blocks before "mid" are uninteresting. | |||
| left = mid; | |||
| } else { | |||
| // Key at "mid" is >= "target". Therefore all blocks at or | |||
| // after "mid" are uninteresting. | |||
| right = mid - 1; | |||
| } | |||
| } | |||
| // Linear search (within restart block) for first key >= target | |||
| SeekToRestartPoint(left); | |||
| while (true) { | |||
| if (!ParseNextKey()) { | |||
| return; | |||
| } | |||
| if (Compare(key_, target) >= 0) { | |||
| return; | |||
| } | |||
| } | |||
| } | |||
| virtual void SeekToFirst() { | |||
| SeekToRestartPoint(0); | |||
| ParseNextKey(); | |||
| } | |||
| virtual void SeekToLast() { | |||
| SeekToRestartPoint(num_restarts_ - 1); | |||
| while (ParseNextKey() && NextEntryOffset() < restarts_) { | |||
| // Keep skipping | |||
| } | |||
| } | |||
| private: | |||
| void CorruptionError() { | |||
| current_ = restarts_; | |||
| restart_index_ = num_restarts_; | |||
| status_ = Status::Corruption("bad entry in block"); | |||
| key_.clear(); | |||
| value_.clear(); | |||
| } | |||
| bool ParseNextKey() { | |||
| current_ = NextEntryOffset(); | |||
| const char* p = data_ + current_; | |||
| const char* limit = data_ + restarts_; // Restarts come right after data | |||
| if (p >= limit) { | |||
| // No more entries to return. Mark as invalid. | |||
| current_ = restarts_; | |||
| restart_index_ = num_restarts_; | |||
| return false; | |||
| } | |||
| // Decode next entry | |||
| uint32_t shared, non_shared, value_length; | |||
| p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); | |||
| if (p == NULL || key_.size() < shared) { | |||
| CorruptionError(); | |||
| return false; | |||
| } else { | |||
| key_.resize(shared); | |||
| key_.append(p, non_shared); | |||
| value_ = Slice(p + non_shared, value_length); | |||
| while (restart_index_ + 1 < num_restarts_ && | |||
| GetRestartPoint(restart_index_ + 1) < current_) { | |||
| ++restart_index_; | |||
| } | |||
| return true; | |||
| } | |||
| } | |||
| }; | |||
| Iterator* Block::NewIterator(const Comparator* cmp) { | |||
| if (size_ < 2*sizeof(uint32_t)) { | |||
| return NewErrorIterator(Status::Corruption("bad block contents")); | |||
| } | |||
| const uint32_t num_restarts = NumRestarts(); | |||
| if (num_restarts == 0) { | |||
| return NewEmptyIterator(); | |||
| } else { | |||
| return new Iter(cmp, data_, restart_offset_, num_restarts); | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,43 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_TABLE_BLOCK_H_ | |||
| #define STORAGE_LEVELDB_TABLE_BLOCK_H_ | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| #include "include/iterator.h" | |||
| namespace leveldb { | |||
| class Comparator; | |||
| class Block { | |||
| public: | |||
| // Initialize the block with the specified contents. | |||
| // Takes ownership of data[] and will delete[] it when done. | |||
| Block(const char* data, size_t size); | |||
| ~Block(); | |||
| size_t size() const { return size_; } | |||
| Iterator* NewIterator(const Comparator* comparator); | |||
| private: | |||
| uint32_t NumRestarts() const; | |||
| const char* data_; | |||
| size_t size_; | |||
| uint32_t restart_offset_; // Offset in data_ of restart array | |||
| // No copying allowed | |||
| Block(const Block&); | |||
| void operator=(const Block&); | |||
| class Iter; | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_TABLE_BLOCK_H_ | |||
| @ -0,0 +1,109 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // BlockBuilder generates blocks where keys are prefix-compressed: | |||
| // | |||
| // When we store a key, we drop the prefix shared with the previous | |||
| // string. This helps reduce the space requirement significantly. | |||
| // Furthermore, once every K keys, we do not apply the prefix | |||
| // compression and store the entire key. We call this a "restart | |||
| // point". The tail end of the block stores the offsets of all of the | |||
| // restart points, and can be used to do a binary search when looking | |||
| // for a particular key. Values are stored as-is (without compression) | |||
| // immediately following the corresponding key. | |||
| // | |||
| // An entry for a particular key-value pair has the form: | |||
| // shared_bytes: varint32 | |||
| // unshared_bytes: varint32 | |||
| // value_length: varint32 | |||
| // key_delta: char[unshared_bytes] | |||
| // value: char[value_length] | |||
| // shared_bytes == 0 for restart points. | |||
| // | |||
| // The trailer of the block has the form: | |||
| // restarts: uint32[num_restarts] | |||
| // num_restarts: uint32 | |||
| // restarts[i] contains the offset within the block of the ith restart point. | |||
| #include "table/block_builder.h" | |||
| #include <algorithm> | |||
| #include <assert.h> | |||
| #include "include/comparator.h" | |||
| #include "include/table_builder.h" | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| BlockBuilder::BlockBuilder(const Options* options) | |||
| : options_(options), | |||
| restarts_(), | |||
| counter_(0), | |||
| finished_(false) { | |||
| assert(options->block_restart_interval >= 1); | |||
| restarts_.push_back(0); // First restart point is at offset 0 | |||
| } | |||
| void BlockBuilder::Reset() { | |||
| buffer_.clear(); | |||
| restarts_.clear(); | |||
| restarts_.push_back(0); // First restart point is at offset 0 | |||
| counter_ = 0; | |||
| finished_ = false; | |||
| last_key_.clear(); | |||
| } | |||
| size_t BlockBuilder::CurrentSizeEstimate() const { | |||
| return (buffer_.size() + // Raw data buffer | |||
| restarts_.size() * sizeof(uint32_t) + // Restart array | |||
| sizeof(uint32_t)); // Restart array length | |||
| } | |||
| Slice BlockBuilder::Finish() { | |||
| // Append restart array | |||
| for (int i = 0; i < restarts_.size(); i++) { | |||
| PutFixed32(&buffer_, restarts_[i]); | |||
| } | |||
| PutFixed32(&buffer_, restarts_.size()); | |||
| finished_ = true; | |||
| return Slice(buffer_); | |||
| } | |||
| void BlockBuilder::Add(const Slice& key, const Slice& value) { | |||
| Slice last_key_piece(last_key_); | |||
| assert(!finished_); | |||
| assert(counter_ <= options_->block_restart_interval); | |||
| assert(buffer_.empty() // No values yet? | |||
| || options_->comparator->Compare(key, last_key_piece) > 0); | |||
| size_t shared = 0; | |||
| if (counter_ < options_->block_restart_interval) { | |||
| // See how much sharing to do with previous string | |||
| const size_t min_length = std::min(last_key_piece.size(), key.size()); | |||
| while ((shared < min_length) && (last_key_[shared] == key[shared])) { | |||
| shared++; | |||
| } | |||
| } else { | |||
| // Restart compression | |||
| restarts_.push_back(buffer_.size()); | |||
| counter_ = 0; | |||
| } | |||
| const size_t non_shared = key.size() - shared; | |||
| // Add "<shared><non_shared><value_size>" to buffer_ | |||
| PutVarint32(&buffer_, shared); | |||
| PutVarint32(&buffer_, non_shared); | |||
| PutVarint32(&buffer_, value.size()); | |||
| // Add string delta to buffer_ followed by value | |||
| buffer_.append(key.data() + shared, non_shared); | |||
| buffer_.append(value.data(), value.size()); | |||
| // Update state | |||
| last_key_.resize(shared); | |||
| last_key_.append(key.data() + shared, non_shared); | |||
| assert(Slice(last_key_) == key); | |||
| counter_++; | |||
| } | |||
| } | |||
| @ -0,0 +1,57 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ | |||
| #define STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ | |||
| #include <vector> | |||
| #include <stdint.h> | |||
| #include "include/slice.h" | |||
| namespace leveldb { | |||
| struct Options; | |||
| class BlockBuilder { | |||
| public: | |||
| explicit BlockBuilder(const Options* options); | |||
| // Reset the contents as if the BlockBuilder was just constructed. | |||
| void Reset(); | |||
| // REQUIRES: Finish() has not been callled since the last call to Reset(). | |||
| // REQUIRES: key is larger than any previously added key | |||
| void Add(const Slice& key, const Slice& value); | |||
| // Finish building the block and return a slice that refers to the | |||
| // block contents. The returned slice will remain valid for the | |||
| // lifetime of this builder or until Reset() is called. | |||
| Slice Finish(); | |||
| // Returns an estimate of the current (uncompressed) size of the block | |||
| // we are building. | |||
| size_t CurrentSizeEstimate() const; | |||
| // Return true iff no entries have been added since the last Reset() | |||
| bool empty() const { | |||
| return buffer_.empty(); | |||
| } | |||
| private: | |||
| const Options* options_; | |||
| std::string buffer_; // Destination buffer | |||
| std::vector<uint32_t> restarts_; // Restart points | |||
| int counter_; // Number of entries emitted since restart | |||
| bool finished_; // Has Finish() been called? | |||
| std::string last_key_; | |||
| // No copying allowed | |||
| BlockBuilder(const BlockBuilder&); | |||
| void operator=(const BlockBuilder&); | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ | |||
| @ -0,0 +1,131 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "table/format.h" | |||
| #include "include/env.h" | |||
| #include "port/port.h" | |||
| #include "table/block.h" | |||
| #include "util/coding.h" | |||
| #include "util/crc32c.h" | |||
| namespace leveldb { | |||
| void BlockHandle::EncodeTo(std::string* dst) const { | |||
| // Sanity check that all fields have been set | |||
| assert(offset_ != ~static_cast<uint64_t>(0)); | |||
| assert(size_ != ~static_cast<uint64_t>(0)); | |||
| PutVarint64(dst, offset_); | |||
| PutVarint64(dst, size_); | |||
| } | |||
| Status BlockHandle::DecodeFrom(Slice* input) { | |||
| if (GetVarint64(input, &offset_) && | |||
| GetVarint64(input, &size_)) { | |||
| return Status::OK(); | |||
| } else { | |||
| return Status::Corruption("bad block handle"); | |||
| } | |||
| } | |||
| void Footer::EncodeTo(std::string* dst) const { | |||
| #ifndef NDEBUG | |||
| const size_t original_size = dst->size(); | |||
| #endif | |||
| metaindex_handle_.EncodeTo(dst); | |||
| index_handle_.EncodeTo(dst); | |||
| dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding | |||
| PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber)); | |||
| PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32)); | |||
| assert(dst->size() == original_size + kEncodedLength); | |||
| } | |||
| Status Footer::DecodeFrom(Slice* input) { | |||
| const char* magic_ptr = input->data() + kEncodedLength - 8; | |||
| const uint32_t magic_lo = DecodeFixed32(magic_ptr); | |||
| const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4); | |||
| const uint64_t magic = ((static_cast<uint64_t>(magic_hi) << 32) | | |||
| (static_cast<uint64_t>(magic_lo))); | |||
| if (magic != kTableMagicNumber) { | |||
| return Status::InvalidArgument("not an sstable (bad magic number)"); | |||
| } | |||
| Status result = metaindex_handle_.DecodeFrom(input); | |||
| if (result.ok()) { | |||
| result = index_handle_.DecodeFrom(input); | |||
| } | |||
| if (result.ok()) { | |||
| // We skip over any leftover data (just padding for now) in "input" | |||
| const char* end = magic_ptr + 8; | |||
| *input = Slice(end, input->data() + input->size() - end); | |||
| } | |||
| return result; | |||
| } | |||
| Status ReadBlock(RandomAccessFile* file, | |||
| const ReadOptions& options, | |||
| const BlockHandle& handle, | |||
| Block** block) { | |||
| *block = NULL; | |||
| // Read the block contents as well as the type/crc footer. | |||
| // See table_builder.cc for the code that built this structure. | |||
| size_t n = handle.size(); | |||
| char* buf = new char[n + kBlockTrailerSize]; | |||
| Slice contents; | |||
| Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf); | |||
| if (!s.ok()) { | |||
| delete[] buf; | |||
| return s; | |||
| } | |||
| if (contents.size() != n + kBlockTrailerSize) { | |||
| delete[] buf; | |||
| return Status::Corruption("truncated block read"); | |||
| } | |||
| // Check the crc of the type and the block contents | |||
| const char* data = contents.data(); // Pointer to where Read put the data | |||
| if (options.verify_checksums) { | |||
| const uint32_t crc = crc32c::Unmask(DecodeFixed32(data + n + 1)); | |||
| const uint32_t actual = crc32c::Value(data, n + 1); | |||
| if (actual != crc) { | |||
| delete[] buf; | |||
| s = Status::Corruption("block checksum mismatch"); | |||
| return s; | |||
| } | |||
| } | |||
| switch (data[n]) { | |||
| case kNoCompression: | |||
| if (data != buf) { | |||
| // File implementation gave us pointer to some other data. | |||
| // Copy into buf[]. | |||
| memcpy(buf, data, n + kBlockTrailerSize); | |||
| } | |||
| // Ok | |||
| break; | |||
| case kLightweightCompression: { | |||
| std::string decompressed; | |||
| if (!port::Lightweight_Uncompress(data, n, &decompressed)) { | |||
| delete[] buf; | |||
| s = Status::Corruption("corrupted compressed block contents"); | |||
| return s; | |||
| } | |||
| delete[] buf; // Done with uncompressed data | |||
| buf = new char[decompressed.size()]; | |||
| memcpy(buf, decompressed.data(), decompressed.size()); | |||
| n = decompressed.size(); | |||
| break; | |||
| } | |||
| default: | |||
| delete[] buf; | |||
| return Status::Corruption("bad block type"); | |||
| } | |||
| *block = new Block(buf, n); // Block takes ownership of buf[] | |||
| return Status::OK(); | |||
| } | |||
| } | |||
| @ -0,0 +1,103 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_TABLE_FORMAT_H_ | |||
| #define STORAGE_LEVELDB_TABLE_FORMAT_H_ | |||
| #include <string> | |||
| #include <stdint.h> | |||
| #include "include/slice.h" | |||
| #include "include/status.h" | |||
| #include "include/table_builder.h" | |||
| namespace leveldb { | |||
| class Block; | |||
| class RandomAccessFile; | |||
| struct ReadOptions; | |||
| // BlockHandle is a pointer to the extent of a file that stores a data | |||
| // block or a meta block. | |||
| class BlockHandle { | |||
| public: | |||
| BlockHandle(); | |||
| // The offset of the block in the file. | |||
| uint64_t offset() const { return offset_; } | |||
| void set_offset(uint64_t offset) { offset_ = offset; } | |||
| // The size of the stored block | |||
| uint64_t size() const { return size_; } | |||
| void set_size(uint64_t size) { size_ = size; } | |||
| void EncodeTo(std::string* dst) const; | |||
| Status DecodeFrom(Slice* input); | |||
| // Maximum encoding length of a BlockHandle | |||
| enum { kMaxEncodedLength = 10 + 10 }; | |||
| private: | |||
| uint64_t offset_; | |||
| uint64_t size_; | |||
| }; | |||
| // Footer encapsulates the fixed information stored at the tail | |||
| // end of every table file. | |||
| class Footer { | |||
| public: | |||
| Footer() { } | |||
| // The block handle for the metaindex block of the table | |||
| const BlockHandle& metaindex_handle() const { return metaindex_handle_; } | |||
| void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } | |||
| // The block handle for the index block of the table | |||
| const BlockHandle& index_handle() const { | |||
| return index_handle_; | |||
| } | |||
| void set_index_handle(const BlockHandle& h) { | |||
| index_handle_ = h; | |||
| } | |||
| void EncodeTo(std::string* dst) const; | |||
| Status DecodeFrom(Slice* input); | |||
| // Encoded length of a Footer. Note that the serialization of a | |||
| // Footer will always occupy exactly this many bytes. It consists | |||
| // of two block handles and a magic number. | |||
| enum { | |||
| kEncodedLength = 2*BlockHandle::kMaxEncodedLength + 8 | |||
| }; | |||
| private: | |||
| BlockHandle metaindex_handle_; | |||
| BlockHandle index_handle_; | |||
| }; | |||
| // kTableMagicNumber was picked by running | |||
| // echo http://code.google.com/p/leveldb/ | sha1sum | |||
| // and taking the leading 64 bits. | |||
| static const uint64_t kTableMagicNumber = 0xdb4775248b80fb57ull; | |||
| // 1-byte type + 32-bit crc | |||
| static const size_t kBlockTrailerSize = 5; | |||
| // Read the block identified by "handle" from "file". On success, | |||
| // store a pointer to the heap-allocated result in *block and return | |||
| // OK. On failure store NULL in *block and return non-OK. | |||
| extern Status ReadBlock(RandomAccessFile* file, | |||
| const ReadOptions& options, | |||
| const BlockHandle& handle, | |||
| Block** block); | |||
| // Implementation details follow. Clients should ignore, | |||
| inline BlockHandle::BlockHandle() | |||
| : offset_(~static_cast<uint64_t>(0)), | |||
| size_(~static_cast<uint64_t>(0)) { | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_TABLE_FORMAT_H_ | |||
| @ -0,0 +1,68 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/iterator.h" | |||
| #include "util/logging.h" | |||
| namespace leveldb { | |||
| Iterator::Iterator() { | |||
| cleanup_.function = NULL; | |||
| cleanup_.next = NULL; | |||
| } | |||
| Iterator::~Iterator() { | |||
| if (cleanup_.function != NULL) { | |||
| (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); | |||
| for (Cleanup* c = cleanup_.next; c != NULL; ) { | |||
| (*c->function)(c->arg1, c->arg2); | |||
| Cleanup* next = c->next; | |||
| delete c; | |||
| c = next; | |||
| } | |||
| } | |||
| } | |||
| void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { | |||
| assert(func != NULL); | |||
| Cleanup* c; | |||
| if (cleanup_.function == NULL) { | |||
| c = &cleanup_; | |||
| } else { | |||
| c = new Cleanup; | |||
| c->next = cleanup_.next; | |||
| cleanup_.next = c; | |||
| } | |||
| c->function = func; | |||
| c->arg1 = arg1; | |||
| c->arg2 = arg2; | |||
| } | |||
| namespace { | |||
| class EmptyIterator : public Iterator { | |||
| public: | |||
| EmptyIterator(const Status& s) : status_(s) { } | |||
| virtual bool Valid() const { return false; } | |||
| virtual void Seek(const Slice& target) { } | |||
| virtual void SeekToFirst() { } | |||
| virtual void SeekToLast() { } | |||
| virtual void Next() { assert(false); } | |||
| virtual void Prev() { assert(false); } | |||
| Slice key() const { assert(false); return Slice(); } | |||
| Slice value() const { assert(false); return Slice(); } | |||
| virtual Status status() const { return status_; } | |||
| private: | |||
| Status status_; | |||
| }; | |||
| } | |||
| Iterator* NewEmptyIterator() { | |||
| return new EmptyIterator(Status::OK()); | |||
| } | |||
| Iterator* NewErrorIterator(const Status& status) { | |||
| return new EmptyIterator(status); | |||
| } | |||
| } | |||
| @ -0,0 +1,64 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ | |||
| #define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ | |||
| namespace leveldb { | |||
| // A internal wrapper class with an interface similar to Iterator that | |||
| // caches the valid() and key() results for an underlying iterator. | |||
| // This can help avoid virtual function calls and also gives better | |||
| // cache locality. | |||
| class IteratorWrapper { | |||
| private: | |||
| Iterator* iter_; | |||
| bool valid_; | |||
| Slice key_; | |||
| public: | |||
| IteratorWrapper(): iter_(NULL), valid_(false) { } | |||
| explicit IteratorWrapper(Iterator* iter): iter_(NULL) { | |||
| Set(iter); | |||
| } | |||
| ~IteratorWrapper() { delete iter_; } | |||
| Iterator* iter() const { return iter_; } | |||
| // Takes ownership of "iter" and will delete it when destroyed, or | |||
| // when Set() is invoked again. | |||
| void Set(Iterator* iter) { | |||
| delete iter_; | |||
| iter_ = iter; | |||
| if (iter_ == NULL) { | |||
| valid_ = false; | |||
| } else { | |||
| Update(); | |||
| } | |||
| } | |||
| // Iterator interface methods | |||
| bool Valid() const { return valid_; } | |||
| Slice key() const { assert(Valid()); return key_; } | |||
| Slice value() const { assert(Valid()); return iter_->value(); } | |||
| // Methods below require iter() != NULL | |||
| Status status() const { assert(iter_); return iter_->status(); } | |||
| void Next() { assert(iter_); iter_->Next(); Update(); } | |||
| void Prev() { assert(iter_); iter_->Prev(); Update(); } | |||
| void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } | |||
| void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } | |||
| void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } | |||
| private: | |||
| void Update() { | |||
| valid_ = iter_->Valid(); | |||
| if (valid_) { | |||
| key_ = iter_->key(); | |||
| } | |||
| } | |||
| }; | |||
| } | |||
| #endif // STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ | |||
| @ -0,0 +1,143 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "table/merger.h" | |||
| #include "include/comparator.h" | |||
| #include "include/iterator.h" | |||
| #include "table/iterator_wrapper.h" | |||
| namespace leveldb { | |||
| namespace { | |||
| class MergingIterator : public Iterator { | |||
| public: | |||
| MergingIterator(const Comparator* comparator, Iterator** children, int n) | |||
| : comparator_(comparator), | |||
| children_(new IteratorWrapper[n]), | |||
| n_(n), | |||
| current_(NULL) { | |||
| for (int i = 0; i < n; i++) { | |||
| children_[i].Set(children[i]); | |||
| } | |||
| } | |||
| virtual ~MergingIterator() { | |||
| delete[] children_; | |||
| } | |||
| virtual bool Valid() const { | |||
| return (current_ != NULL); | |||
| } | |||
| virtual void SeekToFirst() { | |||
| for (int i = 0; i < n_; i++) { | |||
| children_[i].SeekToFirst(); | |||
| } | |||
| FindSmallest(); | |||
| } | |||
| virtual void SeekToLast() { | |||
| for (int i = 0; i < n_; i++) { | |||
| children_[i].SeekToLast(); | |||
| } | |||
| FindLargest(); | |||
| } | |||
| virtual void Seek(const Slice& target) { | |||
| for (int i = 0; i < n_; i++) { | |||
| children_[i].Seek(target); | |||
| } | |||
| FindSmallest(); | |||
| } | |||
| virtual void Next() { | |||
| assert(Valid()); | |||
| current_->Next(); | |||
| FindSmallest(); | |||
| } | |||
| virtual void Prev() { | |||
| assert(Valid()); | |||
| current_->Prev(); | |||
| FindLargest(); | |||
| } | |||
| virtual Slice key() const { | |||
| assert(Valid()); | |||
| return current_->key(); | |||
| } | |||
| virtual Slice value() const { | |||
| assert(Valid()); | |||
| return current_->value(); | |||
| } | |||
| virtual Status status() const { | |||
| Status status; | |||
| for (int i = 0; i < n_; i++) { | |||
| status = children_[i].status(); | |||
| if (!status.ok()) { | |||
| break; | |||
| } | |||
| } | |||
| return status; | |||
| } | |||
| private: | |||
| void FindSmallest(); | |||
| void FindLargest(); | |||
| // We might want to use a heap in case there are lots of children. | |||
| // For now we use a simple array since we expect a very small number | |||
| // of children in leveldb. | |||
| const Comparator* comparator_; | |||
| IteratorWrapper* children_; | |||
| int n_; | |||
| IteratorWrapper* current_; | |||
| }; | |||
| void MergingIterator::FindSmallest() { | |||
| IteratorWrapper* smallest = NULL; | |||
| for (int i = 0; i < n_; i++) { | |||
| IteratorWrapper* child = &children_[i]; | |||
| if (child->Valid()) { | |||
| if (smallest == NULL) { | |||
| smallest = child; | |||
| } else if (comparator_->Compare(child->key(), smallest->key()) < 0) { | |||
| smallest = child; | |||
| } | |||
| } | |||
| } | |||
| current_ = smallest; | |||
| } | |||
| void MergingIterator::FindLargest() { | |||
| IteratorWrapper* largest = NULL; | |||
| for (int i = n_-1; i >= 0; i--) { | |||
| IteratorWrapper* child = &children_[i]; | |||
| if (child->Valid()) { | |||
| if (largest == NULL) { | |||
| largest = child; | |||
| } else if (comparator_->Compare(child->key(), largest->key()) > 0) { | |||
| largest = child; | |||
| } | |||
| } | |||
| } | |||
| current_ = largest; | |||
| } | |||
| } | |||
| Iterator* NewMergingIterator(const Comparator* cmp, Iterator** list, int n) { | |||
| assert(n >= 0); | |||
| if (n == 0) { | |||
| return NewEmptyIterator(); | |||
| } else if (n == 1) { | |||
| return list[0]; | |||
| } else { | |||
| return new MergingIterator(cmp, list, n); | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,26 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_TABLE_MERGER_H_ | |||
| #define STORAGE_LEVELDB_TABLE_MERGER_H_ | |||
| namespace leveldb { | |||
| class Comparator; | |||
| class Iterator; | |||
| // Return an iterator that provided the union of the data in | |||
| // children[0,n-1]. Takes ownership of the child iterators and | |||
| // will delete them when the result iterator is deleted. | |||
| // | |||
| // The result does no duplicate suppression. I.e., if a particular | |||
| // key is present in K child iterators, it will be yielded K times. | |||
| // | |||
| // REQUIRES: n >= 0 | |||
| extern Iterator* NewMergingIterator( | |||
| const Comparator* comparator, Iterator** children, int n); | |||
| } | |||
| #endif // STORAGE_LEVELDB_TABLE_MERGER_H_ | |||
| @ -0,0 +1,175 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/table.h" | |||
| #include "include/cache.h" | |||
| #include "include/env.h" | |||
| #include "table/block.h" | |||
| #include "table/format.h" | |||
| #include "table/two_level_iterator.h" | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| struct Table::Rep { | |||
| ~Rep() { | |||
| delete index_block; | |||
| } | |||
| Options options; | |||
| Status status; | |||
| RandomAccessFile* file; | |||
| uint64_t cache_id; | |||
| BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer | |||
| Block* index_block; | |||
| }; | |||
| Status Table::Open(const Options& options, | |||
| RandomAccessFile* file, | |||
| Table** table) { | |||
| *table = NULL; | |||
| const uint64_t size = file->Size(); | |||
| if (size < Footer::kEncodedLength) { | |||
| return Status::InvalidArgument("file is too short to be an sstable"); | |||
| } | |||
| char footer_space[Footer::kEncodedLength]; | |||
| Slice footer_input; | |||
| Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, | |||
| &footer_input, footer_space); | |||
| if (!s.ok()) return s; | |||
| Footer footer; | |||
| s = footer.DecodeFrom(&footer_input); | |||
| if (!s.ok()) return s; | |||
| // Read the index block | |||
| Block* index_block = NULL; | |||
| if (s.ok()) { | |||
| s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block); | |||
| } | |||
| if (s.ok()) { | |||
| // We've successfully read the footer and the index block: we're | |||
| // ready to serve requests. | |||
| Rep* rep = new Table::Rep; | |||
| rep->options = options; | |||
| rep->file = file; | |||
| rep->metaindex_handle = footer.metaindex_handle(); | |||
| rep->index_block = index_block; | |||
| rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); | |||
| *table = new Table(rep); | |||
| } else { | |||
| if (index_block) delete index_block; | |||
| } | |||
| return s; | |||
| } | |||
| Table::~Table() { | |||
| delete rep_; | |||
| } | |||
| static void DeleteBlock(void* arg, void* ignored) { | |||
| delete reinterpret_cast<Block*>(arg); | |||
| } | |||
| static void DeleteCachedBlock(const Slice& key, void* value) { | |||
| Block* block = reinterpret_cast<Block*>(value); | |||
| delete block; | |||
| } | |||
| static void ReleaseBlock(void* arg, void* h) { | |||
| Cache* cache = reinterpret_cast<Cache*>(arg); | |||
| Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h); | |||
| cache->Release(handle); | |||
| } | |||
| // Convert an index iterator value (i.e., an encoded BlockHandle) | |||
| // into an iterator over the contents of the corresponding block. | |||
| Iterator* Table::BlockReader(void* arg, | |||
| const ReadOptions& options, | |||
| const Slice& index_value) { | |||
| Table* table = reinterpret_cast<Table*>(arg); | |||
| Cache* block_cache = table->rep_->options.block_cache; | |||
| Block* block = NULL; | |||
| Cache::Handle* cache_handle = NULL; | |||
| BlockHandle handle; | |||
| Slice input = index_value; | |||
| Status s = handle.DecodeFrom(&input); | |||
| // We intentionally allow extra stuff in index_value so that we | |||
| // can add more features in the future. | |||
| if (s.ok()) { | |||
| if (block_cache != NULL) { | |||
| char cache_key_buffer[16]; | |||
| EncodeFixed64(cache_key_buffer, table->rep_->cache_id); | |||
| EncodeFixed64(cache_key_buffer+8, handle.offset()); | |||
| Slice key(cache_key_buffer, sizeof(cache_key_buffer)); | |||
| cache_handle = block_cache->Lookup(key); | |||
| if (cache_handle != NULL) { | |||
| block = reinterpret_cast<Block*>(block_cache->Value(cache_handle)); | |||
| } else { | |||
| s = ReadBlock(table->rep_->file, options, handle, &block); | |||
| if (s.ok() && options.fill_cache) { | |||
| cache_handle = block_cache->Insert( | |||
| key, block, block->size(), &DeleteCachedBlock); | |||
| } | |||
| } | |||
| } else { | |||
| s = ReadBlock(table->rep_->file, options, handle, &block); | |||
| } | |||
| } | |||
| Iterator* iter; | |||
| if (block != NULL) { | |||
| iter = block->NewIterator(table->rep_->options.comparator); | |||
| if (cache_handle == NULL) { | |||
| iter->RegisterCleanup(&DeleteBlock, block, NULL); | |||
| } else { | |||
| iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle); | |||
| } | |||
| } else { | |||
| iter = NewErrorIterator(s); | |||
| } | |||
| return iter; | |||
| } | |||
| Iterator* Table::NewIterator(const ReadOptions& options) const { | |||
| return NewTwoLevelIterator( | |||
| rep_->index_block->NewIterator(rep_->options.comparator), | |||
| &Table::BlockReader, const_cast<Table*>(this), options); | |||
| } | |||
| uint64_t Table::ApproximateOffsetOf(const Slice& key) const { | |||
| Iterator* index_iter = | |||
| rep_->index_block->NewIterator(rep_->options.comparator); | |||
| index_iter->Seek(key); | |||
| uint64_t result; | |||
| if (index_iter->Valid()) { | |||
| BlockHandle handle; | |||
| Slice input = index_iter->value(); | |||
| Status s = handle.DecodeFrom(&input); | |||
| if (s.ok()) { | |||
| result = handle.offset(); | |||
| } else { | |||
| // Strange: we can't decode the block handle in the index block. | |||
| // We'll just return the offset of the metaindex block, which is | |||
| // close to the whole file size for this case. | |||
| result = rep_->metaindex_handle.offset(); | |||
| } | |||
| } else { | |||
| // key is past the last key in the file. Approximate the offset | |||
| // by returning the offset of the metaindex block (which is | |||
| // right near the end of the file). | |||
| result = rep_->metaindex_handle.offset(); | |||
| } | |||
| delete index_iter; | |||
| return result; | |||
| } | |||
| } | |||
| @ -0,0 +1,224 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/table_builder.h" | |||
| #include <assert.h> | |||
| #include <stdio.h> | |||
| #include "include/comparator.h" | |||
| #include "include/env.h" | |||
| #include "table/block_builder.h" | |||
| #include "table/format.h" | |||
| #include "util/coding.h" | |||
| #include "util/crc32c.h" | |||
| #include "util/logging.h" | |||
| namespace leveldb { | |||
| struct TableBuilder::Rep { | |||
| Options options; | |||
| Options index_block_options; | |||
| WritableFile* file; | |||
| uint64_t offset; | |||
| Status status; | |||
| BlockBuilder data_block; | |||
| BlockBuilder index_block; | |||
| std::string last_key; | |||
| int64_t num_entries; | |||
| bool closed; // Either Finish() or Abandon() has been called. | |||
| // We do not emit the index entry for a block until we have seen the | |||
| // first key for the next data block. This allows us to use shorter | |||
| // keys in the index block. For example, consider a block boundary | |||
| // between the keys "the quick brown fox" and "the who". We can use | |||
| // "the r" as the key for the index block entry since it is >= all | |||
| // entries in the first block and < all entries in subsequent | |||
| // blocks. | |||
| // | |||
| // Invariant: r->pending_index_entry is true only if data_block is empty. | |||
| bool pending_index_entry; | |||
| BlockHandle pending_handle; // Handle to add to index block | |||
| std::string compressed_output; | |||
| Rep(const Options& opt, WritableFile* f) | |||
| : options(opt), | |||
| index_block_options(opt), | |||
| file(f), | |||
| offset(0), | |||
| data_block(&options), | |||
| index_block(&index_block_options), | |||
| num_entries(0), | |||
| closed(false), | |||
| pending_index_entry(false) { | |||
| index_block_options.block_restart_interval = 1; | |||
| } | |||
| }; | |||
| TableBuilder::TableBuilder(const Options& options, WritableFile* file) | |||
| : rep_(new Rep(options, file)) { | |||
| } | |||
| TableBuilder::~TableBuilder() { | |||
| assert(rep_->closed); // Catch errors where caller forgot to call Finish() | |||
| delete rep_; | |||
| } | |||
| Status TableBuilder::ChangeOptions(const Options& options) { | |||
| // Note: if more fields are added to Options, update | |||
| // this function to catch changes that should not be allowed to | |||
| // change in the middle of building a Table. | |||
| if (options.comparator != rep_->options.comparator) { | |||
| return Status::InvalidArgument("changing comparator while building table"); | |||
| } | |||
| // Note that any live BlockBuilders point to rep_->options and therefore | |||
| // will automatically pick up the updated options. | |||
| rep_->options = options; | |||
| rep_->index_block_options = options; | |||
| rep_->index_block_options.block_restart_interval = 1; | |||
| return Status::OK(); | |||
| } | |||
| void TableBuilder::Add(const Slice& key, const Slice& value) { | |||
| Rep* r = rep_; | |||
| assert(!r->closed); | |||
| if (!ok()) return; | |||
| if (r->num_entries > 0) { | |||
| assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0); | |||
| } | |||
| if (r->pending_index_entry) { | |||
| assert(r->data_block.empty()); | |||
| r->options.comparator->FindShortestSeparator(&r->last_key, key); | |||
| std::string handle_encoding; | |||
| r->pending_handle.EncodeTo(&handle_encoding); | |||
| r->index_block.Add(r->last_key, Slice(handle_encoding)); | |||
| r->pending_index_entry = false; | |||
| } | |||
| r->last_key.assign(key.data(), key.size()); | |||
| r->num_entries++; | |||
| r->data_block.Add(key, value); | |||
| const size_t estimated_block_size = r->data_block.CurrentSizeEstimate(); | |||
| if (estimated_block_size >= r->options.block_size) { | |||
| Flush(); | |||
| } | |||
| } | |||
| void TableBuilder::Flush() { | |||
| Rep* r = rep_; | |||
| assert(!r->closed); | |||
| if (!ok()) return; | |||
| if (r->data_block.empty()) return; | |||
| assert(!r->pending_index_entry); | |||
| WriteBlock(&r->data_block, &r->pending_handle); | |||
| if (ok()) { | |||
| r->pending_index_entry = true; | |||
| r->status = r->file->Flush(); | |||
| } | |||
| } | |||
| void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) { | |||
| // File format contains a sequence of blocks where each block has: | |||
| // block_data: uint8[n] | |||
| // type: uint8 | |||
| // crc: uint32 | |||
| assert(ok()); | |||
| Rep* r = rep_; | |||
| Slice raw = block->Finish(); | |||
| Slice block_contents; | |||
| CompressionType type = r->options.compression; | |||
| // TODO(postrelease): Support more compression options: zlib? | |||
| switch (type) { | |||
| case kNoCompression: | |||
| block_contents = raw; | |||
| break; | |||
| case kLightweightCompression: { | |||
| port::Lightweight_Compress(raw.data(), raw.size(), &r->compressed_output); | |||
| block_contents = r->compressed_output; | |||
| if (block_contents.size() >= raw.size() - (raw.size() / 8u)) { | |||
| // Compressed less than 12.5%, so just store uncompressed form | |||
| block_contents = raw; | |||
| type = kNoCompression; | |||
| } | |||
| break; | |||
| } | |||
| } | |||
| handle->set_offset(r->offset); | |||
| handle->set_size(block_contents.size()); | |||
| r->status = r->file->Append(block_contents); | |||
| if (r->status.ok()) { | |||
| char trailer[kBlockTrailerSize]; | |||
| trailer[0] = type; | |||
| uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size()); | |||
| crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type | |||
| EncodeFixed32(trailer+1, crc32c::Mask(crc)); | |||
| r->status = r->file->Append(Slice(trailer, kBlockTrailerSize)); | |||
| if (r->status.ok()) { | |||
| r->offset += block_contents.size() + kBlockTrailerSize; | |||
| } | |||
| } | |||
| r->compressed_output.clear(); | |||
| block->Reset(); | |||
| } | |||
| Status TableBuilder::status() const { | |||
| return rep_->status; | |||
| } | |||
| Status TableBuilder::Finish() { | |||
| Rep* r = rep_; | |||
| Flush(); | |||
| assert(!r->closed); | |||
| r->closed = true; | |||
| BlockHandle metaindex_block_handle; | |||
| BlockHandle index_block_handle; | |||
| if (ok()) { | |||
| BlockBuilder meta_index_block(&r->options); | |||
| // TODO(postrelease): Add stats and other meta blocks | |||
| WriteBlock(&meta_index_block, &metaindex_block_handle); | |||
| } | |||
| if (ok()) { | |||
| if (r->pending_index_entry) { | |||
| r->options.comparator->FindShortSuccessor(&r->last_key); | |||
| std::string handle_encoding; | |||
| r->pending_handle.EncodeTo(&handle_encoding); | |||
| r->index_block.Add(r->last_key, Slice(handle_encoding)); | |||
| r->pending_index_entry = false; | |||
| } | |||
| WriteBlock(&r->index_block, &index_block_handle); | |||
| } | |||
| if (ok()) { | |||
| Footer footer; | |||
| footer.set_metaindex_handle(metaindex_block_handle); | |||
| footer.set_index_handle(index_block_handle); | |||
| std::string footer_encoding; | |||
| footer.EncodeTo(&footer_encoding); | |||
| r->status = r->file->Append(footer_encoding); | |||
| if (r->status.ok()) { | |||
| r->offset += footer_encoding.size(); | |||
| } | |||
| } | |||
| return r->status; | |||
| } | |||
| void TableBuilder::Abandon() { | |||
| Rep* r = rep_; | |||
| assert(!r->closed); | |||
| r->closed = true; | |||
| } | |||
| uint64_t TableBuilder::NumEntries() const { | |||
| return rep_->num_entries; | |||
| } | |||
| uint64_t TableBuilder::FileSize() const { | |||
| return rep_->offset; | |||
| } | |||
| } | |||
| @ -0,0 +1,808 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/table.h" | |||
| #include <map> | |||
| #include "db/dbformat.h" | |||
| #include "db/memtable.h" | |||
| #include "db/write_batch_internal.h" | |||
| #include "include/db.h" | |||
| #include "include/env.h" | |||
| #include "include/iterator.h" | |||
| #include "include/table_builder.h" | |||
| #include "table/block.h" | |||
| #include "table/block_builder.h" | |||
| #include "table/format.h" | |||
| #include "util/random.h" | |||
| #include "util/testharness.h" | |||
| #include "util/testutil.h" | |||
| namespace leveldb { | |||
| // Return reverse of "key". | |||
| // Used to test non-lexicographic comparators. | |||
| static std::string Reverse(const Slice& key) { | |||
| std::string str(key.ToString()); | |||
| std::string rev(str.rbegin(), str.rend()); | |||
| return rev; | |||
| } | |||
| namespace { | |||
| class ReverseKeyComparator : public Comparator { | |||
| public: | |||
| virtual const char* Name() const { | |||
| return "leveldb.ReverseBytewiseComparator"; | |||
| } | |||
| virtual int Compare(const Slice& a, const Slice& b) const { | |||
| return BytewiseComparator()->Compare(Reverse(a), Reverse(b)); | |||
| } | |||
| virtual void FindShortestSeparator( | |||
| std::string* start, | |||
| const Slice& limit) const { | |||
| std::string s = Reverse(*start); | |||
| std::string l = Reverse(limit); | |||
| BytewiseComparator()->FindShortestSeparator(&s, l); | |||
| *start = Reverse(s); | |||
| } | |||
| virtual void FindShortSuccessor(std::string* key) const { | |||
| std::string s = Reverse(*key); | |||
| BytewiseComparator()->FindShortSuccessor(&s); | |||
| *key = Reverse(s); | |||
| } | |||
| }; | |||
| } | |||
| static ReverseKeyComparator reverse_key_comparator; | |||
| static void Increment(const Comparator* cmp, std::string* key) { | |||
| if (cmp == BytewiseComparator()) { | |||
| key->push_back('\0'); | |||
| } else { | |||
| assert(cmp == &reverse_key_comparator); | |||
| std::string rev = Reverse(*key); | |||
| rev.push_back('\0'); | |||
| *key = Reverse(rev); | |||
| } | |||
| } | |||
| // An STL comparator that uses a Comparator | |||
| namespace { | |||
| struct STLLessThan { | |||
| const Comparator* cmp; | |||
| STLLessThan() : cmp(BytewiseComparator()) { } | |||
| STLLessThan(const Comparator* c) : cmp(c) { } | |||
| bool operator()(const std::string& a, const std::string& b) const { | |||
| return cmp->Compare(Slice(a), Slice(b)) < 0; | |||
| } | |||
| }; | |||
| } | |||
| class StringSink: public WritableFile { | |||
| public: | |||
| ~StringSink() { } | |||
| const std::string& contents() const { return contents_; } | |||
| virtual Status Close() { return Status::OK(); } | |||
| virtual Status Flush() { return Status::OK(); } | |||
| virtual Status Sync() { return Status::OK(); } | |||
| virtual Status Append(const Slice& data) { | |||
| contents_.append(data.data(), data.size()); | |||
| return Status::OK(); | |||
| } | |||
| private: | |||
| std::string contents_; | |||
| }; | |||
| class StringSource: public RandomAccessFile { | |||
| public: | |||
| StringSource(const Slice& contents) | |||
| : contents_(contents.data(), contents.size()) { | |||
| } | |||
| virtual ~StringSource() { } | |||
| virtual uint64_t Size() const { return contents_.size(); } | |||
| virtual Status Read(uint64_t offset, size_t n, Slice* result, | |||
| char* scratch) const { | |||
| if (offset > contents_.size()) { | |||
| return Status::InvalidArgument("invalid Read offset"); | |||
| } | |||
| if (offset + n > contents_.size()) { | |||
| n = contents_.size() - offset; | |||
| } | |||
| memcpy(scratch, &contents_[offset], n); | |||
| *result = Slice(scratch, n); | |||
| return Status::OK(); | |||
| } | |||
| private: | |||
| std::string contents_; | |||
| }; | |||
| typedef std::map<std::string, std::string, STLLessThan> KVMap; | |||
| // Helper class for tests to unify the interface between | |||
| // BlockBuilder/TableBuilder and Block/Table. | |||
| class Constructor { | |||
| public: | |||
| explicit Constructor(const Comparator* cmp) : data_(STLLessThan(cmp)) { } | |||
| virtual ~Constructor() { } | |||
| void Add(const std::string& key, const Slice& value) { | |||
| data_[key] = value.ToString(); | |||
| } | |||
| // Finish constructing the data structure with all the keys that have | |||
| // been added so far. Returns the keys in sorted order in "*keys" | |||
| // and stores the key/value pairs in "*kvmap" | |||
| void Finish(const Options& options, | |||
| std::vector<std::string>* keys, | |||
| KVMap* kvmap) { | |||
| *kvmap = data_; | |||
| keys->clear(); | |||
| for (KVMap::const_iterator it = data_.begin(); | |||
| it != data_.end(); | |||
| ++it) { | |||
| keys->push_back(it->first); | |||
| } | |||
| data_.clear(); | |||
| Status s = FinishImpl(options, *kvmap); | |||
| ASSERT_TRUE(s.ok()) << s.ToString(); | |||
| } | |||
| // Construct the data structure from the data in "data" | |||
| virtual Status FinishImpl(const Options& options, const KVMap& data) = 0; | |||
| virtual size_t NumBytes() const = 0; | |||
| virtual Iterator* NewIterator() const = 0; | |||
| virtual const KVMap& data() { return data_; } | |||
| private: | |||
| KVMap data_; | |||
| }; | |||
| class BlockConstructor: public Constructor { | |||
| public: | |||
| explicit BlockConstructor(const Comparator* cmp) | |||
| : Constructor(cmp), | |||
| comparator_(cmp), | |||
| block_size_(-1), | |||
| block_(NULL) { } | |||
| ~BlockConstructor() { | |||
| delete block_; | |||
| } | |||
| virtual Status FinishImpl(const Options& options, const KVMap& data) { | |||
| delete block_; | |||
| block_ = NULL; | |||
| BlockBuilder builder(&options); | |||
| for (KVMap::const_iterator it = data.begin(); | |||
| it != data.end(); | |||
| ++it) { | |||
| builder.Add(it->first, it->second); | |||
| } | |||
| // Open the block | |||
| Slice block_data = builder.Finish(); | |||
| block_size_ = block_data.size(); | |||
| char* block_data_copy = new char[block_size_]; | |||
| memcpy(block_data_copy, block_data.data(), block_size_); | |||
| block_ = new Block(block_data_copy, block_size_); | |||
| return Status::OK(); | |||
| } | |||
| virtual size_t NumBytes() const { return block_size_; } | |||
| virtual Iterator* NewIterator() const { | |||
| return block_->NewIterator(comparator_); | |||
| } | |||
| private: | |||
| const Comparator* comparator_; | |||
| int block_size_; | |||
| Block* block_; | |||
| BlockConstructor(); | |||
| }; | |||
| class TableConstructor: public Constructor { | |||
| public: | |||
| TableConstructor(const Comparator* cmp) | |||
| : Constructor(cmp), | |||
| source_(NULL), table_(NULL) { | |||
| } | |||
| ~TableConstructor() { | |||
| Reset(); | |||
| } | |||
| virtual Status FinishImpl(const Options& options, const KVMap& data) { | |||
| Reset(); | |||
| StringSink sink; | |||
| TableBuilder builder(options, &sink); | |||
| for (KVMap::const_iterator it = data.begin(); | |||
| it != data.end(); | |||
| ++it) { | |||
| builder.Add(it->first, it->second); | |||
| ASSERT_TRUE(builder.status().ok()); | |||
| } | |||
| Status s = builder.Finish(); | |||
| ASSERT_TRUE(s.ok()) << s.ToString(); | |||
| ASSERT_EQ(sink.contents().size(), builder.FileSize()); | |||
| // Open the table | |||
| source_ = new StringSource(sink.contents()); | |||
| Options table_options; | |||
| table_options.comparator = options.comparator; | |||
| return Table::Open(table_options, source_, &table_); | |||
| } | |||
| virtual size_t NumBytes() const { return source_->Size(); } | |||
| virtual Iterator* NewIterator() const { | |||
| return table_->NewIterator(ReadOptions()); | |||
| } | |||
| uint64_t ApproximateOffsetOf(const Slice& key) const { | |||
| return table_->ApproximateOffsetOf(key); | |||
| } | |||
| private: | |||
| void Reset() { | |||
| delete table_; | |||
| delete source_; | |||
| table_ = NULL; | |||
| source_ = NULL; | |||
| } | |||
| StringSource* source_; | |||
| Table* table_; | |||
| TableConstructor(); | |||
| }; | |||
| // A helper class that converts internal format keys into user keys | |||
| class KeyConvertingIterator: public Iterator { | |||
| public: | |||
| explicit KeyConvertingIterator(Iterator* iter) : iter_(iter) { } | |||
| virtual ~KeyConvertingIterator() { delete iter_; } | |||
| virtual bool Valid() const { return iter_->Valid(); } | |||
| virtual void Seek(const Slice& target) { | |||
| ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); | |||
| std::string encoded; | |||
| AppendInternalKey(&encoded, ikey); | |||
| iter_->Seek(encoded); | |||
| } | |||
| virtual void SeekToFirst() { iter_->SeekToFirst(); } | |||
| virtual void SeekToLast() { iter_->SeekToLast(); } | |||
| virtual void Next() { iter_->Next(); } | |||
| virtual void Prev() { iter_->Prev(); } | |||
| virtual Slice key() const { | |||
| assert(Valid()); | |||
| ParsedInternalKey key; | |||
| if (!ParseInternalKey(iter_->key(), &key)) { | |||
| status_ = Status::Corruption("malformed internal key"); | |||
| return Slice("corrupted key"); | |||
| } | |||
| return key.user_key; | |||
| } | |||
| virtual Slice value() const { return iter_->value(); } | |||
| virtual Status status() const { | |||
| return status_.ok() ? iter_->status() : status_; | |||
| } | |||
| private: | |||
| mutable Status status_; | |||
| Iterator* iter_; | |||
| // No copying allowed | |||
| KeyConvertingIterator(const KeyConvertingIterator&); | |||
| void operator=(const KeyConvertingIterator&); | |||
| }; | |||
| class MemTableConstructor: public Constructor { | |||
| public: | |||
| explicit MemTableConstructor(const Comparator* cmp) | |||
| : Constructor(cmp), | |||
| internal_comparator_(cmp) { | |||
| memtable_ = new MemTable(internal_comparator_); | |||
| } | |||
| ~MemTableConstructor() { | |||
| delete memtable_; | |||
| } | |||
| virtual Status FinishImpl(const Options& options, const KVMap& data) { | |||
| delete memtable_; | |||
| memtable_ = new MemTable(internal_comparator_); | |||
| int seq = 1; | |||
| for (KVMap::const_iterator it = data.begin(); | |||
| it != data.end(); | |||
| ++it) { | |||
| memtable_->Add(seq, kTypeValue, it->first, it->second); | |||
| seq++; | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| virtual size_t NumBytes() const { | |||
| return memtable_->ApproximateMemoryUsage(); | |||
| } | |||
| virtual Iterator* NewIterator() const { | |||
| return new KeyConvertingIterator(memtable_->NewIterator()); | |||
| } | |||
| private: | |||
| InternalKeyComparator internal_comparator_; | |||
| MemTable* memtable_; | |||
| }; | |||
| class DBConstructor: public Constructor { | |||
| public: | |||
| explicit DBConstructor(const Comparator* cmp) | |||
| : Constructor(cmp), | |||
| comparator_(cmp) { | |||
| db_ = NULL; | |||
| NewDB(); | |||
| } | |||
| ~DBConstructor() { | |||
| delete db_; | |||
| } | |||
| virtual Status FinishImpl(const Options& options, const KVMap& data) { | |||
| delete db_; | |||
| db_ = NULL; | |||
| NewDB(); | |||
| for (KVMap::const_iterator it = data.begin(); | |||
| it != data.end(); | |||
| ++it) { | |||
| WriteBatch batch; | |||
| batch.Put(it->first, it->second); | |||
| ASSERT_TRUE(db_->Write(WriteOptions(), &batch).ok()); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| virtual size_t NumBytes() const { | |||
| Range r("", "\xff\xff"); | |||
| uint64_t size; | |||
| db_->GetApproximateSizes(&r, 1, &size); | |||
| return size; | |||
| } | |||
| virtual Iterator* NewIterator() const { | |||
| return db_->NewIterator(ReadOptions()); | |||
| } | |||
| private: | |||
| void NewDB() { | |||
| std::string name = test::TmpDir() + "/table_testdb"; | |||
| Options options; | |||
| options.comparator = comparator_; | |||
| Status status = DestroyDB(name, options); | |||
| ASSERT_TRUE(status.ok()) << status.ToString(); | |||
| options.create_if_missing = true; | |||
| options.error_if_exists = true; | |||
| status = DB::Open(options, name, &db_); | |||
| ASSERT_TRUE(status.ok()) << status.ToString(); | |||
| } | |||
| const Comparator* comparator_; | |||
| DB* db_; | |||
| }; | |||
| enum TestType { | |||
| TABLE_TEST, | |||
| BLOCK_TEST, | |||
| MEMTABLE_TEST, | |||
| DB_TEST, | |||
| }; | |||
| struct TestArgs { | |||
| TestType type; | |||
| bool reverse_compare; | |||
| int restart_interval; | |||
| }; | |||
| static const TestArgs kTestArgList[] = { | |||
| { TABLE_TEST, false, 16 }, | |||
| { TABLE_TEST, false, 1 }, | |||
| { TABLE_TEST, false, 1024 }, | |||
| { TABLE_TEST, true, 16 }, | |||
| { TABLE_TEST, true, 1 }, | |||
| { TABLE_TEST, true, 1024 }, | |||
| { BLOCK_TEST, false, 16 }, | |||
| { BLOCK_TEST, false, 1 }, | |||
| { BLOCK_TEST, false, 1024 }, | |||
| { BLOCK_TEST, true, 16 }, | |||
| { BLOCK_TEST, true, 1 }, | |||
| { BLOCK_TEST, true, 1024 }, | |||
| // Restart interval does not matter for memtables | |||
| { MEMTABLE_TEST, false, 16 }, | |||
| { MEMTABLE_TEST, true, 16 }, | |||
| // Do not bother with restart interval variations for DB | |||
| { DB_TEST, false, 16 }, | |||
| { DB_TEST, true, 16 }, | |||
| }; | |||
| static const int kNumTestArgs = sizeof(kTestArgList) / sizeof(kTestArgList[0]); | |||
| class Harness { | |||
| public: | |||
| Harness() : constructor_(NULL) { } | |||
| void Init(const TestArgs& args) { | |||
| delete constructor_; | |||
| constructor_ = NULL; | |||
| options_ = Options(); | |||
| options_.block_restart_interval = args.restart_interval; | |||
| // Use shorter block size for tests to exercise block boundary | |||
| // conditions more. | |||
| options_.block_size = 256; | |||
| if (args.reverse_compare) { | |||
| options_.comparator = &reverse_key_comparator; | |||
| } | |||
| switch (args.type) { | |||
| case TABLE_TEST: | |||
| constructor_ = new TableConstructor(options_.comparator); | |||
| break; | |||
| case BLOCK_TEST: | |||
| constructor_ = new BlockConstructor(options_.comparator); | |||
| break; | |||
| case MEMTABLE_TEST: | |||
| constructor_ = new MemTableConstructor(options_.comparator); | |||
| break; | |||
| case DB_TEST: | |||
| constructor_ = new DBConstructor(options_.comparator); | |||
| break; | |||
| } | |||
| } | |||
| ~Harness() { | |||
| delete constructor_; | |||
| } | |||
| void Add(const std::string& key, const std::string& value) { | |||
| constructor_->Add(key, value); | |||
| } | |||
| void Test(Random* rnd) { | |||
| std::vector<std::string> keys; | |||
| KVMap data; | |||
| constructor_->Finish(options_, &keys, &data); | |||
| TestForwardScan(keys, data); | |||
| TestBackwardScan(keys, data); | |||
| TestRandomAccess(rnd, keys, data); | |||
| } | |||
| void TestForwardScan(const std::vector<std::string>& keys, | |||
| const KVMap& data) { | |||
| Iterator* iter = constructor_->NewIterator(); | |||
| ASSERT_TRUE(!iter->Valid()); | |||
| iter->SeekToFirst(); | |||
| for (KVMap::const_iterator model_iter = data.begin(); | |||
| model_iter != data.end(); | |||
| ++model_iter) { | |||
| ASSERT_EQ(ToString(data, model_iter), ToString(iter)); | |||
| iter->Next(); | |||
| } | |||
| ASSERT_TRUE(!iter->Valid()); | |||
| delete iter; | |||
| } | |||
| void TestBackwardScan(const std::vector<std::string>& keys, | |||
| const KVMap& data) { | |||
| Iterator* iter = constructor_->NewIterator(); | |||
| ASSERT_TRUE(!iter->Valid()); | |||
| iter->SeekToLast(); | |||
| for (KVMap::const_reverse_iterator model_iter = data.rbegin(); | |||
| model_iter != data.rend(); | |||
| ++model_iter) { | |||
| ASSERT_EQ(ToString(data, model_iter), ToString(iter)); | |||
| iter->Prev(); | |||
| } | |||
| ASSERT_TRUE(!iter->Valid()); | |||
| delete iter; | |||
| } | |||
| void TestRandomAccess(Random* rnd, | |||
| const std::vector<std::string>& keys, | |||
| const KVMap& data) { | |||
| static const bool kVerbose = false; | |||
| Iterator* iter = constructor_->NewIterator(); | |||
| ASSERT_TRUE(!iter->Valid()); | |||
| KVMap::const_iterator model_iter = data.begin(); | |||
| if (kVerbose) fprintf(stderr, "---\n"); | |||
| for (int i = 0; i < 200; i++) { | |||
| const int toss = rnd->Uniform(5); | |||
| switch (toss) { | |||
| case 0: { | |||
| if (iter->Valid()) { | |||
| if (kVerbose) fprintf(stderr, "Next\n"); | |||
| iter->Next(); | |||
| ++model_iter; | |||
| ASSERT_EQ(ToString(data, model_iter), ToString(iter)); | |||
| } | |||
| break; | |||
| } | |||
| case 1: { | |||
| if (kVerbose) fprintf(stderr, "SeekToFirst\n"); | |||
| iter->SeekToFirst(); | |||
| model_iter = data.begin(); | |||
| ASSERT_EQ(ToString(data, model_iter), ToString(iter)); | |||
| break; | |||
| } | |||
| case 2: { | |||
| std::string key = PickRandomKey(rnd, keys); | |||
| model_iter = data.lower_bound(key); | |||
| if (kVerbose) fprintf(stderr, "Seek '%s'\n", | |||
| EscapeString(key).c_str()); | |||
| iter->Seek(Slice(key)); | |||
| ASSERT_EQ(ToString(data, model_iter), ToString(iter)); | |||
| break; | |||
| } | |||
| case 3: { | |||
| if (iter->Valid()) { | |||
| if (kVerbose) fprintf(stderr, "Prev\n"); | |||
| iter->Prev(); | |||
| if (model_iter == data.begin()) { | |||
| model_iter = data.end(); // Wrap around to invalid value | |||
| } else { | |||
| --model_iter; | |||
| } | |||
| ASSERT_EQ(ToString(data, model_iter), ToString(iter)); | |||
| } | |||
| break; | |||
| } | |||
| case 4: { | |||
| if (kVerbose) fprintf(stderr, "SeekToLast\n"); | |||
| iter->SeekToLast(); | |||
| if (keys.empty()) { | |||
| model_iter = data.end(); | |||
| } else { | |||
| std::string last = data.rbegin()->first; | |||
| model_iter = data.lower_bound(last); | |||
| } | |||
| ASSERT_EQ(ToString(data, model_iter), ToString(iter)); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| delete iter; | |||
| } | |||
| std::string ToString(const KVMap& data, const KVMap::const_iterator& it) { | |||
| if (it == data.end()) { | |||
| return "END"; | |||
| } else { | |||
| return "'" + it->first + "->" + it->second + "'"; | |||
| } | |||
| } | |||
| std::string ToString(const KVMap& data, | |||
| const KVMap::const_reverse_iterator& it) { | |||
| if (it == data.rend()) { | |||
| return "END"; | |||
| } else { | |||
| return "'" + it->first + "->" + it->second + "'"; | |||
| } | |||
| } | |||
| std::string ToString(const Iterator* it) { | |||
| if (!it->Valid()) { | |||
| return "END"; | |||
| } else { | |||
| return "'" + it->key().ToString() + "->" + it->value().ToString() + "'"; | |||
| } | |||
| } | |||
| std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) { | |||
| if (keys.empty()) { | |||
| return "foo"; | |||
| } else { | |||
| const int index = rnd->Uniform(keys.size()); | |||
| std::string result = keys[index]; | |||
| switch (rnd->Uniform(3)) { | |||
| case 0: | |||
| // Return an existing key | |||
| break; | |||
| case 1: { | |||
| // Attempt to return something smaller than an existing key | |||
| if (result.size() > 0 && result[result.size()-1] > '\0') { | |||
| result[result.size()-1]--; | |||
| } | |||
| break; | |||
| } | |||
| case 2: { | |||
| // Return something larger than an existing key | |||
| Increment(options_.comparator, &result); | |||
| break; | |||
| } | |||
| } | |||
| return result; | |||
| } | |||
| } | |||
| private: | |||
| Options options_; | |||
| Constructor* constructor_; | |||
| }; | |||
| // Test the empty key | |||
| TEST(Harness, SimpleEmptyKey) { | |||
| for (int i = 0; i < kNumTestArgs; i++) { | |||
| Init(kTestArgList[i]); | |||
| Random rnd(test::RandomSeed() + 1); | |||
| Add("", "v"); | |||
| Test(&rnd); | |||
| } | |||
| } | |||
| TEST(Harness, SimpleSingle) { | |||
| for (int i = 0; i < kNumTestArgs; i++) { | |||
| Init(kTestArgList[i]); | |||
| Random rnd(test::RandomSeed() + 2); | |||
| Add("abc", "v"); | |||
| Test(&rnd); | |||
| } | |||
| } | |||
| TEST(Harness, SimpleMulti) { | |||
| for (int i = 0; i < kNumTestArgs; i++) { | |||
| Init(kTestArgList[i]); | |||
| Random rnd(test::RandomSeed() + 3); | |||
| Add("abc", "v"); | |||
| Add("abcd", "v"); | |||
| Add("ac", "v2"); | |||
| Test(&rnd); | |||
| } | |||
| } | |||
| TEST(Harness, SimpleSpecialKey) { | |||
| for (int i = 0; i < kNumTestArgs; i++) { | |||
| Init(kTestArgList[i]); | |||
| Random rnd(test::RandomSeed() + 4); | |||
| Add("\xff\xff", "v3"); | |||
| Test(&rnd); | |||
| } | |||
| } | |||
| TEST(Harness, Randomized) { | |||
| for (int i = 0; i < kNumTestArgs; i++) { | |||
| Init(kTestArgList[i]); | |||
| Random rnd(test::RandomSeed() + 5); | |||
| for (int num_entries = 0; num_entries < 2000; | |||
| num_entries += (num_entries < 50 ? 1 : 200)) { | |||
| if ((num_entries % 10) == 0) { | |||
| fprintf(stderr, "case %d of %d: num_entries = %d\n", | |||
| (i + 1), int(kNumTestArgs), num_entries); | |||
| } | |||
| for (int e = 0; e < num_entries; e++) { | |||
| std::string v; | |||
| Add(test::RandomKey(&rnd, rnd.Skewed(4)), | |||
| test::RandomString(&rnd, rnd.Skewed(5), &v).ToString()); | |||
| } | |||
| Test(&rnd); | |||
| } | |||
| } | |||
| } | |||
| class MemTableTest { }; | |||
| TEST(MemTableTest, Simple) { | |||
| InternalKeyComparator cmp(BytewiseComparator()); | |||
| MemTable memtable(cmp); | |||
| WriteBatch batch; | |||
| WriteBatchInternal::SetSequence(&batch, 100); | |||
| batch.Put(std::string("k1"), std::string("v1")); | |||
| batch.Put(std::string("k2"), std::string("v2")); | |||
| batch.Put(std::string("k3"), std::string("v3")); | |||
| batch.Put(std::string("largekey"), std::string("vlarge")); | |||
| ASSERT_TRUE(WriteBatchInternal::InsertInto(&batch, &memtable).ok()); | |||
| Iterator* iter = memtable.NewIterator(); | |||
| iter->SeekToFirst(); | |||
| while (iter->Valid()) { | |||
| fprintf(stderr, "key: '%s' -> '%s'\n", | |||
| iter->key().ToString().c_str(), | |||
| iter->value().ToString().c_str()); | |||
| iter->Next(); | |||
| } | |||
| delete iter; | |||
| } | |||
| static bool Between(uint64_t val, uint64_t low, uint64_t high) { | |||
| bool result = (val >= low) && (val <= high); | |||
| if (!result) { | |||
| fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", | |||
| (unsigned long long)(val), | |||
| (unsigned long long)(low), | |||
| (unsigned long long)(high)); | |||
| } | |||
| return result; | |||
| } | |||
| class TableTest { }; | |||
| TEST(TableTest, ApproximateOffsetOfPlain) { | |||
| TableConstructor c(BytewiseComparator()); | |||
| c.Add("k01", "hello"); | |||
| c.Add("k02", "hello2"); | |||
| c.Add("k03", std::string(10000, 'x')); | |||
| c.Add("k04", std::string(200000, 'x')); | |||
| c.Add("k05", std::string(300000, 'x')); | |||
| c.Add("k06", "hello3"); | |||
| c.Add("k07", std::string(100000, 'x')); | |||
| std::vector<std::string> keys; | |||
| KVMap kvmap; | |||
| Options options; | |||
| options.block_size = 1024; | |||
| options.compression = kNoCompression; | |||
| c.Finish(options, &keys, &kvmap); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 210000, 211000)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 611000)); | |||
| } | |||
| TEST(TableTest, ApproximateOffsetOfCompressed) { | |||
| #if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_CHROMIUM) | |||
| // Compression not supported yet, so skip this test. | |||
| // TODO(sanjay) Reenable after compression support is added | |||
| return; | |||
| #endif | |||
| Random rnd(301); | |||
| TableConstructor c(BytewiseComparator()); | |||
| std::string tmp; | |||
| c.Add("k01", "hello"); | |||
| c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); | |||
| c.Add("k03", "hello3"); | |||
| c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); | |||
| std::vector<std::string> keys; | |||
| KVMap kvmap; | |||
| Options options; | |||
| options.block_size = 1024; | |||
| options.compression = kLightweightCompression; | |||
| c.Finish(options, &keys, &kvmap); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); | |||
| ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000)); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,182 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "table/two_level_iterator.h" | |||
| #include "include/table.h" | |||
| #include "table/block.h" | |||
| #include "table/format.h" | |||
| #include "table/iterator_wrapper.h" | |||
| namespace leveldb { | |||
| namespace { | |||
| typedef Iterator* (*BlockFunction)(void*, const ReadOptions&, const Slice&); | |||
| class TwoLevelIterator: public Iterator { | |||
| public: | |||
| TwoLevelIterator( | |||
| Iterator* index_iter, | |||
| BlockFunction block_function, | |||
| void* arg, | |||
| const ReadOptions& options); | |||
| virtual ~TwoLevelIterator(); | |||
| virtual void Seek(const Slice& target); | |||
| virtual void SeekToFirst(); | |||
| virtual void SeekToLast(); | |||
| virtual void Next(); | |||
| virtual void Prev(); | |||
| virtual bool Valid() const { | |||
| return data_iter_.Valid(); | |||
| } | |||
| virtual Slice key() const { | |||
| assert(Valid()); | |||
| return data_iter_.key(); | |||
| } | |||
| virtual Slice value() const { | |||
| assert(Valid()); | |||
| return data_iter_.value(); | |||
| } | |||
| virtual Status status() const { | |||
| // It'd be nice if status() returned a const Status& instead of a Status | |||
| if (!index_iter_.status().ok()) { | |||
| return index_iter_.status(); | |||
| } else if (data_iter_.iter() != NULL && !data_iter_.status().ok()) { | |||
| return data_iter_.status(); | |||
| } else { | |||
| return status_; | |||
| } | |||
| } | |||
| private: | |||
| void SaveError(const Status& s) { | |||
| if (status_.ok() && !s.ok()) status_ = s; | |||
| } | |||
| void SkipEmptyDataBlocksForward(); | |||
| void SkipEmptyDataBlocksBackward(); | |||
| void SetDataIterator(Iterator* data_iter); | |||
| void InitDataBlock(); | |||
| BlockFunction block_function_; | |||
| void* arg_; | |||
| const ReadOptions options_; | |||
| Status status_; | |||
| IteratorWrapper index_iter_; | |||
| IteratorWrapper data_iter_; // May be NULL | |||
| // If data_iter_ is non-NULL, then "data_block_handle_" holds the | |||
| // "index_value" passed to block_function_ to create the data_iter_. | |||
| std::string data_block_handle_; | |||
| }; | |||
| TwoLevelIterator::TwoLevelIterator( | |||
| Iterator* index_iter, | |||
| BlockFunction block_function, | |||
| void* arg, | |||
| const ReadOptions& options) | |||
| : block_function_(block_function), | |||
| arg_(arg), | |||
| options_(options), | |||
| index_iter_(index_iter), | |||
| data_iter_(NULL) { | |||
| } | |||
| TwoLevelIterator::~TwoLevelIterator() { | |||
| } | |||
| void TwoLevelIterator::Seek(const Slice& target) { | |||
| index_iter_.Seek(target); | |||
| InitDataBlock(); | |||
| if (data_iter_.iter() != NULL) data_iter_.Seek(target); | |||
| SkipEmptyDataBlocksForward(); | |||
| } | |||
| void TwoLevelIterator::SeekToFirst() { | |||
| index_iter_.SeekToFirst(); | |||
| InitDataBlock(); | |||
| if (data_iter_.iter() != NULL) data_iter_.SeekToFirst(); | |||
| SkipEmptyDataBlocksForward(); | |||
| } | |||
| void TwoLevelIterator::SeekToLast() { | |||
| index_iter_.SeekToLast(); | |||
| InitDataBlock(); | |||
| if (data_iter_.iter() != NULL) data_iter_.SeekToLast(); | |||
| SkipEmptyDataBlocksBackward(); | |||
| } | |||
| void TwoLevelIterator::Next() { | |||
| assert(Valid()); | |||
| data_iter_.Next(); | |||
| SkipEmptyDataBlocksForward(); | |||
| } | |||
| void TwoLevelIterator::Prev() { | |||
| assert(Valid()); | |||
| data_iter_.Prev(); | |||
| SkipEmptyDataBlocksBackward(); | |||
| } | |||
| void TwoLevelIterator::SkipEmptyDataBlocksForward() { | |||
| while (data_iter_.iter() == NULL || !data_iter_.Valid()) { | |||
| // Move to next block | |||
| if (!index_iter_.Valid()) { | |||
| SetDataIterator(NULL); | |||
| return; | |||
| } | |||
| index_iter_.Next(); | |||
| InitDataBlock(); | |||
| if (data_iter_.iter() != NULL) data_iter_.SeekToFirst(); | |||
| } | |||
| } | |||
| void TwoLevelIterator::SkipEmptyDataBlocksBackward() { | |||
| while (data_iter_.iter() == NULL || !data_iter_.Valid()) { | |||
| // Move to next block | |||
| if (!index_iter_.Valid()) { | |||
| SetDataIterator(NULL); | |||
| return; | |||
| } | |||
| index_iter_.Prev(); | |||
| InitDataBlock(); | |||
| if (data_iter_.iter() != NULL) data_iter_.SeekToLast(); | |||
| } | |||
| } | |||
| void TwoLevelIterator::SetDataIterator(Iterator* data_iter) { | |||
| if (data_iter_.iter() != NULL) SaveError(data_iter_.status()); | |||
| data_iter_.Set(data_iter); | |||
| } | |||
| void TwoLevelIterator::InitDataBlock() { | |||
| if (!index_iter_.Valid()) { | |||
| SetDataIterator(NULL); | |||
| } else { | |||
| Slice handle = index_iter_.value(); | |||
| if (data_iter_.iter() != NULL && handle.compare(data_block_handle_) == 0) { | |||
| // data_iter_ is already constructed with this iterator, so | |||
| // no need to change anything | |||
| } else { | |||
| Iterator* iter = (*block_function_)(arg_, options_, handle); | |||
| data_block_handle_.assign(handle.data(), handle.size()); | |||
| SetDataIterator(iter); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| Iterator* NewTwoLevelIterator( | |||
| Iterator* index_iter, | |||
| BlockFunction block_function, | |||
| void* arg, | |||
| const ReadOptions& options) { | |||
| return new TwoLevelIterator(index_iter, block_function, arg, options); | |||
| } | |||
| } | |||
| @ -0,0 +1,34 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ | |||
| #define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ | |||
| #include "include/iterator.h" | |||
| namespace leveldb { | |||
| struct ReadOptions; | |||
| // Return a new two level iterator. A two-level iterator contains an | |||
| // index iterator whose values point to a sequence of blocks where | |||
| // each block is itself a sequence of key,value pairs. The returned | |||
| // two-level iterator yields the concatenation of all key/value pairs | |||
| // in the sequence of blocks. Takes ownership of "index_iter" and | |||
| // will delete it when no longer needed. | |||
| // | |||
| // Uses a supplied function to convert an index_iter value into | |||
| // an iterator over the contents of the corresponding block. | |||
| extern Iterator* NewTwoLevelIterator( | |||
| Iterator* index_iter, | |||
| Iterator* (*block_function)( | |||
| void* arg, | |||
| const ReadOptions& options, | |||
| const Slice& index_value), | |||
| void* arg, | |||
| const ReadOptions& options); | |||
| } | |||
| #endif // STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ | |||
| @ -0,0 +1,68 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "util/arena.h" | |||
| #include <assert.h> | |||
| namespace leveldb { | |||
| static const int kBlockSize = 4096; | |||
| Arena::Arena() { | |||
| blocks_memory_ = 0; | |||
| alloc_ptr_ = NULL; // First allocation will allocate a block | |||
| alloc_bytes_remaining_ = 0; | |||
| } | |||
| Arena::~Arena() { | |||
| for (int i = 0; i < blocks_.size(); i++) { | |||
| delete[] blocks_[i]; | |||
| } | |||
| } | |||
| char* Arena::AllocateFallback(size_t bytes) { | |||
| if (bytes > kBlockSize / 4) { | |||
| // Object is more than a quarter of our block size. Allocate it separately | |||
| // to avoid wasting too much space in leftover bytes. | |||
| char* result = AllocateNewBlock(bytes); | |||
| return result; | |||
| } | |||
| // We waste the remaining space in the current block. | |||
| alloc_ptr_ = AllocateNewBlock(kBlockSize); | |||
| alloc_bytes_remaining_ = kBlockSize; | |||
| char* result = alloc_ptr_; | |||
| alloc_ptr_ += bytes; | |||
| alloc_bytes_remaining_ -= bytes; | |||
| return result; | |||
| } | |||
| char* Arena::AllocateAligned(size_t bytes) { | |||
| const int align = sizeof(void*); // We'll align to pointer size | |||
| assert((align & (align-1)) == 0); // Pointer size should be a power of 2 | |||
| size_t current_mod = reinterpret_cast<uintptr_t>(alloc_ptr_) & (align-1); | |||
| size_t slop = (current_mod == 0 ? 0 : align - current_mod); | |||
| size_t needed = bytes + slop; | |||
| char* result; | |||
| if (needed <= alloc_bytes_remaining_) { | |||
| result = alloc_ptr_ + slop; | |||
| alloc_ptr_ += needed; | |||
| alloc_bytes_remaining_ -= needed; | |||
| } else { | |||
| // AllocateFallback always returned aligned memory | |||
| result = AllocateFallback(bytes); | |||
| } | |||
| assert((reinterpret_cast<uintptr_t>(result) & (align-1)) == 0); | |||
| return result; | |||
| } | |||
| char* Arena::AllocateNewBlock(size_t block_bytes) { | |||
| char* result = new char[block_bytes]; | |||
| blocks_memory_ += block_bytes; | |||
| blocks_.push_back(result); | |||
| return result; | |||
| } | |||
| } | |||
| @ -0,0 +1,68 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_UTIL_ARENA_H_ | |||
| #define STORAGE_LEVELDB_UTIL_ARENA_H_ | |||
| #include <cstddef> | |||
| #include <vector> | |||
| #include <assert.h> | |||
| #include <stdint.h> | |||
| namespace leveldb { | |||
| class Arena { | |||
| public: | |||
| Arena(); | |||
| ~Arena(); | |||
| // Return a pointer to a newly allocated memory block of "bytes" bytes. | |||
| char* Allocate(size_t bytes); | |||
| // Allocate memory with the normal alignment guarantees provided by malloc | |||
| char* AllocateAligned(size_t bytes); | |||
| // Returns an estimate of the total memory usage of data allocated | |||
| // by the arena (including space allocated but not yet used for user | |||
| // allocations). | |||
| size_t MemoryUsage() const { | |||
| return blocks_memory_ + blocks_.capacity() * sizeof(char*); | |||
| } | |||
| private: | |||
| char* AllocateFallback(size_t bytes); | |||
| char* AllocateNewBlock(size_t block_bytes); | |||
| // Allocation state | |||
| char* alloc_ptr_; | |||
| size_t alloc_bytes_remaining_; | |||
| // Array of new[] allocated memory blocks | |||
| std::vector<char*> blocks_; | |||
| // Bytes of memory in blocks allocated so far | |||
| size_t blocks_memory_; | |||
| // No copying allowed | |||
| Arena(const Arena&); | |||
| void operator=(const Arena&); | |||
| }; | |||
| inline char* Arena::Allocate(size_t bytes) { | |||
| // The semantics of what to return are a bit messy if we allow | |||
| // 0-byte allocations, so we disallow them here (we don't need | |||
| // them for our internal use). | |||
| assert(bytes > 0); | |||
| if (bytes <= alloc_bytes_remaining_) { | |||
| char* result = alloc_ptr_; | |||
| alloc_ptr_ += bytes; | |||
| alloc_bytes_remaining_ -= bytes; | |||
| return result; | |||
| } | |||
| return AllocateFallback(bytes); | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_UTIL_ARENA_H_ | |||
| @ -0,0 +1,68 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "util/arena.h" | |||
| #include "util/random.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| class ArenaTest { }; | |||
| TEST(ArenaTest, Empty) { | |||
| Arena arena; | |||
| } | |||
| TEST(ArenaTest, Simple) { | |||
| std::vector<std::pair<size_t, char*> > allocated; | |||
| Arena arena; | |||
| const int N = 100000; | |||
| size_t bytes = 0; | |||
| Random rnd(301); | |||
| for (int i = 0; i < N; i++) { | |||
| size_t s; | |||
| if (i % (N / 10) == 0) { | |||
| s = i; | |||
| } else { | |||
| s = rnd.OneIn(4000) ? rnd.Uniform(6000) : | |||
| (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); | |||
| } | |||
| if (s == 0) { | |||
| // Our arena disallows size 0 allocations. | |||
| s = 1; | |||
| } | |||
| char* r; | |||
| if (rnd.OneIn(10)) { | |||
| r = arena.AllocateAligned(s); | |||
| } else { | |||
| r = arena.Allocate(s); | |||
| } | |||
| for (int b = 0; b < s; b++) { | |||
| // Fill the "i"th allocation with a known bit pattern | |||
| r[b] = i % 256; | |||
| } | |||
| bytes += s; | |||
| allocated.push_back(std::make_pair(s, r)); | |||
| ASSERT_GE(arena.MemoryUsage(), bytes); | |||
| if (i > N/10) { | |||
| ASSERT_LE(arena.MemoryUsage(), bytes * 1.10); | |||
| } | |||
| } | |||
| for (int i = 0; i < allocated.size(); i++) { | |||
| size_t num_bytes = allocated[i].first; | |||
| const char* p = allocated[i].second; | |||
| for (int b = 0; b < num_bytes; b++) { | |||
| // Check the "i"th allocation for the known bit pattern | |||
| ASSERT_EQ(int(p[b]) & 0xff, i % 256); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,253 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_ANDROID) | |||
| #include <unordered_set> | |||
| #elif defined(LEVELDB_PLATFORM_CHROMIUM) | |||
| #include "base/hash_tables.h" | |||
| #else | |||
| #include <hash_set> // TODO(sanjay): Switch to unordered_set when possible. | |||
| #endif | |||
| #include <assert.h> | |||
| #include "include/cache.h" | |||
| #include "port/port.h" | |||
| #include "util/hash.h" | |||
| #include "util/mutexlock.h" | |||
| namespace leveldb { | |||
| Cache::~Cache() { | |||
| } | |||
| namespace { | |||
| // LRU cache implementation | |||
| // An entry is a variable length heap-allocated structure. Entries | |||
| // are kept in a circular doubly linked list ordered by access time. | |||
| struct LRUHandle { | |||
| void* value; | |||
| void (*deleter)(const Slice&, void* value); | |||
| LRUHandle* next; | |||
| LRUHandle* prev; | |||
| size_t charge; // TODO(opt): Only allow uint32_t? | |||
| size_t key_length; | |||
| size_t refs; // TODO(opt): Pack with "key_length"? | |||
| char key_data[1]; // Beginning of key | |||
| Slice key() const { | |||
| // For cheaper lookups, we allow a temporary Handle object | |||
| // to store a pointer to a key in "value". | |||
| if (next == this) { | |||
| return *(reinterpret_cast<Slice*>(value)); | |||
| } else { | |||
| return Slice(key_data, key_length); | |||
| } | |||
| } | |||
| }; | |||
| // Pick a platform specific hash_set instantiation | |||
| #if defined(LEVELDB_PLATFORM_CHROMIUM) && defined(OS_WIN) | |||
| // Microsoft's hash_set deviates from the standard. See | |||
| // http://msdn.microsoft.com/en-us/library/1t4xas78(v=vs.80).aspx | |||
| // for details. Basically the 2 param () operator is a less than and | |||
| // the 1 param () operator is a hash function. | |||
| struct HandleHashCompare : public stdext::hash_compare<LRUHandle*> { | |||
| size_t operator() (LRUHandle* h) const { | |||
| Slice k = h->key(); | |||
| return Hash(k.data(), k.size(), 0); | |||
| } | |||
| bool operator() (LRUHandle* a, LRUHandle* b) const { | |||
| return a->key().compare(b->key()) < 0; | |||
| } | |||
| }; | |||
| typedef base::hash_set<LRUHandle*, HandleHashCompare> HandleTable; | |||
| #else | |||
| struct HandleHash { | |||
| inline size_t operator()(LRUHandle* h) const { | |||
| Slice k = h->key(); | |||
| return Hash(k.data(), k.size(), 0); | |||
| } | |||
| }; | |||
| struct HandleEq { | |||
| inline bool operator()(LRUHandle* a, LRUHandle* b) const { | |||
| return a->key() == b->key(); | |||
| } | |||
| }; | |||
| # if defined(LEVELDB_PLATFORM_CHROMIUM) | |||
| typedef base::hash_set<LRUHandle*, HandleHash, HandleEq> HandleTable; | |||
| # elif defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_ANDROID) | |||
| typedef std::unordered_set<LRUHandle*, HandleHash, HandleEq> HandleTable; | |||
| # else | |||
| typedef __gnu_cxx::hash_set<LRUHandle*, HandleHash, HandleEq> HandleTable; | |||
| # endif | |||
| #endif | |||
| class LRUCache : public Cache { | |||
| public: | |||
| explicit LRUCache(size_t capacity); | |||
| virtual ~LRUCache(); | |||
| virtual Handle* Insert(const Slice& key, void* value, size_t charge, | |||
| void (*deleter)(const Slice& key, void* value)); | |||
| virtual Handle* Lookup(const Slice& key); | |||
| virtual void Release(Handle* handle); | |||
| virtual void* Value(Handle* handle); | |||
| virtual void Erase(const Slice& key); | |||
| virtual uint64_t NewId(); | |||
| private: | |||
| void LRU_Remove(LRUHandle* e); | |||
| void LRU_Append(LRUHandle* e); | |||
| void Unref(LRUHandle* e); | |||
| // Constructor parameters | |||
| const size_t capacity_; | |||
| // mutex_ protects the following state. | |||
| port::Mutex mutex_; | |||
| size_t usage_; | |||
| uint64_t last_id_; | |||
| // Dummy head of LRU list. | |||
| // lru.prev is newest entry, lru.next is oldest entry. | |||
| LRUHandle lru_; | |||
| HandleTable table_; | |||
| }; | |||
| LRUCache::LRUCache(size_t capacity) | |||
| : capacity_(capacity), | |||
| usage_(0), | |||
| last_id_(0) { | |||
| // Make empty circular linked list | |||
| lru_.next = &lru_; | |||
| lru_.prev = &lru_; | |||
| } | |||
| LRUCache::~LRUCache() { | |||
| table_.clear(); | |||
| for (LRUHandle* e = lru_.next; e != &lru_; ) { | |||
| LRUHandle* next = e->next; | |||
| assert(e->refs == 1); // Error if caller has an unreleased handle | |||
| Unref(e); | |||
| e = next; | |||
| } | |||
| } | |||
| void LRUCache::Unref(LRUHandle* e) { | |||
| assert(e->refs > 0); | |||
| e->refs--; | |||
| if (e->refs <= 0) { | |||
| usage_ -= e->charge; | |||
| (*e->deleter)(e->key(), e->value); | |||
| free(e); | |||
| } | |||
| } | |||
| void LRUCache::LRU_Remove(LRUHandle* e) { | |||
| e->next->prev = e->prev; | |||
| e->prev->next = e->next; | |||
| } | |||
| void LRUCache::LRU_Append(LRUHandle* e) { | |||
| // Make "e" newest entry by inserting just before lru_ | |||
| e->next = &lru_; | |||
| e->prev = lru_.prev; | |||
| e->prev->next = e; | |||
| e->next->prev = e; | |||
| } | |||
| Cache::Handle* LRUCache::Lookup(const Slice& key) { | |||
| MutexLock l(&mutex_); | |||
| LRUHandle dummy; | |||
| dummy.next = &dummy; | |||
| dummy.value = const_cast<Slice*>(&key); | |||
| HandleTable::iterator iter = table_.find(&dummy); | |||
| if (iter == table_.end()) { | |||
| return NULL; | |||
| } else { | |||
| LRUHandle* e = const_cast<LRUHandle*>(*iter); | |||
| e->refs++; | |||
| LRU_Remove(e); | |||
| LRU_Append(e); | |||
| return reinterpret_cast<Handle*>(e); | |||
| } | |||
| } | |||
| void* LRUCache::Value(Handle* handle) { | |||
| return reinterpret_cast<LRUHandle*>(handle)->value; | |||
| } | |||
| void LRUCache::Release(Handle* handle) { | |||
| MutexLock l(&mutex_); | |||
| Unref(reinterpret_cast<LRUHandle*>(handle)); | |||
| } | |||
| Cache::Handle* LRUCache::Insert(const Slice& key, void* value, size_t charge, | |||
| void (*deleter)(const Slice& key, void* value)) { | |||
| MutexLock l(&mutex_); | |||
| LRUHandle* e = reinterpret_cast<LRUHandle*>( | |||
| malloc(sizeof(LRUHandle)-1 + key.size())); | |||
| e->value = value; | |||
| e->deleter = deleter; | |||
| e->charge = charge; | |||
| e->key_length = key.size(); | |||
| e->refs = 2; // One from LRUCache, one for the returned handle | |||
| memcpy(e->key_data, key.data(), key.size()); | |||
| LRU_Append(e); | |||
| usage_ += charge; | |||
| std::pair<HandleTable::iterator,bool> p = table_.insert(e); | |||
| if (!p.second) { | |||
| // Kill existing entry | |||
| LRUHandle* old = const_cast<LRUHandle*>(*(p.first)); | |||
| LRU_Remove(old); | |||
| table_.erase(p.first); | |||
| table_.insert(e); | |||
| Unref(old); | |||
| } | |||
| while (usage_ > capacity_ && lru_.next != &lru_) { | |||
| LRUHandle* old = lru_.next; | |||
| LRU_Remove(old); | |||
| table_.erase(old); | |||
| Unref(old); | |||
| } | |||
| return reinterpret_cast<Handle*>(e); | |||
| } | |||
| void LRUCache::Erase(const Slice& key) { | |||
| MutexLock l(&mutex_); | |||
| LRUHandle dummy; | |||
| dummy.next = &dummy; | |||
| dummy.value = const_cast<Slice*>(&key); | |||
| HandleTable::iterator iter = table_.find(&dummy); | |||
| if (iter != table_.end()) { | |||
| LRUHandle* e = const_cast<LRUHandle*>(*iter); | |||
| LRU_Remove(e); | |||
| table_.erase(iter); | |||
| Unref(e); | |||
| } | |||
| } | |||
| uint64_t LRUCache::NewId() { | |||
| MutexLock l(&mutex_); | |||
| return ++(last_id_); | |||
| } | |||
| } // end anonymous namespace | |||
| Cache* NewLRUCache(size_t capacity) { | |||
| return new LRUCache(capacity); | |||
| } | |||
| } | |||
| @ -0,0 +1,169 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "include/cache.h" | |||
| #include <vector> | |||
| #include "util/coding.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| // Conversions between numeric keys/values and the types expected by Cache. | |||
| static std::string EncodeKey(int k) { | |||
| std::string result; | |||
| PutFixed32(&result, k); | |||
| return result; | |||
| } | |||
| static int DecodeKey(const Slice& k) { | |||
| assert(k.size() == 4); | |||
| return DecodeFixed32(k.data()); | |||
| } | |||
| static void* EncodeValue(uintptr_t v) { return reinterpret_cast<void*>(v); } | |||
| static int DecodeValue(void* v) { return reinterpret_cast<uintptr_t>(v); } | |||
| class CacheTest { | |||
| public: | |||
| static CacheTest* current_; | |||
| static void Deleter(const Slice& key, void* v) { | |||
| current_->deleted_keys_.push_back(DecodeKey(key)); | |||
| current_->deleted_values_.push_back(DecodeValue(v)); | |||
| } | |||
| static const int kCacheSize = 100; | |||
| std::vector<int> deleted_keys_; | |||
| std::vector<int> deleted_values_; | |||
| Cache* cache_; | |||
| CacheTest() : cache_(NewLRUCache(kCacheSize)) { | |||
| current_ = this; | |||
| } | |||
| ~CacheTest() { | |||
| delete cache_; | |||
| } | |||
| int Lookup(int key) { | |||
| Cache::Handle* handle = cache_->Lookup(EncodeKey(key)); | |||
| const int r = (handle == NULL) ? -1 : DecodeValue(cache_->Value(handle)); | |||
| if (handle != NULL) { | |||
| cache_->Release(handle); | |||
| } | |||
| return r; | |||
| } | |||
| void Insert(int key, int value, int charge = 1) { | |||
| cache_->Release(cache_->Insert(EncodeKey(key), EncodeValue(value), charge, | |||
| &CacheTest::Deleter)); | |||
| } | |||
| void Erase(int key) { | |||
| cache_->Erase(EncodeKey(key)); | |||
| } | |||
| }; | |||
| CacheTest* CacheTest::current_; | |||
| TEST(CacheTest, HitAndMiss) { | |||
| ASSERT_EQ(-1, Lookup(100)); | |||
| Insert(100, 101); | |||
| ASSERT_EQ(101, Lookup(100)); | |||
| ASSERT_EQ(-1, Lookup(200)); | |||
| ASSERT_EQ(-1, Lookup(300)); | |||
| Insert(200, 201); | |||
| ASSERT_EQ(101, Lookup(100)); | |||
| ASSERT_EQ(201, Lookup(200)); | |||
| ASSERT_EQ(-1, Lookup(300)); | |||
| Insert(100, 102); | |||
| ASSERT_EQ(102, Lookup(100)); | |||
| ASSERT_EQ(201, Lookup(200)); | |||
| ASSERT_EQ(-1, Lookup(300)); | |||
| ASSERT_EQ(1, deleted_keys_.size()); | |||
| ASSERT_EQ(100, deleted_keys_[0]); | |||
| ASSERT_EQ(101, deleted_values_[0]); | |||
| } | |||
| TEST(CacheTest, Erase) { | |||
| Erase(200); | |||
| ASSERT_EQ(0, deleted_keys_.size()); | |||
| Insert(100, 101); | |||
| Insert(200, 201); | |||
| Erase(100); | |||
| ASSERT_EQ(-1, Lookup(100)); | |||
| ASSERT_EQ(201, Lookup(200)); | |||
| ASSERT_EQ(1, deleted_keys_.size()); | |||
| ASSERT_EQ(100, deleted_keys_[0]); | |||
| ASSERT_EQ(101, deleted_values_[0]); | |||
| Erase(100); | |||
| ASSERT_EQ(-1, Lookup(100)); | |||
| ASSERT_EQ(201, Lookup(200)); | |||
| ASSERT_EQ(1, deleted_keys_.size()); | |||
| } | |||
| TEST(CacheTest, EntriesArePinned) { | |||
| Insert(100, 101); | |||
| Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); | |||
| ASSERT_EQ(101, DecodeValue(cache_->Value(h1))); | |||
| Insert(100, 102); | |||
| Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); | |||
| ASSERT_EQ(102, DecodeValue(cache_->Value(h2))); | |||
| ASSERT_EQ(0, deleted_keys_.size()); | |||
| cache_->Release(h1); | |||
| ASSERT_EQ(1, deleted_keys_.size()); | |||
| ASSERT_EQ(100, deleted_keys_[0]); | |||
| ASSERT_EQ(101, deleted_values_[0]); | |||
| Erase(100); | |||
| ASSERT_EQ(-1, Lookup(100)); | |||
| ASSERT_EQ(1, deleted_keys_.size()); | |||
| cache_->Release(h2); | |||
| ASSERT_EQ(2, deleted_keys_.size()); | |||
| ASSERT_EQ(100, deleted_keys_[1]); | |||
| ASSERT_EQ(102, deleted_values_[1]); | |||
| } | |||
| TEST(CacheTest, EvictionPolicy) { | |||
| Insert(100, 101); | |||
| Insert(200, 201); | |||
| // Frequently used entry must be kept around | |||
| for (int i = 0; i < kCacheSize; i++) { | |||
| Insert(1000+i, 2000+i); | |||
| ASSERT_EQ(2000+i, Lookup(1000+i)); | |||
| ASSERT_EQ(101, Lookup(100)); | |||
| } | |||
| ASSERT_EQ(101, Lookup(100)); | |||
| ASSERT_EQ(2, deleted_keys_.size()); | |||
| ASSERT_EQ(200, deleted_keys_[0]); | |||
| ASSERT_EQ(201, deleted_values_[0]); | |||
| } | |||
| TEST(CacheTest, HeavyEntry) { | |||
| Insert(100, 101); | |||
| Insert(200, 201, kCacheSize); | |||
| ASSERT_EQ(1, deleted_keys_.size()); | |||
| ASSERT_EQ(100, deleted_keys_[0]); | |||
| ASSERT_EQ(101, deleted_values_[0]); | |||
| } | |||
| TEST(CacheTest, NewId) { | |||
| uint64_t a = cache_->NewId(); | |||
| uint64_t b = cache_->NewId(); | |||
| ASSERT_NE(a, b); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,194 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| void EncodeFixed32(char* buf, uint32_t value) { | |||
| #if __BYTE_ORDER == __LITTLE_ENDIAN | |||
| memcpy(buf, &value, sizeof(value)); | |||
| #else | |||
| buf[0] = value & 0xff; | |||
| buf[1] = (value >> 8) & 0xff; | |||
| buf[2] = (value >> 16) & 0xff; | |||
| buf[3] = (value >> 24) & 0xff; | |||
| #endif | |||
| } | |||
| void EncodeFixed64(char* buf, uint64_t value) { | |||
| #if __BYTE_ORDER == __LITTLE_ENDIAN | |||
| memcpy(buf, &value, sizeof(value)); | |||
| #else | |||
| buf[0] = value & 0xff; | |||
| buf[1] = (value >> 8) & 0xff; | |||
| buf[2] = (value >> 16) & 0xff; | |||
| buf[3] = (value >> 24) & 0xff; | |||
| buf[4] = (value >> 32) & 0xff; | |||
| buf[5] = (value >> 40) & 0xff; | |||
| buf[6] = (value >> 48) & 0xff; | |||
| buf[7] = (value >> 56) & 0xff; | |||
| #endif | |||
| } | |||
| void PutFixed32(std::string* dst, uint32_t value) { | |||
| char buf[sizeof(value)]; | |||
| EncodeFixed32(buf, value); | |||
| dst->append(buf, sizeof(buf)); | |||
| } | |||
| void PutFixed64(std::string* dst, uint64_t value) { | |||
| char buf[sizeof(value)]; | |||
| EncodeFixed64(buf, value); | |||
| dst->append(buf, sizeof(buf)); | |||
| } | |||
| char* EncodeVarint32(char* dst, uint32_t v) { | |||
| // Operate on characters as unsigneds | |||
| unsigned char* ptr = reinterpret_cast<unsigned char*>(dst); | |||
| static const int B = 128; | |||
| if (v < (1<<7)) { | |||
| *(ptr++) = v; | |||
| } else if (v < (1<<14)) { | |||
| *(ptr++) = v | B; | |||
| *(ptr++) = v>>7; | |||
| } else if (v < (1<<21)) { | |||
| *(ptr++) = v | B; | |||
| *(ptr++) = (v>>7) | B; | |||
| *(ptr++) = v>>14; | |||
| } else if (v < (1<<28)) { | |||
| *(ptr++) = v | B; | |||
| *(ptr++) = (v>>7) | B; | |||
| *(ptr++) = (v>>14) | B; | |||
| *(ptr++) = v>>21; | |||
| } else { | |||
| *(ptr++) = v | B; | |||
| *(ptr++) = (v>>7) | B; | |||
| *(ptr++) = (v>>14) | B; | |||
| *(ptr++) = (v>>21) | B; | |||
| *(ptr++) = v>>28; | |||
| } | |||
| return reinterpret_cast<char*>(ptr); | |||
| } | |||
| void PutVarint32(std::string* dst, uint32_t v) { | |||
| char buf[5]; | |||
| char* ptr = EncodeVarint32(buf, v); | |||
| dst->append(buf, ptr - buf); | |||
| } | |||
| char* EncodeVarint64(char* dst, uint64_t v) { | |||
| static const int B = 128; | |||
| unsigned char* ptr = reinterpret_cast<unsigned char*>(dst); | |||
| while (v >= B) { | |||
| *(ptr++) = (v & (B-1)) | B; | |||
| v >>= 7; | |||
| } | |||
| *(ptr++) = v; | |||
| return reinterpret_cast<char*>(ptr); | |||
| } | |||
| void PutVarint64(std::string* dst, uint64_t v) { | |||
| char buf[10]; | |||
| char* ptr = EncodeVarint64(buf, v); | |||
| dst->append(buf, ptr - buf); | |||
| } | |||
| void PutLengthPrefixedSlice(std::string* dst, const Slice& value) { | |||
| PutVarint32(dst, value.size()); | |||
| dst->append(value.data(), value.size()); | |||
| } | |||
| int VarintLength(uint64_t v) { | |||
| int len = 1; | |||
| while (v >= 128) { | |||
| v >>= 7; | |||
| len++; | |||
| } | |||
| return len; | |||
| } | |||
| const char* GetVarint32PtrFallback(const char* p, | |||
| const char* limit, | |||
| uint32_t* value) { | |||
| uint32_t result = 0; | |||
| for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) { | |||
| uint32_t byte = *(reinterpret_cast<const unsigned char*>(p)); | |||
| p++; | |||
| if (byte & 128) { | |||
| // More bytes are present | |||
| result |= ((byte & 127) << shift); | |||
| } else { | |||
| result |= (byte << shift); | |||
| *value = result; | |||
| return reinterpret_cast<const char*>(p); | |||
| } | |||
| } | |||
| return NULL; | |||
| } | |||
| bool GetVarint32(Slice* input, uint32_t* value) { | |||
| const char* p = input->data(); | |||
| const char* limit = p + input->size(); | |||
| const char* q = GetVarint32Ptr(p, limit, value); | |||
| if (q == NULL) { | |||
| return false; | |||
| } else { | |||
| *input = Slice(q, limit - q); | |||
| return true; | |||
| } | |||
| } | |||
| const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) { | |||
| uint64_t result = 0; | |||
| for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) { | |||
| uint64_t byte = *(reinterpret_cast<const unsigned char*>(p)); | |||
| p++; | |||
| if (byte & 128) { | |||
| // More bytes are present | |||
| result |= ((byte & 127) << shift); | |||
| } else { | |||
| result |= (byte << shift); | |||
| *value = result; | |||
| return reinterpret_cast<const char*>(p); | |||
| } | |||
| } | |||
| return NULL; | |||
| } | |||
| bool GetVarint64(Slice* input, uint64_t* value) { | |||
| const char* p = input->data(); | |||
| const char* limit = p + input->size(); | |||
| const char* q = GetVarint64Ptr(p, limit, value); | |||
| if (q == NULL) { | |||
| return false; | |||
| } else { | |||
| *input = Slice(q, limit - q); | |||
| return true; | |||
| } | |||
| } | |||
| const char* GetLengthPrefixedSlice(const char* p, const char* limit, | |||
| Slice* result) { | |||
| uint32_t len; | |||
| p = GetVarint32Ptr(p, limit, &len); | |||
| if (p == NULL) return NULL; | |||
| if (p + len > limit) return NULL; | |||
| *result = Slice(p, len); | |||
| return p + len; | |||
| } | |||
| bool GetLengthPrefixedSlice(Slice* input, Slice* result) { | |||
| uint32_t len; | |||
| if (GetVarint32(input, &len) && | |||
| input->size() >= len) { | |||
| *result = Slice(input->data(), len); | |||
| input->remove_prefix(len); | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,104 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // Endian-neutral encoding: | |||
| // * Fixed-length numbers are encoded with least-significant byte first | |||
| // * In addition we support variable length "varint" encoding | |||
| // * Strings are encoded prefixed by their length in varint format | |||
| #ifndef STORAGE_LEVELDB_UTIL_CODING_H_ | |||
| #define STORAGE_LEVELDB_UTIL_CODING_H_ | |||
| #include <stdint.h> | |||
| #include <string.h> | |||
| #include <string> | |||
| #include "include/slice.h" | |||
| #include "port/port.h" | |||
| namespace leveldb { | |||
| // Standard Put... routines append to a string | |||
| extern void PutFixed32(std::string* dst, uint32_t value); | |||
| extern void PutFixed64(std::string* dst, uint64_t value); | |||
| extern void PutVarint32(std::string* dst, uint32_t value); | |||
| extern void PutVarint64(std::string* dst, uint64_t value); | |||
| extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value); | |||
| // Standard Get... routines parse a value from the beginning of a Slice | |||
| // and advance the slice past the parsed value. | |||
| extern bool GetVarint32(Slice* input, uint32_t* value); | |||
| extern bool GetVarint64(Slice* input, uint64_t* value); | |||
| extern bool GetLengthPrefixedSlice(Slice* input, Slice* result); | |||
| // Pointer-based variants of GetVarint... These either store a value | |||
| // in *v and return a pointer just past the parsed value, or return | |||
| // NULL on error. These routines only look at bytes in the range | |||
| // [p..limit-1] | |||
| extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v); | |||
| extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v); | |||
| // Returns the length of the varint32 or varint64 encoding of "v" | |||
| extern int VarintLength(uint64_t v); | |||
| // Lower-level versions of Put... that write directly into a character buffer | |||
| // REQUIRES: dst has enough space for the value being written | |||
| extern void EncodeFixed32(char* dst, uint32_t value); | |||
| extern void EncodeFixed64(char* dst, uint64_t value); | |||
| // Lower-level versions of Put... that write directly into a character buffer | |||
| // and return a pointer just past the last byte written. | |||
| // REQUIRES: dst has enough space for the value being written | |||
| extern char* EncodeVarint32(char* dst, uint32_t value); | |||
| extern char* EncodeVarint64(char* dst, uint64_t value); | |||
| // Lower-level versions of Get... that read directly from a character buffer | |||
| // without any bounds checking. | |||
| inline uint32_t DecodeFixed32(const char* ptr) { | |||
| if (port::kLittleEndian) { | |||
| // Load the raw bytes | |||
| uint32_t result; | |||
| memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load | |||
| return result; | |||
| } else { | |||
| return ((static_cast<uint32_t>(ptr[0])) | |||
| | (static_cast<uint32_t>(ptr[1]) << 8) | |||
| | (static_cast<uint32_t>(ptr[2]) << 16) | |||
| | (static_cast<uint32_t>(ptr[3]) << 24)); | |||
| } | |||
| } | |||
| inline uint64_t DecodeFixed64(const char* ptr) { | |||
| if (port::kLittleEndian) { | |||
| // Load the raw bytes | |||
| uint64_t result; | |||
| memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load | |||
| return result; | |||
| } else { | |||
| uint64_t lo = DecodeFixed32(ptr); | |||
| uint64_t hi = DecodeFixed32(ptr + 4); | |||
| return (hi << 32) | lo; | |||
| } | |||
| } | |||
| // Internal routine for use by fallback path of GetVarint32Ptr | |||
| extern const char* GetVarint32PtrFallback(const char* p, | |||
| const char* limit, | |||
| uint32_t* value); | |||
| inline const char* GetVarint32Ptr(const char* p, | |||
| const char* limit, | |||
| uint32_t* value) { | |||
| if (p < limit) { | |||
| uint32_t result = *(reinterpret_cast<const unsigned char*>(p)); | |||
| if ((result & 128) == 0) { | |||
| *value = result; | |||
| return p + 1; | |||
| } | |||
| } | |||
| return GetVarint32PtrFallback(p, limit, value); | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_UTIL_CODING_H_ | |||
| @ -0,0 +1,173 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "util/coding.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| class Coding { }; | |||
| TEST(Coding, Fixed32) { | |||
| std::string s; | |||
| for (uint32_t v = 0; v < 100000; v++) { | |||
| PutFixed32(&s, v); | |||
| } | |||
| const char* p = s.data(); | |||
| for (uint32_t v = 0; v < 100000; v++) { | |||
| uint32_t actual = DecodeFixed32(p); | |||
| ASSERT_EQ(v, actual); | |||
| p += sizeof(uint32_t); | |||
| } | |||
| } | |||
| TEST(Coding, Fixed64) { | |||
| std::string s; | |||
| for (int power = 0; power <= 63; power++) { | |||
| uint64_t v = static_cast<uint64_t>(1) << power; | |||
| PutFixed64(&s, v - 1); | |||
| PutFixed64(&s, v + 0); | |||
| PutFixed64(&s, v + 1); | |||
| } | |||
| const char* p = s.data(); | |||
| for (int power = 0; power <= 63; power++) { | |||
| uint64_t v = static_cast<uint64_t>(1) << power; | |||
| uint64_t actual; | |||
| actual = DecodeFixed64(p); | |||
| ASSERT_EQ(v-1, actual); | |||
| p += sizeof(uint64_t); | |||
| actual = DecodeFixed64(p); | |||
| ASSERT_EQ(v+0, actual); | |||
| p += sizeof(uint64_t); | |||
| actual = DecodeFixed64(p); | |||
| ASSERT_EQ(v+1, actual); | |||
| p += sizeof(uint64_t); | |||
| } | |||
| } | |||
| TEST(Coding, Varint32) { | |||
| std::string s; | |||
| for (uint32_t i = 0; i < (32 * 32); i++) { | |||
| uint32_t v = (i / 32) << (i % 32); | |||
| PutVarint32(&s, v); | |||
| } | |||
| const char* p = s.data(); | |||
| const char* limit = p + s.size(); | |||
| for (uint32_t i = 0; i < (32 * 32); i++) { | |||
| uint32_t expected = (i / 32) << (i % 32); | |||
| uint32_t actual; | |||
| const char* start = p; | |||
| p = GetVarint32Ptr(p, limit, &actual); | |||
| ASSERT_TRUE(p != NULL); | |||
| ASSERT_EQ(expected, actual); | |||
| ASSERT_EQ(VarintLength(actual), p - start); | |||
| } | |||
| ASSERT_EQ(p, s.data() + s.size()); | |||
| } | |||
| TEST(Coding, Varint64) { | |||
| // Construct the list of values to check | |||
| std::vector<uint64_t> values; | |||
| // Some special values | |||
| values.push_back(0); | |||
| values.push_back(100); | |||
| values.push_back(~static_cast<uint64_t>(0)); | |||
| values.push_back(~static_cast<uint64_t>(0) - 1); | |||
| for (uint32_t k = 0; k < 64; k++) { | |||
| // Test values near powers of two | |||
| const uint64_t power = 1ull << k; | |||
| values.push_back(power); | |||
| values.push_back(power-1); | |||
| values.push_back(power+1); | |||
| }; | |||
| std::string s; | |||
| for (int i = 0; i < values.size(); i++) { | |||
| PutVarint64(&s, values[i]); | |||
| } | |||
| const char* p = s.data(); | |||
| const char* limit = p + s.size(); | |||
| for (int i = 0; i < values.size(); i++) { | |||
| ASSERT_TRUE(p < limit); | |||
| uint64_t actual; | |||
| const char* start = p; | |||
| p = GetVarint64Ptr(p, limit, &actual); | |||
| ASSERT_TRUE(p != NULL); | |||
| ASSERT_EQ(values[i], actual); | |||
| ASSERT_EQ(VarintLength(actual), p - start); | |||
| } | |||
| ASSERT_EQ(p, limit); | |||
| } | |||
| TEST(Coding, Varint32Overflow) { | |||
| uint32_t result; | |||
| std::string input("\x81\x82\x83\x84\x85\x11"); | |||
| ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(), &result) | |||
| == NULL); | |||
| } | |||
| TEST(Coding, Varint32Truncation) { | |||
| uint32_t large_value = (1u << 31) + 100; | |||
| std::string s; | |||
| PutVarint32(&s, large_value); | |||
| uint32_t result; | |||
| for (int len = 0; len < s.size() - 1; len++) { | |||
| ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == NULL); | |||
| } | |||
| ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + s.size(), &result) != NULL); | |||
| ASSERT_EQ(large_value, result); | |||
| } | |||
| TEST(Coding, Varint64Overflow) { | |||
| uint64_t result; | |||
| std::string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11"); | |||
| ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(), &result) | |||
| == NULL); | |||
| } | |||
| TEST(Coding, Varint64Truncation) { | |||
| uint64_t large_value = (1ull << 63) + 100ull; | |||
| std::string s; | |||
| PutVarint64(&s, large_value); | |||
| uint64_t result; | |||
| for (int len = 0; len < s.size() - 1; len++) { | |||
| ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == NULL); | |||
| } | |||
| ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + s.size(), &result) != NULL); | |||
| ASSERT_EQ(large_value, result); | |||
| } | |||
| TEST(Coding, Strings) { | |||
| std::string s; | |||
| PutLengthPrefixedSlice(&s, Slice("")); | |||
| PutLengthPrefixedSlice(&s, Slice("foo")); | |||
| PutLengthPrefixedSlice(&s, Slice("bar")); | |||
| PutLengthPrefixedSlice(&s, Slice(std::string(200, 'x'))); | |||
| Slice input(s); | |||
| Slice v; | |||
| ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); | |||
| ASSERT_EQ("", v.ToString()); | |||
| ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); | |||
| ASSERT_EQ("foo", v.ToString()); | |||
| ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); | |||
| ASSERT_EQ("bar", v.ToString()); | |||
| ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); | |||
| ASSERT_EQ(std::string(200, 'x'), v.ToString()); | |||
| ASSERT_EQ("", input.ToString()); | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||
| @ -0,0 +1,72 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include <stdint.h> | |||
| #include "include/comparator.h" | |||
| #include "include/slice.h" | |||
| #include "util/logging.h" | |||
| namespace leveldb { | |||
| Comparator::~Comparator() { } | |||
| namespace { | |||
| class BytewiseComparatorImpl : public Comparator { | |||
| public: | |||
| BytewiseComparatorImpl() { } | |||
| virtual const char* Name() const { | |||
| return "leveldb.BytewiseComparator"; | |||
| } | |||
| virtual int Compare(const Slice& a, const Slice& b) const { | |||
| return a.compare(b); | |||
| } | |||
| virtual void FindShortestSeparator( | |||
| std::string* start, | |||
| const Slice& limit) const { | |||
| // Find length of common prefix | |||
| size_t min_length = std::min(start->size(), limit.size()); | |||
| size_t diff_index = 0; | |||
| while ((diff_index < min_length) && | |||
| ((*start)[diff_index] == limit[diff_index])) { | |||
| diff_index++; | |||
| } | |||
| if (diff_index >= min_length) { | |||
| // Do not shorten if one string is a prefix of the other | |||
| } else { | |||
| uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]); | |||
| if (diff_byte < static_cast<uint8_t>(0xff) && | |||
| diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) { | |||
| (*start)[diff_index]++; | |||
| start->resize(diff_index + 1); | |||
| assert(Compare(*start, limit) < 0); | |||
| } | |||
| } | |||
| } | |||
| virtual void FindShortSuccessor(std::string* key) const { | |||
| // Find first character that can be incremented | |||
| size_t n = key->size(); | |||
| for (int i = 0; i < n; i++) { | |||
| const uint8_t byte = (*key)[i]; | |||
| if (byte != static_cast<uint8_t>(0xff)) { | |||
| (*key)[i] = byte + 1; | |||
| key->resize(i+1); | |||
| return; | |||
| } | |||
| } | |||
| // *key is a run of 0xffs. Leave it alone. | |||
| } | |||
| }; | |||
| } | |||
| static const BytewiseComparatorImpl bytewise; | |||
| const Comparator* BytewiseComparator() { | |||
| return &bytewise; | |||
| } | |||
| } | |||
| @ -0,0 +1,332 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| // | |||
| // A portable implementation of crc32c, optimized to handle | |||
| // four bytes at a time. | |||
| #include "util/crc32c.h" | |||
| #include <stdint.h> | |||
| #include "util/coding.h" | |||
| namespace leveldb { | |||
| namespace crc32c { | |||
| static const uint32_t table0_[256] = { | |||
| 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, | |||
| 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, | |||
| 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, | |||
| 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, | |||
| 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, | |||
| 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, | |||
| 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, | |||
| 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, | |||
| 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, | |||
| 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, | |||
| 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, | |||
| 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, | |||
| 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, | |||
| 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, | |||
| 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, | |||
| 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, | |||
| 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, | |||
| 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, | |||
| 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, | |||
| 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, | |||
| 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, | |||
| 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, | |||
| 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, | |||
| 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, | |||
| 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, | |||
| 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, | |||
| 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, | |||
| 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, | |||
| 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, | |||
| 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, | |||
| 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, | |||
| 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, | |||
| 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, | |||
| 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, | |||
| 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, | |||
| 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, | |||
| 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, | |||
| 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, | |||
| 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, | |||
| 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, | |||
| 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, | |||
| 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, | |||
| 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, | |||
| 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, | |||
| 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, | |||
| 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, | |||
| 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, | |||
| 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, | |||
| 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, | |||
| 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, | |||
| 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, | |||
| 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, | |||
| 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, | |||
| 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, | |||
| 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, | |||
| 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, | |||
| 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, | |||
| 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, | |||
| 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, | |||
| 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, | |||
| 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, | |||
| 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, | |||
| 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, | |||
| 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 | |||
| }; | |||
| static const uint32_t table1_[256] = { | |||
| 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, | |||
| 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945, | |||
| 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, | |||
| 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, | |||
| 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, | |||
| 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4, | |||
| 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, | |||
| 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, | |||
| 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, | |||
| 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, | |||
| 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, | |||
| 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, | |||
| 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, | |||
| 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, | |||
| 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, | |||
| 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, | |||
| 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, | |||
| 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41, | |||
| 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, | |||
| 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, | |||
| 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, | |||
| 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, | |||
| 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, | |||
| 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, | |||
| 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, | |||
| 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, | |||
| 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, | |||
| 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, | |||
| 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, | |||
| 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2, | |||
| 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, | |||
| 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a, | |||
| 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, | |||
| 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, | |||
| 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, | |||
| 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, | |||
| 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, | |||
| 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, | |||
| 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, | |||
| 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, | |||
| 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, | |||
| 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, | |||
| 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, | |||
| 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306, | |||
| 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, | |||
| 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, | |||
| 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, | |||
| 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287, | |||
| 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, | |||
| 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, | |||
| 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, | |||
| 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, | |||
| 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, | |||
| 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, | |||
| 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, | |||
| 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, | |||
| 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, | |||
| 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, | |||
| 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, | |||
| 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502, | |||
| 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, | |||
| 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, | |||
| 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, | |||
| 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483 | |||
| }; | |||
| static const uint32_t table2_[256] = { | |||
| 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, | |||
| 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469, | |||
| 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, | |||
| 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, | |||
| 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, | |||
| 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3, | |||
| 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, | |||
| 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, | |||
| 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, | |||
| 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, | |||
| 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, | |||
| 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, | |||
| 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, | |||
| 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, | |||
| 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, | |||
| 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, | |||
| 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, | |||
| 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0, | |||
| 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, | |||
| 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75, | |||
| 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, | |||
| 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, | |||
| 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, | |||
| 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, | |||
| 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, | |||
| 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, | |||
| 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, | |||
| 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, | |||
| 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, | |||
| 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e, | |||
| 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, | |||
| 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb, | |||
| 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, | |||
| 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, | |||
| 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, | |||
| 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, | |||
| 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, | |||
| 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, | |||
| 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, | |||
| 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, | |||
| 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, | |||
| 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, | |||
| 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, | |||
| 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb, | |||
| 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, | |||
| 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, | |||
| 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, | |||
| 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71, | |||
| 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, | |||
| 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, | |||
| 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, | |||
| 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, | |||
| 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, | |||
| 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, | |||
| 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, | |||
| 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, | |||
| 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, | |||
| 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, | |||
| 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, | |||
| 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622, | |||
| 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, | |||
| 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d, | |||
| 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, | |||
| 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8 | |||
| }; | |||
| static const uint32_t table3_[256] = { | |||
| 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, | |||
| 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca, | |||
| 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, | |||
| 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, | |||
| 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, | |||
| 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7, | |||
| 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, | |||
| 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, | |||
| 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, | |||
| 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, | |||
| 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, | |||
| 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, | |||
| 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, | |||
| 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, | |||
| 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, | |||
| 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, | |||
| 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, | |||
| 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d, | |||
| 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, | |||
| 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb, | |||
| 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, | |||
| 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, | |||
| 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, | |||
| 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, | |||
| 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, | |||
| 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, | |||
| 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, | |||
| 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, | |||
| 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, | |||
| 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b, | |||
| 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, | |||
| 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d, | |||
| 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, | |||
| 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, | |||
| 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, | |||
| 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, | |||
| 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, | |||
| 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, | |||
| 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, | |||
| 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, | |||
| 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, | |||
| 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, | |||
| 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, | |||
| 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698, | |||
| 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, | |||
| 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, | |||
| 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, | |||
| 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, | |||
| 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, | |||
| 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, | |||
| 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, | |||
| 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, | |||
| 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, | |||
| 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, | |||
| 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, | |||
| 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, | |||
| 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, | |||
| 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, | |||
| 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, | |||
| 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f, | |||
| 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, | |||
| 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4, | |||
| 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, | |||
| 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842 | |||
| }; | |||
| // Used to fetch a naturally-aligned 32-bit word in little endian byte-order | |||
| static inline uint32_t LE_LOAD32(const uint8_t *p) { | |||
| return DecodeFixed32(reinterpret_cast<const char*>(p)); | |||
| } | |||
| uint32_t Extend(uint32_t crc, const char* buf, size_t size) { | |||
| const uint8_t *p = reinterpret_cast<const uint8_t *>(buf); | |||
| const uint8_t *e = p + size; | |||
| uint32_t l = crc ^ 0xffffffffu; | |||
| #define STEP1 do { \ | |||
| int c = (l & 0xff) ^ *p++; \ | |||
| l = table0_[c] ^ (l >> 8); \ | |||
| } while (0) | |||
| #define STEP4 do { \ | |||
| uint32_t c = l ^ LE_LOAD32(p); \ | |||
| p += 4; \ | |||
| l = table3_[c & 0xff] ^ \ | |||
| table2_[(c >> 8) & 0xff] ^ \ | |||
| table1_[(c >> 16) & 0xff] ^ \ | |||
| table0_[c >> 24]; \ | |||
| } while (0) | |||
| // Point x at first 4-byte aligned byte in string. This might be | |||
| // just past the end of the string. | |||
| const uintptr_t pval = reinterpret_cast<uintptr_t>(p); | |||
| const uint8_t* x = reinterpret_cast<const uint8_t*>(((pval + 3) >> 2) << 2); | |||
| if (x <= e) { | |||
| // Process bytes until finished or p is 4-byte aligned | |||
| while (p != x) { | |||
| STEP1; | |||
| } | |||
| } | |||
| // Process bytes 16 at a time | |||
| while ((e-p) >= 16) { | |||
| STEP4; STEP4; STEP4; STEP4; | |||
| } | |||
| // Process bytes 4 at a time | |||
| while ((e-p) >= 4) { | |||
| STEP4; | |||
| } | |||
| // Process the last few bytes | |||
| while (p != e) { | |||
| STEP1; | |||
| } | |||
| #undef STEP4 | |||
| #undef STEP1 | |||
| return l ^ 0xffffffffu; | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,45 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #ifndef STORAGE_LEVELDB_UTIL_CRC32C_H_ | |||
| #define STORAGE_LEVELDB_UTIL_CRC32C_H_ | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| namespace leveldb { | |||
| namespace crc32c { | |||
| // Return the crc32c of concat(A, data[0,n-1]) where init_crc is the | |||
| // crc32c of some string A. Extend() is often used to maintain the | |||
| // crc32c of a stream of data. | |||
| extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n); | |||
| // Return the crc32c of data[0,n-1] | |||
| inline uint32_t Value(const char* data, size_t n) { | |||
| return Extend(0, data, n); | |||
| } | |||
| static const uint32_t kMaskDelta = 0xa282ead8ul; | |||
| // Return a masked representation of crc. | |||
| // | |||
| // Motivation: it is problematic to compute the CRC of a string that | |||
| // contains embedded CRCs. Therefore we recommend that CRCs stored | |||
| // somewhere (e.g., in files) should be masked before being stored. | |||
| inline uint32_t Mask(uint32_t crc) { | |||
| // Rotate right by 15 bits and add a constant. | |||
| return ((crc >> 15) | (crc << 17)) + kMaskDelta; | |||
| } | |||
| // Return the crc whose masked representation is masked_crc. | |||
| inline uint32_t Unmask(uint32_t masked_crc) { | |||
| uint32_t rot = masked_crc - kMaskDelta; | |||
| return ((rot >> 17) | (rot << 15)); | |||
| } | |||
| } | |||
| } | |||
| #endif // STORAGE_LEVELDB_UTIL_CRC32C_H_ | |||
| @ -0,0 +1,86 @@ | |||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |||
| // Use of this source code is governed by a BSD-style license that can be | |||
| // found in the LICENSE file. See the AUTHORS file for names of contributors. | |||
| #include "util/crc32c.h" | |||
| #include "util/testharness.h" | |||
| namespace leveldb { | |||
| namespace crc32c { | |||
| class CRC { }; | |||
| TEST(CRC, StandardResults) { | |||
| // From rfc3720 section B.4. | |||
| char buf[32]; | |||
| memset(buf, 0, sizeof(buf)); | |||
| ASSERT_EQ(0x8a9136aa, Value(buf, sizeof(buf))); | |||
| memset(buf, 0xff, sizeof(buf)); | |||
| ASSERT_EQ(0x62a8ab43, Value(buf, sizeof(buf))); | |||
| for (int i = 0; i < 32; i++) { | |||
| buf[i] = i; | |||
| } | |||
| ASSERT_EQ(0x46dd794e, Value(buf, sizeof(buf))); | |||
| for (int i = 0; i < 32; i++) { | |||
| buf[i] = 31 - i; | |||
| } | |||
| ASSERT_EQ(0x113fdb5c, Value(buf, sizeof(buf))); | |||
| unsigned char data[48] = { | |||
| 0x01, 0xc0, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, | |||
| 0x14, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x04, 0x00, | |||
| 0x00, 0x00, 0x00, 0x14, | |||
| 0x00, 0x00, 0x00, 0x18, | |||
| 0x28, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, | |||
| 0x02, 0x00, 0x00, 0x00, | |||
| 0x00, 0x00, 0x00, 0x00, | |||
| }; | |||
| ASSERT_EQ(0xd9963a56, Value(reinterpret_cast<char*>(data), sizeof(data))); | |||
| } | |||
| TEST(CRC, Values) { | |||
| ASSERT_NE(Value("a", 1), Value("foo", 3)); | |||
| } | |||
| TEST(CRC, Extend) { | |||
| ASSERT_EQ(Value("hello world", 11), | |||
| Extend(Value("hello ", 6), "world", 5)); | |||
| } | |||
| TEST(CRC, Mask) { | |||
| uint32_t crc = Value("foo", 3); | |||
| ASSERT_NE(crc, Mask(crc)); | |||
| ASSERT_NE(crc, Mask(Mask(crc))); | |||
| ASSERT_EQ(crc, Unmask(Mask(crc))); | |||
| ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc))))); | |||
| } | |||
| TEST(CRC, Benchmark) { | |||
| std::string data(1048576 * 100, 'x'); | |||
| double start = Env::Default()->NowMicros() * 1e-6; | |||
| static const int kIters = 10; | |||
| uint32_t crc = 0; | |||
| for (int i = 0; i < kIters; i++) { | |||
| crc |= Value(data.data(), data.size()); | |||
| } | |||
| double finish = Env::Default()->NowMicros() * 1e-6; | |||
| double mb = (static_cast<long long int>(data.size()) * kIters) / 1048576.0; | |||
| fprintf(stderr, "CRC %0.0f MB: %.3f secs; %.1f MB/s, crc=0x%08x\n", | |||
| mb, (finish - start), mb / (finish - start), crc); | |||
| } | |||
| } | |||
| } | |||
| int main(int argc, char** argv) { | |||
| return leveldb::test::RunAllTests(); | |||
| } | |||