From 946e5b5a4ce7980917b22a408f090a4e86c3fa44 Mon Sep 17 00:00:00 2001 From: David Grogan Date: Fri, 12 Oct 2012 11:53:12 -0700 Subject: [PATCH] Update to leveldb 1.6 Highlights ---------- Mmap at most 1000 files on Posix to improve performance for large databases. Support for more architectures (thanks to Alexander K.) Building and porting -------------------- HP/UX support (issue 126) AtomicPointer for ia64 (issue 123) Sparc v9 support (issue 124) Atomic ops for powerpc Use -fno-builtin-memcmp only when using g++ Simplify IOS build rules (issue 114) Use CXXFLAGS instead of CFLAGS when invoking C++ compiler (issue 118) Fix snappy shared library problem (issue 94) Fix shared library installation path regression Endian-ness detection tweak for FreeBSD Bug fixes --------- Stop ignoring FLAGS_open_files in db_bench Make bloom test behavior agnostic to endian-ness Performance ----------- Limit number of mmapped files to 1000 to improve perf for large dbs Do not delay for 1 second on shutdown path (issue 125) Misc ---- Make InMemoryEnv return a no-op logger C binding now has a wrapper for free (issue 117) Add thread-safety annotations Added an in-process lock table (issue 120) Make RandomAccessFile and SequentialFile non-copyable --- Makefile | 11 ++--- TODO | 1 + build_detect_platform | 49 +++++++++++++------- db/c.cc | 4 ++ db/c_test.c | 6 +++ db/db_bench.cc | 1 + db/db_impl.cc | 6 ++- db/db_impl.h | 28 +++++++----- db/db_test.cc | 6 +++ db/version_set.cc | 2 +- db/version_set.h | 4 +- doc/bench/db_bench_sqlite3.cc | 2 +- doc/index.html | 2 +- helpers/memenv/memenv.cc | 10 +++++ include/leveldb/c.h | 10 +++++ include/leveldb/db.h | 2 +- include/leveldb/env.h | 10 +++++ port/atomic_pointer.h | 72 ++++++++++++++++++++++++++++++ port/port_posix.h | 11 ++++- port/thread_annotations.h | 59 ++++++++++++++++++++++++ table/block.cc | 4 +- util/bloom_test.cc | 5 ++- util/coding.cc | 40 ++++++++--------- util/env_posix.cc | 101 +++++++++++++++++++++++++++++++++++++++--- util/mutexlock.h | 8 ++-- 25 files changed, 382 insertions(+), 72 deletions(-) create mode 100644 port/thread_annotations.h diff --git a/Makefile b/Makefile index c648a28..4dd1366 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,8 @@ OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode) #----------------------------------------------- # detect what platform we're building on -$(shell ./build_detect_platform build_config.mk) +$(shell CC=$(CC) CXX=$(CXX) TARGET_OS=$(TARGET_OS) \ + ./build_detect_platform build_config.mk ./) # this file is generated by the previous line to set build flags and sources include build_config.mk @@ -70,7 +71,7 @@ SHARED = $(SHARED1) else # Update db.h if you change these. SHARED_MAJOR = 1 -SHARED_MINOR = 5 +SHARED_MINOR = 6 SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT) SHARED2 = $(SHARED1).$(SHARED_MAJOR) SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR) @@ -82,7 +83,7 @@ $(SHARED2): $(SHARED3) endif $(SHARED3): - $(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) + $(CXX) $(SOURCES) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(INSTALL_PATH)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -o $(SHARED3) endif # PLATFORM_SHARED_EXT @@ -179,14 +180,14 @@ IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBu .cc.o: mkdir -p ios-x86/$(dir $@) - $(SIMULATORROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ + $(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ mkdir -p ios-arm/$(dir $@) $(DEVICEROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@ lipo ios-x86/$@ ios-arm/$@ -create -output $@ .c.o: mkdir -p ios-x86/$(dir $@) - $(SIMULATORROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ + $(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ mkdir -p ios-arm/$(dir $@) $(DEVICEROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@ lipo ios-x86/$@ ios-arm/$@ -create -output $@ diff --git a/TODO b/TODO index 9130b6a..e603c07 100644 --- a/TODO +++ b/TODO @@ -7,6 +7,7 @@ db within [start_key..end_key]? For Chrome, deletion of obsolete object stores, etc. can be done in the background anyway, so probably not that important. +- There have been requests for MultiGet. After a range is completely deleted, what gets rid of the corresponding files if we do no future changes to that range. Make diff --git a/build_detect_platform b/build_detect_platform index 959a7d6..83bbe42 100755 --- a/build_detect_platform +++ b/build_detect_platform @@ -23,8 +23,9 @@ # OUTPUT=$1 -if test -z "$OUTPUT"; then - echo "usage: $0 " >&2 +PREFIX=$2 +if test -z "$OUTPUT" || test -z "$PREFIX"; then + echo "usage: $0 " >&2 exit 1 fi @@ -55,58 +56,72 @@ PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl," PLATFORM_SHARED_CFLAGS="-fPIC" PLATFORM_SHARED_VERSIONED=true -# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp +MEMCMP_FLAG= +if [ "$CXX" = "g++" ]; then + # Use libc's memcmp instead of GCC's memcmp. This results in ~40% + # performance improvement on readrandom under gcc 4.4.3 on Linux/x86. + MEMCMP_FLAG="-fno-builtin-memcmp" +fi + case "$TARGET_OS" in Darwin) PLATFORM=OS_MACOSX - COMMON_FLAGS="-fno-builtin-memcmp -DOS_MACOSX" + COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX" PLATFORM_SHARED_EXT=dylib PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " PORT_FILE=port/port_posix.cc ;; Linux) PLATFORM=OS_LINUX - COMMON_FLAGS="-fno-builtin-memcmp -pthread -DOS_LINUX" + COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX" PLATFORM_LDFLAGS="-pthread" PORT_FILE=port/port_posix.cc ;; SunOS) PLATFORM=OS_SOLARIS - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS" PLATFORM_LDFLAGS="-lpthread -lrt" PORT_FILE=port/port_posix.cc ;; FreeBSD) PLATFORM=OS_FREEBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD" PLATFORM_LDFLAGS="-lpthread" PORT_FILE=port/port_posix.cc ;; NetBSD) PLATFORM=OS_NETBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD" PLATFORM_LDFLAGS="-lpthread -lgcc_s" PORT_FILE=port/port_posix.cc ;; OpenBSD) PLATFORM=OS_OPENBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD" PLATFORM_LDFLAGS="-pthread" PORT_FILE=port/port_posix.cc ;; DragonFly) PLATFORM=OS_DRAGONFLYBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD" PLATFORM_LDFLAGS="-lpthread" PORT_FILE=port/port_posix.cc ;; OS_ANDROID_CROSSCOMPILE) PLATFORM=OS_ANDROID - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX" PLATFORM_LDFLAGS="" # All pthread features are in the Android C library PORT_FILE=port/port_posix.cc CROSS_COMPILE=true ;; + HP-UX) + PLATFORM=OS_HPUX + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX" + PLATFORM_LDFLAGS="-pthread" + PORT_FILE=port/port_posix.cc + # man ld: +h internal_name + PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl," + ;; *) echo "Unknown platform!" >&2 exit 1 @@ -116,11 +131,13 @@ esac # except for the test and benchmark files. By default, find will output a list # of all files matching either rule, so we need to append -print to make the # prune take effect. -DIRS="util db table" +DIRS="$PREFIX/db $PREFIX/util $PREFIX/table" + set -f # temporarily disable globbing so that our patterns aren't expanded PRUNE_TEST="-name *test*.cc -prune" PRUNE_BENCH="-name *_bench.cc -prune" -PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | tr "\n" " "` +PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "` + set +f # re-enable globbing # The sources consist of the portable files, plus the platform-specific port @@ -133,7 +150,7 @@ if [ "$CROSS_COMPILE" = "true" ]; then true else # If -std=c++0x works, use . Otherwise use port_posix.h. - $CXX $CFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() {} EOF @@ -146,7 +163,7 @@ EOF # Test whether Snappy library is installed # http://code.google.com/p/snappy/ - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() {} EOF @@ -156,7 +173,7 @@ EOF fi # Test whether tcmalloc is available - $CXX $CFLAGS -x c++ - -o /dev/null -ltcmalloc 2>/dev/null </dev/null <Name()) { s = Status::InvalidArgument( - edit.comparator_ + "does not match existing comparator ", + edit.comparator_ + " does not match existing comparator ", icmp_.user_comparator()->Name()); } } diff --git a/db/version_set.h b/db/version_set.h index 61c4c99..792899b 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -21,6 +21,7 @@ #include "db/dbformat.h" #include "db/version_edit.h" #include "port/port.h" +#include "port/thread_annotations.h" namespace leveldb { @@ -159,7 +160,8 @@ class VersionSet { // current version. Will release *mu while actually writing to the file. // REQUIRES: *mu is held on entry. // REQUIRES: no other thread concurrently calls LogAndApply() - Status LogAndApply(VersionEdit* edit, port::Mutex* mu); + Status LogAndApply(VersionEdit* edit, port::Mutex* mu) + EXCLUSIVE_LOCKS_REQUIRED(mu); // Recover the last saved descriptor from persistent storage. Status Recover(); diff --git a/doc/bench/db_bench_sqlite3.cc b/doc/bench/db_bench_sqlite3.cc index 256793a..e63aaa8 100644 --- a/doc/bench/db_bench_sqlite3.cc +++ b/doc/bench/db_bench_sqlite3.cc @@ -618,7 +618,7 @@ class Benchmark { ErrorCheck(status); // Execute read statement - while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW); + while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {} StepErrorCheck(status); // Reset SQLite statement for another use diff --git a/doc/index.html b/doc/index.html index 521d2ba..cd29341 100644 --- a/doc/index.html +++ b/doc/index.html @@ -408,7 +408,7 @@ The optional FilterPolicy mechanism can be used to reduce the number of disk reads substantially.
    leveldb::Options options;
-   options.filter_policy = NewBloomFilter(10);
+   options.filter_policy = NewBloomFilterPolicy(10);
    leveldb::DB* db;
    leveldb::DB::Open(options, "/tmp/testdb", &db);
    ... use the database ...
diff --git a/helpers/memenv/memenv.cc b/helpers/memenv/memenv.cc
index 2082083..5879de1 100644
--- a/helpers/memenv/memenv.cc
+++ b/helpers/memenv/memenv.cc
@@ -221,6 +221,11 @@ class WritableFileImpl : public WritableFile {
   FileState* file_;
 };
 
+class NoOpLogger : public Logger {
+ public:
+  virtual void Logv(const char* format, va_list ap) { }
+};
+
 class InMemoryEnv : public EnvWrapper {
  public:
   explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) { }
@@ -358,6 +363,11 @@ class InMemoryEnv : public EnvWrapper {
     return Status::OK();
   }
 
+  virtual Status NewLogger(const std::string& fname, Logger** result) {
+    *result = new NoOpLogger;
+    return Status::OK();
+  }
+
  private:
   // Map from filenames to FileState objects, representing a simple file system.
   typedef std::map FileSystem;
diff --git a/include/leveldb/c.h b/include/leveldb/c.h
index 70e3cc6..9cee971 100644
--- a/include/leveldb/c.h
+++ b/include/leveldb/c.h
@@ -28,6 +28,7 @@
   be true on entry:
      *errptr == NULL
      *errptr points to a malloc()ed null-terminated error message
+       (On Windows, *errptr must have been malloc()-ed by this library.)
   On success, a leveldb routine leaves *errptr unchanged.
   On failure, leveldb frees the old value of *errptr and
   set *errptr to a malloc()ed error message.
@@ -268,6 +269,15 @@ extern void leveldb_cache_destroy(leveldb_cache_t* cache);
 extern leveldb_env_t* leveldb_create_default_env();
 extern void leveldb_env_destroy(leveldb_env_t*);
 
+/* Utility */
+
+/* Calls free(ptr).
+   REQUIRES: ptr was malloc()-ed and returned by one of the routines
+   in this file.  Note that in certain cases (typically on Windows), you
+   may need to call this routine instead of free(ptr) to dispose of
+   malloc()-ed memory returned by this library. */
+extern void leveldb_free(void* ptr);
+
 #ifdef __cplusplus
 }  /* end extern "C" */
 #endif
diff --git a/include/leveldb/db.h b/include/leveldb/db.h
index ed56b87..a50ac69 100644
--- a/include/leveldb/db.h
+++ b/include/leveldb/db.h
@@ -14,7 +14,7 @@ namespace leveldb {
 
 // Update Makefile if you change these
 static const int kMajorVersion = 1;
-static const int kMinorVersion = 5;
+static const int kMinorVersion = 6;
 
 struct Options;
 struct ReadOptions;
diff --git a/include/leveldb/env.h b/include/leveldb/env.h
index 2720667..fa32289 100644
--- a/include/leveldb/env.h
+++ b/include/leveldb/env.h
@@ -175,6 +175,11 @@ class SequentialFile {
   //
   // REQUIRES: External synchronization
   virtual Status Skip(uint64_t n) = 0;
+
+ private:
+  // No copying allowed
+  SequentialFile(const SequentialFile&);
+  void operator=(const SequentialFile&);
 };
 
 // A file abstraction for randomly reading the contents of a file.
@@ -194,6 +199,11 @@ class RandomAccessFile {
   // Safe for concurrent use by multiple threads.
   virtual Status Read(uint64_t offset, size_t n, Slice* result,
                       char* scratch) const = 0;
+
+ private:
+  // No copying allowed
+  RandomAccessFile(const RandomAccessFile&);
+  void operator=(const RandomAccessFile&);
 };
 
 // A file abstraction for sequential writing.  The implementation
diff --git a/port/atomic_pointer.h b/port/atomic_pointer.h
index c58bffb..e17bf43 100644
--- a/port/atomic_pointer.h
+++ b/port/atomic_pointer.h
@@ -36,6 +36,8 @@
 #define ARCH_CPU_X86_FAMILY 1
 #elif defined(__ARMEL__)
 #define ARCH_CPU_ARM_FAMILY 1
+#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__)
+#define ARCH_CPU_PPC_FAMILY 1
 #endif
 
 namespace leveldb {
@@ -91,6 +93,15 @@ inline void MemoryBarrier() {
 }
 #define LEVELDB_HAVE_MEMORY_BARRIER
 
+// PPC
+#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__)
+inline void MemoryBarrier() {
+  // TODO for some powerpc expert: is there a cheaper suitable variant?
+  // Perhaps by having separate barriers for acquire and release ops.
+  asm volatile("sync" : : : "memory");
+}
+#define LEVELDB_HAVE_MEMORY_BARRIER
+
 #endif
 
 // AtomicPointer built using platform-specific MemoryBarrier()
@@ -136,6 +147,66 @@ class AtomicPointer {
   }
 };
 
+// Atomic pointer based on sparc memory barriers
+#elif defined(__sparcv9) && defined(__GNUC__)
+class AtomicPointer {
+ private:
+  void* rep_;
+ public:
+  AtomicPointer() { }
+  explicit AtomicPointer(void* v) : rep_(v) { }
+  inline void* Acquire_Load() const {
+    void* val;
+    __asm__ __volatile__ (
+        "ldx [%[rep_]], %[val] \n\t"
+         "membar #LoadLoad|#LoadStore \n\t"
+        : [val] "=r" (val)
+        : [rep_] "r" (&rep_)
+        : "memory");
+    return val;
+  }
+  inline void Release_Store(void* v) {
+    __asm__ __volatile__ (
+        "membar #LoadStore|#StoreStore \n\t"
+        "stx %[v], [%[rep_]] \n\t"
+        :
+        : [rep_] "r" (&rep_), [v] "r" (v)
+        : "memory");
+  }
+  inline void* NoBarrier_Load() const { return rep_; }
+  inline void NoBarrier_Store(void* v) { rep_ = v; }
+};
+
+// Atomic pointer based on ia64 acq/rel
+#elif defined(__ia64) && defined(__GNUC__)
+class AtomicPointer {
+ private:
+  void* rep_;
+ public:
+  AtomicPointer() { }
+  explicit AtomicPointer(void* v) : rep_(v) { }
+  inline void* Acquire_Load() const {
+    void* val    ;
+    __asm__ __volatile__ (
+        "ld8.acq %[val] = [%[rep_]] \n\t"
+        : [val] "=r" (val)
+        : [rep_] "r" (&rep_)
+        : "memory"
+        );
+    return val;
+  }
+  inline void Release_Store(void* v) {
+    __asm__ __volatile__ (
+        "st8.rel [%[rep_]] = %[v]  \n\t"
+        :
+        : [rep_] "r" (&rep_), [v] "r" (v)
+        : "memory"
+        );
+  }
+  inline void* NoBarrier_Load() const { return rep_; }
+  inline void NoBarrier_Store(void* v) { rep_ = v; }
+};
+
 // We have neither MemoryBarrier(), nor 
 #else
 #error Please implement AtomicPointer for this platform.
@@ -145,6 +216,7 @@ class AtomicPointer {
 #undef LEVELDB_HAVE_MEMORY_BARRIER
 #undef ARCH_CPU_X86_FAMILY
 #undef ARCH_CPU_ARM_FAMILY
+#undef ARCH_CPU_PPC_FAMILY
 
 }  // namespace port
 }  // namespace leveldb
diff --git a/port/port_posix.h b/port/port_posix.h
index 654a4b9..6ca352e 100644
--- a/port/port_posix.h
+++ b/port/port_posix.h
@@ -21,13 +21,20 @@
   #else
     #define PLATFORM_IS_LITTLE_ENDIAN false
   #endif
-#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) ||\
+#elif defined(OS_FREEBSD)
+  #include 
+  #include 
+  #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN)
+#elif defined(OS_OPENBSD) || defined(OS_NETBSD) ||\
       defined(OS_DRAGONFLYBSD) || defined(OS_ANDROID)
   #include 
   #include 
+#elif defined(OS_HPUX)
+  #define PLATFORM_IS_LITTLE_ENDIAN false
 #else
   #include 
 #endif
+
 #include 
 #ifdef SNAPPY
 #include 
@@ -42,7 +49,7 @@
 
 #if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\
     defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\
-    defined(OS_ANDROID)
+    defined(OS_ANDROID) || defined(OS_HPUX)
 // Use fread/fwrite/fflush on platforms without _unlocked variants
 #define fread_unlocked fread
 #define fwrite_unlocked fwrite
diff --git a/port/thread_annotations.h b/port/thread_annotations.h
new file mode 100644
index 0000000..6f9b6a7
--- /dev/null
+++ b/port/thread_annotations.h
@@ -0,0 +1,59 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H
+
+// Some environments provide custom macros to aid in static thread-safety
+// analysis.  Provide empty definitions of such macros unless they are already
+// defined.
+
+#ifndef EXCLUSIVE_LOCKS_REQUIRED
+#define EXCLUSIVE_LOCKS_REQUIRED(...)
+#endif
+
+#ifndef SHARED_LOCKS_REQUIRED
+#define SHARED_LOCKS_REQUIRED(...)
+#endif
+
+#ifndef LOCKS_EXCLUDED
+#define LOCKS_EXCLUDED(...)
+#endif
+
+#ifndef LOCK_RETURNED
+#define LOCK_RETURNED(x)
+#endif
+
+#ifndef LOCKABLE
+#define LOCKABLE
+#endif
+
+#ifndef SCOPED_LOCKABLE
+#define SCOPED_LOCKABLE
+#endif
+
+#ifndef EXCLUSIVE_LOCK_FUNCTION
+#define EXCLUSIVE_LOCK_FUNCTION(...)
+#endif
+
+#ifndef SHARED_LOCK_FUNCTION
+#define SHARED_LOCK_FUNCTION(...)
+#endif
+
+#ifndef EXCLUSIVE_TRYLOCK_FUNCTION
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
+#endif
+
+#ifndef SHARED_TRYLOCK_FUNCTION
+#define SHARED_TRYLOCK_FUNCTION(...)
+#endif
+
+#ifndef UNLOCK_FUNCTION
+#define UNLOCK_FUNCTION(...)
+#endif
+
+#ifndef NO_THREAD_SAFETY_ANALYSIS
+#define NO_THREAD_SAFETY_ANALYSIS
+#endif
+
+#endif  // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H
diff --git a/table/block.cc b/table/block.cc
index 199d453..ab83c11 100644
--- a/table/block.cc
+++ b/table/block.cc
@@ -162,8 +162,8 @@ class Block::Iter : public Iterator {
   }
 
   virtual void Seek(const Slice& target) {
-    // Binary search in restart array to find the first restart point
-    // with a key >= target
+    // Binary search in restart array to find the last restart point
+    // with a key < target
     uint32_t left = 0;
     uint32_t right = num_restarts_ - 1;
     while (left < right) {
diff --git a/util/bloom_test.cc b/util/bloom_test.cc
index 4a6ea1b..0bf8e8d 100644
--- a/util/bloom_test.cc
+++ b/util/bloom_test.cc
@@ -4,6 +4,7 @@
 
 #include "leveldb/filter_policy.h"
 
+#include "util/coding.h"
 #include "util/logging.h"
 #include "util/testharness.h"
 #include "util/testutil.h"
@@ -13,8 +14,8 @@ namespace leveldb {
 static const int kVerbose = 1;
 
 static Slice Key(int i, char* buffer) {
-  memcpy(buffer, &i, sizeof(i));
-  return Slice(buffer, sizeof(i));
+  EncodeFixed32(buffer, i);
+  return Slice(buffer, sizeof(uint32_t));
 }
 
 class BloomTest {
diff --git a/util/coding.cc b/util/coding.cc
index dbd7a65..21e3186 100644
--- a/util/coding.cc
+++ b/util/coding.cc
@@ -7,29 +7,29 @@
 namespace leveldb {
 
 void EncodeFixed32(char* buf, uint32_t value) {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-  memcpy(buf, &value, sizeof(value));
-#else
-  buf[0] = value & 0xff;
-  buf[1] = (value >> 8) & 0xff;
-  buf[2] = (value >> 16) & 0xff;
-  buf[3] = (value >> 24) & 0xff;
-#endif
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+  }
 }
 
 void EncodeFixed64(char* buf, uint64_t value) {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-  memcpy(buf, &value, sizeof(value));
-#else
-  buf[0] = value & 0xff;
-  buf[1] = (value >> 8) & 0xff;
-  buf[2] = (value >> 16) & 0xff;
-  buf[3] = (value >> 24) & 0xff;
-  buf[4] = (value >> 32) & 0xff;
-  buf[5] = (value >> 40) & 0xff;
-  buf[6] = (value >> 48) & 0xff;
-  buf[7] = (value >> 56) & 0xff;
-#endif
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+    buf[4] = (value >> 32) & 0xff;
+    buf[5] = (value >> 40) & 0xff;
+    buf[6] = (value >> 48) & 0xff;
+    buf[7] = (value >> 56) & 0xff;
+  }
 }
 
 void PutFixed32(std::string* dst, uint32_t value) {
diff --git a/util/env_posix.cc b/util/env_posix.cc
index cb1f6fc..78e09c9 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -3,6 +3,7 @@
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -23,6 +24,7 @@
 #include "leveldb/slice.h"
 #include "port/port.h"
 #include "util/logging.h"
+#include "util/mutexlock.h"
 #include "util/posix_logger.h"
 
 namespace leveldb {
@@ -90,18 +92,75 @@ class PosixRandomAccessFile: public RandomAccessFile {
   }
 };
 
+// Helper class to limit mmap file usage so that we do not end up
+// running out virtual memory or running into kernel performance
+// problems for very large databases.
+class MmapLimiter {
+ public:
+  // Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
+  MmapLimiter() {
+    SetAllowed(sizeof(void*) >= 8 ? 1000 : 0);
+  }
+
+  // If another mmap slot is available, acquire it and return true.
+  // Else return false.
+  bool Acquire() {
+    if (GetAllowed() <= 0) {
+      return false;
+    }
+    MutexLock l(&mu_);
+    intptr_t x = GetAllowed();
+    if (x <= 0) {
+      return false;
+    } else {
+      SetAllowed(x - 1);
+      return true;
+    }
+  }
+
+  // Release a slot acquired by a previous call to Acquire() that returned true.
+  void Release() {
+    MutexLock l(&mu_);
+    SetAllowed(GetAllowed() + 1);
+  }
+
+ private:
+  port::Mutex mu_;
+  port::AtomicPointer allowed_;
+
+  intptr_t GetAllowed() const {
+    return reinterpret_cast(allowed_.Acquire_Load());
+  }
+
+  // REQUIRES: mu_ must be held
+  void SetAllowed(intptr_t v) {
+    allowed_.Release_Store(reinterpret_cast(v));
+  }
+
+  MmapLimiter(const MmapLimiter&);
+  void operator=(const MmapLimiter&);
+};
+
 // mmap() based random-access
 class PosixMmapReadableFile: public RandomAccessFile {
  private:
   std::string filename_;
   void* mmapped_region_;
   size_t length_;
+  MmapLimiter* limiter_;
 
  public:
   // base[0,length-1] contains the mmapped contents of the file.
-  PosixMmapReadableFile(const std::string& fname, void* base, size_t length)
-      : filename_(fname), mmapped_region_(base), length_(length) { }
-  virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); }
+  PosixMmapReadableFile(const std::string& fname, void* base, size_t length,
+                        MmapLimiter* limiter)
+      : filename_(fname), mmapped_region_(base), length_(length),
+        limiter_(limiter) {
+  }
+
+  virtual ~PosixMmapReadableFile() {
+    munmap(mmapped_region_, length_);
+    limiter_->Release();
+  }
 
   virtual Status Read(uint64_t offset, size_t n, Slice* result,
                       char* scratch) const {
@@ -300,6 +359,25 @@ static int LockOrUnlock(int fd, bool lock) {
 class PosixFileLock : public FileLock {
  public:
   int fd_;
+  std::string name_;
+};
+
+// Set of locked files.  We keep a separate set instead of just
+// relying on fcntrl(F_SETLK) since fcntl(F_SETLK) does not provide
+// any protection against multiple uses from the same process.
+class PosixLockTable {
+ private:
+  port::Mutex mu_;
+  std::set locked_files_;
+ public:
+  bool Insert(const std::string& fname) {
+    MutexLock l(&mu_);
+    return locked_files_.insert(fname).second;
+  }
+  void Remove(const std::string& fname) {
+    MutexLock l(&mu_);
+    locked_files_.erase(fname);
+  }
 };
 
 class PosixEnv : public Env {
@@ -329,19 +407,21 @@ class PosixEnv : public Env {
     int fd = open(fname.c_str(), O_RDONLY);
     if (fd < 0) {
       s = IOError(fname, errno);
-    } else if (sizeof(void*) >= 8) {
-      // Use mmap when virtual address-space is plentiful.
+    } else if (mmap_limit_.Acquire()) {
       uint64_t size;
       s = GetFileSize(fname, &size);
       if (s.ok()) {
         void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
         if (base != MAP_FAILED) {
-          *result = new PosixMmapReadableFile(fname, base, size);
+          *result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_);
         } else {
           s = IOError(fname, errno);
         }
       }
       close(fd);
+      if (!s.ok()) {
+        mmap_limit_.Release();
+      }
     } else {
       *result = new PosixRandomAccessFile(fname, fd);
     }
@@ -430,12 +510,17 @@ class PosixEnv : public Env {
     int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
     if (fd < 0) {
       result = IOError(fname, errno);
+    } else if (!locks_.Insert(fname)) {
+      close(fd);
+      result = Status::IOError("lock " + fname, "already held by process");
     } else if (LockOrUnlock(fd, true) == -1) {
       result = IOError("lock " + fname, errno);
       close(fd);
+      locks_.Remove(fname);
     } else {
       PosixFileLock* my_lock = new PosixFileLock;
       my_lock->fd_ = fd;
+      my_lock->name_ = fname;
       *lock = my_lock;
     }
     return result;
@@ -447,6 +532,7 @@ class PosixEnv : public Env {
     if (LockOrUnlock(my_lock->fd_, false) == -1) {
       result = IOError("unlock", errno);
     }
+    locks_.Remove(my_lock->name_);
     close(my_lock->fd_);
     delete my_lock;
     return result;
@@ -523,6 +609,9 @@ class PosixEnv : public Env {
   struct BGItem { void* arg; void (*function)(void*); };
   typedef std::deque BGQueue;
   BGQueue queue_;
+
+  PosixLockTable locks_;
+  MmapLimiter mmap_limit_;
 };
 
 PosixEnv::PosixEnv() : page_size_(getpagesize()),
diff --git a/util/mutexlock.h b/util/mutexlock.h
index c3f3306..1ff5a9e 100644
--- a/util/mutexlock.h
+++ b/util/mutexlock.h
@@ -6,6 +6,7 @@
 #define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_
 
 #include "port/port.h"
+#include "port/thread_annotations.h"
 
 namespace leveldb {
 
@@ -19,12 +20,13 @@ namespace leveldb {
 //     ... some complex code, possibly with multiple return paths ...
 //   }
 
-class MutexLock {
+class SCOPED_LOCKABLE MutexLock {
  public:
-  explicit MutexLock(port::Mutex *mu) : mu_(mu) {
+  explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu)
+      : mu_(mu)  {
     this->mu_->Lock();
   }
-  ~MutexLock() { this->mu_->Unlock(); }
+  ~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); }
 
  private:
   port::Mutex *const mu_;