@ -3,6 +3,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// found in the LICENSE file. See the AUTHORS file for names of contributors.
# include <deque>
# include <deque>
# include <set>
# include <dirent.h>
# include <dirent.h>
# include <errno.h>
# include <errno.h>
# include <fcntl.h>
# include <fcntl.h>
@ -23,6 +24,7 @@
# include "leveldb/slice.h"
# include "leveldb/slice.h"
# include "port/port.h"
# include "port/port.h"
# include "util/logging.h"
# include "util/logging.h"
# include "util/mutexlock.h"
# include "util/posix_logger.h"
# include "util/posix_logger.h"
namespace leveldb {
namespace leveldb {
@ -90,18 +92,75 @@ class PosixRandomAccessFile: public RandomAccessFile {
}
}
} ;
} ;
// Helper class to limit mmap file usage so that we do not end up
// running out virtual memory or running into kernel performance
// problems for very large databases.
class MmapLimiter {
public :
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
MmapLimiter ( ) {
SetAllowed ( sizeof ( void * ) > = 8 ? 1000 : 0 ) ;
}
// If another mmap slot is available, acquire it and return true.
// Else return false.
bool Acquire ( ) {
if ( GetAllowed ( ) < = 0 ) {
return false ;
}
MutexLock l ( & mu_ ) ;
intptr_t x = GetAllowed ( ) ;
if ( x < = 0 ) {
return false ;
} else {
SetAllowed ( x - 1 ) ;
return true ;
}
}
// Release a slot acquired by a previous call to Acquire() that returned true.
void Release ( ) {
MutexLock l ( & mu_ ) ;
SetAllowed ( GetAllowed ( ) + 1 ) ;
}
private :
port : : Mutex mu_ ;
port : : AtomicPointer allowed_ ;
intptr_t GetAllowed ( ) const {
return reinterpret_cast < intptr_t > ( allowed_ . Acquire_Load ( ) ) ;
}
// REQUIRES: mu_ must be held
void SetAllowed ( intptr_t v ) {
allowed_ . Release_Store ( reinterpret_cast < void * > ( v ) ) ;
}
MmapLimiter ( const MmapLimiter & ) ;
void operator = ( const MmapLimiter & ) ;
} ;
// mmap() based random-access
// mmap() based random-access
class PosixMmapReadableFile : public RandomAccessFile {
class PosixMmapReadableFile : public RandomAccessFile {
private :
private :
std : : string filename_ ;
std : : string filename_ ;
void * mmapped_region_ ;
void * mmapped_region_ ;
size_t length_ ;
size_t length_ ;
MmapLimiter * limiter_ ;
public :
public :
// base[0,length-1] contains the mmapped contents of the file.
// base[0,length-1] contains the mmapped contents of the file.
PosixMmapReadableFile ( const std : : string & fname , void * base , size_t length )
: filename_ ( fname ) , mmapped_region_ ( base ) , length_ ( length ) { }
virtual ~ PosixMmapReadableFile ( ) { munmap ( mmapped_region_ , length_ ) ; }
PosixMmapReadableFile ( const std : : string & fname , void * base , size_t length ,
MmapLimiter * limiter )
: filename_ ( fname ) , mmapped_region_ ( base ) , length_ ( length ) ,
limiter_ ( limiter ) {
}
virtual ~ PosixMmapReadableFile ( ) {
munmap ( mmapped_region_ , length_ ) ;
limiter_ - > Release ( ) ;
}
virtual Status Read ( uint64_t offset , size_t n , Slice * result ,
virtual Status Read ( uint64_t offset , size_t n , Slice * result ,
char * scratch ) const {
char * scratch ) const {
@ -300,6 +359,25 @@ static int LockOrUnlock(int fd, bool lock) {
class PosixFileLock : public FileLock {
class PosixFileLock : public FileLock {
public :
public :
int fd_ ;
int fd_ ;
std : : string name_ ;
} ;
// Set of locked files. We keep a separate set instead of just
// relying on fcntrl(F_SETLK) since fcntl(F_SETLK) does not provide
// any protection against multiple uses from the same process.
class PosixLockTable {
private :
port : : Mutex mu_ ;
std : : set < std : : string > locked_files_ ;
public :
bool Insert ( const std : : string & fname ) {
MutexLock l ( & mu_ ) ;
return locked_files_ . insert ( fname ) . second ;
}
void Remove ( const std : : string & fname ) {
MutexLock l ( & mu_ ) ;
locked_files_ . erase ( fname ) ;
}
} ;
} ;
class PosixEnv : public Env {
class PosixEnv : public Env {
@ -329,19 +407,21 @@ class PosixEnv : public Env {
int fd = open ( fname . c_str ( ) , O_RDONLY ) ;
int fd = open ( fname . c_str ( ) , O_RDONLY ) ;
if ( fd < 0 ) {
if ( fd < 0 ) {
s = IOError ( fname , errno ) ;
s = IOError ( fname , errno ) ;
} else if ( sizeof ( void * ) > = 8 ) {
// Use mmap when virtual address-space is plentiful.
} else if ( mmap_limit_ . Acquire ( ) ) {
uint64_t size ;
uint64_t size ;
s = GetFileSize ( fname , & size ) ;
s = GetFileSize ( fname , & size ) ;
if ( s . ok ( ) ) {
if ( s . ok ( ) ) {
void * base = mmap ( NULL , size , PROT_READ , MAP_SHARED , fd , 0 ) ;
void * base = mmap ( NULL , size , PROT_READ , MAP_SHARED , fd , 0 ) ;
if ( base ! = MAP_FAILED ) {
if ( base ! = MAP_FAILED ) {
* result = new PosixMmapReadableFile ( fname , base , size ) ;
* result = new PosixMmapReadableFile ( fname , base , size , & mmap_limit_ );
} else {
} else {
s = IOError ( fname , errno ) ;
s = IOError ( fname , errno ) ;
}
}
}
}
close ( fd ) ;
close ( fd ) ;
if ( ! s . ok ( ) ) {
mmap_limit_ . Release ( ) ;
}
} else {
} else {
* result = new PosixRandomAccessFile ( fname , fd ) ;
* result = new PosixRandomAccessFile ( fname , fd ) ;
}
}
@ -430,12 +510,17 @@ class PosixEnv : public Env {
int fd = open ( fname . c_str ( ) , O_RDWR | O_CREAT , 0644 ) ;
int fd = open ( fname . c_str ( ) , O_RDWR | O_CREAT , 0644 ) ;
if ( fd < 0 ) {
if ( fd < 0 ) {
result = IOError ( fname , errno ) ;
result = IOError ( fname , errno ) ;
} else if ( ! locks_ . Insert ( fname ) ) {
close ( fd ) ;
result = Status : : IOError ( " lock " + fname , " already held by process " ) ;
} else if ( LockOrUnlock ( fd , true ) = = - 1 ) {
} else if ( LockOrUnlock ( fd , true ) = = - 1 ) {
result = IOError ( " lock " + fname , errno ) ;
result = IOError ( " lock " + fname , errno ) ;
close ( fd ) ;
close ( fd ) ;
locks_ . Remove ( fname ) ;
} else {
} else {
PosixFileLock * my_lock = new PosixFileLock ;
PosixFileLock * my_lock = new PosixFileLock ;
my_lock - > fd_ = fd ;
my_lock - > fd_ = fd ;
my_lock - > name_ = fname ;
* lock = my_lock ;
* lock = my_lock ;
}
}
return result ;
return result ;
@ -447,6 +532,7 @@ class PosixEnv : public Env {
if ( LockOrUnlock ( my_lock - > fd_ , false ) = = - 1 ) {
if ( LockOrUnlock ( my_lock - > fd_ , false ) = = - 1 ) {
result = IOError ( " unlock " , errno ) ;
result = IOError ( " unlock " , errno ) ;
}
}
locks_ . Remove ( my_lock - > name_ ) ;
close ( my_lock - > fd_ ) ;
close ( my_lock - > fd_ ) ;
delete my_lock ;
delete my_lock ;
return result ;
return result ;
@ -523,6 +609,9 @@ class PosixEnv : public Env {
struct BGItem { void * arg ; void ( * function ) ( void * ) ; } ;
struct BGItem { void * arg ; void ( * function ) ( void * ) ; } ;
typedef std : : deque < BGItem > BGQueue ;
typedef std : : deque < BGItem > BGQueue ;
BGQueue queue_ ;
BGQueue queue_ ;
PosixLockTable locks_ ;
MmapLimiter mmap_limit_ ;
} ;
} ;
PosixEnv : : PosixEnv ( ) : page_size_ ( getpagesize ( ) ) ,
PosixEnv : : PosixEnv ( ) : page_size_ ( getpagesize ( ) ) ,