@ -10,6 +10,7 @@
# include <stdlib.h>
# include <stdlib.h>
# include <string.h>
# include <string.h>
# include <sys/mman.h>
# include <sys/mman.h>
# include <sys/resource.h>
# include <sys/stat.h>
# include <sys/stat.h>
# include <sys/time.h>
# include <sys/time.h>
# include <sys/types.h>
# include <sys/types.h>
@ -23,15 +24,70 @@
# include "util/logging.h"
# include "util/logging.h"
# include "util/mutexlock.h"
# include "util/mutexlock.h"
# include "util/posix_logger.h"
# include "util/posix_logger.h"
# include "util/env_posix_test_helper.h"
namespace leveldb {
namespace leveldb {
namespace {
namespace {
static int open_read_only_file_limit = - 1 ;
static int mmap_limit = - 1 ;
static Status IOError ( const std : : string & context , int err_number ) {
static Status IOError ( const std : : string & context , int err_number ) {
return Status : : IOError ( context , strerror ( err_number ) ) ;
return Status : : IOError ( context , strerror ( err_number ) ) ;
}
}
// Helper class to limit resource usage to avoid exhaustion.
// Currently used to limit read-only file descriptors and mmap file usage
// so that we do not end up running out of file descriptors, virtual memory,
// or running into kernel performance problems for very large databases.
class Limiter {
public :
// Limit maximum number of resources to |n|.
Limiter ( intptr_t n ) {
SetAllowed ( n ) ;
}
// If another resource is available, acquire it and return true.
// Else return false.
bool Acquire ( ) {
if ( GetAllowed ( ) < = 0 ) {
return false ;
}
MutexLock l ( & mu_ ) ;
intptr_t x = GetAllowed ( ) ;
if ( x < = 0 ) {
return false ;
} else {
SetAllowed ( x - 1 ) ;
return true ;
}
}
// Release a resource acquired by a previous call to Acquire() that returned
// true.
void Release ( ) {
MutexLock l ( & mu_ ) ;
SetAllowed ( GetAllowed ( ) + 1 ) ;
}
private :
port : : Mutex mu_ ;
port : : AtomicPointer allowed_ ;
intptr_t GetAllowed ( ) const {
return reinterpret_cast < intptr_t > ( allowed_ . Acquire_Load ( ) ) ;
}
// REQUIRES: mu_ must be held
void SetAllowed ( intptr_t v ) {
allowed_ . Release_Store ( reinterpret_cast < void * > ( v ) ) ;
}
Limiter ( const Limiter & ) ;
void operator = ( const Limiter & ) ;
} ;
class PosixSequentialFile : public SequentialFile {
class PosixSequentialFile : public SequentialFile {
private :
private :
std : : string filename_ ;
std : : string filename_ ;
@ -69,73 +125,51 @@ class PosixSequentialFile: public SequentialFile {
class PosixRandomAccessFile : public RandomAccessFile {
class PosixRandomAccessFile : public RandomAccessFile {
private :
private :
std : : string filename_ ;
std : : string filename_ ;
bool temporary_fd_ ; // If true, fd_ is -1 and we open on every read.
int fd_ ;
int fd_ ;
Limiter * limiter_ ;
public :
public :
PosixRandomAccessFile ( const std : : string & fname , int fd )
: filename_ ( fname ) , fd_ ( fd ) { }
virtual ~ PosixRandomAccessFile ( ) { close ( fd_ ) ; }
PosixRandomAccessFile ( const std : : string & fname , int fd , Limiter * limiter )
: filename_ ( fname ) , fd_ ( fd ) , limiter_ ( limiter ) {
temporary_fd_ = ! limiter - > Acquire ( ) ;
if ( temporary_fd_ ) {
// Open file on every access.
close ( fd_ ) ;
fd_ = - 1 ;
}
}
virtual ~ PosixRandomAccessFile ( ) {
if ( ! temporary_fd_ ) {
close ( fd_ ) ;
limiter_ - > Release ( ) ;
}
}
virtual Status Read ( uint64_t offset , size_t n , Slice * result ,
virtual Status Read ( uint64_t offset , size_t n , Slice * result ,
char * scratch ) const {
char * scratch ) const {
int fd = fd_ ;
if ( temporary_fd_ ) {
fd = open ( filename_ . c_str ( ) , O_RDONLY ) ;
if ( fd < 0 ) {
return IOError ( filename_ , errno ) ;
}
}
Status s ;
Status s ;
ssize_t r = pread ( fd_ , scratch , n , static_cast < off_t > ( offset ) ) ;
ssize_t r = pread ( fd , scratch , n , static_cast < off_t > ( offset ) ) ;
* result = Slice ( scratch , ( r < 0 ) ? 0 : r ) ;
* result = Slice ( scratch , ( r < 0 ) ? 0 : r ) ;
if ( r < 0 ) {
if ( r < 0 ) {
// An error: return a non-ok status
// An error: return a non-ok status
s = IOError ( filename_ , errno ) ;
s = IOError ( filename_ , errno ) ;
}
}
return s ;
}
} ;
// Helper class to limit mmap file usage so that we do not end up
// running out virtual memory or running into kernel performance
// problems for very large databases.
class MmapLimiter {
public :
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
MmapLimiter ( ) {
SetAllowed ( sizeof ( void * ) > = 8 ? 1000 : 0 ) ;
}
// If another mmap slot is available, acquire it and return true.
// Else return false.
bool Acquire ( ) {
if ( GetAllowed ( ) < = 0 ) {
return false ;
}
MutexLock l ( & mu_ ) ;
intptr_t x = GetAllowed ( ) ;
if ( x < = 0 ) {
return false ;
} else {
SetAllowed ( x - 1 ) ;
return true ;
if ( temporary_fd_ ) {
// Close the temporary file descriptor opened earlier.
close ( fd ) ;
}
}
return s ;
}
}
// Release a slot acquired by a previous call to Acquire() that returned true.
void Release ( ) {
MutexLock l ( & mu_ ) ;
SetAllowed ( GetAllowed ( ) + 1 ) ;
}
private :
port : : Mutex mu_ ;
port : : AtomicPointer allowed_ ;
intptr_t GetAllowed ( ) const {
return reinterpret_cast < intptr_t > ( allowed_ . Acquire_Load ( ) ) ;
}
// REQUIRES: mu_ must be held
void SetAllowed ( intptr_t v ) {
allowed_ . Release_Store ( reinterpret_cast < void * > ( v ) ) ;
}
MmapLimiter ( const MmapLimiter & ) ;
void operator = ( const MmapLimiter & ) ;
} ;
} ;
// mmap() based random-access
// mmap() based random-access
@ -144,12 +178,12 @@ class PosixMmapReadableFile: public RandomAccessFile {
std : : string filename_ ;
std : : string filename_ ;
void * mmapped_region_ ;
void * mmapped_region_ ;
size_t length_ ;
size_t length_ ;
Mmap Limiter* limiter_ ;
Limiter * limiter_ ;
public :
public :
// base[0,length-1] contains the mmapped contents of the file.
// base[0,length-1] contains the mmapped contents of the file.
PosixMmapReadableFile ( const std : : string & fname , void * base , size_t length ,
PosixMmapReadableFile ( const std : : string & fname , void * base , size_t length ,
Mmap Limiter* limiter )
Limiter * limiter )
: filename_ ( fname ) , mmapped_region_ ( base ) , length_ ( length ) ,
: filename_ ( fname ) , mmapped_region_ ( base ) , length_ ( length ) ,
limiter_ ( limiter ) {
limiter_ ( limiter ) {
}
}
@ -332,7 +366,7 @@ class PosixEnv : public Env {
mmap_limit_ . Release ( ) ;
mmap_limit_ . Release ( ) ;
}
}
} else {
} else {
* result = new PosixRandomAccessFile ( fname , fd ) ;
* result = new PosixRandomAccessFile ( fname , fd , & fd_limit_ );
}
}
return s ;
return s ;
}
}
@ -532,10 +566,42 @@ class PosixEnv : public Env {
BGQueue queue_ ;
BGQueue queue_ ;
PosixLockTable locks_ ;
PosixLockTable locks_ ;
MmapLimiter mmap_limit_ ;
Limiter mmap_limit_ ;
Limiter fd_limit_ ;
} ;
} ;
PosixEnv : : PosixEnv ( ) : started_bgthread_ ( false ) {
// Return the maximum number of concurrent mmaps.
static int MaxMmaps ( ) {
if ( mmap_limit > = 0 ) {
return mmap_limit ;
}
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
mmap_limit = sizeof ( void * ) > = 8 ? 1000 : 0 ;
return mmap_limit ;
}
// Return the maximum number of read-only files to keep open.
static intptr_t MaxOpenFiles ( ) {
if ( open_read_only_file_limit > = 0 ) {
return open_read_only_file_limit ;
}
struct rlimit rlim ;
if ( getrlimit ( RLIMIT_NOFILE , & rlim ) ) {
// getrlimit failed, fallback to hard-coded default.
open_read_only_file_limit = 50 ;
} else if ( rlim . rlim_cur = = RLIM_INFINITY ) {
open_read_only_file_limit = std : : numeric_limits < int > : : max ( ) ;
} else {
// Allow use of 20% of available file descriptors for read-only files.
open_read_only_file_limit = rlim . rlim_cur / 5 ;
}
return open_read_only_file_limit ;
}
PosixEnv : : PosixEnv ( )
: started_bgthread_ ( false ) ,
mmap_limit_ ( MaxMmaps ( ) ) ,
fd_limit_ ( MaxOpenFiles ( ) ) {
PthreadCall ( " mutex_init " , pthread_mutex_init ( & mu_ , NULL ) ) ;
PthreadCall ( " mutex_init " , pthread_mutex_init ( & mu_ , NULL ) ) ;
PthreadCall ( " cvar_init " , pthread_cond_init ( & bgsignal_ , NULL ) ) ;
PthreadCall ( " cvar_init " , pthread_cond_init ( & bgsignal_ , NULL ) ) ;
}
}
@ -610,6 +676,16 @@ static pthread_once_t once = PTHREAD_ONCE_INIT;
static Env * default_env ;
static Env * default_env ;
static void InitDefaultEnv ( ) { default_env = new PosixEnv ; }
static void InitDefaultEnv ( ) { default_env = new PosixEnv ; }
void EnvPosixTestHelper : : SetReadOnlyFDLimit ( int limit ) {
assert ( default_env = = NULL ) ;
open_read_only_file_limit = limit ;
}
void EnvPosixTestHelper : : SetReadOnlyMMapLimit ( int limit ) {
assert ( default_env = = NULL ) ;
mmap_limit = limit ;
}
Env * Env : : Default ( ) {
Env * Env : : Default ( ) {
pthread_once ( & once , InitDefaultEnv ) ;
pthread_once ( & once , InitDefaultEnv ) ;
return default_env ;
return default_env ;