Joshua
open source statistical hierarchical phrase-based machine translation system
|
00001 #ifndef UTIL_MMAP_H 00002 #define UTIL_MMAP_H 00003 // Utilities for mmaped files. 00004 00005 #include <cstddef> 00006 #include <limits> 00007 00008 #include <stdint.h> 00009 #include <sys/types.h> 00010 00011 namespace util { 00012 00013 class scoped_fd; 00014 00015 std::size_t SizePage(); 00016 00017 // (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here. 00018 class scoped_mmap { 00019 public: 00020 scoped_mmap() : data_((void*)-1), size_(0) {} 00021 scoped_mmap(void *data, std::size_t size) : data_(data), size_(size) {} 00022 ~scoped_mmap(); 00023 00024 void *get() const { return data_; } 00025 00026 const uint8_t *begin() const { return reinterpret_cast<uint8_t*>(data_); } 00027 const uint8_t *end() const { return reinterpret_cast<uint8_t*>(data_) + size_; } 00028 std::size_t size() const { return size_; } 00029 00030 void reset(void *data, std::size_t size) { 00031 scoped_mmap other(data_, size_); 00032 data_ = data; 00033 size_ = size; 00034 } 00035 00036 void reset() { 00037 reset((void*)-1, 0); 00038 } 00039 00040 void *steal() { 00041 void *ret = data_; 00042 data_ = (void*)-1; 00043 size_ = 0; 00044 return ret; 00045 } 00046 00047 private: 00048 void *data_; 00049 std::size_t size_; 00050 00051 scoped_mmap(const scoped_mmap &); 00052 scoped_mmap &operator=(const scoped_mmap &); 00053 }; 00054 00055 /* For when the memory might come from mmap, new char[], or malloc. Uses NULL 00056 * and 0 for blanks even though mmap signals errors with (void*)-1). The reset 00057 * function checks that blank for mmap. 00058 */ 00059 class scoped_memory { 00060 public: 00061 typedef enum { 00062 MMAP_ROUND_UP_ALLOCATED, // The size was rounded up to a multiple of page size. Do the same before munmap. 00063 MMAP_ALLOCATED, // munmap 00064 MALLOC_ALLOCATED, // free 00065 NONE_ALLOCATED // nothing here! 00066 } Alloc; 00067 00068 scoped_memory(void *data, std::size_t size, Alloc source) 00069 : data_(data), size_(size), source_(source) {} 00070 00071 scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {} 00072 00073 // Calls HugeMalloc 00074 scoped_memory(std::size_t to, bool zero_new); 00075 00076 ~scoped_memory() { reset(); } 00077 00078 void *get() const { return data_; } 00079 const char *begin() const { return reinterpret_cast<char*>(data_); } 00080 const char *end() const { return reinterpret_cast<char*>(data_) + size_; } 00081 std::size_t size() const { return size_; } 00082 00083 Alloc source() const { return source_; } 00084 00085 void reset() { reset(NULL, 0, NONE_ALLOCATED); } 00086 00087 void reset(void *data, std::size_t size, Alloc from); 00088 00089 void *steal() { 00090 void *ret = data_; 00091 data_ = NULL; 00092 size_ = 0; 00093 source_ = NONE_ALLOCATED; 00094 return ret; 00095 } 00096 00097 private: 00098 void *data_; 00099 std::size_t size_; 00100 00101 Alloc source_; 00102 00103 scoped_memory(const scoped_memory &); 00104 scoped_memory &operator=(const scoped_memory &); 00105 }; 00106 00107 extern const int kFileFlags; 00108 00109 // Cross-platform, error-checking wrapper for mmap(). 00110 void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0); 00111 00112 // msync wrapper 00113 void SyncOrThrow(void *start, size_t length); 00114 00115 // Cross-platform, error-checking wrapper for munmap(). 00116 void UnmapOrThrow(void *start, size_t length); 00117 00118 // Allocate memory, promising that all/vast majority of it will be used. Tries 00119 // hard to use huge pages on Linux. 00120 // If you want zeroed memory, pass zeroed = true. 00121 void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to); 00122 00123 // Reallocates memory ala realloc but with option to zero the new memory. 00124 // On Linux, the memory can come from anonymous mmap or malloc/calloc. 00125 // On non-Linux, only malloc/calloc is supported. 00126 // 00127 // To summarize, any memory from HugeMalloc or HugeRealloc can be resized with 00128 // this. 00129 void HugeRealloc(std::size_t size, bool new_zeroed, scoped_memory &mem); 00130 00131 typedef enum { 00132 // mmap with no prepopulate 00133 LAZY, 00134 // On linux, pass MAP_POPULATE to mmap. 00135 POPULATE_OR_LAZY, 00136 // Populate on Linux. malloc and read on non-Linux. 00137 POPULATE_OR_READ, 00138 // malloc and read. 00139 READ, 00140 // malloc and read in parallel (recommended for Lustre) 00141 PARALLEL_READ, 00142 } LoadMethod; 00143 00144 void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out); 00145 00146 // Open file name with mmap of size bytes, all of which are initially zero. 00147 void *MapZeroedWrite(int fd, std::size_t size); 00148 void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file); 00149 00150 // Forward rolling memory map with no overlap. 00151 class Rolling { 00152 public: 00153 Rolling() {} 00154 00155 explicit Rolling(void *data) { Init(data); } 00156 00157 Rolling(const Rolling ©_from, uint64_t increase = 0); 00158 Rolling &operator=(const Rolling ©_from); 00159 00160 // For an actual rolling mmap. 00161 explicit Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount); 00162 00163 // For a static mapping 00164 void Init(void *data) { 00165 ptr_ = data; 00166 current_end_ = std::numeric_limits<uint64_t>::max(); 00167 current_begin_ = 0; 00168 // Mark as a pass-through. 00169 fd_ = -1; 00170 } 00171 00172 void IncreaseBase(uint64_t by) { 00173 file_begin_ += by; 00174 ptr_ = static_cast<uint8_t*>(ptr_) + by; 00175 if (!IsPassthrough()) current_end_ = 0; 00176 } 00177 00178 void DecreaseBase(uint64_t by) { 00179 file_begin_ -= by; 00180 ptr_ = static_cast<uint8_t*>(ptr_) - by; 00181 if (!IsPassthrough()) current_end_ = 0; 00182 } 00183 00184 void *ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size); 00185 00186 // Returns base pointer 00187 void *get() const { return ptr_; } 00188 00189 // Returns base pointer. 00190 void *CheckedBase(uint64_t index) { 00191 if (index >= current_end_ || index < current_begin_) { 00192 Roll(index); 00193 } 00194 return ptr_; 00195 } 00196 00197 // Returns indexed pointer. 00198 void *CheckedIndex(uint64_t index) { 00199 return static_cast<uint8_t*>(CheckedBase(index)) + index; 00200 } 00201 00202 private: 00203 void Roll(uint64_t index); 00204 00205 // True if this is just a thin wrapper on a pointer. 00206 bool IsPassthrough() const { return fd_ == -1; } 00207 00208 void *ptr_; 00209 uint64_t current_begin_; 00210 uint64_t current_end_; 00211 00212 scoped_memory mem_; 00213 00214 int fd_; 00215 uint64_t file_begin_; 00216 uint64_t file_end_; 00217 00218 bool for_write_; 00219 std::size_t block_; 00220 std::size_t read_bound_; 00221 }; 00222 00223 } // namespace util 00224 00225 #endif // UTIL_MMAP_H