Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/kenlm/util/mmap.hh
00001 #ifndef UTIL_MMAP_H
00002 #define UTIL_MMAP_H
00003 // Utilities for mmaped files.
00004 
00005 #include <cstddef>
00006 #include <limits>
00007 
00008 #include <stdint.h>
00009 #include <sys/types.h>
00010 
00011 namespace util {
00012 
00013 class scoped_fd;
00014 
00015 std::size_t SizePage();
00016 
00017 // (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
00018 class scoped_mmap {
00019   public:
00020     scoped_mmap() : data_((void*)-1), size_(0) {}
00021     scoped_mmap(void *data, std::size_t size) : data_(data), size_(size) {}
00022     ~scoped_mmap();
00023 
00024     void *get() const { return data_; }
00025 
00026     const uint8_t *begin() const { return reinterpret_cast<uint8_t*>(data_); }
00027     const uint8_t *end() const { return reinterpret_cast<uint8_t*>(data_) + size_; }
00028     std::size_t size() const { return size_; }
00029 
00030     void reset(void *data, std::size_t size) {
00031       scoped_mmap other(data_, size_);
00032       data_ = data;
00033       size_ = size;
00034     }
00035 
00036     void reset() {
00037       reset((void*)-1, 0);
00038     }
00039 
00040     void *steal() {
00041       void *ret = data_;
00042       data_ = (void*)-1;
00043       size_ = 0;
00044       return ret;
00045     }
00046 
00047   private:
00048     void *data_;
00049     std::size_t size_;
00050 
00051     scoped_mmap(const scoped_mmap &);
00052     scoped_mmap &operator=(const scoped_mmap &);
00053 };
00054 
00055 /* For when the memory might come from mmap, new char[], or malloc.  Uses NULL
00056  * and 0 for blanks even though mmap signals errors with (void*)-1).  The reset
00057  * function checks that blank for mmap.
00058  */
00059 class scoped_memory {
00060   public:
00061     typedef enum {
00062       MMAP_ROUND_UP_ALLOCATED, // The size was rounded up to a multiple of page size.  Do the same before munmap.
00063       MMAP_ALLOCATED, // munmap
00064       MALLOC_ALLOCATED, // free
00065       NONE_ALLOCATED // nothing here!
00066     } Alloc;
00067 
00068     scoped_memory(void *data, std::size_t size, Alloc source)
00069       : data_(data), size_(size), source_(source) {}
00070 
00071     scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
00072 
00073     // Calls HugeMalloc
00074     scoped_memory(std::size_t to, bool zero_new);
00075 
00076     ~scoped_memory() { reset(); }
00077 
00078     void *get() const { return data_; }
00079     const char *begin() const { return reinterpret_cast<char*>(data_); }
00080     const char *end() const { return reinterpret_cast<char*>(data_) + size_; }
00081     std::size_t size() const { return size_; }
00082 
00083     Alloc source() const { return source_; }
00084 
00085     void reset() { reset(NULL, 0, NONE_ALLOCATED); }
00086 
00087     void reset(void *data, std::size_t size, Alloc from);
00088 
00089     void *steal() {
00090       void *ret = data_;
00091       data_ = NULL;
00092       size_ = 0;
00093       source_ = NONE_ALLOCATED;
00094       return ret;
00095     }
00096 
00097   private:
00098     void *data_;
00099     std::size_t size_;
00100 
00101     Alloc source_;
00102 
00103     scoped_memory(const scoped_memory &);
00104     scoped_memory &operator=(const scoped_memory &);
00105 };
00106 
00107 extern const int kFileFlags;
00108 
00109 // Cross-platform, error-checking wrapper for mmap().
00110 void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0);
00111 
00112 // msync wrapper
00113 void SyncOrThrow(void *start, size_t length);
00114 
00115 // Cross-platform, error-checking wrapper for munmap().
00116 void UnmapOrThrow(void *start, size_t length);
00117 
00118 // Allocate memory, promising that all/vast majority of it will be used.  Tries
00119 // hard to use huge pages on Linux.
00120 // If you want zeroed memory, pass zeroed = true.
00121 void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to);
00122 
00123 // Reallocates memory ala realloc but with option to zero the new memory.
00124 // On Linux, the memory can come from anonymous mmap or malloc/calloc.
00125 // On non-Linux, only malloc/calloc is supported.
00126 //
00127 // To summarize, any memory from HugeMalloc or HugeRealloc can be resized with
00128 // this.
00129 void HugeRealloc(std::size_t size, bool new_zeroed, scoped_memory &mem);
00130 
00131 typedef enum {
00132   // mmap with no prepopulate
00133   LAZY,
00134   // On linux, pass MAP_POPULATE to mmap.
00135   POPULATE_OR_LAZY,
00136   // Populate on Linux.  malloc and read on non-Linux.
00137   POPULATE_OR_READ,
00138   // malloc and read.
00139   READ,
00140   // malloc and read in parallel (recommended for Lustre)
00141   PARALLEL_READ,
00142 } LoadMethod;
00143 
00144 void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out);
00145 
00146 // Open file name with mmap of size bytes, all of which are initially zero.
00147 void *MapZeroedWrite(int fd, std::size_t size);
00148 void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file);
00149 
00150 // Forward rolling memory map with no overlap.
00151 class Rolling {
00152   public:
00153     Rolling() {}
00154 
00155     explicit Rolling(void *data) { Init(data); }
00156 
00157     Rolling(const Rolling &copy_from, uint64_t increase = 0);
00158     Rolling &operator=(const Rolling &copy_from);
00159 
00160     // For an actual rolling mmap.
00161     explicit Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount);
00162 
00163     // For a static mapping
00164     void Init(void *data) {
00165       ptr_ = data;
00166       current_end_ = std::numeric_limits<uint64_t>::max();
00167       current_begin_ = 0;
00168       // Mark as a pass-through.
00169       fd_ = -1;
00170     }
00171 
00172     void IncreaseBase(uint64_t by) {
00173       file_begin_ += by;
00174       ptr_ = static_cast<uint8_t*>(ptr_) + by;
00175       if (!IsPassthrough()) current_end_ = 0;
00176     }
00177 
00178     void DecreaseBase(uint64_t by) {
00179       file_begin_ -= by;
00180       ptr_ = static_cast<uint8_t*>(ptr_) - by;
00181       if (!IsPassthrough()) current_end_ = 0;
00182     }
00183 
00184     void *ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size);
00185 
00186     // Returns base pointer
00187     void *get() const { return ptr_; }
00188 
00189     // Returns base pointer.
00190     void *CheckedBase(uint64_t index) {
00191       if (index >= current_end_ || index < current_begin_) {
00192         Roll(index);
00193       }
00194       return ptr_;
00195     }
00196 
00197     // Returns indexed pointer.
00198     void *CheckedIndex(uint64_t index) {
00199       return static_cast<uint8_t*>(CheckedBase(index)) + index;
00200     }
00201 
00202   private:
00203     void Roll(uint64_t index);
00204 
00205     // True if this is just a thin wrapper on a pointer.
00206     bool IsPassthrough() const { return fd_ == -1; }
00207 
00208     void *ptr_;
00209     uint64_t current_begin_;
00210     uint64_t current_end_;
00211 
00212     scoped_memory mem_;
00213 
00214     int fd_;
00215     uint64_t file_begin_;
00216     uint64_t file_end_;
00217 
00218     bool for_write_;
00219     std::size_t block_;
00220     std::size_t read_bound_;
00221 };
00222 
00223 } // namespace util
00224 
00225 #endif // UTIL_MMAP_H