Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/joshua/decoder/ff/lm/kenlm/util/mmap.hh
00001 #ifndef UTIL_MMAP__
00002 #define UTIL_MMAP__
00003 // Utilities for mmaped files.  
00004 
00005 #include "util/scoped.hh"
00006 
00007 #include <cstddef>
00008 
00009 #include <inttypes.h>
00010 #include <sys/types.h>
00011 
00012 namespace util {
00013 
00014 // (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.  
00015 class scoped_mmap {
00016   public:
00017     scoped_mmap() : data_((void*)-1), size_(0) {}
00018     scoped_mmap(void *data, std::size_t size) : data_(data), size_(size) {}
00019     ~scoped_mmap();
00020 
00021     void *get() const { return data_; }
00022 
00023     const uint8_t *begin() const { return reinterpret_cast<uint8_t*>(data_); }
00024     const uint8_t *end() const { return reinterpret_cast<uint8_t*>(data_) + size_; }
00025     std::size_t size() const { return size_; }
00026 
00027     void reset(void *data, std::size_t size) {
00028       scoped_mmap other(data_, size_);
00029       data_ = data;
00030       size_ = size;
00031     }
00032 
00033     void reset() {
00034       reset((void*)-1, 0);
00035     }
00036 
00037   private:
00038     void *data_;
00039     std::size_t size_;
00040 
00041     scoped_mmap(const scoped_mmap &);
00042     scoped_mmap &operator=(const scoped_mmap &);
00043 };
00044 
00045 /* For when the memory might come from mmap, new char[], or malloc.  Uses NULL
00046  * and 0 for blanks even though mmap signals errors with (void*)-1).  The reset
00047  * function checks that blank for mmap.  
00048  */
00049 class scoped_memory {
00050   public:
00051     typedef enum {MMAP_ALLOCATED, ARRAY_ALLOCATED, MALLOC_ALLOCATED, NONE_ALLOCATED} Alloc;
00052 
00053     scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
00054 
00055     ~scoped_memory() { reset(); }
00056 
00057     void *get() const { return data_; }
00058     const char *begin() const { return reinterpret_cast<char*>(data_); }
00059     const char *end() const { return reinterpret_cast<char*>(data_) + size_; }
00060     std::size_t size() const { return size_; }
00061 
00062     Alloc source() const { return source_; }
00063 
00064     void reset() { reset(NULL, 0, NONE_ALLOCATED); }
00065 
00066     void reset(void *data, std::size_t size, Alloc from);
00067 
00068     // realloc allows the current data to escape hence the need for this call
00069     // If realloc fails, destroys the original too and get() returns NULL.
00070     void call_realloc(std::size_t to);
00071 
00072   private:
00073 
00074     void *data_;
00075     std::size_t size_;
00076 
00077     Alloc source_;
00078 
00079     scoped_memory(const scoped_memory &);
00080     scoped_memory &operator=(const scoped_memory &);
00081 };
00082 
00083 typedef enum {
00084   // mmap with no prepopulate
00085   LAZY,
00086   // On linux, pass MAP_POPULATE to mmap.
00087   POPULATE_OR_LAZY,
00088   // Populate on Linux.  malloc and read on non-Linux.  
00089   POPULATE_OR_READ,
00090   // malloc and read.  
00091   READ
00092 } LoadMethod;
00093 
00094 extern const int kFileFlags;
00095 
00096 // Wrapper around mmap to check it worked and hide some platform macros.  
00097 void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, off_t offset = 0);
00098 
00099 void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out);
00100 
00101 void *MapAnonymous(std::size_t size);
00102 
00103 // Open file name with mmap of size bytes, all of which are initially zero.  
00104 void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file);
00105 
00106 } // namespace util
00107 
00108 #endif // UTIL_MMAP__