Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/kenlm/lm/common/ngram.hh
00001 #ifndef LM_COMMON_NGRAM_H
00002 #define LM_COMMON_NGRAM_H
00003 
00004 #include "lm/weights.hh"
00005 #include "lm/word_index.hh"
00006 
00007 #include <cstddef>
00008 #include <cassert>
00009 #include <stdint.h>
00010 #include <cstring>
00011 
00012 namespace lm {
00013 
00014 class NGramHeader {
00015   public:
00016     NGramHeader(void *begin, std::size_t order)
00017       : begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}
00018 
00019     NGramHeader() : begin_(NULL), end_(NULL) {}
00020 
00021     const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
00022     uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); }
00023 
00024     void ReBase(void *to) {
00025       std::size_t difference = end_ - begin_;
00026       begin_ = reinterpret_cast<WordIndex*>(to);
00027       end_ = begin_ + difference;
00028     }
00029 
00030     // These are for the vocab index.
00031     // Lower-case in deference to STL.
00032     const WordIndex *begin() const { return begin_; }
00033     WordIndex *begin() { return begin_; }
00034     const WordIndex *end() const { return end_; }
00035     WordIndex *end() { return end_; }
00036 
00037     std::size_t size() const { return end_ - begin_; }
00038     std::size_t Order() const { return end_ - begin_; }
00039 
00040   private:
00041     WordIndex *begin_, *end_;
00042 };
00043 
00044 template <class PayloadT> class NGram : public NGramHeader {
00045   public:
00046     typedef PayloadT Payload;
00047 
00048     NGram() : NGramHeader(NULL, 0) {}
00049 
00050     NGram(void *begin, std::size_t order) : NGramHeader(begin, order) {}
00051 
00052     // Would do operator++ but that can get confusing for a stream.
00053     void NextInMemory() {
00054       ReBase(&Value() + 1);
00055     }
00056 
00057     static std::size_t TotalSize(std::size_t order) {
00058       return order * sizeof(WordIndex) + sizeof(Payload);
00059     }
00060     std::size_t TotalSize() const {
00061       // Compiler should optimize this.
00062       return TotalSize(Order());
00063     }
00064 
00065     static std::size_t OrderFromSize(std::size_t size) {
00066       std::size_t ret = (size - sizeof(Payload)) / sizeof(WordIndex);
00067       assert(size == TotalSize(ret));
00068       return ret;
00069     }
00070 
00071     const Payload &Value() const { return *reinterpret_cast<const Payload *>(end()); }
00072     Payload &Value() { return *reinterpret_cast<Payload *>(end()); }
00073 };
00074 
00075 } // namespace lm
00076 
00077 #endif // LM_COMMON_NGRAM_H