Joshua
open source statistical hierarchical phrase-based machine translation system
|
00001 #ifndef LM_COMMON_NGRAM_H 00002 #define LM_COMMON_NGRAM_H 00003 00004 #include "lm/weights.hh" 00005 #include "lm/word_index.hh" 00006 00007 #include <cstddef> 00008 #include <cassert> 00009 #include <stdint.h> 00010 #include <cstring> 00011 00012 namespace lm { 00013 00014 class NGramHeader { 00015 public: 00016 NGramHeader(void *begin, std::size_t order) 00017 : begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {} 00018 00019 NGramHeader() : begin_(NULL), end_(NULL) {} 00020 00021 const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); } 00022 uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); } 00023 00024 void ReBase(void *to) { 00025 std::size_t difference = end_ - begin_; 00026 begin_ = reinterpret_cast<WordIndex*>(to); 00027 end_ = begin_ + difference; 00028 } 00029 00030 // These are for the vocab index. 00031 // Lower-case in deference to STL. 00032 const WordIndex *begin() const { return begin_; } 00033 WordIndex *begin() { return begin_; } 00034 const WordIndex *end() const { return end_; } 00035 WordIndex *end() { return end_; } 00036 00037 std::size_t size() const { return end_ - begin_; } 00038 std::size_t Order() const { return end_ - begin_; } 00039 00040 private: 00041 WordIndex *begin_, *end_; 00042 }; 00043 00044 template <class PayloadT> class NGram : public NGramHeader { 00045 public: 00046 typedef PayloadT Payload; 00047 00048 NGram() : NGramHeader(NULL, 0) {} 00049 00050 NGram(void *begin, std::size_t order) : NGramHeader(begin, order) {} 00051 00052 // Would do operator++ but that can get confusing for a stream. 00053 void NextInMemory() { 00054 ReBase(&Value() + 1); 00055 } 00056 00057 static std::size_t TotalSize(std::size_t order) { 00058 return order * sizeof(WordIndex) + sizeof(Payload); 00059 } 00060 std::size_t TotalSize() const { 00061 // Compiler should optimize this. 00062 return TotalSize(Order()); 00063 } 00064 00065 static std::size_t OrderFromSize(std::size_t size) { 00066 std::size_t ret = (size - sizeof(Payload)) / sizeof(WordIndex); 00067 assert(size == TotalSize(ret)); 00068 return ret; 00069 } 00070 00071 const Payload &Value() const { return *reinterpret_cast<const Payload *>(end()); } 00072 Payload &Value() { return *reinterpret_cast<Payload *>(end()); } 00073 }; 00074 00075 } // namespace lm 00076 00077 #endif // LM_COMMON_NGRAM_H