Joshua
open source statistical hierarchical phrase-based machine translation system
|
00001 #ifndef LM_BUILDER_INTERPOLATE_H 00002 #define LM_BUILDER_INTERPOLATE_H 00003 00004 #include "lm/common/special.hh" 00005 #include "lm/word_index.hh" 00006 #include "util/stream/multi_stream.hh" 00007 00008 #include <vector> 00009 00010 #include <stdint.h> 00011 00012 namespace lm { namespace builder { 00013 00014 /* Interpolate step. 00015 * Input: suffix sorted n-grams with (p_uninterpolated, gamma) from 00016 * InitialProbabilities. 00017 * Output: suffix sorted n-grams with complete probability 00018 */ 00019 class Interpolate { 00020 public: 00021 // Normally vocab_size is the unigram count-1 (since p(<s>) = 0) but might 00022 // be larger when the user specifies a consistent vocabulary size. 00023 explicit Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab, bool output_q, const SpecialVocab &specials); 00024 00025 void Run(const util::stream::ChainPositions &positions); 00026 00027 private: 00028 float uniform_prob_; 00029 util::stream::ChainPositions backoffs_; 00030 const std::vector<uint64_t> prune_thresholds_; 00031 bool prune_vocab_; 00032 bool output_q_; 00033 const SpecialVocab specials_; 00034 }; 00035 00036 }} // namespaces 00037 #endif // LM_BUILDER_INTERPOLATE_H