Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/kenlm/lm/builder/interpolate.hh
00001 #ifndef LM_BUILDER_INTERPOLATE_H
00002 #define LM_BUILDER_INTERPOLATE_H
00003 
00004 #include "lm/common/special.hh"
00005 #include "lm/word_index.hh"
00006 #include "util/stream/multi_stream.hh"
00007 
00008 #include <vector>
00009 
00010 #include <stdint.h>
00011 
00012 namespace lm { namespace builder {
00013 
00014 /* Interpolate step.
00015  * Input: suffix sorted n-grams with (p_uninterpolated, gamma) from
00016  * InitialProbabilities.
00017  * Output: suffix sorted n-grams with complete probability
00018  */
00019 class Interpolate {
00020   public:
00021     // Normally vocab_size is the unigram count-1 (since p(<s>) = 0) but might
00022     // be larger when the user specifies a consistent vocabulary size.
00023     explicit Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab, bool output_q, const SpecialVocab &specials);
00024 
00025     void Run(const util::stream::ChainPositions &positions);
00026 
00027   private:
00028     float uniform_prob_;
00029     util::stream::ChainPositions backoffs_;
00030     const std::vector<uint64_t> prune_thresholds_;
00031     bool prune_vocab_;
00032     bool output_q_;
00033     const SpecialVocab specials_;
00034 };
00035 
00036 }} // namespaces
00037 #endif // LM_BUILDER_INTERPOLATE_H