Joshua
open source statistical hierarchical phrase-based machine translation system
|
00001 #ifndef LM_BUILDER_INITIAL_PROBABILITIES_H 00002 #define LM_BUILDER_INITIAL_PROBABILITIES_H 00003 00004 #include "lm/builder/discount.hh" 00005 #include "lm/word_index.hh" 00006 #include "util/stream/config.hh" 00007 00008 #include <vector> 00009 00010 namespace util { namespace stream { class Chains; } } 00011 00012 namespace lm { 00013 class SpecialVocab; 00014 namespace builder { 00015 00016 struct InitialProbabilitiesConfig { 00017 // These should be small buffers to keep the adder from getting too far ahead 00018 util::stream::ChainConfig adder_in; 00019 util::stream::ChainConfig adder_out; 00020 // SRILM doesn't normally interpolate unigrams. 00021 bool interpolate_unigrams; 00022 }; 00023 00024 /* Compute initial (uninterpolated) probabilities 00025 * primary: the normal chain of n-grams. Incoming is context sorted adjusted 00026 * counts. Outgoing has uninterpolated probabilities for use by Interpolate. 00027 * second_in: a second copy of the primary input. Discard the output. 00028 * gamma_out: Computed gamma values are output on these chains in suffix order. 00029 * The values are bare floats and should be buffered for interpolation to 00030 * use. 00031 */ 00032 void InitialProbabilities( 00033 const InitialProbabilitiesConfig &config, 00034 const std::vector<Discount> &discounts, 00035 util::stream::Chains &primary, 00036 util::stream::Chains &second_in, 00037 util::stream::Chains &gamma_out, 00038 const std::vector<uint64_t> &prune_thresholds, 00039 bool prune_vocab, 00040 const SpecialVocab &vocab); 00041 00042 } // namespace builder 00043 } // namespace lm 00044 00045 #endif // LM_BUILDER_INITIAL_PROBABILITIES_H