Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/kenlm/lm/builder/initial_probabilities.hh
00001 #ifndef LM_BUILDER_INITIAL_PROBABILITIES_H
00002 #define LM_BUILDER_INITIAL_PROBABILITIES_H
00003 
00004 #include "lm/builder/discount.hh"
00005 #include "lm/word_index.hh"
00006 #include "util/stream/config.hh"
00007 
00008 #include <vector>
00009 
00010 namespace util { namespace stream { class Chains; } }
00011 
00012 namespace lm {
00013 class SpecialVocab;
00014 namespace builder {
00015 
00016 struct InitialProbabilitiesConfig {
00017   // These should be small buffers to keep the adder from getting too far ahead
00018   util::stream::ChainConfig adder_in;
00019   util::stream::ChainConfig adder_out;
00020   // SRILM doesn't normally interpolate unigrams.
00021   bool interpolate_unigrams;
00022 };
00023 
00024 /* Compute initial (uninterpolated) probabilities
00025  * primary: the normal chain of n-grams.  Incoming is context sorted adjusted
00026  *   counts.  Outgoing has uninterpolated probabilities for use by Interpolate.
00027  * second_in: a second copy of the primary input.  Discard the output.
00028  * gamma_out: Computed gamma values are output on these chains in suffix order.
00029  *   The values are bare floats and should be buffered for interpolation to
00030  *   use.
00031  */
00032 void InitialProbabilities(
00033     const InitialProbabilitiesConfig &config,
00034     const std::vector<Discount> &discounts,
00035     util::stream::Chains &primary,
00036     util::stream::Chains &second_in,
00037     util::stream::Chains &gamma_out,
00038     const std::vector<uint64_t> &prune_thresholds,
00039     bool prune_vocab,
00040     const SpecialVocab &vocab);
00041 
00042 } // namespace builder
00043 } // namespace lm
00044 
00045 #endif // LM_BUILDER_INITIAL_PROBABILITIES_H