Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/kenlm/lm/common/joint_order.hh
00001 #ifndef LM_COMMON_JOINT_ORDER_H
00002 #define LM_COMMON_JOINT_ORDER_H
00003 
00004 #include "lm/common/ngram_stream.hh"
00005 #include "lm/lm_exception.hh"
00006 
00007 #ifdef DEBUG
00008 #include "util/fixed_array.hh"
00009 #include <iostream>
00010 #endif
00011 
00012 #include <cstring>
00013 
00014 namespace lm {
00015 
00016 template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) {
00017   // Allow matching to reference streams[-1].
00018   util::FixedArray<ProxyStream<NGramHeader> > streams_with_dummy(positions.size() + 1);
00019   // A bogus stream for [-1].
00020   streams_with_dummy.push_back();
00021   for (std::size_t i = 0; i < positions.size(); ++i) {
00022     streams_with_dummy.push_back(positions[i], NGramHeader(NULL, i + 1));
00023   }
00024   ProxyStream<NGramHeader> *streams = streams_with_dummy.begin() + 1;
00025 
00026   std::size_t order;
00027   for (order = 0; order < positions.size() && streams[order]; ++order) {}
00028   assert(order); // should always have <unk>.
00029 
00030   // Debugging only: call comparison function to sanity check order.
00031 #ifdef DEBUG
00032   util::FixedArray<Compare> less_compare(order);
00033   for (unsigned i = 0; i < order; ++i)
00034     less_compare.push_back(i + 1);
00035 #endif // DEBUG
00036 
00037   std::size_t current = 0;
00038   while (true) {
00039     // Does the context match the lower one?
00040     if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
00041       callback.Enter(current, streams[current].Get());
00042       // Transition to looking for extensions.
00043       if (++current < order) continue;
00044     }
00045 #ifdef DEBUG
00046     // match_check[current - 1] matches current-grams
00047     // The lower-order stream (which skips fewer current-grams) should always be <= the higher order-stream (which can skip current-grams).
00048     else if (!less_compare[current - 1](streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset)) {
00049       std::cerr << "Stream out of order detected" << std::endl;
00050       abort();
00051     }
00052 #endif // DEBUG
00053     // No extension left.
00054     while(true) {
00055       assert(current > 0);
00056       --current;
00057       callback.Exit(current, streams[current].Get());
00058 
00059       if (++streams[current]) break;
00060 
00061       UTIL_THROW_IF(order != current + 1, FormatLoadException, "Detected n-gram without matching suffix");
00062 
00063       order = current;
00064       if (!order) return;
00065     }
00066   }
00067 }
00068 
00069 } // namespaces
00070 
00071 #endif // LM_COMMON_JOINT_ORDER_H