Joshua
open source statistical hierarchical phrase-based machine translation system
|
00001 #ifndef LM_COMMON_JOINT_ORDER_H 00002 #define LM_COMMON_JOINT_ORDER_H 00003 00004 #include "lm/common/ngram_stream.hh" 00005 #include "lm/lm_exception.hh" 00006 00007 #ifdef DEBUG 00008 #include "util/fixed_array.hh" 00009 #include <iostream> 00010 #endif 00011 00012 #include <cstring> 00013 00014 namespace lm { 00015 00016 template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) { 00017 // Allow matching to reference streams[-1]. 00018 util::FixedArray<ProxyStream<NGramHeader> > streams_with_dummy(positions.size() + 1); 00019 // A bogus stream for [-1]. 00020 streams_with_dummy.push_back(); 00021 for (std::size_t i = 0; i < positions.size(); ++i) { 00022 streams_with_dummy.push_back(positions[i], NGramHeader(NULL, i + 1)); 00023 } 00024 ProxyStream<NGramHeader> *streams = streams_with_dummy.begin() + 1; 00025 00026 std::size_t order; 00027 for (order = 0; order < positions.size() && streams[order]; ++order) {} 00028 assert(order); // should always have <unk>. 00029 00030 // Debugging only: call comparison function to sanity check order. 00031 #ifdef DEBUG 00032 util::FixedArray<Compare> less_compare(order); 00033 for (unsigned i = 0; i < order; ++i) 00034 less_compare.push_back(i + 1); 00035 #endif // DEBUG 00036 00037 std::size_t current = 0; 00038 while (true) { 00039 // Does the context match the lower one? 00040 if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) { 00041 callback.Enter(current, streams[current].Get()); 00042 // Transition to looking for extensions. 00043 if (++current < order) continue; 00044 } 00045 #ifdef DEBUG 00046 // match_check[current - 1] matches current-grams 00047 // The lower-order stream (which skips fewer current-grams) should always be <= the higher order-stream (which can skip current-grams). 00048 else if (!less_compare[current - 1](streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset)) { 00049 std::cerr << "Stream out of order detected" << std::endl; 00050 abort(); 00051 } 00052 #endif // DEBUG 00053 // No extension left. 00054 while(true) { 00055 assert(current > 0); 00056 --current; 00057 callback.Exit(current, streams[current].Get()); 00058 00059 if (++streams[current]) break; 00060 00061 UTIL_THROW_IF(order != current + 1, FormatLoadException, "Detected n-gram without matching suffix"); 00062 00063 order = current; 00064 if (!order) return; 00065 } 00066 } 00067 } 00068 00069 } // namespaces 00070 00071 #endif // LM_COMMON_JOINT_ORDER_H