Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/kenlm/util/multi_intersection.hh
00001 #ifndef UTIL_MULTI_INTERSECTION_H
00002 #define UTIL_MULTI_INTERSECTION_H
00003 
00004 #include <boost/optional.hpp>
00005 #include <boost/range/iterator_range.hpp>
00006 
00007 #include <algorithm>
00008 #include <functional>
00009 #include <vector>
00010 
00011 namespace util {
00012 
00013 namespace detail {
00014 template <class Range> struct RangeLessBySize : public std::binary_function<const Range &, const Range &, bool> {
00015   bool operator()(const Range &left, const Range &right) const {
00016     return left.size() < right.size();
00017   }
00018 };
00019 
00020 /* Takes sets specified by their iterators and a boost::optional containing
00021  * the lowest intersection if any.  Each set must be sorted in increasing
00022  * order.  sets is changed to truncate the beginning of each sequence to the
00023  * location of the match or an empty set.  Precondition: sets is not empty
00024  * since the intersection over null is the universe and this function does not
00025  * know the universe.
00026  */
00027 template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
00028   typedef std::vector<boost::iterator_range<Iterator> > Sets;
00029   typedef typename std::iterator_traits<Iterator>::value_type Value;
00030 
00031   assert(!sets.empty());
00032 
00033   if (sets.front().empty()) return boost::optional<Value>();
00034   // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
00035   Value highest(sets.front().front());
00036   for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
00037     i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
00038     if (i->empty()) return boost::optional<Value>();
00039     if (less(highest, i->front())) {
00040       highest = i->front();
00041       // start over
00042       i = sets.begin();
00043     } else {
00044       ++i;
00045     }
00046   }
00047   return boost::optional<Value>(highest);
00048 }
00049 
00050 } // namespace detail
00051 
00052 template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets, const Less less) {
00053   assert(!sets.empty());
00054 
00055   std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
00056   return detail::FirstIntersectionSorted(sets, less);
00057 }
00058 
00059 template <class Iterator> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets) {
00060   return FirstIntersection(sets, std::less<typename std::iterator_traits<Iterator>::value_type>());
00061 }
00062 
00063 template <class Iterator, class Output, class Less> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out, const Less less) {
00064   typedef typename std::iterator_traits<Iterator>::value_type Value;
00065   assert(!sets.empty());
00066 
00067   std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
00068   boost::optional<Value> ret;
00069   for (boost::optional<Value> ret; (ret = detail::FirstIntersectionSorted(sets, less)); sets.front().advance_begin(1)) {
00070     out(*ret);
00071   }
00072 }
00073 
00074 template <class Iterator, class Output> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out) {
00075   AllIntersection(sets, out, std::less<typename std::iterator_traits<Iterator>::value_type>());
00076 }
00077 
00078 } // namespace util
00079 
00080 #endif // UTIL_MULTI_INTERSECTION_H