Joshua
open source statistical hierarchical phrase-based machine translation system
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
src/kenlm/util/double-conversion/utils.h
00001 // Copyright 2010 the V8 project authors. All rights reserved.
00002 // Redistribution and use in source and binary forms, with or without
00003 // modification, are permitted provided that the following conditions are
00004 // met:
00005 //
00006 //     * Redistributions of source code must retain the above copyright
00007 //       notice, this list of conditions and the following disclaimer.
00008 //     * Redistributions in binary form must reproduce the above
00009 //       copyright notice, this list of conditions and the following
00010 //       disclaimer in the documentation and/or other materials provided
00011 //       with the distribution.
00012 //     * Neither the name of Google Inc. nor the names of its
00013 //       contributors may be used to endorse or promote products derived
00014 //       from this software without specific prior written permission.
00015 //
00016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00019 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00020 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00021 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00022 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00023 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00024 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00025 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00026 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00027 
00028 #ifndef DOUBLE_CONVERSION_UTILS_H_
00029 #define DOUBLE_CONVERSION_UTILS_H_
00030 
00031 #include <stdlib.h>
00032 #include <string.h>
00033 
00034 #include <assert.h>
00035 #ifndef ASSERT
00036 #define ASSERT(condition)      (assert(condition))
00037 #endif
00038 #ifndef UNIMPLEMENTED
00039 #define UNIMPLEMENTED() (abort())
00040 #endif
00041 #ifndef UNREACHABLE
00042 #define UNREACHABLE()   (abort())
00043 #endif
00044 
00045 // Double operations detection based on target architecture.
00046 // Linux uses a 80bit wide floating point stack on x86. This induces double
00047 // rounding, which in turn leads to wrong results.
00048 // An easy way to test if the floating-point operations are correct is to
00049 // evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then
00050 // the result is equal to 89255e-22.
00051 // The best way to test this, is to create a division-function and to compare
00052 // the output of the division with the expected result. (Inlining must be
00053 // disabled.)
00054 // On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
00055 #if defined(_M_X64) || defined(__x86_64__) || \
00056     defined(__ARMEL__) || defined(__avr32__) || \
00057     defined(__hppa__) || defined(__ia64__) || \
00058     defined(__mips__) || defined(__powerpc__) || \
00059     defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
00060     defined(__SH4__) || defined(__alpha__) || \
00061     defined(_MIPS_ARCH_MIPS32R2)
00062 #define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
00063 #elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
00064 #if defined(_WIN32)
00065 // Windows uses a 64bit wide floating point stack.
00066 #define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
00067 #else
00068 #undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
00069 #endif  // _WIN32
00070 #else
00071 #error Target architecture was not detected as supported by Double-Conversion.
00072 #endif
00073 
00074 
00075 #if defined(_WIN32) && !defined(__MINGW32__)
00076 
00077 typedef signed char int8_t;
00078 typedef unsigned char uint8_t;
00079 typedef short int16_t;  // NOLINT
00080 typedef unsigned short uint16_t;  // NOLINT
00081 typedef int int32_t;
00082 typedef unsigned int uint32_t;
00083 typedef __int64 int64_t;
00084 typedef unsigned __int64 uint64_t;
00085 // intptr_t and friends are defined in crtdefs.h through stdio.h.
00086 
00087 #else
00088 
00089 #include <stdint.h>
00090 
00091 #endif
00092 
00093 // The following macro works on both 32 and 64-bit platforms.
00094 // Usage: instead of writing 0x1234567890123456
00095 //      write UINT64_2PART_C(0x12345678,90123456);
00096 #define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u))
00097 
00098 
00099 // The expression ARRAY_SIZE(a) is a compile-time constant of type
00100 // size_t which represents the number of elements of the given
00101 // array. You should only use ARRAY_SIZE on statically allocated
00102 // arrays.
00103 #ifndef ARRAY_SIZE
00104 #define ARRAY_SIZE(a)                                   \
00105   ((sizeof(a) / sizeof(*(a))) /                         \
00106   static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
00107 #endif
00108 
00109 // A macro to disallow the evil copy constructor and operator= functions
00110 // This should be used in the private: declarations for a class
00111 #ifndef DISALLOW_COPY_AND_ASSIGN
00112 #define DISALLOW_COPY_AND_ASSIGN(TypeName)      \
00113   TypeName(const TypeName&);                    \
00114   void operator=(const TypeName&)
00115 #endif
00116 
00117 // A macro to disallow all the implicit constructors, namely the
00118 // default constructor, copy constructor and operator= functions.
00119 //
00120 // This should be used in the private: declarations for a class
00121 // that wants to prevent anyone from instantiating it. This is
00122 // especially useful for classes containing only static methods.
00123 #ifndef DISALLOW_IMPLICIT_CONSTRUCTORS
00124 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
00125   TypeName();                                    \
00126   DISALLOW_COPY_AND_ASSIGN(TypeName)
00127 #endif
00128 
00129 namespace double_conversion {
00130 
00131 static const int kCharSize = sizeof(char);
00132 
00133 // Returns the maximum of the two parameters.
00134 template <typename T>
00135 static T Max(T a, T b) {
00136   return a < b ? b : a;
00137 }
00138 
00139 
00140 // Returns the minimum of the two parameters.
00141 template <typename T>
00142 static T Min(T a, T b) {
00143   return a < b ? a : b;
00144 }
00145 
00146 
00147 inline int StrLength(const char* string) {
00148   size_t length = strlen(string);
00149   ASSERT(length == static_cast<size_t>(static_cast<int>(length)));
00150   return static_cast<int>(length);
00151 }
00152 
00153 // This is a simplified version of V8's Vector class.
00154 template <typename T>
00155 class Vector {
00156  public:
00157   Vector() : start_(NULL), length_(0) {}
00158   Vector(T* data, int length) : start_(data), length_(length) {
00159     ASSERT(length == 0 || (length > 0 && data != NULL));
00160   }
00161 
00162   // Returns a vector using the same backing storage as this one,
00163   // spanning from and including 'from', to but not including 'to'.
00164   Vector<T> SubVector(int from, int to) {
00165     ASSERT(to <= length_);
00166     ASSERT(from < to);
00167     ASSERT(0 <= from);
00168     return Vector<T>(start() + from, to - from);
00169   }
00170 
00171   // Returns the length of the vector.
00172   int length() const { return length_; }
00173 
00174   // Returns whether or not the vector is empty.
00175   bool is_empty() const { return length_ == 0; }
00176 
00177   // Returns the pointer to the start of the data in the vector.
00178   T* start() const { return start_; }
00179 
00180   // Access individual vector elements - checks bounds in debug mode.
00181   T& operator[](int index) const {
00182     ASSERT(0 <= index && index < length_);
00183     return start_[index];
00184   }
00185 
00186   T& first() { return start_[0]; }
00187 
00188   T& last() { return start_[length_ - 1]; }
00189 
00190  private:
00191   T* start_;
00192   int length_;
00193 };
00194 
00195 
00196 // Helper class for building result strings in a character buffer. The
00197 // purpose of the class is to use safe operations that checks the
00198 // buffer bounds on all operations in debug mode.
00199 class StringBuilder {
00200  public:
00201   StringBuilder(char* buffer, int size)
00202       : buffer_(buffer, size), position_(0) { }
00203 
00204   ~StringBuilder() { if (!is_finalized()) Finalize(); }
00205 
00206   int size() const { return buffer_.length(); }
00207 
00208   // Get the current position in the builder.
00209   int position() const {
00210     ASSERT(!is_finalized());
00211     return position_;
00212   }
00213 
00214   // Reset the position.
00215   void Reset() { position_ = 0; }
00216 
00217   // Add a single character to the builder. It is not allowed to add
00218   // 0-characters; use the Finalize() method to terminate the string
00219   // instead.
00220   void AddCharacter(char c) {
00221     // I just extract raw data not a cstr so null is fine.
00222     //ASSERT(c != '\0');
00223     ASSERT(!is_finalized() && position_ < buffer_.length());
00224     buffer_[position_++] = c;
00225   }
00226 
00227   // Add an entire string to the builder. Uses strlen() internally to
00228   // compute the length of the input string.
00229   void AddString(const char* s) {
00230     AddSubstring(s, StrLength(s));
00231   }
00232 
00233   // Add the first 'n' characters of the given string 's' to the
00234   // builder. The input string must have enough characters.
00235   void AddSubstring(const char* s, int n) {
00236     ASSERT(!is_finalized() && position_ + n < buffer_.length());
00237     // I just extract raw data not a cstr so null is fine.
00238     //ASSERT(static_cast<size_t>(n) <= strlen(s));
00239     memmove(&buffer_[position_], s, n * kCharSize);
00240     position_ += n;
00241   }
00242 
00243 
00244   // Add character padding to the builder. If count is non-positive,
00245   // nothing is added to the builder.
00246   void AddPadding(char c, int count) {
00247     for (int i = 0; i < count; i++) {
00248       AddCharacter(c);
00249     }
00250   }
00251 
00252   // Finalize the string by 0-terminating it and returning the buffer.
00253   char* Finalize() {
00254     ASSERT(!is_finalized() && position_ < buffer_.length());
00255     buffer_[position_] = '\0';
00256     // Make sure nobody managed to add a 0-character to the
00257     // buffer while building the string.
00258     // I just extract raw data not a cstr so null is fine.
00259     //ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
00260     position_ = -1;
00261     ASSERT(is_finalized());
00262     return buffer_.start();
00263   }
00264 
00265  private:
00266   Vector<char> buffer_;
00267   int position_;
00268 
00269   bool is_finalized() const { return position_ < 0; }
00270 
00271   DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
00272 };
00273 
00274 // The type-based aliasing rule allows the compiler to assume that pointers of
00275 // different types (for some definition of different) never alias each other.
00276 // Thus the following code does not work:
00277 //
00278 // float f = foo();
00279 // int fbits = *(int*)(&f);
00280 //
00281 // The compiler 'knows' that the int pointer can't refer to f since the types
00282 // don't match, so the compiler may cache f in a register, leaving random data
00283 // in fbits.  Using C++ style casts makes no difference, however a pointer to
00284 // char data is assumed to alias any other pointer.  This is the 'memcpy
00285 // exception'.
00286 //
00287 // Bit_cast uses the memcpy exception to move the bits from a variable of one
00288 // type of a variable of another type.  Of course the end result is likely to
00289 // be implementation dependent.  Most compilers (gcc-4.2 and MSVC 2005)
00290 // will completely optimize BitCast away.
00291 //
00292 // There is an additional use for BitCast.
00293 // Recent gccs will warn when they see casts that may result in breakage due to
00294 // the type-based aliasing rule.  If you have checked that there is no breakage
00295 // you can use BitCast to cast one pointer type to another.  This confuses gcc
00296 // enough that it can no longer see that you have cast one pointer type to
00297 // another thus avoiding the warning.
00298 template <class Dest, class Source>
00299 inline Dest BitCast(const Source& source) {
00300   // Compile time assertion: sizeof(Dest) == sizeof(Source)
00301   // A compile error here means your Dest and Source have different sizes.
00302   typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]
00303 #if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
00304       __attribute__((unused))
00305 #endif
00306       ;
00307 
00308   Dest dest;
00309   memmove(&dest, &source, sizeof(dest));
00310   return dest;
00311 }
00312 
00313 template <class Dest, class Source>
00314 inline Dest BitCast(Source* source) {
00315   return BitCast<Dest>(reinterpret_cast<uintptr_t>(source));
00316 }
00317 
00318 }  // namespace double_conversion
00319 
00320 #endif  // DOUBLE_CONVERSION_UTILS_H_