Joshua
open source statistical hierarchical phrase-based machine translation system
|
00001 // Copyright 2010 the V8 project authors. All rights reserved. 00002 // Redistribution and use in source and binary forms, with or without 00003 // modification, are permitted provided that the following conditions are 00004 // met: 00005 // 00006 // * Redistributions of source code must retain the above copyright 00007 // notice, this list of conditions and the following disclaimer. 00008 // * Redistributions in binary form must reproduce the above 00009 // copyright notice, this list of conditions and the following 00010 // disclaimer in the documentation and/or other materials provided 00011 // with the distribution. 00012 // * Neither the name of Google Inc. nor the names of its 00013 // contributors may be used to endorse or promote products derived 00014 // from this software without specific prior written permission. 00015 // 00016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00019 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00020 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00021 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00022 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00023 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00024 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00025 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00026 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00027 00028 #ifndef DOUBLE_CONVERSION_UTILS_H_ 00029 #define DOUBLE_CONVERSION_UTILS_H_ 00030 00031 #include <stdlib.h> 00032 #include <string.h> 00033 00034 #include <assert.h> 00035 #ifndef ASSERT 00036 #define ASSERT(condition) (assert(condition)) 00037 #endif 00038 #ifndef UNIMPLEMENTED 00039 #define UNIMPLEMENTED() (abort()) 00040 #endif 00041 #ifndef UNREACHABLE 00042 #define UNREACHABLE() (abort()) 00043 #endif 00044 00045 // Double operations detection based on target architecture. 00046 // Linux uses a 80bit wide floating point stack on x86. This induces double 00047 // rounding, which in turn leads to wrong results. 00048 // An easy way to test if the floating-point operations are correct is to 00049 // evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then 00050 // the result is equal to 89255e-22. 00051 // The best way to test this, is to create a division-function and to compare 00052 // the output of the division with the expected result. (Inlining must be 00053 // disabled.) 00054 // On Linux,x86 89255e-22 != Div_double(89255.0/1e22) 00055 #if defined(_M_X64) || defined(__x86_64__) || \ 00056 defined(__ARMEL__) || defined(__avr32__) || \ 00057 defined(__hppa__) || defined(__ia64__) || \ 00058 defined(__mips__) || defined(__powerpc__) || \ 00059 defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ 00060 defined(__SH4__) || defined(__alpha__) || \ 00061 defined(_MIPS_ARCH_MIPS32R2) 00062 #define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 00063 #elif defined(_M_IX86) || defined(__i386__) || defined(__i386) 00064 #if defined(_WIN32) 00065 // Windows uses a 64bit wide floating point stack. 00066 #define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 00067 #else 00068 #undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 00069 #endif // _WIN32 00070 #else 00071 #error Target architecture was not detected as supported by Double-Conversion. 00072 #endif 00073 00074 00075 #if defined(_WIN32) && !defined(__MINGW32__) 00076 00077 typedef signed char int8_t; 00078 typedef unsigned char uint8_t; 00079 typedef short int16_t; // NOLINT 00080 typedef unsigned short uint16_t; // NOLINT 00081 typedef int int32_t; 00082 typedef unsigned int uint32_t; 00083 typedef __int64 int64_t; 00084 typedef unsigned __int64 uint64_t; 00085 // intptr_t and friends are defined in crtdefs.h through stdio.h. 00086 00087 #else 00088 00089 #include <stdint.h> 00090 00091 #endif 00092 00093 // The following macro works on both 32 and 64-bit platforms. 00094 // Usage: instead of writing 0x1234567890123456 00095 // write UINT64_2PART_C(0x12345678,90123456); 00096 #define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u)) 00097 00098 00099 // The expression ARRAY_SIZE(a) is a compile-time constant of type 00100 // size_t which represents the number of elements of the given 00101 // array. You should only use ARRAY_SIZE on statically allocated 00102 // arrays. 00103 #ifndef ARRAY_SIZE 00104 #define ARRAY_SIZE(a) \ 00105 ((sizeof(a) / sizeof(*(a))) / \ 00106 static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) 00107 #endif 00108 00109 // A macro to disallow the evil copy constructor and operator= functions 00110 // This should be used in the private: declarations for a class 00111 #ifndef DISALLOW_COPY_AND_ASSIGN 00112 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ 00113 TypeName(const TypeName&); \ 00114 void operator=(const TypeName&) 00115 #endif 00116 00117 // A macro to disallow all the implicit constructors, namely the 00118 // default constructor, copy constructor and operator= functions. 00119 // 00120 // This should be used in the private: declarations for a class 00121 // that wants to prevent anyone from instantiating it. This is 00122 // especially useful for classes containing only static methods. 00123 #ifndef DISALLOW_IMPLICIT_CONSTRUCTORS 00124 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ 00125 TypeName(); \ 00126 DISALLOW_COPY_AND_ASSIGN(TypeName) 00127 #endif 00128 00129 namespace double_conversion { 00130 00131 static const int kCharSize = sizeof(char); 00132 00133 // Returns the maximum of the two parameters. 00134 template <typename T> 00135 static T Max(T a, T b) { 00136 return a < b ? b : a; 00137 } 00138 00139 00140 // Returns the minimum of the two parameters. 00141 template <typename T> 00142 static T Min(T a, T b) { 00143 return a < b ? a : b; 00144 } 00145 00146 00147 inline int StrLength(const char* string) { 00148 size_t length = strlen(string); 00149 ASSERT(length == static_cast<size_t>(static_cast<int>(length))); 00150 return static_cast<int>(length); 00151 } 00152 00153 // This is a simplified version of V8's Vector class. 00154 template <typename T> 00155 class Vector { 00156 public: 00157 Vector() : start_(NULL), length_(0) {} 00158 Vector(T* data, int length) : start_(data), length_(length) { 00159 ASSERT(length == 0 || (length > 0 && data != NULL)); 00160 } 00161 00162 // Returns a vector using the same backing storage as this one, 00163 // spanning from and including 'from', to but not including 'to'. 00164 Vector<T> SubVector(int from, int to) { 00165 ASSERT(to <= length_); 00166 ASSERT(from < to); 00167 ASSERT(0 <= from); 00168 return Vector<T>(start() + from, to - from); 00169 } 00170 00171 // Returns the length of the vector. 00172 int length() const { return length_; } 00173 00174 // Returns whether or not the vector is empty. 00175 bool is_empty() const { return length_ == 0; } 00176 00177 // Returns the pointer to the start of the data in the vector. 00178 T* start() const { return start_; } 00179 00180 // Access individual vector elements - checks bounds in debug mode. 00181 T& operator[](int index) const { 00182 ASSERT(0 <= index && index < length_); 00183 return start_[index]; 00184 } 00185 00186 T& first() { return start_[0]; } 00187 00188 T& last() { return start_[length_ - 1]; } 00189 00190 private: 00191 T* start_; 00192 int length_; 00193 }; 00194 00195 00196 // Helper class for building result strings in a character buffer. The 00197 // purpose of the class is to use safe operations that checks the 00198 // buffer bounds on all operations in debug mode. 00199 class StringBuilder { 00200 public: 00201 StringBuilder(char* buffer, int size) 00202 : buffer_(buffer, size), position_(0) { } 00203 00204 ~StringBuilder() { if (!is_finalized()) Finalize(); } 00205 00206 int size() const { return buffer_.length(); } 00207 00208 // Get the current position in the builder. 00209 int position() const { 00210 ASSERT(!is_finalized()); 00211 return position_; 00212 } 00213 00214 // Reset the position. 00215 void Reset() { position_ = 0; } 00216 00217 // Add a single character to the builder. It is not allowed to add 00218 // 0-characters; use the Finalize() method to terminate the string 00219 // instead. 00220 void AddCharacter(char c) { 00221 // I just extract raw data not a cstr so null is fine. 00222 //ASSERT(c != '\0'); 00223 ASSERT(!is_finalized() && position_ < buffer_.length()); 00224 buffer_[position_++] = c; 00225 } 00226 00227 // Add an entire string to the builder. Uses strlen() internally to 00228 // compute the length of the input string. 00229 void AddString(const char* s) { 00230 AddSubstring(s, StrLength(s)); 00231 } 00232 00233 // Add the first 'n' characters of the given string 's' to the 00234 // builder. The input string must have enough characters. 00235 void AddSubstring(const char* s, int n) { 00236 ASSERT(!is_finalized() && position_ + n < buffer_.length()); 00237 // I just extract raw data not a cstr so null is fine. 00238 //ASSERT(static_cast<size_t>(n) <= strlen(s)); 00239 memmove(&buffer_[position_], s, n * kCharSize); 00240 position_ += n; 00241 } 00242 00243 00244 // Add character padding to the builder. If count is non-positive, 00245 // nothing is added to the builder. 00246 void AddPadding(char c, int count) { 00247 for (int i = 0; i < count; i++) { 00248 AddCharacter(c); 00249 } 00250 } 00251 00252 // Finalize the string by 0-terminating it and returning the buffer. 00253 char* Finalize() { 00254 ASSERT(!is_finalized() && position_ < buffer_.length()); 00255 buffer_[position_] = '\0'; 00256 // Make sure nobody managed to add a 0-character to the 00257 // buffer while building the string. 00258 // I just extract raw data not a cstr so null is fine. 00259 //ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_)); 00260 position_ = -1; 00261 ASSERT(is_finalized()); 00262 return buffer_.start(); 00263 } 00264 00265 private: 00266 Vector<char> buffer_; 00267 int position_; 00268 00269 bool is_finalized() const { return position_ < 0; } 00270 00271 DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); 00272 }; 00273 00274 // The type-based aliasing rule allows the compiler to assume that pointers of 00275 // different types (for some definition of different) never alias each other. 00276 // Thus the following code does not work: 00277 // 00278 // float f = foo(); 00279 // int fbits = *(int*)(&f); 00280 // 00281 // The compiler 'knows' that the int pointer can't refer to f since the types 00282 // don't match, so the compiler may cache f in a register, leaving random data 00283 // in fbits. Using C++ style casts makes no difference, however a pointer to 00284 // char data is assumed to alias any other pointer. This is the 'memcpy 00285 // exception'. 00286 // 00287 // Bit_cast uses the memcpy exception to move the bits from a variable of one 00288 // type of a variable of another type. Of course the end result is likely to 00289 // be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005) 00290 // will completely optimize BitCast away. 00291 // 00292 // There is an additional use for BitCast. 00293 // Recent gccs will warn when they see casts that may result in breakage due to 00294 // the type-based aliasing rule. If you have checked that there is no breakage 00295 // you can use BitCast to cast one pointer type to another. This confuses gcc 00296 // enough that it can no longer see that you have cast one pointer type to 00297 // another thus avoiding the warning. 00298 template <class Dest, class Source> 00299 inline Dest BitCast(const Source& source) { 00300 // Compile time assertion: sizeof(Dest) == sizeof(Source) 00301 // A compile error here means your Dest and Source have different sizes. 00302 typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1] 00303 #if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8 00304 __attribute__((unused)) 00305 #endif 00306 ; 00307 00308 Dest dest; 00309 memmove(&dest, &source, sizeof(dest)); 00310 return dest; 00311 } 00312 00313 template <class Dest, class Source> 00314 inline Dest BitCast(Source* source) { 00315 return BitCast<Dest>(reinterpret_cast<uintptr_t>(source)); 00316 } 00317 00318 } // namespace double_conversion 00319 00320 #endif // DOUBLE_CONVERSION_UTILS_H_