90 #ifndef LLVM_SUPPORT_CONVERTUTF_H 91 #define LLVM_SUPPORT_CONVERTUTF_H 101 typedef unsigned int UTF32;
102 typedef unsigned short UTF16;
103 typedef unsigned char UTF8;
104 typedef bool Boolean;
107 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD 108 #define UNI_MAX_BMP (UTF32)0x0000FFFF 109 #define UNI_MAX_UTF16 (UTF32)0x0010FFFF 110 #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF 111 #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF 113 #define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4 115 #define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF 116 #define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE 126 strictConversion = 0,
135 ConversionResult ConvertUTF8toUTF16 (
136 const UTF8** sourceStart,
const UTF8* sourceEnd,
137 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
143 ConversionResult ConvertUTF8toUTF32Partial(
144 const UTF8** sourceStart,
const UTF8* sourceEnd,
145 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
151 ConversionResult ConvertUTF8toUTF32(
152 const UTF8** sourceStart,
const UTF8* sourceEnd,
153 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
155 ConversionResult ConvertUTF16toUTF8 (
156 const UTF16** sourceStart,
const UTF16* sourceEnd,
157 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
159 ConversionResult ConvertUTF32toUTF8 (
160 const UTF32** sourceStart,
const UTF32* sourceEnd,
161 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
163 ConversionResult ConvertUTF16toUTF32 (
164 const UTF16** sourceStart,
const UTF16* sourceEnd,
165 UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
167 ConversionResult ConvertUTF32toUTF16 (
168 const UTF32** sourceStart,
const UTF32* sourceEnd,
169 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
171 Boolean isLegalUTF8Sequence(
const UTF8 *source,
const UTF8 *sourceEnd);
173 Boolean isLegalUTF8String(
const UTF8 **source,
const UTF8 *sourceEnd);
175 unsigned getNumBytesForUTF8(UTF8 firstByte);
183 #include "llvm/ArrayRef.h" 184 #include "llvm/StringRef.h" 198 bool ConvertCodePointToUTF8(
unsigned Source,
char *&ResultPtr);
215 static inline ConversionResult convertUTF8Sequence(
const UTF8 **source,
216 const UTF8 *sourceEnd,
218 ConversionFlags flags) {
219 if (*source == sourceEnd)
220 return sourceExhausted;
221 unsigned size = getNumBytesForUTF8(**source);
222 if ((ptrdiff_t)size > sourceEnd - *source)
223 return sourceExhausted;
224 return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
231 bool hasUTF16ByteOrderMark(ArrayRef<char> SrcBytes);
238 bool convertUTF16ToUTF8String(ArrayRef<UTF16> SrcUTF16,
239 SmallVectorImpl<char> &DstUTF8);
246 bool convertUTF8ToUTF16String(StringRef SrcUTF8,
247 SmallVectorImpl<UTF16> &DstUTF16);