ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
1
if (source + extraBytesToRead >= sourceEnd) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
if (! isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
break;
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch 《= 6; /* remember, illegal UTF-8 */
case 4: ch += *source++; ch 《= 6; /* remember, illegal UTF-8 */
case 3: ch += *source++; ch 《= 6;
case 2: ch += *source++; ch 《= 6;
case 1: ch += *source++; ch 《= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (target >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /* normal case */
}
} else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) {
result = sourceIllegal;
source -= (extraBytesToRead+1); /* return to the start */
break; /* Bail out; shouldn't continue */
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch 》 halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF8 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
ch = *source++;
if (flags == strictConversion ) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/*
* Figure out how many bytes the result will require. Turn any
* illegally large UTF32 things (> Plane 17) into replacement chars.
*/
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal;
}
target += bytesToWrite;
if (target > targetEnd) {
--source; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch 》= 6;
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch 》= 6;
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch 》= 6;
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF32 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF32* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (source + extraBytesToRead >= sourceEnd) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
if (! isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
break;
}
/*