C++中各种string的相互转化 - c++编程基础

　　ConversionResult ConvertUTF8toUTF16 （

　　const UTF8** sourceStart, const UTF8* sourceEnd,

　　UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags） {

　　ConversionResult result = conversionOK;

　　const UTF8* source = *sourceStart;

　　UTF16* target = *targetStart;

　　while （source < sourceEnd） {

　　UTF32 ch = 0;

　　if （source + extraBytesToRead >= sourceEnd） {

　　result = sourceExhausted; break;

　　}

　　/* Do this check whether lenient or strict */

　　if （！ isLegalUTF8（source, extraBytesToRead+1）） {

　　result = sourceIllegal;

　　break;

　　}

　　* The cases all fall through. See "Note A" below.

　　switch （extraBytesToRead） {

　　case 5: ch += *source++; ch 《= 6; /* remember, illegal UTF-8 */

　　case 4: ch += *source++; ch 《= 6; /* remember, illegal UTF-8 */

　　case 3: ch += *source++; ch 《= 6;

　　case 2: ch += *source++; ch 《= 6;

　　case 1: ch += *source++; ch 《= 6;

　　case 0: ch += *source++;

　　}

　　ch -= offsetsFromUTF8[extraBytesToRead];

　　if （target >= targetEnd） {

　　source -= （extraBytesToRead+1）； /* Back up source pointer! */

　　result = targetExhausted; break;

　　}

　　if （ch <= UNI_MAX_BMP） { /* Target is a character <= 0xFFFF */

　　/* UTF-16 surrogate values are illegal in UTF-32 */

　　if （ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END） {

　　if （flags == strictConversion） {

　　source -= （extraBytesToRead+1）； /* return to the illegal value itself */

　　result = sourceIllegal;

　　break;

　　} else {

　　*target++ = UNI_REPLACEMENT_CHAR;

　　}

　　} else {

　　*target++ = （UTF16）ch; /* normal case */

　　}

　　} else if （ch > UNI_MAX_UTF16） {

　　if （flags == strictConversion） {

　　result = sourceIllegal;

　　source -= （extraBytesToRead+1）； /* return to the start */

　　break; /* Bail out; shouldn't continue */

　　} else {

　　*target++ = UNI_REPLACEMENT_CHAR;

　　}

　　} else {

　　/* target is a character in range 0xFFFF - 0x10FFFF. */

　　if （target + 1 >= targetEnd） {

　　source -= （extraBytesToRead+1）； /* Back up source pointer! */

　　result = targetExhausted; break;

　　}

　　ch -= halfBase;

　　*target++ = （UTF16）（（ch 》 halfShift） + UNI_SUR_HIGH_START）；

　　*target++ = （UTF16）（（ch & halfMask） + UNI_SUR_LOW_START）；

　　}

　　*sourceStart = source;

　　*targetStart = target;

　　return result;

　　}

　　/* --------------------------------------------------------------------- */

　　ConversionResult ConvertUTF32toUTF8 （

　　const UTF32** sourceStart, const UTF32* sourceEnd,

　　UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags） {

　　ConversionResult result = conversionOK;

　　const UTF32* source = *sourceStart;

　　UTF8* target = *targetStart;

　　while （source < sourceEnd） {

　　UTF32 ch;

　　unsigned short bytesToWrite = 0;

　　const UTF32 byteMask = 0xBF;

　　const UTF32 byteMark = 0x80;

　　ch = *source++;

　　if （flags == strictConversion ） {

　　/* UTF-16 surrogate values are illegal in UTF-32 */

　　if （ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END） {

　　--source; /* return to the illegal value itself */

　　result = sourceIllegal;

　　break;

　　}

　　* Figure out how many bytes the result will require. Turn any

　　* illegally large UTF32 things （> Plane 17） into replacement chars.

　　if （ch < （UTF32）0x80） { bytesToWrite = 1;

　　} else if （ch < （UTF32）0x800） { bytesToWrite = 2;

　　} else if （ch < （UTF32）0x10000） { bytesToWrite = 3;

　　} else if （ch <= UNI_MAX_LEGAL_UTF32） { bytesToWrite = 4;

　　} else { bytesToWrite = 3;

　　ch = UNI_REPLACEMENT_CHAR;

　　result = sourceIllegal;

　　}

　　target += bytesToWrite;

　　if （target > targetEnd） {

　　--source; /* Back up source pointer! */

　　target -= bytesToWrite; result = targetExhausted; break;

　　}

　　switch （bytesToWrite） { /* note: everything falls through. */

　　case 4: *--target = （UTF8）（（ch | byteMark） & byteMask）； ch 》= 6;

　　case 3: *--target = （UTF8）（（ch | byteMark） & byteMask）； ch 》= 6;

　　case 2: *--target = （UTF8）（（ch | byteMark） & byteMask）； ch 》= 6;

　　case 1: *--target = （UTF8）（ch | firstByteMark[bytesToWrite]）；

　　}

　　target += bytesToWrite;

　　}

　　*sourceStart = source;

　　*targetStart = target;

　　return result;

　　}

　　/* --------------------------------------------------------------------- */

　　ConversionResult ConvertUTF8toUTF32 （

　　const UTF8** sourceStart, const UTF8* sourceEnd,

　　UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags） {

　　ConversionResult result = conversionOK;

　　const UTF8* source = *sourceStart;

　　UTF32* target = *targetStart;

　　while （source < sourceEnd） {

　　UTF32 ch = 0;

　　unsigned short extraBytesToRead = trailingBytesForUTF8[*source];

　　if （source + extraBytesToRead >= sourceEnd） {

　　result = sourceExhausted; break;

　　}

　　/* Do this check whether lenient or strict */

　　if （！ isLegalUTF8（source, extraBytesToRead+1）） {

　　result = sourceIllegal;

　　break;

　　}

C++中各种string的相互转化(五)