设为首页 加入收藏

TOP

C++中各种string的相互转化(五)
2013-10-17 09:03:49 来源: 作者: 【 】 浏览:597
Tags:各种 string 相互 转化

 

  ConversionResult ConvertUTF8toUTF16 (

  const UTF8** sourceStart, const UTF8* sourceEnd,

  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {

  ConversionResult result = conversionOK;

  const UTF8* source = *sourceStart;

  UTF16* target = *targetStart;

  while (source < sourceEnd) {

  UTF32 ch = 0;

  1

  if (source + extraBytesToRead >= sourceEnd) {

  result = sourceExhausted; break;

  }

  /* Do this check whether lenient or strict */

  if (! isLegalUTF8(source, extraBytesToRead+1)) {

  result = sourceIllegal;

  break;

  }

  /*

  * The cases all fall through. See "Note A" below.

  */

  switch (extraBytesToRead) {

  case 5: ch += *source++; ch 《= 6; /* remember, illegal UTF-8 */

  case 4: ch += *source++; ch 《= 6; /* remember, illegal UTF-8 */

  case 3: ch += *source++; ch 《= 6;

  case 2: ch += *source++; ch 《= 6;

  case 1: ch += *source++; ch 《= 6;

  case 0: ch += *source++;

  }

  ch -= offsetsFromUTF8[extraBytesToRead];

  if (target >= targetEnd) {

  source -= (extraBytesToRead+1); /* Back up source pointer! */

  result = targetExhausted; break;

  }

  if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */

  /* UTF-16 surrogate values are illegal in UTF-32 */

  if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {

  if (flags == strictConversion) {

  source -= (extraBytesToRead+1); /* return to the illegal value itself */

  result = sourceIllegal;

  break;

  } else {

  *target++ = UNI_REPLACEMENT_CHAR;

  }

  } else {

  *target++ = (UTF16)ch; /* normal case */

  }

  } else if (ch > UNI_MAX_UTF16) {

  if (flags == strictConversion) {

  result = sourceIllegal;

  source -= (extraBytesToRead+1); /* return to the start */

  break; /* Bail out; shouldn't continue */

  } else {

  *target++ = UNI_REPLACEMENT_CHAR;

  }

  } else {

  /* target is a character in range 0xFFFF - 0x10FFFF. */

  if (target + 1 >= targetEnd) {

  source -= (extraBytesToRead+1); /* Back up source pointer! */

  result = targetExhausted; break;

  }

  ch -= halfBase;

  *target++ = (UTF16)((ch 》 halfShift) + UNI_SUR_HIGH_START);

  *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);

  }

  }

  *sourceStart = source;

  *targetStart = target;

  return result;

  }

  /* --------------------------------------------------------------------- */

  ConversionResult ConvertUTF32toUTF8 (

  const UTF32** sourceStart, const UTF32* sourceEnd,

  UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {

  ConversionResult result = conversionOK;

  const UTF32* source = *sourceStart;

  UTF8* target = *targetStart;

  while (source < sourceEnd) {

  UTF32 ch;

  unsigned short bytesToWrite = 0;

  const UTF32 byteMask = 0xBF;

  const UTF32 byteMark = 0x80;

  ch = *source++;

  if (flags == strictConversion ) {

  /* UTF-16 surrogate values are illegal in UTF-32 */

  if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {

  --source; /* return to the illegal value itself */

  result = sourceIllegal;

  break;

  }

  }

  /*

  * Figure out how many bytes the result will require. Turn any

  * illegally large UTF32 things (> Plane 17) into replacement chars.

  */

  if (ch < (UTF32)0x80) {         bytesToWrite = 1;

  } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;

  } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;

  } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;

  } else {                bytesToWrite = 3;

  ch = UNI_REPLACEMENT_CHAR;

  result = sourceIllegal;

  }

  target += bytesToWrite;

  if (target > targetEnd) {

  --source; /* Back up source pointer! */

  target -= bytesToWrite; result = targetExhausted; break;

  }

  switch (bytesToWrite) { /* note: everything falls through. */

  case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch 》= 6;

  case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch 》= 6;

  case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch 》= 6;

  case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);

  }

  target += bytesToWrite;

  }

  *sourceStart = source;

  *targetStart = target;

  return result;

  }

  /* --------------------------------------------------------------------- */

  ConversionResult ConvertUTF8toUTF32 (

  const UTF8** sourceStart, const UTF8* sourceEnd,

  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {

  ConversionResult result = conversionOK;

  const UTF8* source = *sourceStart;

  UTF32* target = *targetStart;

  while (source < sourceEnd) {

  UTF32 ch = 0;

  unsigned short extraBytesToRead = trailingBytesForUTF8[*source];

  if (source + extraBytesToRead >= sourceEnd) {

  result = sourceExhausted; break;

  }

  /* Do this check whether lenient or strict */

  if (! isLegalUTF8(source, extraBytesToRead+1)) {

  result = sourceIllegal;

  break;

  }

  /*

          

首页 上一页 2 3 4 5 6 下一页 尾页 5/6/6
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
分享到: 
上一篇C++接口与实现分离的2种方法 下一篇转盘旋转算法

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容: