break;
case '\n':outBuffer.append('\\'); outBuffer.append('n');
break;
case '\r':outBuffer.append('\\'); outBuffer.append('r');
break;
case '\f':outBuffer.append('\\'); outBuffer.append('f');
break;
case '=': // Fall through
case ':': // Fall through
case '#': // Fall through
case '!':
outBuffer.append('\\'); outBuffer.append(aChar);
break;
default:
if ((aChar < 0x0020) || (aChar > 0x007e)) {
outBuffer.append('\\');
outBuffer.append('u');
outBuffer.append(toHex((aChar >> 12) & 0xF));
outBuffer.append(toHex((aChar >> & 0xF));
outBuffer.append(toHex((aChar >> 4) & 0xF));
outBuffer.append(toHex( aChar & 0xF));
} else {
outBuffer.append(aChar);
}
}
}
return outBuffer.toString();
}
public static String fromUnicode(String str) {
return fromUnicode(str.toCharArray(), 0, str.length(), new char[1024]);
}
/*
* Converts encoded \uxxxx to unicode chars
* and changes special saved chars to their original forms
*/
public static String fromUnicode(char[] in, int off, int len, char[] convtBuf) {
if (convtBuf.length < len) {
int newLen = len * 2;
if (newLen < 0) {
newLen = Integer.MAX_VALUE;
}
convtBuf = new char[newLen];
}
char aChar;
char[] out = convtBuf;
int outLen = 0;
int end = off + len;
while (off < end) {
aChar = in[off++];
if (aChar == '\\') {
aChar = in[off++];
if (aChar == 'u') {
// Read the xxxx
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = in[off++];
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException(
"Malformed \\uxxxx encoding.");
}
}
out[outLen++] = (char) value;
} else {
if (aChar == 't') {
aChar = '\t';
} else if (aChar == 'r') {
aChar = '\r';
} else if (aChar == 'n') {
aChar = '\n';
} else if (aChar == 'f') {
aChar = '\f';
}
out[outLen++] = aChar;
}
} else {
out[outLen++] = (char) aChar;
}
}
return new String(out, 0, outLen);
}
}
五、附加
下例是一个说明:无论字符原来用何种本地字符集表示,在Unicode字符集中都被表示成相同的编码。或者说,Unicode字符集和语言的种类无关。
public class ch {
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
String str = "中";
String CHARSET = "GB2312";
// String CHARSET = "SHIFT-JIS";
char nativeChars[] = str.toCharArray();
java.nio.charset.Charset nativeCharset = java.nio.charset.Charset.forName(CHARSET);
java.nio.CharBuffer nativeCharBuffer = java.nio.CharBuffer.wrap(nativeChars);
java.nio.charset.CharsetEncoder encoder = nativeCharset.newEncoder();
java.nio.ByteBuffer nativeBytebuffer = encoder.encode(nativeCharBuffer);
byte[] nativeBytes = nativeBytebuffer.array();
System.out.println("\n#----- " + CHARSET + " encoding output -----#");
for (int i = 0; i < nativeBytes.length; i++) {
System.out.print(Integer.toHexString('\u00FF' & nativeBytes[i]).toUpperCase());
}
java.nio.charset.CharsetDecoder unicodeDecoder = nativeCharset.newDecoder();
java.nio.CharBuffer unicodeCharbuffer = unicodeDecoder.decode(nativeBytebuffer);
char unicodeChars[] = unicodeCharbuffer.array();
System.out.println("\n#----- Unicode encoding output -----#");
for (int i = 0; i < unicodeChars.length; i++) {
System.out.print(In