Thrift 中以GBK传输中文字符和分词服务搭建(一)

2014-11-24 09:07:07 · 作者: · 浏览: 7
项目中需要将分词做成线上服务形式,服务后端用c++实现,客户端用java实现,来调用分词的服务。由于分词程序默认是以GBK编码为准,但是JAVA读写字符串的固定编码为utf-8,需要在数据传输的时候以GBK编码方式传输,Thrift 的JAVA库中不支持以GBK方式传输字符串,而且预期不会增加如此功能(Support non-UTF-8 in Java and C#),原因参考Support non-UTF-8 in Java;c/c++中传输的字符串是字节序列,不存在编码的问题。通过 阅读TProtocol相关代码,只需要重写readString 和writeString中的字符串读写编码方式即可,重写的类的完整实现如下:
[java]
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import org.apache.thrift.ShortStack;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.*;
import org.apache.thrift.transport.TTransport;
public class GBKCompactProtocol extends TProtocol {
private static final TStruct ANONYMOUS_STRUCT = new TStruct("");
private static final TField TSTOP = new TField("", (byte) 0, (short) 0);
private static final byte[] ttypeToCompactType = new byte[16];
private static final byte PROTOCOL_ID = -126;
private static final byte VERSION = 1;
private static final byte VERSION_MASK = 31;
private static final byte TYPE_MASK = -32;
private static final int TYPE_SHIFT_AMOUNT = 5;
private ShortStack lastField_ = new ShortStack(15);
private short lastFieldId_ = 0;
private TField booleanField_ = null;
private Boolean boolValue_ = null;
byte[] i32buf = new byte[5];
byte[] varint64out = new byte[10];
private byte[] byteDirectBuffer = new byte[1];
byte[] byteRawBuf = new byte[1];
public GBKCompactProtocol(TTransport transport) {
super(transport);
}
public void reset() {
this.lastField_.clear();
this.lastFieldId_ = 0;
}
public void writeMessageBegin(TMessage message)
throws TException {
writeByteDirect((byte) -126);
writeByteDirect(0x1 | message.type << 5 & 0xFFFFFFE0);
writeVarint32(message.seqid);
writeString(message.name);
}
public void writeStructBegin(TStruct struct)
throws TException {
this.lastField_.push(this.lastFieldId_);
this.lastFieldId_ = 0;
}
public void writeStructEnd()
throws TException {
this.lastFieldId_ = this.lastField_.pop();
}
public void writeFieldBegin(TField field)
throws TException {
if (field.type == 2) {
this.booleanField_ = field;
} else writeFieldBeginInternal(field, (byte) -1);
}
private void writeFieldBeginInternal(TField field, byte typeOverride)
throws TException {
byte typeToWrite = typeOverride == -1 getCompactType(field.type) : typeOverride;
if ((field.id > this.lastFieldId_) && (field.id - this.lastFieldId_ <= 15)) {
writeByteDirect(field.id - this.lastFieldId_ << 4 | typeToWrite);
} else {
writeByteDirect(typeToWrite);
writeI16(field.id);
}
this.lastFieldId_ = field.id;
}
public void writeFieldStop()
throws TException {
writeByteDirect((byte) 0);
}
public void writeMapBegin(TMap map)
throws TException {
if (map.size == 0) {
writeByteDirect(0);
} else {
writeVarint32(map.size);
writ