利用Windows的API,MultiByToWideChar将其转为Unicode,当然了,如果文件就是Unicode(LE)的话,处理掉BOM就可以直接读取了,如果是Unicode(BE)的话,得倒一下字节序。
下面给出我实现的类的代码。
这是头文件TxtReader.h:
#pragma once
#include
#include
enum
{
TXT_TYPE_NONE = 0,
TXT_TYPE_ANSI,
TXT_TYPE_UNICODE_LE,
TXT_TYPE_UNICODE_BE,
TXT_TYPE_UTF8
};
class CTxtReader
{
public:
CTxtReader(void);
~CTxtReader(void);
BOOL Open(WCHAR* pFileName);
void Close();
BOOL Read(WCHAR* pBuff, DWORD dwToRead, DWORD& dwRead);
LONG Tell();
protected:
FILE* m_pFile;
INT m_iType;
CPINFO m_codepage;
INT m_iMaxLeadBytePairNum;
BOOL NeedNextByte(BYTE byFirstByte);
};
这是CPP文件TxtReader.cpp:
#include "TxtReader.h"
CTxtReader::CTxtReader(void)
{
m_pFile = NULL;
m_iType = TXT_TYPE_NONE;
GetCPInfo(CP_ACP, &m_codepage);
m_iMaxLeadBytePairNum = 0;
int i;
for(i=0; i<5; i++)
{
if(m_codepage.LeadByte[i*2]==0 && m_codepage.LeadByte[i*2+1]==0)
break;
++m_iMaxLeadBytePairNum;
}
}
CTxtReader::~CTxtReader(void)
{
Close();
}
BOOL CTxtReader::Open(WCHAR* pFileName)
{