一个简单的 Cfile 读取文本文件, 却是乱码, 向大家请教

应该是你UNICODE 没有处理好

操作系统是中文的, 如果文本文件是Unicode编码,就没问题, 但是一般我们新建的文本文件,默认是ANSI, 用户也不可能去另存为unicode, 所以请问怎样读取ANSI编码的文本文件呢?

你先判断下字符集类型然后转换下OKl

1,IsTextUnicode
The IsTextUnicode function determines whether a buffer is likely to contain a form of Unicode text. The function uses various statistical and deterministic methods to make its determination, under the control of flags passed via lpi. When the function returns, the results of such tests are reported via lpi.
2,MultiByteToWideChar
The MultiByteToWideChar function maps a character string to a wide-character (Unicode) string. The character string mapped by this function is not necessarily from a multibyte character set. 要转换

Ansi转Unicode
介绍2种方法 void CConvertDlg::OnBnClickedButtonAnsiToUnicode()
{
    // ansi to unicode
    char* szAnsi = "abcd1234你我他";
    //预转换，得到所需空间的大小
    int wcsLen = ::MultiByteToWideChar(CP_ACP, NULL, szAnsi, strlen(szAnsi), NULL, 0);
    //分配空间要给'\0'留个空间，MultiByteToWideChar不会给'\0'空间
    wchar_t* wszString = new wchar_t[wcsLen + 1];
    //转换
    ::MultiByteToWideChar(CP_ACP, NULL, szAnsi, strlen(szAnsi), wszString, wcsLen);
    //最后加上'\0'
    wszString[wcsLen] = '\0';
    //unicode版的MessageBox API
    ::MessageBoxW(GetSafeHwnd(), wszString, wszString, MB_OK);     //接下来写入文本
    //写文本文件，头2个字节0xfeff，低位0xff写在前
    CFile cFile;
    cFile.Open(_T("1.txt"), CFile::modeWrite | CFile::modeCreate);
    //文件开头
    cFile.SeekToBegin();
    cFile.Write("\xff\xfe", 2);
    //写入内容
    cFile.Write(wszString, wcsLen * sizeof(wchar_t));
    cFile.Flush();
    cFile.Close();
    delete[] wszString;
    wszString =NULL;
    //方法2
    //设置当前地域信息，不设置的话，使用这种方法，中文不会正确显示
    //需要#include<locale.h>
    setlocale(LC_CTYPE, "chs");
    wchar_t wcsStr[100];
    //注意下面是大写S，在unicode中，代表后面是ansi字符串
    //swprintf是sprintf的unicode版本
    //格式的前面要加大写L，代表是unicode
    swprintf(wcsStr, L"%S", szAnsi);
    ::MessageBoxW(GetSafeHwnd(), wcsStr, wcsStr, MB_OK); }
Unicode转Ansi
也是2种方法 void CConvertDlg::OnBnClickedButtonUnicodeToAnsi()
{
    // unicode to ansi
    wchar_t* wszString = L"abcd1234你我他";
    //预转换，得到所需空间的大小，这次用的函数和上面名字相反
    int ansiLen = ::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), NULL, 0, NULL, NULL);
    //同上，分配空间要给'\0'留个空间
    char* szAnsi = new char[ansiLen + 1];
    //转换
    //unicode版对应的strlen是wcslen
    ::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), szAnsi, ansiLen, NULL, NULL);
    //最后加上'\0'
    szAnsi[ansiLen] = '\0';
    //Ansi版的MessageBox API
    ::MessageBoxA(GetSafeHwnd(), szAnsi, szAnsi, MB_OK);     //接下来写入文本
    //写文本文件，ANSI文件没有BOM
    CFile cFile;
    cFile.Open(_T("1.txt"), CFile::modeWrite | CFile::modeCreate);
    //文件开头
    cFile.SeekToBegin();
    //写入内容
    cFile.Write(szAnsi, ansiLen * sizeof(char));
    cFile.Flush();
    cFile.Close();
    delete[] szAnsi;
    szAnsi =NULL;
    //方法2
    //和上面一样有另一种方法
    setlocale(LC_CTYPE, "chs");
    char szStr[100];
    //注意下面是大写，在ansi中，代表后面是unicode字符串
    //sprintf
    sprintf(szStr, "%S", wszString);
    ::MessageBoxA(GetSafeHwnd(), szStr, szStr, MB_OK);
}
Unicode转UTF8 void CConvertDlg::OnBnClickedButtonUnicodeToU8()
{
    // unicode to UTF8
    wchar_t* wszString = L"abcd1234你我他";
    //预转换，得到所需空间的大小，这次用的函数和上面名字相反
    int u8Len = ::WideCharToMultiByte(CP_UTF8, NULL, wszString, wcslen(wszString), NULL, 0, NULL, NULL);
    //同上，分配空间要给'\0'留个空间
    //UTF8虽然是Unicode的压缩形式，但也是多字节字符串，所以可以以char的形式保存
    char* szU8 = new char[u8Len + 1];
    //转换
    //unicode版对应的strlen是wcslen
    ::WideCharToMultiByte(CP_UTF8, NULL, wszString, wcslen(wszString), szU8, u8Len, NULL, NULL);
    //最后加上'\0'
    szU8[u8Len] = '\0';
    //MessageBox不支持UTF8,所以只能写文件     //接下来写入文本
    //写文本文件，UTF8的BOM是0xbfbbef
    CFile cFile;
    cFile.Open(_T("1.txt"), CFile::modeWrite | CFile::modeCreate);
    //文件开头
    cFile.SeekToBegin();
    //写BOM，同样低位写在前
    cFile.Write("\xef\xbb\xbf", 3);
    //写入内容
    cFile.Write(szU8, u8Len * sizeof(char));
    cFile.Flush();
    cFile.Close();
    delete[] szU8;
    szU8 =NULL; } UTF8转UNICODE void CConvertDlg::OnBnClickedButtonU8ToUnicode()
{
    //UTF8 to Unicode
    //由于中文直接复制过来会成乱码，编译器有时会报错，故采用16进制形式
    char* szU8 = "abcd1234\xe4\xbd\xa0\xe6\x88\x91\xe4\xbb\x96\x00";
    //预转换，得到所需空间的大小
    int wcsLen = ::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), NULL, 0);
    //分配空间要给'\0'留个空间，MultiByteToWideChar不会给'\0'空间
    wchar_t* wszString = new wchar_t[wcsLen + 1];
    //转换
    ::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), wszString, wcsLen);
    //最后加上'\0'
    wszString[wcsLen] = '\0';
    //unicode版的MessageBox API
    ::MessageBoxW(GetSafeHwnd(), wszString, wszString, MB_OK);     //写文本同ansi to unicode
}

ANSI编码的文本文件读取1.工程非unicode，直接读取就没有什么问题吧~
2.Unicode工程，W2A（）宏，上msdn上查查，很详细的例子~数据多的话用WidCharToMultiByte()，好像是这个函数~可能拼的不大对~

CString str;
CFile f;
f.Open(_T("c:\\a.txt"),CFile::modeRead);
f.Read(str.GetBuffer(f.GetLength()),f.GetLength());
f.Close();
AfxMessageBox(str) ;// vc6测试没有问题呀，可能跟你的编码方式和运行平台有关

是VS2005, 项目是Unicode编码 , 文本文件是ANSI编码.. 问题还没有解决 ....大家再帮忙看看

datoucaicai

(大头真菜！) 等　级： *
还是个星等级
人家是 UNICODE 读 ANSI 不转换没有问题么？真不知道这里现在什么样子了还有楼主方法都给你说了很清楚了

你的代码中 char* szAnsi = "abcd1234你我他"; 可我的文本是从文件文件里面取的啊

哦我说呢可能刚才说话激动了。。过激了。。
HOHO~~

CString str;
CFile f;
f.Open(_T("c:\\a.txt"),CFile::modeRead);//////////////////////////////////////////////////////////////////////////// 创建字符数组缓冲
char* ptchBuffer = NULL ;int nCount = f.GetLength() ;ptchBuffer = new char[nCount+1] ;ptchBuffer[nCount] = '\0' ;f.Read(ptchBuffer,f.GetLength());f.Close();str = ptchBuffer ;// 释放
if (NULL != ptchBuffer )
{
delete[] ptchBuffer ; ptchBuffer = NULL ;
}AfxMessageBox(str) ;//////////////////////////////////////////////////////////////////////////// vc2005 UNICODE

谢谢 datoucaicai.和wysbk002 ...原来要用 char*

调试易

一个简单的 Cfile 读取文本文件, 却是乱码, 向大家请教

解决方案 »