void CChartsetManager::convertGBKToUtf8(CString& strGBK)
{
USES_CONVERSION;
int nLen = MultiByteToWideChar(CP_ACP, 0, W2A((LPCTSTR)strGBK), -1, NULL,0); wchar_t *wszUtf8 = new wchar_t[nLen+1];
memset(wszUtf8, 0, (nLen+1)*sizeof(wchar_t));
MultiByteToWideChar(CP_ACP, 0, W2A((LPCTSTR)strGBK), -1, wszUtf8, nLen); nLen = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL); char *szUtf8=new char[nLen+1];
memset(szUtf8, 0, nLen+1);
WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, szUtf8, nLen, NULL,NULL); strGBK = szUtf8;
delete[] szUtf8;
delete[] wszUtf8;
}void CChartsetManager::convertUtf8ToGBK(CString& strUtf8)
{
USES_CONVERSION;
int nLen = MultiByteToWideChar(CP_UTF8, 0, W2A((LPCTSTR)strUtf8), -1, NULL,0);
wchar_t *wszGBK = new wchar_t[nLen+1];
memset(wszGBK, 0, (nLen+1)*sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, W2A((LPCTSTR)strUtf8), -1, wszGBK, nLen); nLen = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
char *szGBK=new char[nLen+1];
memset(szGBK, 0, nLen+1);
WideCharToMultiByte (CP_ACP, 0, wszGBK, -1, szGBK, nLen, NULL,NULL); strUtf8 = szGBK;
delete[] szGBK;
delete[] wszGBK;
}这个是两个我gbk和utf8转换的函数,gbk转utf8能转换正确。但utf8转换gbk当汉字为奇数时候,utf8码转换为gbk,会少掉一个字。一个gbk为2个字节,而utf8为3个字节,当转换的utf8为奇数字节时候,如何才能正确转换为gbk码?
{
USES_CONVERSION;
int nLen = MultiByteToWideChar(CP_ACP, 0, W2A((LPCTSTR)strGBK), -1, NULL,0); wchar_t *wszUtf8 = new wchar_t[nLen+1];
memset(wszUtf8, 0, (nLen+1)*sizeof(wchar_t));
MultiByteToWideChar(CP_ACP, 0, W2A((LPCTSTR)strGBK), -1, wszUtf8, nLen); nLen = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL); char *szUtf8=new char[nLen+1];
memset(szUtf8, 0, nLen+1);
WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, szUtf8, nLen, NULL,NULL); strGBK = szUtf8;
delete[] szUtf8;
delete[] wszUtf8;
}void CChartsetManager::convertUtf8ToGBK(CString& strUtf8)
{
USES_CONVERSION;
int nLen = MultiByteToWideChar(CP_UTF8, 0, W2A((LPCTSTR)strUtf8), -1, NULL,0);
wchar_t *wszGBK = new wchar_t[nLen+1];
memset(wszGBK, 0, (nLen+1)*sizeof(wchar_t));
MultiByteToWideChar(CP_UTF8, 0, W2A((LPCTSTR)strUtf8), -1, wszGBK, nLen); nLen = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
char *szGBK=new char[nLen+1];
memset(szGBK, 0, nLen+1);
WideCharToMultiByte (CP_ACP, 0, wszGBK, -1, szGBK, nLen, NULL,NULL); strUtf8 = szGBK;
delete[] szGBK;
delete[] wszGBK;
}这个是两个我gbk和utf8转换的函数,gbk转utf8能转换正确。但utf8转换gbk当汉字为奇数时候,utf8码转换为gbk,会少掉一个字。一个gbk为2个字节,而utf8为3个字节,当转换的utf8为奇数字节时候,如何才能正确转换为gbk码?
http://hi.baidu.com/%B9%B7%B9%B7%B5%C4%CA%C0%BD%E7/blog/item/46400cfa18d22e9658ee90f9.html试试这个,然后再检查你自己的代码,找出出错原因.
{
int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
unsigned short * wszGBK = new unsigned short[len + 1];
memset(wszGBK, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUTF8.c_str(), -1, wszGBK, len);len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
char *szGBK = new char[len + 1];
memset(szGBK, 0, len + 1);
WideCharToMultiByte(CP_ACP,0, wszGBK, -1, szGBK, len, NULL, NULL);
//strUTF8 = szGBK;
std::string strTemp(szGBK);
delete[]szGBK;
delete[]wszGBK;
return strTemp;
}
{
int len=MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, NULL,0);
unsigned short * wszUtf8 = new unsigned short[len+1];
memset(wszUtf8, 0, len * 2 + 2);
MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, wszUtf8, len);
len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
char *szUtf8=new char[len + 1];
memset(szUtf8, 0, len + 1);
WideCharToMultiByte (CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL,NULL);
strGBK = szUtf8;
delete[] szUtf8;
delete[] wszUtf8;
}
void ConvertUtf8ToGBK(CString& strUtf8)
{
int len=MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, NULL,0);
unsigned short * wszGBK = new unsigned short[len+1];
memset(wszGBK, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, wszGBK, len);
len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
char *szGBK=new char[len + 1];
memset(szGBK, 0, len + 1);
WideCharToMultiByte (CP_ACP, 0, wszGBK, -1, szGBK, len, NULL,NULL);
strUtf8 = szGBK;
delete[] szGBK;
delete[] wszGBK;
}
我的问题是一个汉字转换为的utf8码,在转换过去就转不了了,你的可以转换成功吗?
就是当utf8吗为奇数字节的话,不知道是否要补上一个字节或是什么,才能转换正确?ps:我是在vs2005下测试的。
转换长度这个我知道,只是望你在看下我的题目,thanks!
http://www.vckbase.com/document/viewdoc/?id=1397
它的转换有点问题。
const char* pUtf8Buf = ......utf 8编码;
DWORD UniCodeLen=MultiByteToWideChar(CP_UTF8, 0, pUtf8Buf, -1, NULL,0 );
std::vector<wchar_t> vWCH(UniCodeLen);
MultiByteToWideChar(CP_UTF8, 0, pUtf8Buf, -1, &vWCH[0] , UniCodeLen );
DWORD dwASCIILen=WideCharToMultiByte(CP_ACP, 0, &vWCH[0], UniCodeLen , NULL ,NULL , NULL, NULL );
std::vector<char> vAscii( dwASCIILen );
WideCharToMultiByte(CP_ACP, 0, &vWCH[0], UniCodeLen , &vAscii[0], dwASCIILen, NULL, NULL );
printf("%s\n",&vAscii[0]);
可以把你的gbk转utf8也贴出来吗?
thanks
#include<windows.h>
#include<vector>
#include<iostream>
using namespace std;int AToUtf8(LPCSTR pASCIIBuf,LPSTR pUtf8Buf=NULL)
{ DWORD UniCodeLen=MultiByteToWideChar(CP_ACP, 0, pASCIIBuf, -1, 0, 0);
std::vector<wchar_t> vWCH(UniCodeLen);
MultiByteToWideChar(CP_ACP, 0, pASCIIBuf, -1, &vWCH[0], UniCodeLen);
DWORD dwUtf8Len=WideCharToMultiByte(CP_UTF8, 0, &vWCH[0], UniCodeLen , NULL, NULL, NULL, NULL );
if (NULL==pUtf8Buf)
return dwUtf8Len;
return WideCharToMultiByte(CP_UTF8, 0, &vWCH[0], UniCodeLen , pUtf8Buf, dwUtf8Len, NULL, NULL );
}
int Utf8ToA(LPCSTR pUtf8Buf,LPSTR pASCIIBuf =NULL)
{ DWORD UniCodeLen=MultiByteToWideChar(CP_UTF8, 0, pUtf8Buf, -1, NULL,0 );
std::vector<wchar_t> vWCH(UniCodeLen);
MultiByteToWideChar(CP_UTF8, 0, pUtf8Buf, -1, &vWCH[0] , UniCodeLen );
DWORD dwASCIILen=WideCharToMultiByte(CP_ACP, 0, &vWCH[0], UniCodeLen , NULL ,NULL , NULL, NULL );
if (NULL==pASCIIBuf)
return dwASCIILen;
return WideCharToMultiByte(CP_ACP, 0, &vWCH[0], UniCodeLen , pASCIIBuf, dwASCIILen, NULL, NULL );
}int main(int ,char* [])
{
char sz[]="中";
int x = AToUtf8( sz );
char* putf8 = new char[x];
memset(putf8,0,x);
AToUtf8( sz , putf8 );
cout<<putf8<<endl; int y = Utf8ToA( putf8 );
char* pascii = new char[y];
memset(pascii,0 , y);
Utf8ToA(putf8 , pascii );
cout<<pascii<<endl;
delete putf8;
delete pascii;
return 0;
}
有问题我可以发消息给你?
谢了!
[in] Specifies the code page to be used to perform the conversion. This parameter can be given the value of any code page that is installed or available in the system. You can also specify one of the values shown in the following table. Value Meaning
CP_ACP ANSI code page
CP_MACCP Macintosh code page
CP_OEMCP OEM code page
CP_SYMBOL Windows 2000/XP: Symbol code page (42)
CP_THREAD_ACP Windows 2000/XP: The current thread's ANSI code page
CP_UTF7 Windows 98/Me, Windows NT 4.0 and later: Translate using UTF-7
CP_UTF8 Windows 98/Me, Windows NT 4.0 and later: Translate using UTF-8. CP_ACP 是ansi不是gbk
To:unsigned(僵哥(发站内消息,请附上链接或问题说明,否则不予回)
那是不是我的代码页如果是gbk的话,还要将用户输入的再先转成ansi?