有一个网站用的utf-8的URL编码,用中文的话就会转成这种编码,比如说汉字“中国”就会转成“%E4%B8%AD%E5%9B%BD”,每三个位对应一个汉字,比如说%E4%B8%AD对应汉字的“中”,“%E5%9B%BD”对应中文汉字的“国”,请问一下这是怎么转的呢?在网上找了很久都是GB2312的URLEncode(比如中文的汉字“中国”就会转成“%D6%D0%B9%FA”每两位对应一个汉字),没有看到UTF-8的URLEncode,有的都是用ASP或是PHP写的,没有看到用C++写的,请高手看一下,在线等,急
就能转换成utf-8编码了
{
char* uchar = (char *)pOut;
uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);
}
void UnicodeToGB2312(char* pOut,unsigned short uData)
{
WideCharToMultiByte(CP_ACP,NULL,(LPCWSTR)&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
}
void UTF8ToGB2312(CString &pOut, char *pText, int pLen)
{
char * newBuf = new char[pLen];
char Ctemp[4];
memset(Ctemp,0,4);
int i =0;
int j = 0;
while(i < pLen)
{
if(pText[i] > 0)
{
newBuf[j++] = pText[i++];
}
else
{
WCHAR Wtemp;
UTF8ToUnicode(&Wtemp,pText + i);
UnicodeToGB2312(Ctemp,Wtemp);
newBuf[j] = Ctemp[0];
newBuf[j + 1] = Ctemp[1];
i += 3;
j += 2;
}
}
newBuf[j] = '\0';
pOut = newBuf;
delete []newBuf;
}
void GB2312ToUnicode(WCHAR* pOut,char *gbBuffer)
{
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
}void UnicodeToUTF8(char* pOut,WCHAR* pText)
{
char* pchar = (char *)pText;
pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[1] & 0xC0) >> 6);
pOut[2] = (0x80 | (pchar[0] & 0x3F));
}
void GB2312ToUTF8(CString& pOut,char *pText,int pLen)
{
char buf[3];
int len=pLen+(pLen>>2)+2;
char* rst=new char[len];
memset(buf,0,4);
memset(rst,0,len); int i=0,j=0;
while(i<pLen)
{
if(*(pText+i)>=0)
rst[j++]=pText[i++];
else
{
WCHAR pBuf;
GB2312ToUnicode(&pBuf,pText+i);
UnicodeToUTF8(buf,&pBuf);
unsigned short int temp=0;
temp=rst[j]=buf[0];
temp=rst[j+1]=buf[1];
temp=rst[j+2]=buf[2]; j+=3;
i+=2;
}
}
rst[j]='\0';
pOut=rst;
// delete[] rst;
}这种转换不对,但不知道是那里错了,请高手帮忙看看!
CString strName("中国");
int len=strName.GetLength();
char *pText=new char[len];
memset(pText,0,len);
pText=strName.GetBuffer(len);
CString strRet="";
GB2312ToUTF8(strRet,pText,len);
//AfxMessageBox(strRet);
strRet=URLEncode(strRet);
AfxMessageBox(strRet);
strName.ReleaseBuffer();MessageBox弹出的结果是”%E4%B9%AD%E5%99%BD“和正确结果“%E4%B8%AD%E5%9B%BD”有出处,不知道哪里错了,请高手看看这是那里 的问题??
HTTP请求/响应头:ANSI
utf-8:受控的文件内容
BTW.
很多URL跟随的参数最好用Base64重新编/解码。
#include<stdio.h>int main(int argc, char* argv[])
{
char* pAnsi ="中国";
wchar_t unicode[32]={0};
MultiByteToWideChar( CP_ACP,0,pAnsi,strlen(pAnsi),unicode,31); unsigned char sz[32]={0};
WideCharToMultiByte(CP_UTF8,0,unicode,wcslen(unicode),(LPSTR)sz,31,0,0); char out[64]={0};
sprintf(out,"%%%X%%%X%%%X%%%X%%%X%%%X",sz[0],sz[1],sz[2],sz[3],sz[4],sz[5]);
MessageBox(0,out,0,0);
return 0;
}
pOut[1] = (0x80 ¦ ((pchar[1] & 0x0F) < < 2)) + ((pchar[1] & 0xC0) >> 6);
//这里错了,改成pOut[1] = (0x80 ¦ ((pchar[1] & 0x0F) < < 2)) + ((pchar[0] & 0xC0) >> 6);
pOut[2] = (0x80 ¦ (pchar[0] & 0x3F));