在我的一个程序中,需要用socket发送POST报文,这两天查了下资料,POST中发送给服务器的数据编码是urlencode,我用了两个函数来实现,首先用GBToUTF8()函数将GB2312编码转换为Unicode,再转为UTF8编码格式。接着用urlEncoding()函数对转换后的UTF8编码进行转换,最后再用urlEncoding()函数编码后的数据进行POST,但却发现无法成功POST。通过用HttpWatch抓取的报文数据和我的程序发送的报文数据进行对比,发现同样的内容却与HttpWatch抓取的内容不一样,比如“test 文章标题 123 test”,用HttpWatch抓取的内容为“test+%E6%96%87%E7%AB%A0%E6%AD%A3%E6%96%87+123+test”,而我的程序生成的内容为“test+%E6%96%87%E7%AB%A0%E6%A0%87%E9%A2%98+123+test”,其中“标题”两个字的编码对不上号,我不知道是不是GBToUTF8()函数和urlEncoding()函数哪个地方错了,把代码贴上来让各位兄弟给看看,不知是我实现的思路有问题还是实现的代码有问题。
#include <string>
#include <iostream>
#include <windows.h>using namespace std;
typedef unsigned char BYTE;
inline BYTE toHex(const BYTE &x)
{
return x > 9 ? x + 55: x + 48;
}string urlEncoding( string &sIn )
{
//cout << "size: " << sIn.size() << endl;
string sOut;
for( int ix = 0; ix < sIn.size(); ix++ )
{
BYTE buf[4];
memset( buf, 0, 4 );
if( isalnum( (BYTE)sIn[ix] ) )
{
buf[0] = sIn[ix];
}
else if ( isspace( (BYTE)sIn[ix] ) )
{
buf[0] = '+';
}
else
{
buf[0] = '%';
buf[1] = toHex( (BYTE)sIn[ix] >> 4 );
buf[2] = toHex( (BYTE)sIn[ix] % 16);
}
sOut += (char *)buf;
}
return sOut;
}std::string GBToUTF8(const char* str)
{
std::string result;
WCHAR *strSrc;
char *szRes; //获得临时变量的大小
int i = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);
strSrc = new WCHAR[i+1];
MultiByteToWideChar(CP_ACP, 0, str, -1, strSrc, i); //获得临时变量的大小
i = WideCharToMultiByte(CP_UTF8, 0, strSrc, -1, NULL, 0, NULL, NULL);
szRes = new char[i+1];
int j=WideCharToMultiByte(CP_UTF8, 0, strSrc, -1, szRes, i, NULL, NULL);
result = szRes;
delete []strSrc;
delete []szRes; return result;
}int main()
{
string InStr = "test 文章标题 123 test";
string OutStr = urlEncoding(GBToUTF8(InStr.c_str()));
cout<< OutStr << endl;
system("PAUSE");
return 0;
}
#include <string>
#include <iostream>
#include <windows.h>using namespace std;
typedef unsigned char BYTE;
inline BYTE toHex(const BYTE &x)
{
return x > 9 ? x + 55: x + 48;
}string urlEncoding( string &sIn )
{
//cout << "size: " << sIn.size() << endl;
string sOut;
for( int ix = 0; ix < sIn.size(); ix++ )
{
BYTE buf[4];
memset( buf, 0, 4 );
if( isalnum( (BYTE)sIn[ix] ) )
{
buf[0] = sIn[ix];
}
else if ( isspace( (BYTE)sIn[ix] ) )
{
buf[0] = '+';
}
else
{
buf[0] = '%';
buf[1] = toHex( (BYTE)sIn[ix] >> 4 );
buf[2] = toHex( (BYTE)sIn[ix] % 16);
}
sOut += (char *)buf;
}
return sOut;
}std::string GBToUTF8(const char* str)
{
std::string result;
WCHAR *strSrc;
char *szRes; //获得临时变量的大小
int i = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);
strSrc = new WCHAR[i+1];
MultiByteToWideChar(CP_ACP, 0, str, -1, strSrc, i); //获得临时变量的大小
i = WideCharToMultiByte(CP_UTF8, 0, strSrc, -1, NULL, 0, NULL, NULL);
szRes = new char[i+1];
int j=WideCharToMultiByte(CP_UTF8, 0, strSrc, -1, szRes, i, NULL, NULL);
result = szRes;
delete []strSrc;
delete []szRes; return result;
}int main()
{
string InStr = "test 文章标题 123 test";
string OutStr = urlEncoding(GBToUTF8(InStr.c_str()));
cout<< OutStr << endl;
system("PAUSE");
return 0;
}
void CUtility::ConvertGBKToUtf8(string& strGBK)
{// int len=MultiByteToWideChar(CP_ACP, 0, strGBK.c_str() , -1, NULL,0);
int len = strGBK.length();
WCHAR * wszUtf8 = new WCHAR[len+1];
if( wszUtf8 == NULL)
{
return;
}
// memset(wszUtf8, 0, len * 2 + 2);
len = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str() , -1, wszUtf8, len*2+2); //len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
len = len*3;
char *szUtf8=new char[len + 1];
if( szUtf8 == NULL)
{
delete[] wszUtf8;
return;
}
// memset(szUtf8, 0, len + 1);
WideCharToMultiByte (CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL,NULL); strGBK = szUtf8;
delete[] szUtf8;
delete[] wszUtf8;
}void CUtility::ConvertUtf8ToGBK(CSafeMemBuf& safeMem)
{
int nLen = MultiByteToWideChar( CP_UTF8, 0, safeMem.GetBuf(), -1, NULL, NULL );//得到UTF8编码的字符串长度
LPWSTR lpwsz = new WCHAR[nLen];
if( lpwsz == NULL)
{
return;
}
MultiByteToWideChar( CP_UTF8, 0, safeMem.GetBuf(), -1, lpwsz, nLen );//转换的结果是UCS2格式 int nLen1 = WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, NULL, NULL, NULL, NULL ); safeMem.SetBufSize(nLen1+1);
// LPSTR lpsz = new CHAR[nLen1];
//WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, lpsz, nLen1, NULL, NULL );//转换完毕
WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, safeMem.GetBuf(), nLen1, NULL, NULL );//转换完毕 delete [] lpwsz;
//delete [] lpsz; *(safeMem.GetBuf() + nLen1) = '\0';}int CUtility::ConvertUtf8ToGBK(char* pChangeStr,int nLength)
{
//int nLen = MultiByteToWideChar( CP_UTF8, 0, pChangeStr, -1, NULL, NULL );//得到UTF8编码的字符串长度
int nLen = nLength*2; LPWSTR lpwsz = new WCHAR[nLen];
if( lpwsz == NULL)
{
return 0;
}
MultiByteToWideChar( CP_UTF8, 0, pChangeStr, -1, lpwsz, nLen );//转换的结果是UCS2格式// int nLen1 = WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, NULL, NULL, NULL, NULL ); // LPSTR lpsz = new CHAR[nLen1];
//WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, lpsz, nLen1, NULL, NULL );//转换完毕
int nLen1 = WideCharToMultiByte( CP_ACP, 0, lpwsz, -1, pChangeStr, nLen, NULL, NULL );//转换完毕 delete [] lpwsz;
return nLen1;
}
BOOL CUtility::URLDecode(LPCTSTR pRowData,CString& strOut)
{
CString sDecodedURL;
const char* pIndex = pRowData; while (*pIndex)
{
char* pfind = strchr(pIndex,'%');
if( pfind)
{
*pfind = '\0';
strOut += pIndex;
unsigned char cHigh = *(pfind+1) ;
if( cHigh == 0 )
{
strOut += '%';
break;
}
unsigned char cLow = *(pfind+2);
if( cLow == 0 )
{
strOut += '%';
strOut += cHigh;
break;
} if( cHigh && cLow &&
( ('0' <= cHigh && '9'>= cHigh) || ('f'>= cHigh &&'a'<= cHigh ) ) &&
( ('0' <= cLow && '9'>= cLow) || ('f'>= cLow &&'a'<= cLow ) ) )
{
if( '0' <= cHigh && '9'>= cHigh )
{
cHigh = cHigh-'0';
}
else
{
cHigh = cHigh-'a' + 0x0A;
} if( '0' <= cLow && '9'>= cLow )
{
cLow = cLow-'0';
}
else
{
cLow = cLow-'a' + 0x0A;
}
unsigned char cChar = (cHigh << 4) + cLow;
strOut += cChar;
pIndex = pfind + 3;
}
else
{
strOut += '%';
pIndex += 1;
}
}
else
{
strOut += pIndex;
break;
}
} return TRUE;
}
TNND,搞了我一整天