网页源文件获取乱码问题! 本帖最后由 VisualEleven 于 2011-01-10 14:49:01 编辑 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 需要根据网友获取到的编码charset进行编码转换 你查找一下 gbk 转 gb2312 的算法或者函数,在你获得页面内容之后,转换一下文字编码即可 符合你的要求,可以直接复制#include <windows.h>#include "Wininet.h"#include <iostream>#include <string>#pragma comment(lib, "Wininet.lib")using namespace std;enum Html_Type{ CODE_UTF8, //UTF8编码的网站 CODE_GB2312, //GB2312编码的网站};string DownHtmlContent(const string &strUrl, Html_Type type);int main(){ string strHtml(""); string strUrl = "http://news.sohu.com/20101230/n278604307.shtml"; strHtml = DownHtmlContent(strUrl, CODE_GB2312); cout << strHtml.c_str() << endl;}string DownHtmlContent(const string &strUrl, Html_Type type){ string strContent; string strRooK = "RookIE/1.0"; string strUrlTmp = strUrl; HINTERNET hSession = InternetOpen("RookIE/1.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0); if (hSession != NULL) { HINTERNET handle2 = InternetOpenUrl(hSession, strUrl.c_str(), NULL, 0, INTERNET_FLAG_DONT_CACHE, 0); if (handle2 != NULL) { char *Temp = new char[1024*1024]; ZeroMemory(Temp, 1024*1024); DWORD Number = 1; DWORD Total = 0; BOOL bRes = FALSE; while ( Number > 0 ) { if( InternetReadFile(handle2, Temp+Total, 2048, &Number) ) { Total += Number; } } if ( type == CODE_GB2312 ) { strContent = Temp; } else if ( type == CODE_UTF8 ) { wchar_t *pUnicode = new wchar_t[1024*1024]; char *pAnsi = new char[1024*1024]; ZeroMemory( pAnsi, 1024*1024 ); ZeroMemory( pUnicode, 1024*1024*2 ); if( MultiByteToWideChar( CP_UTF8, 0, Temp, Total, pUnicode, 1024*1024 ) != 0 ) { WideCharToMultiByte( CP_ACP, 0, pUnicode, wcslen( pUnicode ), pAnsi, 1024*1024, "", NULL ); strContent = pAnsi; } delete []pAnsi; delete []pUnicode; } delete []Temp; InternetCloseHandle(handle2); handle2 = NULL; } InternetCloseHandle(hSession); hSession = NULL; } return strContent;} 有个P乱码就GZIP解压出来就可以了不信你自己下载用UE把HTTP头去掉,就可以用winrar打开了 to beyond0824你的代码测试过吗?还是不行的需要gzip来解压的 tabview中的视图得不到m_pDocument? 58分求小问题::::::为何下拉菜单项是灰色不可用的?代码: 请问VC有开三次方的函数吗? 怎么从一个无限循环里面跳不出来啊??? 想用vc在ie中画个图表。 我在子线程里,如何向单文档窗口中发送欲显示的字符串? 分割窗口的问题 如何动态的切换CFormView类 怎么觉得那么乱? 关于VC的POSITION数据类型,有一事不明,有请C++高手 如何隐藏按钮控件上的位图 P2P视频点播系统 Create()函数调用问题
#include "Wininet.h"
#include <iostream>
#include <string>#pragma comment(lib, "Wininet.lib")using namespace std;enum Html_Type
{
CODE_UTF8, //UTF8编码的网站
CODE_GB2312, //GB2312编码的网站
};string DownHtmlContent(const string &strUrl, Html_Type type);int main()
{
string strHtml("");
string strUrl = "http://news.sohu.com/20101230/n278604307.shtml"; strHtml = DownHtmlContent(strUrl, CODE_GB2312);
cout << strHtml.c_str() << endl;
}
string DownHtmlContent(const string &strUrl, Html_Type type)
{
string strContent;
string strRooK = "RookIE/1.0";
string strUrlTmp = strUrl; HINTERNET hSession = InternetOpen("RookIE/1.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
if (hSession != NULL)
{
HINTERNET handle2 = InternetOpenUrl(hSession, strUrl.c_str(), NULL, 0, INTERNET_FLAG_DONT_CACHE, 0);
if (handle2 != NULL)
{
char *Temp = new char[1024*1024];
ZeroMemory(Temp, 1024*1024);
DWORD Number = 1;
DWORD Total = 0;
BOOL bRes = FALSE;
while ( Number > 0 )
{
if( InternetReadFile(handle2, Temp+Total, 2048, &Number) )
{
Total += Number;
}
}
if ( type == CODE_GB2312 )
{
strContent = Temp;
}
else if ( type == CODE_UTF8 )
{
wchar_t *pUnicode = new wchar_t[1024*1024];
char *pAnsi = new char[1024*1024];
ZeroMemory( pAnsi, 1024*1024 );
ZeroMemory( pUnicode, 1024*1024*2 );
if( MultiByteToWideChar( CP_UTF8, 0, Temp, Total, pUnicode, 1024*1024 ) != 0 )
{
WideCharToMultiByte( CP_ACP, 0, pUnicode, wcslen( pUnicode ), pAnsi, 1024*1024, "", NULL );
strContent = pAnsi;
}
delete []pAnsi;
delete []pUnicode;
}
delete []Temp;
InternetCloseHandle(handle2);
handle2 = NULL;
}
InternetCloseHandle(hSession);
hSession = NULL;
} return strContent;
}
就GZIP解压出来就可以了不信你自己下载
用UE把HTTP头去掉,就可以用winrar打开了
你的代码测试过吗?还是不行的
需要gzip来解压的