利用网络爬虫抓取数据的时候,被屏蔽掉了,有什么好的解决办法嘛 利用网络爬虫抓取数据的时候,被屏蔽掉了,有什么好的解决办法嘛?请各位大侠指点 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 /// <summary> /// 获取指定页面的源代码 /// </summary> /// <param name="PageURL"></param> /// <returns></returns> public String GetPageCode(string PageURL) { string Charset = "gb2312"; try { //存放目标网页的html String strHtml = ""; //连接到目标网页 HttpWebRequest wreq = (HttpWebRequest)WebRequest.Create(PageURL); wreq.Headers.Add("X_FORWARDED_FOR", "101.0.0.11"); //发送X_FORWARDED_FOR头(若是用取源IP的方式,可以用这个来造假IP,对日志的记录无效) wreq.Method = "Get"; wreq.KeepAlive = true; wreq.ContentType = "application/x-www-form-urlencoded"; wreq.AllowAutoRedirect = true; wreq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"; wreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)"; CookieContainer cookieCon = new CookieContainer(); wreq.CookieContainer = cookieCon; HttpWebResponse wresp = (HttpWebResponse)wreq.GetResponse(); //采用流读取,并确定编码方式 Stream s = wresp.GetResponseStream(); StreamReader objReader = new StreamReader(s, System.Text.Encoding.GetEncoding(Charset)); string strLine = ""; //读取 while (strLine != null) { strLine = objReader.ReadLine(); if (strLine != null) { strHtml += strLine.Trim(); } } strHtml = strHtml.Replace("<br />", "\r\n"); return strHtml; } catch (Exception n) //遇到错误,打印错误 { return n.Message; } }你可以试试 但不保证成功 C#绘制若干段线的问题讨论 短信猫的问题..知道的请进..谢谢... 怎么设计每天登录增加积分的系统 开发方案-想写一个学习的复习记忆提醒的小软件 数据包的问题 C#中Color问题,在线等 你好,帮帮忙! devTreeList绑定多张表 有重写界面的高手不,请教下如何重写界面。 c#能用dephi写的ActiveX控件吗?(分不够可以再加) 关于.net常用技术点的文档实例。 c# winform如何限制每天使用次数?
/// 获取指定页面的源代码
/// </summary>
/// <param name="PageURL"></param>
/// <returns></returns>
public String GetPageCode(string PageURL)
{
string Charset = "gb2312";
try
{
//存放目标网页的html
String strHtml = "";
//连接到目标网页
HttpWebRequest wreq = (HttpWebRequest)WebRequest.Create(PageURL);
wreq.Headers.Add("X_FORWARDED_FOR", "101.0.0.11"); //发送X_FORWARDED_FOR头(若是用取源IP的方式,可以用这个来造假IP,对日志的记录无效) wreq.Method = "Get";
wreq.KeepAlive = true;
wreq.ContentType = "application/x-www-form-urlencoded";
wreq.AllowAutoRedirect = true;
wreq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
wreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)"; CookieContainer cookieCon = new CookieContainer();
wreq.CookieContainer = cookieCon; HttpWebResponse wresp = (HttpWebResponse)wreq.GetResponse(); //采用流读取,并确定编码方式
Stream s = wresp.GetResponseStream();
StreamReader objReader = new StreamReader(s, System.Text.Encoding.GetEncoding(Charset)); string strLine = "";
//读取
while (strLine != null)
{
strLine = objReader.ReadLine();
if (strLine != null)
{
strHtml += strLine.Trim();
}
}
strHtml = strHtml.Replace("<br />", "\r\n"); return strHtml;
}
catch (Exception n) //遇到错误,打印错误
{
return n.Message;
}
}你可以试试 但不保证成功