C# 代理 数据采集 我要采集一个页面里面的数据但是那个页面不能直接在IE的地址栏里面打开而要通过一个代理页面,在此代理页面的搜索框里面输入地址才可以进去可以通过什么方法进行采集呢? 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 即下面步骤“打开在线网站:https://www.zxproxy.com/输入http://www.bclc.com/app/DidYouWin/WinningNumbers/Keno.asp点"GO"即可然后我要采集的就是http://www.bclc.com/app/DidYouWin/WinningNumbers/Keno.asp里面的数据 HttpWebRequest模拟提交我也知道是要这样,可是网上找了好多,都不行呐。打开的那个网页地址栏还加锁了,我郁闷了,还会过时。有没实例呀,小弟急 https://www.zxproxy.com/这个是代理页面http://www.bclc.com/app/DidYouWin/WinningNumbers/Keno.asp这个是我要访问采集的页面 using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Net;using System.IO;using System.IO.Compression;using System.Text.RegularExpressions;namespace WikiPageCreater.Common{ public class PageHelper { /// <summary> /// 根据 url 获取网页编码 /// </summary> /// <param name="url"></param> /// <returns></returns> public static string GetEncoding(string url) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)); else reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)"); if (reg_charset.IsMatch(html)) { return reg_charset.Match(html).Groups["charset"].Value; } else if (response.CharacterSet != string.Empty) { return response.CharacterSet; } else return Encoding.Default.BodyName; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return Encoding.Default.BodyName; } /// <summary> /// 根据 url 和 encoding 获取当前url页面的 html 源代码 /// </summary> /// <param name="url"></param> /// <param name="encoding"></param> /// <returns></returns> public static string GetHtml(string url, Encoding encoding) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding); else reader = new StreamReader(response.GetResponseStream(), encoding); string html = reader.ReadToEnd(); return html; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return string.Empty; } }} HttpWebRequestSystem.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(""); request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; System.Net.WebResponse response = request.GetResponse(); System.IO.Stream resStream = response.GetResponseStream(); System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding); string content=sr.ReadToEnd(); resStream.Close(); sr.Close();webrequest,WebClient System.Net.WebClient wc = new System.Net.WebClient(); wc.Credentials = System.Net.CredentialCache.DefaultCredentials; Byte[] pageData = wc.DownloadData(""); string content= System.Text.Encoding.Default.GetString(pageData); 这个是被gtw封了的网站,不一定要用在线代理网站上,这个程序控制起来太麻烦。其实只需要找一个国外的代理,然后在程序里设置为用这个代理去下载网站,就可以了。 大神.. 你是专业在 CSDN上回答问题的吗? web 的TreeView怎么实现点击文字选中checkBox和点击子节点选中父节点? 怎么让存储过程返回表,万分火急! 公司的烦心事-------你们都烦什么事呢? 关于文件下载,很简单的问题 asp.net中用什么控件可以在粘贴时支持word文件复制格式 如何实现将word和ppt文件自动转换成html网页格式呢?急! IE中使用客户端的打印功能 即时通信软件的内容是通过什么传送的? 这一句不明白 关闭窗口的问题? 全选,局部全选,想用JavaScript实现,求思路 JS中如何获取asp:label呢?
http://www.bclc.com/app/DidYouWin/WinningNumbers/Keno.asp这个是我要访问采集的页面
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.IO.Compression;
using System.Text.RegularExpressions;namespace WikiPageCreater.Common
{
public class PageHelper
{
/// <summary>
/// 根据 url 获取网页编码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string GetEncoding(string url)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
else
reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
if (reg_charset.IsMatch(html))
{
return reg_charset.Match(html).Groups["charset"].Value;
}
else if (response.CharacterSet != string.Empty)
{
return response.CharacterSet;
}
else
return Encoding.Default.BodyName;
}
}
catch
{
}
finally
{ if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close(); if (request != null)
request = null; } return Encoding.Default.BodyName;
} /// <summary>
/// 根据 url 和 encoding 获取当前url页面的 html 源代码
/// </summary>
/// <param name="url"></param>
/// <param name="encoding"></param>
/// <returns></returns>
public static string GetHtml(string url, Encoding encoding)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);
else
reader = new StreamReader(response.GetResponseStream(), encoding);
string html = reader.ReadToEnd(); return html;
}
}
catch
{
}
finally
{ if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close(); if (request != null)
request = null; } return string.Empty;
}
}
}
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("");
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
string content=sr.ReadToEnd();
resStream.Close();
sr.Close();webrequest,WebClient
System.Net.WebClient wc = new System.Net.WebClient();
wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
Byte[] pageData = wc.DownloadData("");
string content= System.Text.Encoding.Default.GetString(pageData);
其实只需要找一个国外的代理,然后在程序里设置为用这个代理去下载网站,就可以了。