抓取网页的数据这个是我第一次做需要被抓取的网页是登陆后所要查询到的信息显示的网页,现在我该如何做一个模拟的登陆? 登陆后如何抓取数据?这个我还真是没太大把握~
所以请大家帮忙~~~
所以请大家帮忙~~~
解决方案 »
- asp.net如何判断一个IP的端口是否打开
- Asp.net,弹出信息提示框问题(ClientScript.RegisterStartupScript)????
- 急~~ 如何将EXCEL数据导入到SQL中 ~在线等
- ASP.NET2.0中创建数据库连接实例与命令对象
- 按钮无效了
- 求Winform正计时代码
- 身份验证:用户登陆得到属于哪个组的信息后.............
- treeview 对齐问题?
- 请问如何控制crystal report输出的chart的清晰程度?
- 如何在一次会话中保持数据库连接不断开?
- 请问machine.config的MemoryLimit问题
- 数据库里面关于日期问题?
WebClient w = new WebClient();
byte[] b = w.DownloadData("http://www.baidu.com");
Response.Write(Encoding.Default.GetString(b));
获取HTML源码后 再处理取源码中的数据
public void DownData( string URL, string Filename, ProgressBar Prog )
{
System.Net.HttpWebRequest Myrq = (System.Net.HttpWebRequest)System.Net.HttpWebRequest.Create(抓取的网址);
System.Net.HttpWebResponse myrp = (System.Net.HttpWebResponse)Myrq.GetResponse();
long totalBytes = myrp.ContentLength;
Prog.Maximum = (int)totalBytes;
System.IO.Stream st = myrp.GetResponseStream();
System.IO.Stream so = new System.IO.FileStream(
name, System.IO.FileMode.Create);
long totalDownloadedByte = 0;
byte[] by = new byte[1024];
int osize = st.Read(by, 0, (int)by.Length);
totalDownloadedByte = osize + totalDownloadedByte;
Application.DoEvents();
so.Write(by, 0, osize);
Prog.Value = (int)totalDownloadedByte;
osize = st.Read(by, 0, (int)by.Length);
st.Close();
}
baojia control = (baojia)m_pageHolder.LoadControl("baojia.aspx");
m_pageHolder.Controls.Add(control);
StringWriter output = new StringWriter();
HttpContext.Current.Server.Execute(m_pageHolder, output, false);
/// 实现登录
/// </summary>
/// <param name="targetURL">请求的路径,必须是实现登录的路径(*)</param>
/// <param name="cc">用于维持cookies Or Session</param>
/// <param name="param">Post提交的信息(用户名,密码)</param>
/// <returns>html page</returns>
public static CookieContainer cc = new CookieContainer();//维持cookie或Session
public static string PostAndGetHTML(string targetURL, Hashtable param)
{
//formData用于保存提交的信息
string formData = "";
foreach (DictionaryEntry de in param)
{
formData += de.Key.ToString() + "=" + de.Value.ToString() + "&";
} if (formData.Length > 0)
formData = formData.Substring(0, formData.Length - 1); //去除最后一个 '&' //把提交的信息转码(post提交必须转码)
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] data = encoding.GetBytes(formData); //开始创建请求
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL);
request.Method = "POST"; //提交方式:post
request.ContentType = "application/x-www-form-urlencoded";
request.ContentLength = data.Length;
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.1124)";
request.AllowAutoRedirect = true;
request.KeepAlive = true;
Stream newStream = request.GetRequestStream();
newStream.Write(data, 0, data.Length);//将请求的信息写入request
newStream.Close();
request.CookieContainer = cc; //向服务器发送请求
HttpWebResponse response = (HttpWebResponse)request.GetResponse(); //获得Cookie 保存到Appliction中
string cookieHeader = request.CookieContainer.GetCookieHeader(new Uri("http://login.xiaonei.com/Login.do"));
HttpContext.Current.Application.Lock();
HttpContext.Current.Application["cookieHeader"] = cookieHeader;
HttpContext.Current.Application.UnLock();
return "OK";
} /// <summary>
/// 访问其他页面
/// </summary>
/// <param name="strUrl"></param>
/// <returns></returns>
public static string ReGetHtml(string strUrl)
{
//第二次请求
HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(strUrl);
string cookhead = HttpContext.Current.Application["cookieHeader"].ToString();
request1.Method = "GET";
request1.Headers.Add("cookie:"+cookhead);
request1.KeepAlive = true;
request1.AllowAutoRedirect = true; HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();
Stream stream2 = response1.GetResponseStream();//获得回应的数据流
//将数据流转成 String
string result1 = new StreamReader(stream2, System.Text.Encoding.UTF8).ReadToEnd();
return result1;
}
你的这个url是什么地址?
我用你的方法好像行不通? 登陆以外的还有验证码,我如何得到那个验证码???
string cookieHeader = request.CookieContainer.GetCookieHeader(new Uri("http://login.xiaonei.com/Login.do"));这个是什么的url
???
方法里面的二个参数我不知道是什么意思???
一个是登陆的url ,一个是什么???
如果你要送登录名和密码就可能是
"userid=aaa&pasword=bbb"
但是如果有验证码的情况下,我也不清楚验证吗是不是也一起放在cookie里面