自动获取网页内的信息 我想实现的是给定一个URL,这个指定的页面链接中可能有新的链接,自动获取新的链接并抓取页面中指定的信息比如邮箱,联系方式等!希望有详细代码,我是新手想学习,有源码的可以直接发邮箱[email protected] 谢谢! 分不多 有多少给多少! 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 Stream resStream = mWebClient.OpenRead("http://www.ceshi.com/ShowNewsX.aspx); //以流的形式打开URL Encoding enc = Encoding.GetEncoding("utf-8"); // 如果是乱码就改成 utf-8 / GB2312 StreamReader sr = new StreamReader(resStream, enc); //以指定的编码方式读取数据流 string pageHtml = sr.ReadToEnd(); 读取到网页的内容,然后通过正则表达式或IndexOf找到联系方式和邮箱的具体位置 并用Substring读出。多个的话可以用循环。具体的自己可以想一下 如下,能返回整个HTML页面,至于你想要什么,你就操作STRING 就行了。 public string OpenReadWithHttps(string URL, string strPostdata, string strEncoding) { string strS = string.Empty; try { Encoding encoding = Encoding.Default; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL); request.Method = "post"; request.Accept = "text/html, application/xhtml+xml, */*"; request.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"; request.ContentType = "application/x-www-form-urlencoded"; request.Timeout = 10000000; string paraUrlCoded = System.Web.HttpUtility.UrlEncode("username"); paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("[email protected] "); paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("password"); paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("123456"); paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("list"); paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("1"); paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("submit.x"); paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("71"); paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("submit.y"); paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("16"); byte[] buffer; //将URL编码后的字符串转化为字节 buffer = System.Text.Encoding.UTF8.GetBytes(paraUrlCoded); //设置请求的ContentLength //byte[] buffer = encoding.GetBytes(strPostdata); request.ContentLength = buffer.Length; request.GetRequestStream().Write(buffer, 0, buffer.Length); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); using (StreamReader reader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding))) { strS = reader.ReadToEnd(); } //Response.Write(strS); } catch (Exception) { } return strS; }//调用OpenReadWithHttps("http://new.cnzz.com/user/login.php","", "gb2312"); WebClient mWebClient = new WebClient(); mWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据 mWebClient.Credentials = CredentialCache.DefaultNetworkCredentials;上面少了一段代码 能不能做个demo啊!这样看我有点容易乱,你做个demo发到我邮箱 我可以在VS里面调试 修改学习 谢谢! WebClient webClient = new WebClient(); string url = this.TxtUrl.Text.Trim();//网页地址 if (string.IsNullOrEmpty(url)) { return; } //异步下载网页的信息 webClient.DownloadStringCompleted += new DownloadStringCompletedEventHandler(webClient_DownloadStringCompleted); webClient.Encoding = Encoding.Default; webClient.DownloadStringAsync(new Uri(url)); void webClient_DownloadStringCompleted(object sender, DownloadStringCompletedEventArgs e) { webClient.DownloadStringCompleted -= new DownloadStringCompletedEventHandler(webClient_DownloadStringCompleted); string result = e.Result; result = result.Replace(" ", ""); int i = 0; StringBuilder sb = new StringBuilder(); foreach (var a in result) { if (a == '@') { if (i > 10) { string temp = result.Substring(i - 10, 25);//这里比较难计算,邮箱长度都不一样,大概的估计 sb.Append(temp);//sb就是获取的邮箱! sb.Append("\n"); } else { //暂时未处理 } } i++; } } 1楼忘记了乱码当中也有 //GBK js单线程异步 c#winform实现类似快速监视的功能,请帮忙,在线等,谢谢 求Sqlserver 2005 开发版下载地址 有c#高手吗?在线等,急!!!!! c# 多线程与WINFORM 控件问题 lock(x)?X是啥意思??? 求数组中最大元素 (急)求一正则表达式 这个问题!!!!!!!!高手看看 登录出现问题,不管输入正确用户还是错误用户,都提示错误 求助:windows ce程序如何仿真调试 给定起始日期判断本周是单周还是双周
Stream resStream = mWebClient.OpenRead("http://www.ceshi.com/ShowNewsX.aspx); //以流的形式打开URL
Encoding enc = Encoding.GetEncoding("utf-8"); // 如果是乱码就改成 utf-8 / GB2312
StreamReader sr = new StreamReader(resStream, enc); //以指定的编码方式读取数据流
string pageHtml = sr.ReadToEnd();
读取到网页的内容,然后通过正则表达式或IndexOf找到联系方式和邮箱的具体位置 并用Substring读出。多个的话可以用循环。具体的自己可以想一下
{
string strS = string.Empty;
try
{
Encoding encoding = Encoding.Default;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
request.Method = "post";
request.Accept = "text/html, application/xhtml+xml, */*";
request.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)";
request.ContentType = "application/x-www-form-urlencoded";
request.Timeout = 10000000;
string paraUrlCoded = System.Web.HttpUtility.UrlEncode("username");
paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("[email protected] ");
paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("password");
paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("123456");
paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("list");
paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("1");
paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("submit.x");
paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("71");
paraUrlCoded += "&" + System.Web.HttpUtility.UrlEncode("submit.y");
paraUrlCoded += "=" + System.Web.HttpUtility.UrlEncode("16"); byte[] buffer;
//将URL编码后的字符串转化为字节
buffer = System.Text.Encoding.UTF8.GetBytes(paraUrlCoded);
//设置请求的ContentLength //byte[] buffer = encoding.GetBytes(strPostdata);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
using (StreamReader reader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding)))
{
strS = reader.ReadToEnd();
}
//Response.Write(strS); } catch (Exception)
{
}
return strS;
}
//调用
OpenReadWithHttps("http://new.cnzz.com/user/login.php","", "gb2312");
mWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
mWebClient.Credentials = CredentialCache.DefaultNetworkCredentials;上面少了一段代码
string url = this.TxtUrl.Text.Trim();//网页地址
if (string.IsNullOrEmpty(url))
{
return;
}
//异步下载网页的信息
webClient.DownloadStringCompleted += new DownloadStringCompletedEventHandler(webClient_DownloadStringCompleted);
webClient.Encoding = Encoding.Default;
webClient.DownloadStringAsync(new Uri(url)); void webClient_DownloadStringCompleted(object sender, DownloadStringCompletedEventArgs e)
{
webClient.DownloadStringCompleted -= new DownloadStringCompletedEventHandler(webClient_DownloadStringCompleted);
string result = e.Result;
result = result.Replace(" ", "");
int i = 0;
StringBuilder sb = new StringBuilder();
foreach (var a in result)
{
if (a == '@')
{
if (i > 10)
{ string temp = result.Substring(i - 10, 25);//这里比较难计算,邮箱长度都不一样,大概的估计
sb.Append(temp);//sb就是获取的邮箱!
sb.Append("\n");
}
else
{
//暂时未处理
}
}
i++;
}
}