一软件 怎么从别人网站抓取数据 怎么抓取.. 有代码 强烈要求代码. 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 通过httpwebrequest抓取,再使用正则格式化System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url); request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; System.Net.WebResponse response = request.GetResponse(); System.IO.Stream resStream = response.GetResponseStream(); System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding); string html = (sr.ReadToEnd()); resStream.Close(); sr.Close(); System.Net.WebClient wc = new System.Net.WebClient(); wc.Credentials = System.Net.CredentialCache.DefaultCredentials; Byte[] pageData = wc.DownloadData(PageUrl); string Content= System.Text.Encoding.Default.GetString(pageData); http://www.cocoachina.com/bbs/read.php?tid-6924-fpage-7-page-1.html http://www.cnblogs.com/DonePuzzle/archive/2007/10/27/939486.html有讲解,有源码。嫌工程太大可搜索"c# 爬虫",有相关教程。 源文件下载:http://d.download.csdn.net/down/2563713/yemeixiuzhuyan自己下载研究吧,很有学习价值。 采集原理:命名空间:using System.Net;WebClient Wc=new WebClient();string Url="你要采集的网站地址";byte [] Bt=Wc.DownloadData(Url);string StrHtml=Encoding.Default.GetString(Bt);return StrHtml;//返回你要采集网站的源文件再用Regex.Match()//来提取你所要的信息的正则表达是//(Regex.Match(StrHtml,"(?<=<h1 style=\"margin-top:0px; line-height:25px;\">).*?(?=<span class=\"pubHotels_Choice_gradestar01\">)",RegexOptions.Singleline).ToString()//Regex.Replace(Regex.Match(StrHtml,"(?<=<h1 style=\"margin-top:0px; line-height:25px;\">).*?(?=<span class=\"pubHotels_Choice_gradestar01\">)",RegexOptions.Singleline).ToString(),@"<(.[^>]*)>| ","",RegexOptions.IgnoreCase);这是去除HTML代码的例子。MatchCollection Ma=Regex.Matches(hotcity.ToString(),"(?<=<td>).*?(?=</td>)",RegexOptions.Singleline); //获取所有与正则表达式匹配的信息内容 然后在循环便利出来。 foreach(Match Matcs in Ma) { string Strurl=Regex.Replace(Regex.Match(Matcs.ToString(),"(?<=<a href=\").*?(?=1.html)",RegexOptions.Singleline).ToString(),@"<(.[^>]*)>| ","",RegexOptions.IgnoreCase).ToString(); string CityName=Regex.Replace(Regex.Match(Matcs.ToString(),"(?<=.html\">).*?(?=预订)",RegexOptions.Singleline).ToString(),@"<(.[^>]*)>| ","",RegexOptions.IgnoreCase).ToString(); } c# visio技术开发 C# 新手求教一个菜鸟问题 winform中文字自动换行 怎么动态显示遍历文件名 关于构建主键的问题 请教大家了,送你们分!!!!!!!!!!! 鼠标悬停时触发OnMouseHover事件,在这个事件里面怎么获得鼠标的位置? 在js脚本调用button的click事件,程序会执行,但是dropdownlist在程序添加的Item不显示 跪求: 学习 (c#)winform开发最适合的图书?!!! 请教:用空格或Tab分割字符串,但忽略双引号内的内容 怎么给工具栏中的按钮设置快捷键? C# 用一个tcp单线程传输文件问题
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
string html = (sr.ReadToEnd());
resStream.Close();
sr.Close();
System.Net.WebClient wc = new System.Net.WebClient();
wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
Byte[] pageData = wc.DownloadData(PageUrl);
string Content= System.Text.Encoding.Default.GetString(pageData);
有讲解,有源码。
嫌工程太大可搜索"c# 爬虫",有相关教程。
自己下载研究吧,很有学习价值。
using System.Net;
WebClient Wc=new WebClient();
string Url="你要采集的网站地址";
byte [] Bt=Wc.DownloadData(Url);
string StrHtml=Encoding.Default.GetString(Bt);
return StrHtml;//返回你要采集网站的源文件再用Regex.Match()//来提取你所要的信息的正则表达是
//(Regex.Match(StrHtml,"(?<=<h1 style=\"margin-top:0px; line-height:25px;\">).*?(?=<span class=\"pubHotels_Choice_gradestar01\">)",RegexOptions.Singleline).ToString()//Regex.Replace(Regex.Match(StrHtml,"(?<=<h1 style=\"margin-top:0px; line-height:25px;\">).*?(?=<span class=\"pubHotels_Choice_gradestar01\">)",RegexOptions.Singleline).ToString(),@"<(.[^>]*)>| ","",RegexOptions.IgnoreCase);这是去除HTML代码的例子。
MatchCollection Ma=Regex.Matches(hotcity.ToString(),"(?<=<td>).*?(?=</td>)",RegexOptions.Singleline);
//获取所有与正则表达式匹配的信息内容 然后在循环便利出来。
foreach(Match Matcs in Ma)
{ string Strurl=Regex.Replace(Regex.Match(Matcs.ToString(),"(?<=<a href=\").*?(?=1.html)",RegexOptions.Singleline).ToString(),@"<(.[^>]*)>| ","",RegexOptions.IgnoreCase).ToString();
string CityName=Regex.Replace(Regex.Match(Matcs.ToString(),"(?<=.html\">).*?(?=预订)",RegexOptions.Singleline).ToString(),@"<(.[^>]*)>| ","",RegexOptions.IgnoreCase).ToString(); }