<div class="tabContentDiv selected">
<table width="100%" cellspacing="0" cellpadding="0" border="0">
<tbody>
<tr>
<td class="border2 borderB pad5 padRL" align="left" colspan="2">
<strong>Product</strong>
</td>
<td align="center" class="border2 borderB">
<strong>Rate</strong>
</td>
<td align="center" class="border2 borderB">
<strong>Last week</strong>
</td>
</tr>
<tr>
<td height="18" align="left" class="bgListBlue pad5 padRL">
<a href="http://www.bankrate.com/funnel/mortgages/?prods=1" class="oarates_box_data">
10</a>
</td>
<td align="center" class="bgListBlue pad5 padRL">
<a href="http://www.bankrate.com/funnel/graph/?&cat=2&state=zz&d=1825&t=Line&ids=1">
<div class="graph-oa">
</div>
</a>
</td>
<td align="center" class="bgListBlue pad5 padRL">
我要抓取的数据1
</td>
<td width="15" align="center" class="bgListBlue pad5 padRL">
<img width="13" height="10" src="http://www.bankrate.com/images_MRA/news_advice_averages/arrow-dwn-gold-13x10.gif">
</td>
<td align="center" class="bgListBlue pad5 padRL">
3.96%
</td>
</tr>
<tr>
<td height="18" align="left" class="pad5 padRL">
<a href="http://www.bankrate.com/funnel/mortgages/?prods=2" class="oarates_box_data">
15 yr fixed mtg</a>
</td>
<td align="center" class="pad5 padRL">
<a href="http://www.bankrate.com/funnel/graph/?&cat=2&state=zz&d=1825&t=Line&ids=10">
<div class="graph-oa">
</div>
</a>
</td>
<td align="center" class="pad5 padRL">
我要抓取的数据2
</td>
<td width="15" align="center" class="pad5 padRL">
<img width="13" height="10" src="http://www.bankrate.com/images_MRA/news_advice_averages/arrow-dwn-gold-13x10.gif">
</td>
<td align="center" class="pad5 padRL">
3.30%
</td>
</tr>
<tr>
<td height="18" align="left" class="bgListBlue pad5 padRL">
<a href="http://www.bankrate.com/funnel/mortgages/?prods=1" class="oarates_box_data">
aa</a>
</td>
<td align="center" class="bgListBlue pad5 padRL">
<a href="http://www.bankrate.com/funnel/graph/?&cat=2&state=zz&d=1825&t=Line&ids=1">
<div class="graph-oa">
</div>
</a>
</td>
<td align="center" class="bgListBlue pad5 padRL">
我要抓取的数据3
</td>
<td width="15" align="center" class="bgListBlue pad5 padRL">
<img width="13" height="10" src="http://www.bankrate.com/images_MRA/news_advice_averages/arrow-dwn-gold-13x10.gif">
</td>
<td align="center" class="pad5 padRL">
3.30%
</td>
</tr>
</tbody>
</table>
</div>
WebRequest webr = WebRequest.Create(url);//一个url请求
Stream rc = webr.GetResponse().GetResponseStream();//响应请求并返回数据
StreamReader read = new StreamReader(rc, System.Text.Encoding.UTF8);//指定返回的数据流的编码
StringBuilder RegHtml = new StringBuilder();
RegHtml.Append("(?is)<div class=\"tabContentDiv selected\">.*?");
RegHtml.Append("<table width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\">.*?<tbody>.*?"); RegHtml.Append("<tr>.*?<td class=\"border2 borderB pad5 padRL\" align=\"left\" colspan=\"2\">.*?<strong>Product</strong>.*?</td>.*?");
RegHtml.Append("<td align=\"center\" class=\"border2 borderB\">.*?<strong>Rate</strong>.*?</td>.*?");
RegHtml.Append("<td align=\"center\" class=\"border2 borderB\">.*?<strong>Last week</strong>.*?</td>.*?</tr>.*?"); RegHtml.Append("<tr>.*?<td height=\"18\" align=\"left\" class=\"bgListBlue pad5 padRL\">.*?<a href=\"http://www\\.bankrate\\.com/funnel/mortgages/\\?prods=[0-9]+\" class=\"oarates_box_data\">.*</a>.*?</td>.*?"); RegHtml.Append("<td align=\"center\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append("<a href=\"http://www\\.bankrate\\.com/funnel/graph/\\?&cat=[0-9]+&state=[\\w]+&d=[0-9]+&t=[\\w]+&ids=[0-9]+\">.*?");
RegHtml.Append("<div class=\"graph-oa\">.*?</div>.*?</a>.*?</td>.*?"); RegHtml.Append("<td align=\"center\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append("(?<value1>.*).*?</td>.*?");//数据1 RegHtml.Append("<td width=\"15\" align=\"center\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append("<img width=\"13\" height=\"10\" src=\"http://www.bankrate.com/images_MRA/news_advice_averages/arrow-dwn-gold-13x10\\.gif\">.*?</td>.*?"); RegHtml.Append("<td align=\"center\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append(".*.*?</td>.*?"); RegHtml.Append("</tr>.*?");
//1
RegHtml.Append("<tr>.*?");
RegHtml.Append("<td height=\"18\" align=\"left\" class=\"pad5 padRL\">.*?");
RegHtml.Append("<a href=\"http://www\\.bankrate\\.com/funnel/mortgages/\\?prods=[0-9]+\" class=\"oarates_box_data\">");
RegHtml.Append(".*</a>.*?");
RegHtml.Append("</td>.*?");
RegHtml.Append("<td align=\"center\" class=\"pad5 padRL\">.*?");
RegHtml.Append("<a href=\"http://www\\.bankrate\\.com/funnel/graph/\\?&cat=[0-9]+&state=[\\w]+&d=[0-9]+&t=[\\w]+&ids=[0-9]+\">.*?");
//这里的.号不用转义也可以
RegHtml.Append("<div class=\"graph-oa\">.*?</div>.*?</a>.*?</td>.*?");
RegHtml.Append("<td align=\"center\" class=\"pad5 padRL\">.*?(?<value2>.*).*?</td>.*?");//数据2
RegHtml.Append("<td width=\"15\" align=\"center\" class=\"pad5 padRL\">.*?");
RegHtml.Append("<img width=\"13\" height=\"10\" src=\"http://www.bankrate\\.com/images_MRA/news_advice_averages/arrow-dwn-gold-13x10\\.gif\">.*?</td>.*?");
RegHtml.Append("<td align=\"center\" class=\"pad5 padRL\">.*?(.*).*?</td>.*?");
RegHtml.Append("</tr>.*?"); RegHtml.Append("<tr>.*?");
RegHtml.Append("<td height=\"18\" align=\"left\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append(" <a href=\"http://www\\.bankrate\\.com/funnel/mortgages/\\?prods=[0-9]+\" class=\"oarates_box_data\">");
RegHtml.Append("(.*).*?</td>.*?");
RegHtml.Append("<td align=\"center\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append("<a href=\"http://www\\.bankrate\\.com/funnel/graph/\\?&cat=[0-9]+&state=[\\w]+&d=[0-9]+&t=[\\w]+&ids=[0-9]+\">.*?");
RegHtml.Append("<div class=\"graph-oa\">.*?</div>.*?</a>.*?"); RegHtml.Append("</td>.*?"); RegHtml.Append("<td align=\"center\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append("(?<value3>.*).*?</td>.*?");//数据三 RegHtml.Append("<td width=\"15\" align=\"center\" class=\"bgListBlue pad5 padRL\">.*?");
RegHtml.Append("<img width=\"13\" height=\"10\" src=\"http://www\\.bankrate\\.com/images_MRA/news_advice_averages/arrow-dwn-gold-13x10\\.gif\">.*?</td>.*?"); RegHtml.Append("<td align=\"center\" class=\"pad5 padRL\">.*?");
RegHtml.Append("(.*).*?</td>.*?"); RegHtml.Append("</tr>.*?");
RegHtml.Append("</tbody>.*?</table>.*?</div>"); string content = read.ReadToEnd();//输出网页的源码代码
Regex r = new Regex(RegHtml.ToString(), RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(content);//在源代码中,用正则表达式进行搜索,返回为一个集合
string sMsg = "";
foreach (Match m in mc)//不能进行循环?
{
sMsg += m.Groups["value1"].Value;
sMsg += m.Groups["value2"].Value;
sMsg += m.Groups["value3"].Value;
}
Response.Write(sMsg);
}
这样是可以抓取出来,但是加载的速度非常慢。。有时出现超时的提示?第一次做这东西,麻烦哪位指点,谢谢
string s = File.ReadAllText(Server.MapPath("~/test.txt"));
HtmlDocument xmlDoc = new HtmlDocument();
xmlDoc.LoadHtml(s);
HtmlNodeCollection nodes = xmlDoc.DocumentNode.SelectNodes(@"//div[@class='tabContentDiv selected']//tr[position()>1]/td[3]");
foreach (HtmlNode node in nodes)
Response.Write(node.InnerText + "<br/>");
输出:
我要抓取的数据1
我要抓取的数据2
我要抓取的数据3