请教页面抓取~~谢：）

通过httpwebrequest抓取，再使用正则格式化
System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
  request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
  System.Net.WebResponse response = request.GetResponse();
  System.IO.Stream resStream = response.GetResponseStream();
  System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
  string html = (sr.ReadToEnd());
  resStream.Close();
  sr.Close();

  System.Net.WebClient wc = new System.Net.WebClient();
  wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
  Byte[] pageData = wc.DownloadData(PageUrl);
  string Content= System.Text.Encoding.Default.GetString(pageData);

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

用正则表达式：
string s=@"<tr bgcolor= [^>]+>\s+<td align=""center"">\s+<a href=""[^""]+"" target=""_blank"">(?<dm>\d+)</a></td>\s+<td align=""center""> <a href=""[^""]+"" target=""_blank"">(?<xx>[^<]+)</a></td>\s+<td align=""center""> (?<jhs>\d+)</td>\s+<td align=""left"">(?<dz>[^<]+)</td>\s+<td align=""left"">(?<dh>[^<]+)</td>\s+<td align=""center""><a href=""[^""]+""><font color=""green"">查询专业<font color=""""></font></a></td>\s+</tr>";MatchCollection mc=Regex.Matchs(html,s);
foreach(Match m in mc)
{
    Console.WriteLine(m.Groups["dm"].Value;
    Console.WriteLine(m.Groups["xx"].Value;
    Console.WriteLine(m.Groups["jhs"].Value;
    Console.WriteLine(m.Groups["dz"].Value;
    Console.WriteLine(m.Groups["dh"].Value;
}
static void Main(string[] args)
{
    string html = File.ReadAllText("test.html");
    MatchCollection sections = Regex.Matches(html, @"(?isn)<tr bgcolor= #FFFFFF>.+?<a href=""(?<id_url>[^""]+)[^>]+>(?<id>\d+)([^>]+>){3}.+?<a href=""(?<school_url>[^""]+)[^>]*>(?<school>[^<]+)([^>]+>){3}\D*(?<plan>\d+)([^>]+>){2}\W*(?<address>[^<]+)([^>]+>){2}\W+(?<tel>[\d-]*).+?<a href=""(?<search>[^""]+)");    List<HtmlResult> result = new List<HtmlResult>();
    foreach (Match section in sections)
    {
        HtmlResult item = new HtmlResult();
        item.ID = section.Groups["id"].Value;
        item.IdUrl = section.Groups["id_url"].Value;
        item.School = section.Groups["school"].Value;
        item.SchoolUrl = section.Groups["school_url"].Value;
        item.PlanCount = section.Groups["plan"].Value;
        item.Address = section.Groups["address"].Value;
        item.Tel = section.Groups["tel"].Value;
        item.SearchUrl = section.Groups["search"].Value;
        result.Add(item);
    }    //result就是分析后的结果    foreach (HtmlResult item in result)
    {
        Console.WriteLine(item.ToString());
    }
    Console.ReadKey();
}public class HtmlResult
{
    public string ID { get; set; }
    public string IdUrl { get; set; }
    public string School { get; set; }
    public string SchoolUrl { get; set; }
    public string PlanCount { get; set; }
    public string Address { get; set; }
    public string Tel { get; set; }
    public string SearchUrl { get; set; }
    public override string ToString()
    {
        return string.Format("{0},{1},{2},{3},{4},{5},{6},{7}", ID, IdUrl, School, SchoolUrl, PlanCount, Address, Tel, SearchUrl);
    }
}
o ,对，忘记了，我是测试的本地页面，加上下载页面的。
完整例子private static void TestRegex01()
{
    string html = Encoding.UTF8.GetString(new WebClient().DownloadData("http://zxks.jseea.cn:8081/czweb/school/zsjhcx.jsp")); //File.ReadAllText("test.html");
    MatchCollection sections = Regex.Matches(html, @"(?isn)<tr bgcolor= #FFFFFF>.+?<a href=""(?<id_url>[^""]+)[^>]+>(?<id>\d+)([^>]+>){3}.+?<a href=""(?<school_url>[^""]+)[^>]*>(?<school>[^<]+)([^>]+>){3}\D*(?<plan>\d+)([^>]+>){2}\W*(?<address>[^<]+)([^>]+>){2}\W+(?<tel>[\d-]*).+?<a href=""(?<search>[^""]+)");    List<HtmlResult> result = new List<HtmlResult>();
    foreach (Match section in sections)
    {
        HtmlResult item = new HtmlResult();
        item.ID = section.Groups["id"].Value;
        item.IdUrl = section.Groups["id_url"].Value;
        item.School = section.Groups["school"].Value;
        item.SchoolUrl = section.Groups["school_url"].Value;
        item.PlanCount = section.Groups["plan"].Value;
        item.Address = section.Groups["address"].Value;
        item.Tel = section.Groups["tel"].Value;
        item.SearchUrl = section.Groups["search"].Value;
        result.Add(item);
    }    //result就是分析后的结果    foreach (HtmlResult item in result)
    {
        Console.WriteLine(item.ToString());
    }
}public class HtmlResult
{
    public string ID { get; set; }
    public string IdUrl { get; set; }
    public string School { get; set; }
    public string SchoolUrl { get; set; }
    public string PlanCount { get; set; }
    public string Address { get; set; }
    public string Tel { get; set; }
    public string SearchUrl { get; set; }
    public override string ToString()
    {
        return string.Format("{0},{1},{2},{3},{4},{5},{6},{7}", ID, IdUrl, School, SchoolUrl, PlanCount, Address, Tel, SearchUrl);
    }
}
谢谢逍遥和2L的 wuyq11
如果你会jQuery的话那很简单了，你可以考虑一下用这个
这里有一段我的源码，我想你要做的工作跟我以前做的非常相似，
如果你能读懂下面的内容，那你实现所要的功能就非常简单了...var pars='';
$("tbody tr").each(function(i, tr) {//获取tbody下的所有tr
        Catalog = $(tr).find("td").eq(0).text();//获取tr下面的第0个td的值，下面类似
        size = $(tr).find("td").eq(1).text();
        price = $(tr).find("td").eq(2).text();        if (Catalog != '' && Catalog != 'custom') {
            pars = pars + "subCat=" + Catalog + "&size=" + size + "&price="+ price+"|";
        }
    });    try {
        if (pars == '')
            return;
//下面是通过json的方式进异步操作，如：操作数据库之类的
        $.getJSON('../../Handler.ashx', { param: pars, CatNo: getRequest("Catalog") },
                    function(json) {
                        window.open('', '_top');
                        window.top.close();                    });
    }
    catch (e) {
        alert(e.Message);
    }
另外，我这里还有抓取网页源码的程序，有兴趣的可以去参考一下：http://zengxin2008.download.csdn.net/