Regex reg = new Regex(@"(?is)<a[^>]*?href=(['""\s]?)(?<href>[^'""\s]*)\1[^>]*?>");

解决方案 »

  1.   

    http://blog.csdn.net/flying881114/article/details/6609546
      

  2.   

    为什么一定要用正则呢?xpath简单明了
    下面的演示代码,使用了开源类库Html Agility Packusing System;
    using HtmlAgilityPack;namespace com58
    {
    class Program
    {
    const string html = @"<tr logr=""p_0_23219080775431_18974112972167_0"">
                        <td class=""img""><a target=""_blank"" href=""http://ny.58.com/ershoufang/18974112972167x.shtml"">
                            <img alt="""" src=""http://pic7.58cdn.com.cn/p1/tiny/n_s02391407514652158114.jpg"" /></a>
                         </td>
                        <td class=""t"">
                            <a href=""/ershoufang/?key=邓州"" target=""_blank"" class=""t1"">[二手房]</a><span class=""hg"">-</span>
                            <a href=""http://ny.58.com/ershoufang/18974112972167x.shtml"" target=""_blank"" class=""t"" title=""性价比极高的个人房源出售"">性价比极高的个人房源出售
                            </a>
                            <span class='ico biz'>(个人)</span>                    <span class='ico ntu' title=''>[8图]</span>
                            <span name=""zaixian_23219080775431""></span>
                            <br />
                            <p class=""sub"">个人房源。…就医上学…升值空间大,户型设计合理,…看房请提前预约.</p>
                            <p class=""abt"">
                                <a href=""/dengzhou/ershoufang/"">邓州</a><span> - </span>
                                <!-- 房产信息中二手房和租房在商圈后增加显示小区 -->
                                <a href=""/ershoufang/jh_超高性价比的"">超高性价比的</a><span> - </span>
                                今天
                            </p>
                        </td>
                        <td class=""pd"" style=""width: 15%"">2室(94)
                        </td>
                        <td class=""pd01"" style=""width: 15%"">
                            <b class='pri'>25</b> 万
                        </td>
                    </tr>                <tr logr=""p_1_18191348468486_19099111543554_0"">
                        <td class=""img""><a target=""_blank"" href=""http://ny.58.com/ershoufang/19099111543554x.shtml"">
                            <img alt="""" src=""http://pic8.58cdn.com.cn/p1/tiny/n_s12407409337043876114.jpg"" /></a></td>
                        <td class=""t"">
                            <a href=""/ershoufang/?key=邓州"" target=""_blank"" class=""t1"">[二手房]</a><span class=""hg"">-</span>
                            <a href=""http://ny.58.com/ershoufang/19099111543554x.shtml"" target=""_blank"" class=""t"" title=""盛世龙源39万 84平米 2房 毛坯 西南户,罕点房源"">盛世龙源39万 84平米 2房
                            </a>
                            <span class='ico ntu' title=''>[4图]</span>
                            <span name=""zaixian_18191348468486""></span>
                            <br />
                            <p class=""sub"">       4 非常罕点 ,有需求的尽快联系,出手极快,目前仍有不同楼层其它…
                            </p>
                            <p class=""abt"">
                                <a href=""/dengzhou/ershoufang/"">邓州</a><span> - </span>
                                <!-- 房产信息中二手房和租房在商圈后增加显示小区 -->
                                <a href=""/ershoufang/jh_盛世龙源"">盛世龙源</a><span> - </span>
                                今天
                            </p>
                        </td>
                        <td class=""pd"" style=""width: 15%"">2室(84)
                        </td>
                        <td class=""pd01"" style=""width: 15%"">
                            <b class='pri'>39</b> 万
                        </td>
                    </tr>";
    const string path = "//tr/td[1]/a/@href";

    public static void Main(string[] args)
    {
    Extract();
    Console.Write("Press any key to continue . . . ");
    Console.ReadKey(true);
    }

    public static void Extract()
    {
    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
    doc.LoadHtml(html);
    HtmlNode root = doc.DocumentNode; HtmlNodeCollection nodes = root.SelectNodes(path);
    if (null != nodes) {
    for (int i = 0; i < nodes.Count; ++i) {
    Console.WriteLine(string.Format("href: {0}", nodes[i].GetAttributeValue("href", "")));
    }
    }
    }
    }
    }
      

  3.   

    这个很简单,几句代码就可以了,楼主看下是这样吗?
    使用一个正则表达式就可以直接提取出所有结果了,不需要其他处理代码,这边已经写好完整的代码了,复制粘贴就可以使用:
    http://www.hellocsharp.com/ask/24.aspx
      

  4.   


    结完贴感觉有点不对,这个是抓取所有的<a href="http:> 
    不过虽然有点粗心,但是还是很感谢你!