代码如下。string test = "<tr class=\"thread_alt\" tid=\"926981281\">                                    <td nowrap>                                    15                                    </td>                                    <td nowrap>      1                                    </td>                                    <td class=\"thread_title\">                             <a href=\"/f?kz=链接A\" target=\"_blank\">标题A</a>                                    </td>                                    <td nowrap>                                        <a href=\"/i/sys/jump?un=%BF%DD%CA%F7%CF%C2%B5%C4%C3%A8\" target=\"_blank\">作者A</a>                                    </td>       <td nowrap>                                        19:48&nbsp;&nbsp;<a href=\"/i/sys/jump?un=jasonwen0107\" target=\"_blank\">jasonwen0107</a>                                    </td>                                </tr><tr class=\"thread_alt\" tid=\"926981281\">                                    <td nowrap>                                    15                                    </td>                                    <td nowrap>      1                                    </td>                                    <td class=\"thread_title\">                             <a href=\"/f?kz=链接B\" target=\"_blank\">标题B</a>                                    </td>                                    <td nowrap>                                        <a href=\"/i/sys/jump?un=%BF%DD%CA%F7%CF%C2%B5%C4%C3%A8\" target=\"_blank\">作者B</a>                                    </td>       <td nowrap>                                        09:48&nbsp;&nbsp;<a href=\"/i/sys/jump?un=jasonwen0107\" target=\"_blank\">jasonwen0107</a>                                    </td>                                </tr>";
            pattern = "<tr(.*)? tid=\".*?\">\\s*<td nowrap>.*</td>\\s*<td nowrap>.*</td>.*<td.*>\\s*<a href=\"(?<Href>.+?)\" .*>(?<Title>.+?)</a>\\s*</td>\\s*<td nowrap>.*</td>\\s*<td nowrap>\\s*(?<Time>.+?)<a.*>.*</a>\\s*</td>.*</tr>";            Regex rr = new Regex(pattern, RegexOptions.IgnoreCase);
            MatchCollection matches = rr.Matches(test);
            foreach (Match match in matches)
            {
                //Console.WriteLine("点击次数 " + match.Groups["Click"].Value);
                //Console.WriteLine("回复次数 " + match.Groups["HuiFu"].Value);
                Console.WriteLine("链接地址 " + match.Groups["Href"].Value);
                //Console.WriteLine("标题 " + match.Groups["Title"].Value);
                //Console.WriteLine("作者 " + match.Groups["ZuoZhe"].Value);
                Console.WriteLine("时间 " + match.Groups["Time"].Value.Replace("&nbsp;", ""));
                Console.WriteLine("------------------------------------");
            }这个代码只能匹配到第二个TR里面的内容,或者单独一个TR的话是都能匹配到的,但是如果有多个TR,就只能匹配到最后一个了。上面test的空格是需要的。请大家帮忙看一下。谢谢!

解决方案 »

  1.   

    虽然效率不高,但是可以用string test = "<tr class=\"thread_alt\" tid=\"926981281\">                                    <td nowrap>                                    15                                    </td>                                    <td nowrap>      1                                    </td>                                    <td class=\"thread_title\">                             <a href=\"/f?kz=链接A\" target=\"_blank\">标题A</a>                                    </td>                                    <td nowrap>                                        <a href=\"/i/sys/jump?un=%BF%DD%CA%F7%CF%C2%B5%C4%C3%A8\" target=\"_blank\">作者A</a>                                    </td>       <td nowrap>                                        19:48&nbsp;&nbsp;<a href=\"/i/sys/jump?un=jasonwen0107\" target=\"_blank\">jasonwen0107</a>                                    </td>                                </tr><tr class=\"thread_alt\" tid=\"926981281\">                                    <td nowrap>                                    15                                    </td>                                    <td nowrap>      1                                    </td>                                    <td class=\"thread_title\">                             <a href=\"/f?kz=链接B\" target=\"_blank\">标题B</a>                                    </td>                                    <td nowrap>                                        <a href=\"/i/sys/jump?un=%BF%DD%CA%F7%CF%C2%B5%C4%C3%A8\" target=\"_blank\">作者B</a>                                    </td>       <td nowrap>                                        09:48&nbsp;&nbsp;<a href=\"/i/sys/jump?un=jasonwen0107\" target=\"_blank\">jasonwen0107</a>                                    </td>                                </tr>";

    string[] patern=new string[2];
    patern[0] = @"href=""(?<href>[^""]*)""";
    patern[1] = @"[0-9]{2}:[0-9]{2}";
    for (int i = 0; i < patern.Length;i++ )
    {
    Regex rr = new Regex(patern[i], RegexOptions.IgnoreCase);
    MatchCollection matches = rr.Matches(test);
    foreach (Match match in matches)
    {
    Console.WriteLine(match.Value);
    }
    }
      

  2.   

    本帖最后由 lxcnn 于 2010-11-08 13:20:04 编辑
      

  3.   

    谢谢您的回复。
    您的正则把所有的链接都提取出来了,其实test里面只是一小部分内容,我现在只需要提取上面<tr></tr>里面的 两个东西,一个就是<a href=\"/f?kz=链接B\" target=\"_blank\">标题B</a>里面的链接地址,和<td nowrap>                                        09:48&nbsp;&nbsp;<a href=\"/i/sys/jump?un=jasonwen0107\" target=\"_blank\">jasonwen0107</a>                                    </td>里面的时间,这个时间不一定就是小时分钟,也可能是年月日,后面跟的<a>链接有可能有,也有可能没有。