现有源文件:
[code=HTML] <table id="topicListTable" border="0" cellpadding="1" cellspacing="1" width="100%">
                            <colgroup>
                                <!--col class="icon" /-->
                                <col class="caption" style="padding-left: 5px" />
                                <col class="point" />
                                <col class="author" />
                                <col class="replyCount" />
                                <col class="lastReply" />
                                <col class="function" />
                            </colgroup>
                            <tr>
                                <!--th class="icon">
                                        &nbsp;</th-->
                                <th class="caption">
                                    
                                </th>
                                <th class="point">
                                    
                                </th>
                                <th class="author">
                                    
                                </th>
                                <th class="replyCount">
                                    ?
                                </th>
                                <th class="lastReply">
                                    
                                </th>
                                <th class="function">
                                    
                                </th>
                            </tr>
                    
                        <tr class="light">
                            <!--td>
                                    <input type="checkbox" name="selectid" value="74f9d13e-6094-4670-b2d1-3ffdf81ae794" />
                                </td-->
                            <td class="caption" style="word-break: break-all">
                                <strong><font color="green"></font></strong>
                                <strong style="color:#993300">[<a  style="color:#993300" target="_blank"  href="http://hi.csdn.net/wufeng4552 ">wufeng4552 </a>]</strong>
                                
                                <a target="_blank" title="?????" href="http://topic.csdn.net/u/20090527/11/74f9d13e-6094-4670-b2d1-3ffdf81ae794.html" >?????</a>
                                
                                
                                
                            </td>
                            <td>
                                100
                            </td>
                            <td>
                                <a target="_blank" href="http://hi.csdn.net/tanwei1002"
                                    style="font-family: ,Arial;">
                                    tanwei1002
                                </a>
                                <br />
                                05-27 11:52
                            </td>
                            <td>
                                35
                            </td>
                            <td>
                                <a target="_blank" href="http://hi.csdn.net/sdhylj"
                                    style="font-family: ,Arial;">
                                    sdhylj
                                </a>
                                <br />
                                05-27 14:31
                            </td>
                            <td>
                                <a href="http://forum.csdn.net/PointForum/Manage/TopicManageView.aspx?forumID=292a9ca6-1a8e-49bb-a408-6f79df2268ef&topicID=74f9d13e-6094-4670-b2d1-3ffdf81ae794&date=2009-5-27+11%3a52%3a41"
                                    target="_blank"></a>
                            </td>
                        </tr>
                    
                        <tr class="dark">
                            <!--td>
                                    <input type="checkbox" name="selectid" value="84a85168-ca4b-4039-b068-7469f25c455f" />
                                </td-->
                            <td class="caption" style="word-break: break-all">
                                <strong><font color="green"></font></strong>
                                <strong style="color:#993300">[<a  style="color:#993300"  href="http://hi.csdn.net/ALL">ALL</a>]</strong>
                                
                                <a target="_blank" title="?,???" href="http://topic.csdn.net/u/20090527/10/84a85168-ca4b-4039-b068-7469f25c455f.html" >?,???</a>
                                
                                
                                
                            </td>
                            <td>
                                300
                            </td>
                            <td>
                                <a target="_blank" href="http://hi.csdn.net/wufeng4552"
                                    style="font-family: ,Arial;">
                                    wufeng4552
                                </a>
                                <br />
                                05-27 10:29
                            </td>
                            <td>
                                44
                            </td>
                            <td>
                                <a target="_blank" href="http://hi.csdn.net/mdjzhihong"
                                    style="font-family: ,Arial;">
                                    mdjzhihong
                                </a>
                                <br />
                                05-27 14:31
                            </td>
                            <td>
                                <a href="http://forum.csdn.net/PointForum/Manage/TopicManageView.aspx?forumID=292a9ca6-1a8e-49bb-a408-6f79df2268ef&topicID=84a85168-ca4b-4039-b068-7469f25c455f&date=2009-5-27+10%3a29%3a25"
                                    target="_blank"></a>
                            </td>
                        </tr>
                    
                        <tr class="light">
                            <!--td>
                                    <input type="checkbox" name="selectid" value="d30288be-cb7a-4a5c-ac70-43fc745d9591" />
                                </td-->
                            <td class="caption" style="word-break: break-all">
                                <strong><font color="green"></font></strong>
                                
                                
                                <a target="_blank" title="VS???" href="http://topic.csdn.net/u/20090526/12/d30288be-cb7a-4a5c-ac70-43fc745d9591.html" >VS???</a>
                                
                                
                                
                            </td>
               ...code]
问题1:如何取得每个TD中的第二个a的HREF属性
问题2:求这个HTML中有多少个TR。
问题3:如何取得每个TD中的第二个a的HREF属性(TD中不一定只有两个A,有可能超过好几个,如果求第三个A对应的HREF呢)

解决方案 »

  1.   

    <table id="topicListTable" border="0" cellpadding="1" cellspacing="1" width="100%"> 
                                <colgroup> 
                                    <!--col class="icon" /--> 
                                    <col class="caption" style="padding-left: 5px" /> 
                                    <col class="point" /> 
                                    <col class="author" /> 
                                    <col class="replyCount" /> 
                                    <col class="lastReply" /> 
                                    <col class="function" /> 
                                </colgroup> 
                                <tr> 
                                    <!--th class="icon"> 
                                            &nbsp; </th--> 
                                    <th class="caption"> 
                                        
                                    </th> 
                                    <th class="point"> 
                                        
                                    </th> 
                                    <th class="author"> 
                                        
                                    </th> 
                                    <th class="replyCount"> 
                                        ? 
                                    </th> 
                                    <th class="lastReply"> 
                                        
                                    </th> 
                                    <th class="function"> 
                                        
                                    </th> 
                                </tr> 
                        
                            <tr class="light"> 
                                <!--td> 
                                        <input type="checkbox" name="selectid" value="74f9d13e-6094-4670-b2d1-3ffdf81ae794" /> 
                                    </td--> 
                                <td class="caption" style="word-break: break-all"> 
                                    <strong> <font color="green"> </font> </strong> 
                                    <strong style="color:#993300">[ <a  style="color:#993300" target="_blank"  href="http://hi.csdn.net/wufeng4552 ">wufeng4552 </a>] </strong> 
                                    
                                    <a target="_blank" title="?????" href="http://topic.csdn.net/u/20090527/11/74f9d13e-6094-4670-b2d1-3ffdf81ae794.html" >????? </a> 
                                    
                                    
                                    
                                </td> 
                                <td> 
                                    100 
                                </td> 
                                <td> 
                                    <a target="_blank" href="http://hi.csdn.net/tanwei1002" 
                                        style="font-family: ,Arial;"> 
                                        tanwei1002 
                                    </a> 
                                    <br /> 
                                    05-27 11:52 
                                </td> 
                                <td> 
                                    35 
                                </td> 
                                <td> 
                                    <a target="_blank" href="http://hi.csdn.net/sdhylj" 
                                        style="font-family: ,Arial;"> 
                                        sdhylj 
                                    </a> 
                                    <br /> 
                                    05-27 14:31 
                                </td> 
                                <td> 
                                    <a href="http://forum.csdn.net/PointForum/Manage/TopicManageView.aspx?forumID=292a9ca6-1a8e-49bb-a408-6f79df2268ef&topicID=74f9d13e-6094-4670-b2d1-3ffdf81ae794&date=2009-5-27+11%3a52%3a41" 
                                        target="_blank"> </a> 
                                </td> 
                            </tr> 
                        
                            <tr class="dark"> 
                                <!--td> 
                                        <input type="checkbox" name="selectid" value="84a85168-ca4b-4039-b068-7469f25c455f" /> 
                                    </td--> 
                                <td class="caption" style="word-break: break-all"> 
                                    <strong> <font color="green"> </font> </strong> 
                                    <strong style="color:#993300">[ <a  style="color:#993300"  href="http://hi.csdn.net/ALL">ALL </a>] </strong> 
                                    
                                    <a target="_blank" title="?,???" href="http://topic.csdn.net/u/20090527/10/84a85168-ca4b-4039-b068-7469f25c455f.html" >?,??? </a> 
                                    
                                    
                                    
                                </td> 
                                <td> 
                                    300 
                                </td> 
                                <td> 
                                    <a target="_blank" href="http://hi.csdn.net/wufeng4552" 
                                        style="font-family: ,Arial;"> 
                                        wufeng4552 
                                    </a> 
                                    <br /> 
                                    05-27 10:29 
                                </td> 
                                <td> 
                                    44 
                                </td> 
                                <td> 
                                    <a target="_blank" href="http://hi.csdn.net/mdjzhihong" 
                                        style="font-family: ,Arial;"> 
                                        mdjzhihong 
                                    </a> 
                                    <br /> 
                                    05-27 14:31 
                                </td> 
                                <td> 
                                    <a href="http://forum.csdn.net/PointForum/Manage/TopicManageView.aspx?forumID=292a9ca6-1a8e-49bb-a408-6f79df2268ef&topicID=84a85168-ca4b-4039-b068-7469f25c455f&date=2009-5-27+10%3a29%3a25" 
                                        target="_blank"> </a> 
                                </td> 
                            </tr> 
                        
                            <tr class="light"> 
                                <!--td> 
                                        <input type="checkbox" name="selectid" value="d30288be-cb7a-4a5c-ac70-43fc745d9591" /> 
                                    </td--> 
                                <td class="caption" style="word-break: break-all"> 
                                    <strong> <font color="green"> </font> </strong> 
                                    
                                    
                                    <a target="_blank" title="VS???" href="http://topic.csdn.net/u/20090526/12/d30288be-cb7a-4a5c-ac70-43fc745d9591.html" >VS??? </a> 
                                    
                                    
                                    
                                </td>
    由于时间紧促,所以也不能立马写出来,呵呵
    (?<=\</a\>)\<a\>[^>]*?href=([^>]*?)(?=\>/a\>)...这个我估计是错的,还有语法不熟,能否写全到输出字符串环节,语法太不熟了,呵呵
      

  2.   

    哦,看我笨的,管它第几个,
    匹配每个<a>都匹配,然后在对matchcollection进行foreach迭代,取第N个就行了。呵呵
    也不要正向零预...和负向零预...那么复杂了,
    不过,语法不熟,有谁会写?
      

  3.   

    嗯,还是想简单了,还是要用到(?<=td   这样的,期待高人出现.
      

  4.   

    问题1:如何取得每个TD中的第二个a的HREF属性
    Regex regTD = new Regex(@"<td[^>]*>((?>(?<o>)<td[^>]*>|(?<-o>)</td>|(?!</?td).)*(?(o)(?!)))</td>", RegexOptions.IgnoreCase| RegexOptions.Singleline);
    Regex regA = new Regex(@"(?<=<a[^>]*>(?:(?!<a).)*<a(?:(?!href=).)*href=(['""]?))[^'""\s>]+(?=\1)", RegexOptions.IgnoreCase | RegexOptions.Singleline);
    MatchCollection mc = regTD.Matches(yourStr);
    foreach (Match mTD in mc)
    {
        Match mA = regA.Match(mTD.Value);
        if (mA.Success)
        {
            richTextBox2.Text += mA.Value + "\n";
        }
    }
      

  5.   

    问题2:求这个HTML中有多少个TR。这里认为只要有<tr ...>就算是一个tr了
    int num = Regex.Matches(yourStr, @"<tr\b", RegexOptions.IgnoreCase).Count;
      

  6.   

    NB人的问题看来只有等NB的人才能回答~~~
      

  7.   

    问题3:如何取得每个TD中的第二个a的HREF属性(TD中不一定只有两个A,有可能超过好几个,如果求第三个A对应的HREF呢)int n;   //取td中第n个a的href属性
    Int32.TryParse(textBox1.Text, out n);
    if (n < 1)
        MessageBox.Show("输入的n不合法");
    Regex regTD = new Regex(@"<td[^>]*>((?>(?<o>)<td[^>]*>|(?<-o>)</td>|(?!</?td).)*(?(o)(?!)))</td>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
    Regex regA = new Regex(@"(?<=(?:<a[^>]*>(?:(?!</a).)*</a>){" + (n - 1) + @"}(?:(?!<a\b).)*<a(?:(?!href=).)*href=(['""]?))[^'""\s>]+(?=\1)", RegexOptions.IgnoreCase | RegexOptions.Singleline);
    MatchCollection mc = regTD.Matches(yourStr);
    foreach (Match mTD in mc)
    {
        Match mA = regA.Match(mTD.Value);
        if (mA.Success)
        {
            richTextBox2.Text += mA.Value + "\n";
        }
    }
      

  8.   

    稍稍优化完善了下int n;
    Int32.TryParse(textBox1.Text, out n);
    if (n < 1)
        MessageBox.Show("输入的n不合法");
    Regex regTD = new Regex(@"<td(?>[^>]*)>((?>(?<o>)<td(?>[^>]*)>|(?<-o>)</td>|(?!</?td).)*(?(o)(?!)))</td>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
    Regex regA = new Regex(@"(?<=(?:<a\b[^>]*>(?:(?!</a>).)*</a>){" + (n - 1) + @"}(?:(?!<a\b).)*<a\b(?:(?!href=).)*href=(['""]?))[^'""\s>]+(?=\1)", RegexOptions.IgnoreCase | RegexOptions.Singleline);
    MatchCollection mc = regTD.Matches(yourStr);
    foreach (Match mTD in mc)
    {
        Match mA = regA.Match(mTD.Value);
        if (mA.Success)
        {
            richTextBox2.Text += mA.Value + "\n";
        }
    }第一个需求也可以用这个来实现,所以就不更新了既然是取每个TD中的第n个A标签的href属性,那么思路就是先取出每个TD标签,因为TD可能有嵌套的,所以用了平衡组,如果可以保证没有嵌套,那么去掉平衡组,可以提高匹配效率
      

  9.   

    还有另一种思路,就是先取出含有<a...>标签的TD标签,然后再进行下一轮匹配
    至于跟上面哪一种效率高,那要看源字符串的复杂度了int n;
    Int32.TryParse(textBox1.Text, out n);
    if (n < 1)
    {
        MessageBox.Show("输入的n不合法");
    }
    else
    {
        Regex regTD = new Regex(@"<td(?>[^>]*)>((?>(?<p>)<a\b|(?<o>)<td(?>[^>]*)>|(?<-o>)</td>|(?!</?td).)*(?(o)(?!))(?(p)|(?!)))</td>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
        Regex regA = new Regex(@"(?<=(?:<a\b[^>]*>(?:(?!</a>).)*</a>){" + (n - 1) + @"}(?:(?!<a\b).)*<a\b(?:(?!href=).)*href=(['""]?))[^'""\s>]+(?=\1)", RegexOptions.IgnoreCase | RegexOptions.Singleline);
        MatchCollection mc = regTD.Matches(yourStr);
        foreach (Match mTD in mc)
        {
            Match mA = regA.Match(mTD.Value);
            if (mA.Success)
            {
                richTextBox2.Text += mA.Value + "\n";
            }
        }
    }