在做某个winform获取网页的指定位置内容时,要用到正则表达式,无奈正则不精通啊,,,,发个模版,求大哥吧正则表达式写下,最好来个c#代码,嘻嘻,,,
<tr>
                           <td class="number">
<div class="consumeBizNo">
   2012102800001000310009864627 </div>
   </td>
                           <td class="time">2012-10-28 20:20:16</td>
                           <td class="info">
<ul>
<li class="name">
支付-139-8215-0805 四川移动50元自动快充 自动充值 移动话...
</li>
                                </ul>
                           </td>
    <td class="amount">&nbsp;</td>
<td class="amount outlay">- 49.80</td>
       
                           <td class="balance">20.00</td>
                           <td class="from"><ul>
                                               <li class="name">支付宝</li>
                      </ul>
                            </td>
                           <td class="detail">
          <a href="https://lab.alipay.com/consume/queryTradeDetail.htm?tradeNo=2012102800001000310009864627" target="_blank" seed="detail-link" smartracker="on">查看</a> 
        </td>
                         </tr>
                        <tr class="split">
                           <td class="number">
<div class="consumeBizNo">
   2012102800000000008046766***09 </div>
   </td>
                           <td class="time">2012-10-28 20:20:16</td>
                           <td class="info">
<ul>
<li class="name">
充值
</li>
<li class="name-no"><span class="ft-gray">交易成功</span></li>                                 </ul>
                           </td>
    <td class="amount income">49.80</td>
<td class="amount">&nbsp;</td>
       
                           <td class="balance">69.80</td>
                           <td class="from"><ul>
                                        <li class="name">中国农业银行</li>
        </ul>
                            </td>
                           <td class="detail">
          &nbsp;
        </td>
                         </tr>
                        <tr>
                           <td class="number">
<div class="consumeBizNo">
   2012102000001000310008079726 </div>
   </td>
                           <td class="time">2012-10-20 10:16:36</td>
                           <td class="info">
<ul>
<li class="name">
支付-182-0038-1757 全国移动50元 JYT自动充值 快速到账...
</li>
                                </ul>
                           </td>
    <td class="amount">&nbsp;</td>
<td class="amount outlay">- 49.55</td>
       
                           <td class="balance">20.00</td>
                           <td class="from"><ul>
                                               <li class="name">支付宝</li>
                      </ul>
                            </td>
                           <td class="detail">
          <a href="https://lab.alipay.com/consume/queryTradeDetail.htm?tradeNo=2012102000001000310008079726" target="_blank" seed="detail-linkT1" smartracker="on">查看</a> 
        </td>
                         </tr>将里面的数据提取出来:2012102800001000310009864627
2012-10-28 20:20:16
支付-139-8215-0805 四川移动50元自动快充 自动充值 移动话...
  - 49.80 20.00
支付宝
查看
2012102800000000008046766***09
2012-10-28 20:20:16
充值
交易成功
49.80   69.80
中国农业银行
 求大哥帮助,,,

解决方案 »

  1.   

    (?is)<td[^>]*?>\s*(<div[^>]*?>)?(.*?)(</div>)?\s*</td>
      

  2.   

    不只  <div class="consumeBizNo">2012102800001000310009864627 </div> </td>一个啊,还有其他那,其他怎么做啊?其他的又不一样,
      

  3.   


     string html = System.IO.File.ReadAllText(@"C:\1.txt", Encoding.GetEncoding("GB2312"));
                string pattern1 = @"(?is)(?<=<td\s*class=""[^""]*?"">).*?(?=</td>)";
                string pattern2 = @"(?is)<(div|ul)[^>]*?>\s*(<li[^>]*?>)?(.*?)(</li>\s*)?</\1>";
                List<string> list1 = new List<string>();
                List<string> list2 = new List<string>();
                foreach (System.Text.RegularExpressions.Match m in System.Text.RegularExpressions.Regex.Matches(html, pattern1))
                {
                    list1.Add(m.Value);
                }
                string ss = "";
                foreach (string l in list1)
                {                if (Regex.IsMatch(l, pattern2))
                    {
                        ss = Regex.Match(l, pattern2).Groups[3].Value;
                    }
                    else
                        ss = l;
                    list2.Add(ss);
                }
                list2.ForEach(x => Console.WriteLine(x)); 其余的自行处理
      

  4.   

    这儿有两个方法。。你可以试试。 全部封装好了。不用导入任何东西。直接拷进去引用就行了   public static string NoHTML(string Htmlstring)
        {        //删除脚本        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        //删除HTML        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"-->", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<!--.*", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(amp|#38);", "&", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(lt|#60);", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(gt|#62);", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&#(\d+);", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);        Htmlstring.Replace("<", "");        Htmlstring.Replace(">", "");        Htmlstring.Replace("\r\n", "");        Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();        return Htmlstring;    }    public string StripHTML(string strHtml)
        {
            string[] aryReg ={
              @"<script[^>]*?>.*?</script>",          @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
              @"([\r\n])[\s]+",
              @"&(quot|#34);",
              @"&(amp|#38);",          @"&(lt|#60);",
              @"&(gt|#62);", 
              @"&(nbsp|#160);", 
              @"&(iexcl|#161);",
              @"&(cent|#162);",
              @"&(pound|#163);",
              @"&(copy|#169);",
              @"&#(\d+);",
              @"-->",
              @"<!--.*\n"
             
             };        string[] aryRep = {
               "",
               "",
               "",
               "\"",
               "&",
               "<",
               ">",
               " ",
               "\xa1",//chr(161),
               "\xa2",//chr(162),
               "\xa3",//chr(163),
               "\xa9",//chr(169),
               "",
               "\r\n",
               ""
              };        string newReg = aryReg[0];
            string strOutput = strHtml;
            for (int i = 0; i < aryReg.Length; i++)
            {
                System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(aryReg[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                strOutput = regex.Replace(strOutput, aryRep[i]);
            }        strOutput.Replace("<", "");
            strOutput.Replace(">", "");
            strOutput.Replace("\r\n", "");        return strOutput;
        }