在做某个winform获取网页的指定位置内容时,要用到正则表达式,无奈正则不精通啊,,,,发个模版,求大哥吧正则表达式写下,最好来个c#代码,嘻嘻,,,
<tr>
<td class="number">
<div class="consumeBizNo">
2012102800001000310009864627 </div>
</td>
<td class="time">2012-10-28 20:20:16</td>
<td class="info">
<ul>
<li class="name">
支付-139-8215-0805 四川移动50元自动快充 自动充值 移动话...
</li>
</ul>
</td>
<td class="amount"> </td>
<td class="amount outlay">- 49.80</td>
<td class="balance">20.00</td>
<td class="from"><ul>
<li class="name">支付宝</li>
</ul>
</td>
<td class="detail">
<a href="https://lab.alipay.com/consume/queryTradeDetail.htm?tradeNo=2012102800001000310009864627" target="_blank" seed="detail-link" smartracker="on">查看</a>
</td>
</tr>
<tr class="split">
<td class="number">
<div class="consumeBizNo">
2012102800000000008046766***09 </div>
</td>
<td class="time">2012-10-28 20:20:16</td>
<td class="info">
<ul>
<li class="name">
充值
</li>
<li class="name-no"><span class="ft-gray">交易成功</span></li> </ul>
</td>
<td class="amount income">49.80</td>
<td class="amount"> </td>
<td class="balance">69.80</td>
<td class="from"><ul>
<li class="name">中国农业银行</li>
</ul>
</td>
<td class="detail">
</td>
</tr>
<tr>
<td class="number">
<div class="consumeBizNo">
2012102000001000310008079726 </div>
</td>
<td class="time">2012-10-20 10:16:36</td>
<td class="info">
<ul>
<li class="name">
支付-182-0038-1757 全国移动50元 JYT自动充值 快速到账...
</li>
</ul>
</td>
<td class="amount"> </td>
<td class="amount outlay">- 49.55</td>
<td class="balance">20.00</td>
<td class="from"><ul>
<li class="name">支付宝</li>
</ul>
</td>
<td class="detail">
<a href="https://lab.alipay.com/consume/queryTradeDetail.htm?tradeNo=2012102000001000310008079726" target="_blank" seed="detail-linkT1" smartracker="on">查看</a>
</td>
</tr>将里面的数据提取出来:2012102800001000310009864627
2012-10-28 20:20:16
支付-139-8215-0805 四川移动50元自动快充 自动充值 移动话...
- 49.80 20.00
支付宝
查看
2012102800000000008046766***09
2012-10-28 20:20:16
充值
交易成功
49.80 69.80
中国农业银行
求大哥帮助,,,
<tr>
<td class="number">
<div class="consumeBizNo">
2012102800001000310009864627 </div>
</td>
<td class="time">2012-10-28 20:20:16</td>
<td class="info">
<ul>
<li class="name">
支付-139-8215-0805 四川移动50元自动快充 自动充值 移动话...
</li>
</ul>
</td>
<td class="amount"> </td>
<td class="amount outlay">- 49.80</td>
<td class="balance">20.00</td>
<td class="from"><ul>
<li class="name">支付宝</li>
</ul>
</td>
<td class="detail">
<a href="https://lab.alipay.com/consume/queryTradeDetail.htm?tradeNo=2012102800001000310009864627" target="_blank" seed="detail-link" smartracker="on">查看</a>
</td>
</tr>
<tr class="split">
<td class="number">
<div class="consumeBizNo">
2012102800000000008046766***09 </div>
</td>
<td class="time">2012-10-28 20:20:16</td>
<td class="info">
<ul>
<li class="name">
充值
</li>
<li class="name-no"><span class="ft-gray">交易成功</span></li> </ul>
</td>
<td class="amount income">49.80</td>
<td class="amount"> </td>
<td class="balance">69.80</td>
<td class="from"><ul>
<li class="name">中国农业银行</li>
</ul>
</td>
<td class="detail">
</td>
</tr>
<tr>
<td class="number">
<div class="consumeBizNo">
2012102000001000310008079726 </div>
</td>
<td class="time">2012-10-20 10:16:36</td>
<td class="info">
<ul>
<li class="name">
支付-182-0038-1757 全国移动50元 JYT自动充值 快速到账...
</li>
</ul>
</td>
<td class="amount"> </td>
<td class="amount outlay">- 49.55</td>
<td class="balance">20.00</td>
<td class="from"><ul>
<li class="name">支付宝</li>
</ul>
</td>
<td class="detail">
<a href="https://lab.alipay.com/consume/queryTradeDetail.htm?tradeNo=2012102000001000310008079726" target="_blank" seed="detail-linkT1" smartracker="on">查看</a>
</td>
</tr>将里面的数据提取出来:2012102800001000310009864627
2012-10-28 20:20:16
支付-139-8215-0805 四川移动50元自动快充 自动充值 移动话...
- 49.80 20.00
支付宝
查看
2012102800000000008046766***09
2012-10-28 20:20:16
充值
交易成功
49.80 69.80
中国农业银行
求大哥帮助,,,
解决方案 »
- C#下一页按钮
- VB.net程序运行正常,移植到C#上就出现问题了!求VB和C#都通的高手解答!!万分感激!!!
- 关于时间的计算问题(C# VB 均可以)
- objGraphics.Dispose(); 有必要么?
- 请高手指点(数据交换)
- 在.NET中怎样把一个EXE文件分成几个文件,在线等待
- 在c#里面怎样动态的画一个矩形???????????
- 如何在 Webbrowser 控件中获取ASP.NET_SessionId 的值
- DataGrid的ColumnCaption怎么无法设置?
- 为何C#老是改变我自己写的类的引用路径(C#的bug???)
- c#.net,做个进度条,怎么实现?在线等
- C# 怎么将一个方法标识为已过时啊
string html = System.IO.File.ReadAllText(@"C:\1.txt", Encoding.GetEncoding("GB2312"));
string pattern1 = @"(?is)(?<=<td\s*class=""[^""]*?"">).*?(?=</td>)";
string pattern2 = @"(?is)<(div|ul)[^>]*?>\s*(<li[^>]*?>)?(.*?)(</li>\s*)?</\1>";
List<string> list1 = new List<string>();
List<string> list2 = new List<string>();
foreach (System.Text.RegularExpressions.Match m in System.Text.RegularExpressions.Regex.Matches(html, pattern1))
{
list1.Add(m.Value);
}
string ss = "";
foreach (string l in list1)
{ if (Regex.IsMatch(l, pattern2))
{
ss = Regex.Match(l, pattern2).Groups[3].Value;
}
else
ss = l;
list2.Add(ss);
}
list2.ForEach(x => Console.WriteLine(x)); 其余的自行处理
{ //删除脚本 Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); //删除HTML Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"-->", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<!--.*", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(amp|#38);", "&", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(lt|#60);", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(gt|#62);", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&#(\d+);", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); Htmlstring.Replace("<", ""); Htmlstring.Replace(">", ""); Htmlstring.Replace("\r\n", ""); Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); return Htmlstring; } public string StripHTML(string strHtml)
{
string[] aryReg ={
@"<script[^>]*?>.*?</script>", @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);", @"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
}; string[] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1",//chr(161),
"\xa2",//chr(162),
"\xa3",//chr(163),
"\xa9",//chr(169),
"",
"\r\n",
""
}; string newReg = aryReg[0];
string strOutput = strHtml;
for (int i = 0; i < aryReg.Length; i++)
{
System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(aryReg[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase);
strOutput = regex.Replace(strOutput, aryRep[i]);
} strOutput.Replace("<", "");
strOutput.Replace(">", "");
strOutput.Replace("\r\n", ""); return strOutput;
}