<table border="0" width="556" cellpadding="0" bgcolor="#FFFFFF">
<tr><td width="130" valign="top"><p align="center"><img border="0" src="face/m02.gif"><br><Br>第<font color="#FF0000">1068</font>条留言</p></td>
<td width="425" valign="top">
<table border="0" width="100%" cellspacing="0" cellpadding="0">
<tbody><tr><td width="43" colspan="2" rowspan="2" height="29">
<img border="0" src="/img/1_r2_c2.gif" width="43" height="29"></td>
<td height="10" background="/img/1_r2_c41.gif"></td>
<td width="37" colspan="2" rowspan="2" height="29">
<img border="0" src="/img/1_r2_c6.gif" width="37" height="29"></td></tr><tr><td height="19"><table border="0" width="100%" cellspacing="0" cellpadding="0"><tr><td width="60%"><div style="width:120;color:0099FF;filter:glow(color=00FF66, strength=1)">天之痕</div></td><td width="40%" align="right"></td></tr></table></td></tr><tr><td width="10" background="/img/1_r4_c2.gif"></td><td width="27"></td><td width="100%" height="50" style="table-layout:fixed; WORD-WRAP:break-word; word-break:break-all">来看看
</td><td width="22"></td><td width="15" background="/img/1_r4_c2.gif"></td></tr><tr><td width="43" colspan="2" rowspan="2" height="26"><img border="0" src="/img/1_r6_c2.gif" width="43" height="27"></td><td height="16" align="right"><img border="0" src="/img/ip.gif" width="16" height="15" title="116.19.*.*"><font color=#336600>[2009-6-25 16:41:41]</font></td><td width="43" colspan="2" rowspan="2" height="26"><img border="0" src="/img/1_r6_c6.gif" width="37" height="27"></td></tr><tr><td height="9" background="/img/1_r2_c4.gif"></td></tr></tbody></table></td></tr>
<tr><td width="640" colspan="2">
<hr SIZE="1" color=#F0F0F0></td></tr></table>
留言本搬家 希望把以前的数据保存
这个是别人的程序 拿不到数据库 所以只有用正则提出来一个页面有10个上面这样的table
希望提出下面红色的数据
1068 条数
天之痕 发帖人
来看看 内容 (可能包含 空格 回车 和UBB code)
2009-6-25 16:41:41 时间
解决方案 »
- 怎么样做出漂亮的C#界面呀
- 请大家提供一种思路
- 引用一个播放MP3的控件出现异常,怎么解决
- 紧急求救. sql server2005. 分离.附加等等..
- vs2005,开发windows程序,所有的windows组件都不能放到窗体上,有谁遇到过此类问题
- 用C#编写QQ快捷键怎么写
- 一个关于.net自定义权限类的问题
- 孟子大哥等高手可以帮帮我吗(OWC图形问题)?
- 求,X.400邮件控制程序。
- 怎样统一时间VB程序只能运行一个?
- 右击datagridview如何判断是否点到某单元格并获取其位置显示菜单
- 现在的.net类库,与mfc有关系吗?是 mfc的进一步封装呢?还是与mfc平行的另外一种类库?
<tr><td width="130" valign="top"><p align="center"><img border="0" src="face/m02.gif">
<br><Br>第<font color="#FF0000">1068</font>条留言</p></td>
<td width="425" valign="top">
<table border="0" width="100%" cellspacing="0" cellpadding="0">
<tbody><tr><td width="43" colspan="2" rowspan="2" height="29">
<img border="0" src="/img/1_r2_c2.gif" width="43" height="29"></td>
<td height="10" background="/img/1_r2_c41.gif"></td>
<td width="37" colspan="2" rowspan="2" height="29">
<img border="0" src="/img/1_r2_c6.gif" width="37" height="29"></td></tr><tr><td height="19">
<table border="0" width="100%" cellspacing="0" cellpadding="0"><tr><td width="60%">
<div style="width:120;color:0099FF;filter:glow(color=00FF66, strength=1)">天之痕奔波</div></td>
<td width="40%" align="right"></td></tr></table>
</td></tr><tr><td width="10" background="/img/1_r4_c2.gif">
</td><td width="27"></td>
<td width="100%" height="50" style="table-layout:fixed; WORD-WRAP:break-word; word-break:break-all">来看看 还是一片枯叶飘过休~~~~呜~呼呼~
</td><td width="22"></td><td width="15" background="/img/1_r4_c2.gif"></td></tr><tr>
<td width="43" colspan="2" rowspan="2" height="26">
<img border="0" src="/img/1_r6_c2.gif" width="43" height="27">
</td><td height="16" align="right"><img border="0" src="/img/ip.gif" width="16" height="15" title="116.19.*.*">
<font color=#336600>[2009-6-25 16:41:41]</font></td>
<td width="43" colspan="2" rowspan="2" height="26">
<img border="0" src="/img/1_r6_c6.gif" width="37" height="27">
</td></tr><tr><td height="9" background="/img/1_r2_c4.gif">
</td></tr></tbody></table></td></tr>
<tr><td width="640" colspan="2">
<hr SIZE="1" color=#F0F0F0></td></tr></table>
string s = @"<table border=""""0"""" width=""556"" cellpadding=""0"" bgcolor=""#FFFFFF"">
<tr><td width=""130"" valign=""top""><p align=""center""><img border=""0"" src=""face/m02.gif"">
<br><Br>第<font color=""#FF0000"">1068</font>条留言</p></td>
<td width=""425"" valign=""top"">
<table border=""0"" width=""100%"" cellspacing=""0"" cellpadding=""0"">
<tbody><tr><td width=""43"" colspan=""2"" rowspan=""2"" height=""29"">
<img border=""0"" src=""/img/1_r2_c2.gif"" width=""43"" height=""29""></td>
<td height=""10"" background=""/img/1_r2_c41.gif""></td>
<td width=""37"" colspan=""2"" rowspan=""2"" height=""29"">
<img border=""0"" src=""/img/1_r2_c6.gif"" width=""37"" height=""29""></td></tr><tr><td height=""19"">
<table border=""0"" width=""100%"" cellspacing=""0"" cellpadding=""0""><tr><td width=""60%"">
<div style=""width:120;color:0099FF;filter:glow(color=00FF66, strength=1)"">天之痕奔波</div></td>
<td width=""40%"" align=""right""></td></tr></table>
</td></tr><tr><td width=""10"" background=""/img/1_r4_c2.gif"">
</td><td width=""27""></td>
<td width=""100%"" height=""50"" style=""table-layout:fixed; WORD-WRAP:break-word; word-break:break-all"">来看看 还是一片枯叶飘过休~~~~呜~呼呼~
</td><td width=""22""></td><td width=""15"" background=""/img/1_r4_c2.gif""></td></tr><tr>
<td width=""43"" colspan=""2"" rowspan=""2"" height=""26"">
<img border=""0"" src=""/img/1_r6_c2.gif"" width=""43"" height=""27"">
</td><td height=""16"" align=""right""><img border=""0"" src=""/img/ip.gif"" width=""16"" height=""15"" title=""116.19.*.*"">
<font color=#336600>[2009-6-25 16:41:41]</font></td>
<td width=""43"" colspan=""2"" rowspan=""2"" height=""26"">
<img border=""0"" src=""/img/1_r6_c6.gif"" width=""37"" height=""27"">
</td></tr><tr><td height=""9"" background=""/img/1_r2_c4.gif"">
</td></tr></tbody></table></td></tr>
<tr><td width=""640"" colspan=""2"">
<hr SIZE=""1"" color=#F0F0F0></td></tr></table>";
Regex reg = new Regex(@"第.*>(?<count>\d+)<.*条留言[\s\S]*
<div.*>(?<actor>.*)<\/div>[\s\S]*
<td\swidth\=""100%"".*>(?<content>[^<]+)<[\s\S]*
\[(?<date>.*)\][\s\S]*");
MatchCollection mc = reg.Matches(s);
foreach (Match m in mc)
{
Console.WriteLine("条数:" + m.Groups["count"].ToString() + "\r\n"
+ "发帖人:" + m.Groups["actor"].ToString() + "\r\n"
+ "内容:" + m.Groups["content"].ToString() + "\r\n"
+ "时间:" + m.Groups["date"].ToString());
} /* *****************************结果*********************
条数:1068
发帖人:天之痕奔波
内容:来看看 还是一片枯叶飘过休~~~~呜~呼呼~时间:2009-6-25 16:41:41 */
<tr><td width=""130"" valign=""top""><p align=""center""><img border=""0"" src=""face/m02.gif"">
<br><Br>第<font color=""#FF0000"">1068</font>条留言</p></td>
<td width=""425"" valign=""top"">
<table border=""0"" width=""100%"" cellspacing=""0"" cellpadding=""0"">
<tbody><tr><td width=""43"" colspan=""2"" rowspan=""2"" height=""29"">
<img border=""0"" src=""/img/1_r2_c2.gif"" width=""43"" height=""29""></td>
<td height=""10"" background=""/img/1_r2_c41.gif""></td>
<td width=""37"" colspan=""2"" rowspan=""2"" height=""29"">
<img border=""0"" src=""/img/1_r2_c6.gif"" width=""37"" height=""29""></td></tr><tr><td height=""19"">
<table border=""0"" width=""100%"" cellspacing=""0"" cellpadding=""0""><tr><td width=""60%"">
<div style=""width:120;color:0099FF;filter:glow(color=00FF66, strength=1)"">天之痕奔波</div></td>
<td width=""40%"" align=""right""></td></tr></table>
</td></tr><tr><td width=""10"" background=""/img/1_r4_c2.gif"">
</td><td width=""27""></td>
<td width=""100%"" height=""50"" style=""table-layout:fixed; WORD-WRAP:break-word; word-break:break-all"">来看看 还是一片枯叶飘过休~~~~呜~呼呼~
</td><td width=""22""></td><td width=""15"" background=""/img/1_r4_c2.gif""></td></tr><tr>
<td width=""43"" colspan=""2"" rowspan=""2"" height=""26"">
<img border=""0"" src=""/img/1_r6_c2.gif"" width=""43"" height=""27"">
</td><td height=""16"" align=""right""><img border=""0"" src=""/img/ip.gif"" width=""16"" height=""15"" title=""116.19.*.*"">
<font color=#336600>[2009-6-25 16:41:41]</font></td>
<td width=""43"" colspan=""2"" rowspan=""2"" height=""26"">
<img border=""0"" src=""/img/1_r6_c6.gif"" width=""37"" height=""27"">
</td></tr><tr><td height=""9"" background=""/img/1_r2_c4.gif"">
</td></tr></tbody></table></td></tr>
<tr><td width=""640"" colspan=""2"">
<hr SIZE=""1"" color=#F0F0F0></td></tr></table>";
s.Replace(@"<[^<>]+>","")
Regex reg_tb = new Regex(@"<table border=""0"" width=""556"" cellpadding=""0"" bgcolor=""#FFFFFF"">.+?<hr SIZE=""1"" color=#F0F0F0></td></tr></table>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
Match mat = reg_tb.Match(html);
while(mat.Success)
{
string table = mat.Value;
//使用2楼的代码即可
mat = reg_tb.Match(html, mat.Index+mat.Length);
}
不过2楼的 有点点问题 就是很多地方的m.Groups["actor"] 和 m.Groups["content"]取到的是"\r\r"
用记事本打开查看 就显示的是一个黑方块 内容的第一个字符