html格式如下,要求可以同时匹配下面3种情况,这样我可以通过循环来获得所有td的内容了
1. <td> Approaching </td>
2. <td> </td>
3. <td> <a href="asdfsadf">Dakhla </a> </td>
谢谢了 最后一个td比较特殊,需要的内容在 <a>的title上,如果可以的话,也麻烦写一个正则表达式 <tr >
<td>
Approaching
</td>
<td> </td>
<td>
<a
href="/lmiu/places/sightings.htm?placeID=5304">Dakhla </a>
</td>
<td>75.2 </td>
<td>15 Sep 2008 18:46 </td>
<td>15 Sep 2008 18:46 </td>
<td>
<a
href="/lmiu/places/sightings.htm?placeID=3903">BONNY.NIGERIA </a>
</td>
<td>27 Sep 2008 </td>
<td> <a id="0_ais__tt_link" class="tooltip_link"
href="#0_ais__tt_link"
title="GMT From: 15 Sep 2008 18:46 To: 15 Sep 2008 18:46
<br/>COG: 208 SOG: 8.3 Draft:
<br/>Updates #: 1
<br/>Latitude: 24?8'14''N
<br/>Longitude: 17?'0''W">details </a> </td>
</tr>
1. <td> Approaching </td>
2. <td> </td>
3. <td> <a href="asdfsadf">Dakhla </a> </td>
谢谢了 最后一个td比较特殊,需要的内容在 <a>的title上,如果可以的话,也麻烦写一个正则表达式 <tr >
<td>
Approaching
</td>
<td> </td>
<td>
<a
href="/lmiu/places/sightings.htm?placeID=5304">Dakhla </a>
</td>
<td>75.2 </td>
<td>15 Sep 2008 18:46 </td>
<td>15 Sep 2008 18:46 </td>
<td>
<a
href="/lmiu/places/sightings.htm?placeID=3903">BONNY.NIGERIA </a>
</td>
<td>27 Sep 2008 </td>
<td> <a id="0_ais__tt_link" class="tooltip_link"
href="#0_ais__tt_link"
title="GMT From: 15 Sep 2008 18:46 To: 15 Sep 2008 18:46
<br/>COG: 208 SOG: 8.3 Draft:
<br/>Updates #: 1
<br/>Latitude: 24?8'14''N
<br/>Longitude: 17?'0''W">details </a> </td>
</tr>
public static string[] GetRegBeteenString(string body)
{
Regex re=new Regex(@"<td>(?<Text>.*)</td>", RegexOptions.IgnoreCase);
MatchCollection mcoll=re.Matches(body);
ArrayList slist=new ArrayList();
foreach(Match m in mcoll)
{
string reslut=m.Result("${Value}");
if(reslut!="")
{
if(!slist.Contains(reslut))
slist.Add(reslut);
}
}
return (string[])slist.ToArray(typeof(string));
}
<td>\s{0,}<a[^<>]*title=\"?(?<title>[^<>]*)\"?.*>.*</td>