存成.asp文件,执行,你用ASPHTTP抓内容的时候用这个很爽,当然自己要改进一下了<% Option ExplicitFunction stripHTML(strHTML) 'Strips the HTML tags from strHTMLDim objRegExp, strOutput Set objRegExp = New RegexpobjRegExp.IgnoreCase = True objRegExp.Global = True objRegExp.Pattern = "<.+?>"'Replace all HTML tag matches with the empty string strOutput = objRegExp.Replace(strHTML, "")'Replace all < and > with < and > strOutput = Replace(strOutput, "<", "<") strOutput = Replace(strOutput, ">", ">")stripHTML = strOutput 'Return the value of strOutputSet objRegExp = Nothing End Function %><form method="post" id=form1 name=form1> <b>Enter an HTML String:</b><br> <textarea name="txtHTML" cols="50" rows="8" wrap="virtual"><%=Request("txtHTML")%></textarea> <p> <input type="submit" value="Strip HTML Tags!" id=submit1 name=submit1> </form><% if Len(Request("txtHTML")) > 0 then %> <p><hr><p> <b><u>View of string <i>with no</i> HTML stripping:</u></b><br> <xmp> <%=Request("txtHTML")%> </xmp><p> <b><u>View of string <i>with</i> HTML stripping:</u></b><br> <pre> <%=StripHTML(Request("txtHTML"))%> </pre> <% End If %>
Dim Titlehtml As String = "<tr><td width=5% height=19 align=center>·</td><td class=title14 height=20><a href= http://news1.jrj.com.cn/news/2005-01-19/000000999424.html target=_blank><font color=black ><u>项怀诚称社保基金境外投资首选港 暂无时间表</u></font></a></td><td align=right class=title14><font color=#777777>1月19日 15:43</font></td></tr><tr><td width=5% height=19 align=center>·</td><td class=title14 height=20><a href= http://news1.jrj.com.cn/news/2005-01-19/000000999413.html target=_blank><font color=black ><u>外汇储备高企 年内有望注资工农二行启动股改</u></font></a><FONT color=#ff0000 style='FONT-SIZE: 12px'>★</FONT></td><td align=right class=title14><font color=#777777>1月19日 15:37</font></td></tr><tr><td width=5% height=19 align=center>·</td><td class=title14 height=20><a href= http://news1.jrj.com.cn/news/2005-01-19/000000999383.html target=_blank><font color=black ><u>证监会李青原表示:希望年内解决股权分置问题</u></font></a></td><td align=right class=title14><font color=#777777>1月19日 15:16</font></td></tr><tr><td width=5% height=19 align=center>·</td><td class=title14 height=20><a href= http://news1.jrj.com.cn/news/2005-01-19/000000999105.html target=_blank><font color=black ><u>四大保障基金渐行渐近 投资人须树立风险意识</u></font></a></td><td align=right class=title14><font color=#777777>1月19日 09:46</font></td></tr><tr><td width=5% height=19 align=center>·</td><td class=title14 height=20>" Dim re As System.Text.RegularExpressions.Regex = New System.Text.RegularExpressions.Regex("<a href=\s*http://(?<url>[^\s]*?)\s+target=_blank>[\s\S]*?<u>(?<title>[\s\S]*?)</u>[\s\S]*?</a>", Text.RegularExpressions.RegexOptions.IgnoreCase And Text.RegularExpressions.RegexOptions.Multiline) Dim i As Integer = 1 For Each mm As System.Text.RegularExpressions.Match In re.Matches(Titlehtml) Console.WriteLine("a" & i.ToString & "=" & mm.Groups("url").Value) Console.WriteLine("b" & i.ToString & "=" & mm.Groups("title").Value) i += 1 Next 结果: a1=news1.jrj.com.cn/news/2005-01-19/000000999424.html b1=项怀诚称社保基金境外投资首选港 暂无时间表 a2=news1.jrj.com.cn/news/2005-01-19/000000999413.html b2=外汇储备高企 年内有望注资工农二行启动股改 a3=news1.jrj.com.cn/news/2005-01-19/000000999383.html b3=证监会李青原表示:希望年内解决股权分置问题 a4=news1.jrj.com.cn/news/2005-01-19/000000999105.html b4=四大保障基金渐行渐近 投资人须树立风险意识
try string s = "........."; Regex re = new Regex(@"<a\s+href\s*=\s*('(?<href>[^']*)'|""(?<href>[^""]*)""|(?<href>[\S>]*))[^>]*>.*?<u>(?<link>[^<]+)</u>.*?</a>", RegexOptions.IgnoreCase|RegexOptions.Singleline); foreach (Match m in re.Matches(s)) { Console.WriteLine("**{0}-----{1}***",m.Groups["href"].Value, m.Groups["link"].Value); }
Option ExplicitFunction stripHTML(strHTML)
'Strips the HTML tags from strHTMLDim objRegExp, strOutput
Set objRegExp = New RegexpobjRegExp.IgnoreCase = True
objRegExp.Global = True
objRegExp.Pattern = "<.+?>"'Replace all HTML tag matches with the empty string
strOutput = objRegExp.Replace(strHTML, "")'Replace all < and > with < and >
strOutput = Replace(strOutput, "<", "<")
strOutput = Replace(strOutput, ">", ">")stripHTML = strOutput 'Return the value of strOutputSet objRegExp = Nothing
End Function
%><form method="post" id=form1 name=form1>
<b>Enter an HTML String:</b><br>
<textarea name="txtHTML" cols="50" rows="8" wrap="virtual"><%=Request("txtHTML")%></textarea>
<p>
<input type="submit" value="Strip HTML Tags!" id=submit1 name=submit1>
</form><% if Len(Request("txtHTML")) > 0 then %>
<p><hr><p>
<b><u>View of string <i>with no</i> HTML stripping:</u></b><br>
<xmp>
<%=Request("txtHTML")%>
</xmp><p>
<b><u>View of string <i>with</i> HTML stripping:</u></b><br>
<pre>
<%=StripHTML(Request("txtHTML"))%>
</pre>
<% End If %>
Dim i As Integer = 1
For Each mm As System.Text.RegularExpressions.Match In re.Matches(Titlehtml) Console.WriteLine("a" & i.ToString & "=" & mm.Groups("url").Value)
Console.WriteLine("b" & i.ToString & "=" & mm.Groups("title").Value)
i += 1
Next
结果:
a1=news1.jrj.com.cn/news/2005-01-19/000000999424.html
b1=项怀诚称社保基金境外投资首选港 暂无时间表
a2=news1.jrj.com.cn/news/2005-01-19/000000999413.html
b2=外汇储备高企 年内有望注资工农二行启动股改
a3=news1.jrj.com.cn/news/2005-01-19/000000999383.html
b3=证监会李青原表示:希望年内解决股权分置问题
a4=news1.jrj.com.cn/news/2005-01-19/000000999105.html
b4=四大保障基金渐行渐近 投资人须树立风险意识
<a href=\s*http://(?<url>[^\s]*?)\s+target=_blank>[\s\S]*?<u>(?<title>[\s\S]*?)</u>[\s\S]*?</a>
{
Console.WriteLine("**{0}-----{1}***",m.Groups["href"].Value, m.Groups["link"].Value);
}