我要获取一些URL地址,写出了一下的正则表达式<a[^>]+href=("(?<href>[^"#]*)"|'(?<href>[^'#]*)'|(?<href>[^\s\n>#]*))[^>]*>\s*((\d)*|下一页|尾页)\s*</a>需要匹配一下的地址:<a href='#' class='on'>1</a> <a href='/topic/list_94_48_0_2.html'>2</a> <a href='/topic/list_94_48_0_3.html'>3</a> <a href='/topic/list_94_48_0_4.html'>4</a> <a href='/topic/list_94_48_0_5.html'>5</a> <a href='/topic/list_94_48_0_6.html'>6</a> <a href='/topic/list_94_48_0_7.html'>7</a> <a href='/topic/list_94_48_0_8.html'>8</a> <a href='/topic/list_94_48_0_9.html'>9</a> <a href='/topic/list_94_48_0_10.html'>10</a> <a href='/topic/list_94_48_0_2.html'>下一页</a> <a href='/topic/list_94_48_0_43.html'>尾页</a>我使用匹配器告诉我所有都匹配,但是我现在不想匹配href='#'这个地址,可是我使用匹配器还是告诉我匹配这个地址,哪位高人可以帮我改动一下这个正则表达式?改成不获取“#”而获取其它的正常地址。
void Main()
{
string html = @"<a href='#' class='on'>1</a> <a href='/topic/list_94_48_0_2.html'>2</a> <a href='/topic/list_94_48_0_3.html'>3</a> <a href='/topic/list_94_48_0_4.html'>4</a> <a href='/topic/list_94_48_0_5.html'>5</a> <a href='/topic/list_94_48_0_6.html'>6</a> <a href='/topic/list_94_48_0_7.html'>7</a> <a href='/topic/list_94_48_0_8.html'>8</a> <a href='/topic/list_94_48_0_9.html'>9</a> <a href='/topic/list_94_48_0_10.html'>10</a> <a href='/topic/list_94_48_0_2.html'>下一页</a> <a href='/topic/list_94_48_0_43.html'>尾页</a>";
foreach(Match m in Regex.Matches(html,@"(?i)<a[^>]*?href=(['""\s]?)([^'""\s#]+)\1[^>]*>(?:\d+|下一页|尾页)</a>"))
{
Console.WriteLine(m.Value);
}
/*
<a href='/topic/list_94_48_0_2.html'>2</a>
<a href='/topic/list_94_48_0_3.html'>3</a>
<a href='/topic/list_94_48_0_4.html'>4</a>
<a href='/topic/list_94_48_0_5.html'>5</a>
<a href='/topic/list_94_48_0_6.html'>6</a>
<a href='/topic/list_94_48_0_7.html'>7</a>
<a href='/topic/list_94_48_0_8.html'>8</a>
<a href='/topic/list_94_48_0_9.html'>9</a>
<a href='/topic/list_94_48_0_10.html'>10</a>
<a href='/topic/list_94_48_0_2.html'>下一页</a>
<a href='/topic/list_94_48_0_43.html'>尾页</a>
*/}
MatchCollection mc = Regex.Matches(_lstContent, this.ListPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase); for (int i = 0; i < mc.Count; i++)
{
Match m = mc[i]; HtmlLinkLabel link = new HtmlLinkLabel(); link.Href = GetUrl(m.Groups["href"].Value);
void Main()
{
string html = @"<a href='#' class='on'>1</a> <a href='/topic/list_94_48_0_2.html'>2</a> <a href='/topic/list_94_48_0_3.html'>3</a> <a href='/topic/list_94_48_0_4.html'>4</a> <a href='/topic/list_94_48_0_5.html'>5</a> <a href='/topic/list_94_48_0_6.html'>6</a> <a href='/topic/list_94_48_0_7.html'>7</a> <a href='/topic/list_94_48_0_8.html'>8</a> <a href='/topic/list_94_48_0_9.html'>9</a> <a href='/topic/list_94_48_0_10.html'>10</a> <a href='/topic/list_94_48_0_2.html'>下一页</a> <a href='/topic/list_94_48_0_43.html'>尾页</a>";
foreach(Match m in Regex.Matches(html,@"(?i)<a[^>]*?href=(['""\s]?)(?<href>[^'""\s#]+)\1[^>]*>(?:\d+|下一页|尾页)</a>"))
{
Console.WriteLine(m.Groups["href"].Value);
}
/*
/topic/list_94_48_0_2.html
/topic/list_94_48_0_3.html
/topic/list_94_48_0_4.html
/topic/list_94_48_0_5.html
/topic/list_94_48_0_6.html
/topic/list_94_48_0_7.html
/topic/list_94_48_0_8.html
/topic/list_94_48_0_9.html
/topic/list_94_48_0_10.html
/topic/list_94_48_0_2.html
/topic/list_94_48_0_43.html
*/}