我要通过C#取所有超连接 jquery :$(":a") 可以取所有的。 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 jquery :$("a") 可以取所有的。 foreach(Macth m in Regex.Matches("html代码","<a href=""(?<href>[\s\S]*?)""[^>]*>[\s\S]*?</a>")){ //m.Groups["href"].value} jquery :$(":a") 可以取所有的。/^(src|href)=[\'\"]http:\/\/(.*)/is你也可以采用使用正则表达式的方法取~例如http://sports.sina.com.cn/global/ 需要把里面所有的连接的url拿出来并输出成列表大概就所有 a href = "http://xxxx.xxxx.xx" 中的http://xxxx.xxxx.xx拿出来然后输出成列表例如google.com的输出大概就会是 images.google.com/imghp?hl=en&tab=wi maps.google.com/maps?hl=en&tab=wl news.google.com/nwshp?hl=en&tab=wn www.google.com/prdhp?hl=en&tab=wf mail.google.com/mail/?hl=en&tab=wm www.google.com/intl/en/options/ video.google.com/?hl=en&tab=wv groups.google.com/grphp?hl=en&tab=wg books.google.com/bkshp?hl=en&tab=wp scholar.google.com/schhp?hl=en&tab=ws finance.google.com/finance?hl=en&tab=we blogsearch.google.com/?hl=en&tab=wb www.youtube.com/?hl=en&tab=w1 www.google.com/calendar/render?hl=en&tab=wc picasaweb.google.com/home?hl=en&tab=wq docs.google.com/?hl=en&tab=wo www.google.com/reader/view/?hl=en&tab=wy sites.google.com/?hl=en&tab=w3 www.google.com/intl/en/options/如果可以的话多给点分吧!!!!!!!!!!!!!!!!!!!!!!! 1 xmlhttp 拿到你要取的网页 2 saveas .html 3 iframe open .html 4 javascript: var alist=window.iframe1.document.getElementsByTagName("a"); 剩下就不用说了吧 try..Regex re = new Regex(@"http://(?<url>[^\u4e00-\u9fa5\s]*)|<a\s*href=""(?<url>[^""]*)""[^>]*>"); string s = @"http://zhidao.baidu.com/question/102440411.html里面的还其它信息,包手 <A等... http://zhidao.baidu.com/question/105773275.html <a href=""/question/102440411.html"">新闻 </a> <a href=""/question/102440412.html"">新闻 </a> <a href=""/question/102440413.html"">新闻 </a> <a href=""/question/102440414.html"">新闻 </a> "; Match m; for (m = re.Match(s); m.Success; m = m.NextMatch()) { Console.WriteLine(m.Groups["url"].ToString()); } 正则获取href string webDocContent=""; string strPattern=@"a[\s]+href=(?<Link>[^\s>]+)[^>]*>(?<Text>[^<]*)</a>"; MatchCollection Matches=Regex.Matches(webDocContent,strPattern,RegexOptions.IgnoreCase|RegexOptions.Compiled); foreach(Match NextMatch in Matches) { string URL=NextMatch.Groups["Link"].Value.ToString().Trim(); string URLText=NextMatch.Groups["Text"].Value.ToString().Trim(); Response.Write(URL); Response.Write(URLText); } string str = ""; Regex re = new Regex(@"<a[^>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>(?<text>.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline); MatchCollection mc = re.Matches(str); Console.WriteLine(mc.Count); foreach (Match m in mc) { Console.WriteLine("{0}:{1}", m.Groups["href"].Value, m.Groups["text"].Value); } 优化了下 static void Main(string[] args) { Regex re = new Regex(@"(?<url>\bhttp://[^\u4e00-\u9fa5\s]*)|<a\s*href=""(?<url>[^""]*)""[^>]*>"); string s = @"http://zhidao.baidu.com/question/102440411.html里面的还其它信息,包手 <A等... http://zhidao.baidu.com/question/105773275.html <a href=""/question/102440411.html"">新闻 </a> <a href=""/question/102440412.html"">新闻 </a> <a href=""/question/102440413.html"">新闻 </a> <a href=""/question/102440414.html"">新闻 </a> "; Match m; for (m = re.Match(s); m.Success; m = m.NextMatch()) { Console.WriteLine(m.Groups["url"].ToString()); } } 请教 Repeater内的CheckBox问题 找不到数据库服务器 如何用C#修改注册表值 在aspx中调用ascx作为页面头,ascx有张图片,可是怎么也无法将图片顶到浏览器最上面,怎么解决 一个字符串如何概率随机显示出来 怎样复制整个文件夹 关于我国地区的IP分布 网上下了源码后怎么打开项目呀 页面刷新问题 我要通过C#取所有超连接 我要通过C#取所有超连接
{
//m.Groups["href"].value
}
/^(src|href)=[\'\"]http:\/\/(.*)/is
你也可以采用使用正则表达式的方法取~
例如http://sports.sina.com.cn/global/ 需要把里面所有的连接的url拿出来并输出成列表大概就所有 a href = "http://xxxx.xxxx.xx" 中的http://xxxx.xxxx.xx拿出来然后输出成列表例如google.com的输出大概就会是 images.google.com/imghp?hl=en&tab=wi maps.google.com/maps?hl=en&tab=wl news.google.com/nwshp?hl=en&tab=wn www.google.com/prdhp?hl=en&tab=wf mail.google.com/mail/?hl=en&tab=wm www.google.com/intl/en/options/ video.google.com/?hl=en&tab=wv groups.google.com/grphp?hl=en&tab=wg books.google.com/bkshp?hl=en&tab=wp scholar.google.com/schhp?hl=en&tab=ws finance.google.com/finance?hl=en&tab=we blogsearch.google.com/?hl=en&tab=wb www.youtube.com/?hl=en&tab=w1 www.google.com/calendar/render?hl=en&tab=wc picasaweb.google.com/home?hl=en&tab=wq docs.google.com/?hl=en&tab=wo www.google.com/reader/view/?hl=en&tab=wy sites.google.com/?hl=en&tab=w3 www.google.com/intl/en/options/
如果可以的话多给点分吧!!!!!!!!!!!!!!!!!!!!!!!
Regex re = new Regex(@"http://(?<url>[^\u4e00-\u9fa5\s]*)|<a\s*href=""(?<url>[^""]*)""[^>]*>");
string s = @"http://zhidao.baidu.com/question/102440411.html里面的还其它信息,包手 <A等...
http://zhidao.baidu.com/question/105773275.html
<a href=""/question/102440411.html"">新闻 </a>
<a href=""/question/102440412.html"">新闻 </a>
<a href=""/question/102440413.html"">新闻 </a>
<a href=""/question/102440414.html"">新闻 </a> ";
Match m;
for (m = re.Match(s); m.Success; m = m.NextMatch())
{
Console.WriteLine(m.Groups["url"].ToString());
}
string webDocContent="";
string strPattern=@"a[\s]+href=(?<Link>[^\s>]+)[^>]*>(?<Text>[^<]*)</a>";
MatchCollection Matches=Regex.Matches(webDocContent,strPattern,RegexOptions.IgnoreCase|RegexOptions.Compiled);
foreach(Match NextMatch in Matches)
{
string URL=NextMatch.Groups["Link"].Value.ToString().Trim();
string URLText=NextMatch.Groups["Text"].Value.ToString().Trim();
Response.Write(URL);
Response.Write(URLText);
}
Regex re = new Regex(@"<a[^>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>(?<text>.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
MatchCollection mc = re.Matches(str);
Console.WriteLine(mc.Count);
foreach (Match m in mc)
{
Console.WriteLine("{0}:{1}", m.Groups["href"].Value, m.Groups["text"].Value);
}
static void Main(string[] args)
{
Regex re = new Regex(@"(?<url>\bhttp://[^\u4e00-\u9fa5\s]*)|<a\s*href=""(?<url>[^""]*)""[^>]*>");
string s = @"http://zhidao.baidu.com/question/102440411.html里面的还其它信息,包手 <A等...
http://zhidao.baidu.com/question/105773275.html
<a href=""/question/102440411.html"">新闻 </a>
<a href=""/question/102440412.html"">新闻 </a>
<a href=""/question/102440413.html"">新闻 </a>
<a href=""/question/102440414.html"">新闻 </a> ";
Match m;
for (m = re.Match(s); m.Success; m = m.NextMatch())
{
Console.WriteLine(m.Groups["url"].ToString());
}
}