//采集网址滴。。偶正则不懂,麻烦各路大侠了 //iL里存的是如"v.do?x=http://www.xxx.com",go.htm?u=http://www.xxx.com.cn,/,/4393489,aa.htm"类的集合,要取出其中的网址"http:///www.xxx.com"....求此正则,呵~~谢谢先
List<string> tmp = new List<string>();
Regex reg = new Regex(@"(http(s)?://)?([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)? ");//这样写不行
foreach (string s in iL)
{
MatchCollection mc = reg.Matches(s);
foreach (Match m in mc)
{
tmp.Add(m.Value);
}
}
return tmp;
List<string> tmp = new List<string>();
Regex reg = new Regex(@"(http(s)?://)?([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)? ");//这样写不行
foreach (string s in iL)
{
MatchCollection mc = reg.Matches(s);
foreach (Match m in mc)
{
tmp.Add(m.Value);
}
}
return tmp;
这个必须加上 www 如 http://www.abc.com
{
string str =@"v.do?x=http://www.xxx.com"",go.htm?u=http://www.xxx.com.cn,/,/4393489,aa.htm";
Regex re = new Regex(@"(?i)http://([\w-]+.)+[\w-]+(/[\w- ./?%&=]*)?");
MatchCollection mc = re.Matches(str);
foreach(Match m in mc)
Console.WriteLine(m.Value);
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Linq.Expressions;
using System.Data;
using System.IO;namespace ConsoleApplication11
{
class Program
{
static void Main(string[] args)
{
string str = @"v.do?x=http://www.xxx.com"",go.htm?u=http://www.xxx.com.cn,/,/4393489,aa.htmhttp://www.a-b.com";
Regex re = new Regex(@"(?i)http://([\w-]+\.)+[\w-]+(/[\w-\./?%&=]*)?");
MatchCollection mc = re.Matches(str);
foreach(Match m in mc)
Console.WriteLine(m.Value);
}
}
}