请问如何使用正则表达式从多个网页中的每一个中提取多个有特征的链接

要从多个网页中的每一个网页中提取多个链接，使用MatchCollection来提取所有链接时发现程序越来越占内存，有什么办法可以根据一些链接的特征来只提取这些有特征的链接，比如链接里含有“/Product/ProductInfo.asp”，或者链接字符是“[下一页]”的，用正则表达式快还是一个字一个字的分析快？有什么好方法吗？

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

public static ArrayList CallRssService(string url)
        {
ArrayList list = new ArrayList();
// Grab the page
            string rawResponse = GetWebPage(url);
            //MessageBox.Show(rawResponse);
            //Byte[] encodedBytes = Encoding.Default.GetBytes(rawResponse);
            //MessageBox.Show( System.Text.Encoding.UTF8.GetString(encodedBytes));
MatchCollection items = Regex.Matches(rawResponse, @"<item>(.*?)</item>", RegexOptions.Singleline);
foreach (Match item in items) {
ServiceResponseInfoItem responseInfoItem = new ServiceResponseInfoItem();
// Get Item title
Match itemTitle = Regex.Match(item.Result("$1"), @"<title>(.*?)</title>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (itemTitle.Success)
responseInfoItem.Title = itemTitle.Result("$1");
// Get Item Link
Match itemLink = Regex.Match(item.Result("$1"), @"<link>(.*?)</link>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (itemLink.Success)
responseInfoItem.Link = itemLink.Result("$1");
// Get Item Author
Match itemAuthor = Regex.Match(item.Result("$1"), @"<author>(.*?)</author>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (itemAuthor.Success)
responseInfoItem.Author = itemAuthor.Result("$1");
// Get Item PubDate
Match itemPubDate = Regex.Match(item.Result("$1"), @"<pubDate>(.*?)</pubDate>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (itemPubDate.Success)
responseInfoItem.PubDate = itemPubDate.Result("$1");
// Add new item to Response Info
list.Add(responseInfoItem);
}

return list;
}

private static string GetWebPage(string url) {
StringBuilder builder = new StringBuilder();
WebRequest req = WebRequest.Create(url);
// Set Timeout to 15 seconds
req.Timeout = 20000;

try {
WebResponse result = req.GetResponse();
Stream ReceiveStream = result.GetResponseStream(); Byte[] read = new Byte[512];
int bytes = ReceiveStream.Read(read, 0, 512);
               // MessageBox.Show(System.Text.Encoding.Unicode.GetString(System.Text.Encoding.Unicode.GetBytes("\u30340")));//"\u6b64"
                //MessageBox.Show(Encoding.UTF8.GetString( read)); while (bytes > 0) {
Encoding encode = System.Text.Encoding.GetEncoding("utf-8");
                    //Encoding encode = System.Text.Encoding.UTF7;
                    builder.Append( encode.GetString(read, 0, bytes)) ;
                    //MessageBox.Show(builder);
bytes = ReceiveStream.Read(read, 0, 512);
                    //MessageBox.Show(Encoding.UTF8.GetString(read));

}
}
catch(Exception ex){
throw ex;
}
return builder.ToString();
}
“/Product/ProductInfo.asp”如果只是这样的话, 一个字一个字快.