是要获取标题和链接吗? var httpClient = new WebClient(); var page = httpClient.DownloadString("http://www.lwxs.org/books/0/20/index.html"); const string pattern = "<a title=\"([^<]*)\" href=\"([^<]*)\" target=\"_blank\" >\\S+</a>"; var myRegex = new Regex(pattern, RegexOptions.IgnoreCase); if (!myRegex.IsMatch(page)) return;
var myMatch = myRegex.Match(page); while (myMatch.Success) { var title= myMatch.Groups[1].Value; var link = "http://www.lwxs.org/books/0/20/" + myMatch.Groups[2].Value; myMatch = myMatch.NextMatch(); }
<a target="_blank" href="3530.html" title="第1章抓周上"></a>,发现把整个页面代码拿出来测试就不行0 0,求帮助,http://www.lwxs.org/books/0/20/index.html这是原网站
var page = httpClient.DownloadString("http://www.lwxs.org/books/0/20/index.html");
const string pattern = "<a title=\"([^<]*)\" href=\"([^<]*)\" target=\"_blank\" >\\S+</a>";
var myRegex = new Regex(pattern, RegexOptions.IgnoreCase);
if (!myRegex.IsMatch(page))
return;
var myMatch = myRegex.Match(page); while (myMatch.Success)
{
var title= myMatch.Groups[1].Value;
var link = "http://www.lwxs.org/books/0/20/" + myMatch.Groups[2].Value;
myMatch = myMatch.NextMatch();
}
var page = httpClient.DownloadString("http://www.lwxs.org/books/0/20/index.html");
var lastIndex = page.LastIndexOf("<DIV class=dccss>", StringComparison.Ordinal);
var firstIndex = page.IndexOf("<DIV class=dccss>", StringComparison.Ordinal);
page = page.Substring(firstIndex, lastIndex - firstIndex);
const string pattern = "<a[^>]*?href=['|\"]([^<\\s]*)['|\"][^>]*?>([^<]*)</a>";
var myRegex = new Regex(pattern, RegexOptions.IgnoreCase);
var myMatch = myRegex.Match(page);
while (myMatch.Success)
{
var link = "http://www.lwxs.org/books/0/20/" + myMatch.Groups[1].Value;
var title = myMatch.Groups[2].Value; Console.WriteLine("标题:" + title);
Console.WriteLine("链接:" + link);
Console.WriteLine("----------------------------------"); myMatch = myMatch.NextMatch();
} Console.WriteLine("按任意键继续...");
Console.Read();