foreach (Match match in matches) { string tdHtml = match.Value;
Match m = Regex.Match(tdHtml, regexTitle); if (m.Success) { Console.WriteLine(Regex.Replace(m.Groups["title"].Value, "<[^>]+>", "")); Console.WriteLine(m.Groups["url"].Value); } Console.WriteLine(); }
{
string resultHtml = GetUrlHtml("http://www.baidu.com/s?ie=utf-8&bs=msdn&f=8&rsv_bp=1&wd=csdn&rsv_sug3=1&inputT=1065");
string regexTD = "(?is)<td class=\"(?:c-default|f)\"[^?]*>.*?</td>";
string regexTitle = "(?is)<h3 class=\"t\"><a[^>]*?href=\"(?<url>[^\"]*)\"[^>]*>(?<title>.*?)</h3>";
var matches = Regex.Matches(resultHtml, regexTD);
foreach (Match match in matches)
{
string tdHtml = match.Value;
Match m = Regex.Match(tdHtml, regexTitle);
if (m.Success)
{
Console.WriteLine(Regex.Replace(m.Groups["title"].Value, "<[^>]+>", ""));
Console.WriteLine(m.Groups["url"].Value);
}
Console.WriteLine();
}
Console.Read();
}
private static string GetUrlHtml(string url)
{
HttpWebRequest hwr = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse();
Stream stream = hwrs.GetResponseStream();
StreamReader sr = new StreamReader(stream, Encoding.GetEncoding(hwrs.CharacterSet));
string html = sr.ReadToEnd();
sr.Close();
return html;
}
总这样衣来伸手饭来张口,不好吧。
http://www.shuxiaolong.com/Project/0/5I7UFN38GEQ.aspx自己看看吧,有Demo的。