匹配区域:<div id="hot">
html数据区域,该区域有子div嵌套
</div>
提取id=hot的div中所有超链接 地址和超链接中文说明
提取网址:http://news.qq.com/
获取html函数: public static string GetContent(string url, string regStr)
{
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);
req.Method = "GET";
req.ContentType = "application/x-www-form-urlencoded";
HttpWebResponse wsp = (HttpWebResponse)req.GetResponse();
Stream st = wsp.GetResponseStream();
if (wsp.ContentEncoding.ToLower().Contains("gzip"))
{
st = new GZipStream(st, CompressionMode.Decompress);
}
StreamReader sr = new StreamReader(st, Encoding.Default);
string value = sr.ReadToEnd();
Regex reg = new Regex(regStr);
foreach (Match m in reg.Matches(value))
{
var a = m.Groups[1].Value;
}
string s = reg.Matches(value)[0].Groups[1].Value;
return s;
}在线等 结贴加分
html数据区域,该区域有子div嵌套
</div>
提取id=hot的div中所有超链接 地址和超链接中文说明
提取网址:http://news.qq.com/
获取html函数: public static string GetContent(string url, string regStr)
{
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);
req.Method = "GET";
req.ContentType = "application/x-www-form-urlencoded";
HttpWebResponse wsp = (HttpWebResponse)req.GetResponse();
Stream st = wsp.GetResponseStream();
if (wsp.ContentEncoding.ToLower().Contains("gzip"))
{
st = new GZipStream(st, CompressionMode.Decompress);
}
StreamReader sr = new StreamReader(st, Encoding.Default);
string value = sr.ReadToEnd();
Regex reg = new Regex(regStr);
foreach (Match m in reg.Matches(value))
{
var a = m.Groups[1].Value;
}
string s = reg.Matches(value)[0].Groups[1].Value;
return s;
}在线等 结贴加分
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml("采集到的string");
HtmlNodeCollection anchors = htmlDoc.DocumentNode.SelectNodes(@"//div[@id='hot']//a");
foreach (HtmlNode anchor in anchors)
{
Response.Write(anchor.Attributes["href"].Value + "<br/>");
Response.Write(anchor.InnerText + "<br/><br/>");
}
string input = @"<div id=""a"">AAA<div id=""b"">BB<div id=""c"">CCC</div> B</div> </div> ";
string id = Console.ReadLine(); //输入要取得div的id
while (id.Trim().ToUpper().CompareTo("G")!=0)
{
string pattern = @"<div id=""" + id + @""">[^<>]*(((?'Open'<div[^>]*>)[^<>]*)+((?'-Open'</div>)[^<>]*)+)*(?(Open)(?!))</div>";
Console.WriteLine(Regex.Match(input, pattern));
id = Console.ReadLine();
}
请问HtmlNodeCollection 类能都提供一下下载地址
HtmlDocument 这个类的下载地址 请提供一下 马上结贴了 谢谢
HtmlDocument 这个类的下载地址 请提供一下 马上结贴了 谢谢