那位高手使用C#做过抓取网页栏目的链接和名称的 那位高手使用C#做过抓取网页中栏目的链接和名称的请指导下有什么方法可以定位到网页中的栏目位置 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 //读取 网页public string ReadHtml(string uri) { string retVal = null; try { HttpWebRequest request = WebRequest.Create(uri) as HttpWebRequest; request.Method = "GET"; request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)"; request.AllowAutoRedirect = false; HttpWebResponse response = request.GetResponse() as HttpWebResponse; if (response.StatusCode == HttpStatusCode.OK) { Console.Write("正在读取 {0} 页面 ", uri); StreamReader sReader = new StreamReader(response.GetResponseStream(), Encoding.Default); retVal = sReader.ReadToEnd(); Console.WriteLine(response.StatusDescription); } } catch (WebException ex) { if (ex.Status == WebExceptionStatus.ProtocolError) { HttpWebResponse res = ex.Response as HttpWebResponse; if (res.StatusCode == HttpStatusCode.NotFound) retVal = ""; else throw ex; } else { throw ex; } } return retVal; }//然后正规则 如获取 <a href="www.csdn.com">csdn</a> string pattrn =@"<a\s+href=\"([^\"]+)\">([^<]+)</a>"; Match match = Regex.Match(text,pattrn); string href = match.Groups[1].Value; string text = match.Groups[2].Value; WebBrowser 不会的到C#-Home群里问 TextBox怎么取数据库中的数据? 关于在timer中碰到的timeout的解决方法 请问,怎么来处理交错数组的符值和提取~ 请教怎样写一个集合类? 关于java中代码移植到C#的一个问题! 如何清空指定程序的文本框? 茫然 窗体的height怎么不超过780?? 用chart 控件绘图怎么获取曲线颜色 使用Remoting做类似于MSN的p2p项目中遇到的一些问题? 如何提高C#插入或操作数据库的速度??? 问个窗体外绘制问题
//读取 网页
public string ReadHtml(string uri)
{ string retVal = null; try
{
HttpWebRequest request = WebRequest.Create(uri) as HttpWebRequest; request.Method = "GET"; request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)"; request.AllowAutoRedirect = false; HttpWebResponse response = request.GetResponse() as HttpWebResponse;
if (response.StatusCode == HttpStatusCode.OK)
{
Console.Write("正在读取 {0} 页面 ", uri); StreamReader sReader = new StreamReader(response.GetResponseStream(), Encoding.Default); retVal = sReader.ReadToEnd();
Console.WriteLine(response.StatusDescription);
}
}
catch (WebException ex)
{ if (ex.Status == WebExceptionStatus.ProtocolError)
{
HttpWebResponse res = ex.Response as HttpWebResponse; if (res.StatusCode == HttpStatusCode.NotFound)
retVal = "";
else
throw ex;
}
else
{ throw ex;
}
} return retVal;
}
//然后正规则 如获取 <a href="www.csdn.com">csdn</a>
string pattrn =@"<a\s+href=\"([^\"]+)\">([^<]+)</a>"; Match match = Regex.Match(text,pattrn);
string href = match.Groups[1].Value;
string text = match.Groups[2].Value;