.net抓取网页内容回来怎么用正则匹配出来 内容抓回来了 我需要用正则怎么匹配出来里面的内容 求大神主要代码如下:
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Capture);
//HttpWebRequest请求
request.Headers.Set("Pragma", "no-cache"); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream streamReceive = response.GetResponseStream(); Encoding encoding = Encoding.GetEncoding("GB2312");
//StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312")); StreamReader streamReader = new StreamReader(streamReceive, encoding); strResult = streamReader.ReadToEnd(); //strResult是抓取出来的源代码 我需要匹配 }
catch
{ }
return;
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Capture);
//HttpWebRequest请求
request.Headers.Set("Pragma", "no-cache"); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream streamReceive = response.GetResponseStream(); Encoding encoding = Encoding.GetEncoding("GB2312");
//StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312")); StreamReader streamReader = new StreamReader(streamReceive, encoding); strResult = streamReader.ReadToEnd(); //strResult是抓取出来的源代码 我需要匹配 }
catch
{ }
return;
string regStr = "(?is)<h1 id=\"artibodyTitle\"[^>]*?>(?<title>.*?)</h1>.*?<!-- publish[^>]*?>(?<content>.*?)<!-- publish_helper_end";
可以试下~
没用过正则么??string regStr = "(?is)<h1 id=\"artibodyTitle\"[^>]*?>(?<title>.*?)</h1>.*?<!-- publish[^>]*?>(?<content>.*?)<!-- publish_helper_end";
Match mc = Regex.Match(strResult,regStr);
Console.WriteLine(mc.Group["title"].Value);
Console.WriteLine(mc.Group["content"].Value);
Match m = Regex.Match(str, @"(?is)<h1[^>]*?id=""artibodyTitle""[^>]*>(?<title>[^<]+).*?<div[^>]*?id=""artibody""[^>]*>(?<body>((?<g><div)|(?<-g></div>)|(?!</?div)[\s\S])*(?(g)(?!)))</div>");
Console.WriteLine(m.Groups["title"].Value);
Console.WriteLine(Regex.Replace(m.Groups["body"].Value,"<[^>]*>",""));
Console.ReadLine();
Regex需要using System.Text.RegularExpressions;