1.获取远程页面的内容:
........其它html略........
<DIV class="news_rbox mart10 border1" id=ShowPhoto>
<DIV class=news_rtit1>
<DIV class=news_rnav5><a>中国</a><img src='' /></DIV>
</DIV>
</DIV>
........其它html略...........2.要求结果:
<DIV class=news_rtit1>
<DIV class=news_rnav5><a>中国</a><img src='' /></DIV>
</DIV>3.问题:如果提取标签为DIV且id=ShowPhoto的内容?
........其它html略........
<DIV class="news_rbox mart10 border1" id=ShowPhoto>
<DIV class=news_rtit1>
<DIV class=news_rnav5><a>中国</a><img src='' /></DIV>
</DIV>
</DIV>
........其它html略...........2.要求结果:
<DIV class=news_rtit1>
<DIV class=news_rnav5><a>中国</a><img src='' /></DIV>
</DIV>3.问题:如果提取标签为DIV且id=ShowPhoto的内容?
Regex expression = new Regex("<div(?:.|\\n)*?id=ShowPhoto(?:.|\\n)*?>((?:.|\\n)*?)</div>", RegexOptions.IgnoreCase);
Group g = expression.Matches(text)[1].Groups[1];
Regex reg = new Regex(@"(?is)(?<=<div\b(?:(?!id=).)*id=ShowPhoto[^>]*>)(?><div[^>]*>(?<o>)|</div>(?<-o>)|(?:(?!</?div\b).)*)*(?(o)(?!))(?=</div>)");参考【分享】正则平衡组应用场景分析及性能优化
<div class="news_rbox mart10 border1" id="ShowPhoto">
<div class="news_rtit1">
<div class="news_rnav5"></div>
</div>
<div style="padding:5px" class="c333">
<div class="news_rboxx"><div style="height:100px; overflow:hidden"><a href="http://photos.jinti.com/Channel_15569" target="_blank"><img src=http://pic.jinti.com/member/459491/140_105_20098149456705.jpg border="0" /></div>
<a href="http://photos.jinti.com/Channel_15569" target="_blank">华丽的美人鱼</a></div>
<div class="news_rboxx"><div style="height:100px; overflow:hidden"><a href="http://photos.jinti.com/Channel_15568" target="_blank"><img src=http://pic.jinti.com/member/459491/140_105_200981485528705.jpg border="0" /></div>
<a href="http://photos.jinti.com/Channel_15568" target="_blank">碎花蓝的猜想</a></div>
<div class="news_rboxx"><div style="height:100px; overflow:hidden"><a href="http://photos.jinti.com/Channel_15567" target="_blank"><img src=http://pic.jinti.com/member/459491/140_105_20098138562705.jpg border="0" /></div>
<a href="http://photos.jinti.com/Channel_15567" target="_blank">黄奕柔美小乔造型曝光</a></div>
<div class="news_rboxx"><div style="height:100px; overflow:hidden"><a href="http://photos.jinti.com/Channel_15566" target="_blank"><img src=http://pic.jinti.com/member/459491/140_105_200981291112705.jpg border="0" /></div>
<a href="http://photos.jinti.com/Channel_15566" target="_blank">安室奈美惠代言变宫...</a></div>
</div>
</div>
Regex reg = new Regex(@"(?is)<div\b(?:(?!id=).)*id=(['""]?)ShowPhoto\1[^>]*>((?><div[^>]*>(?<o>)|</div>(?<-o>)|(?:(?!</?div\b).)*)*(?(o)(?!)))</div>");
public static string PostData(string url, string data)
{
string info = string.Empty;
try
{
CookieContainer cc = new CookieContainer();
HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
request.CookieContainer = cc;
request.Method = "POST";
request.ContentType = "application/x-www-form-urlencoded";
Stream requestStream = request.GetRequestStream();
byte[] byteArray = Encoding.UTF8.GetBytes(data);
requestStream.Write(byteArray, 0, byteArray.Length);
requestStream.Close();
HttpWebResponse response = request.GetResponse() as HttpWebResponse;
Uri responseUri = response.ResponseUri;
Stream receiveStream = response.GetResponseStream();
Encoding encode = System.Text.Encoding.GetEncoding("utf-8");
StreamReader readStream = new StreamReader(receiveStream, encode);
info = readStream.ReadToEnd();
}
catch{} return info;
}
你的那个方法里data传的是什么?按你给的这个页面,里面根本就没有id="ShowPhoto"的div还有,给一下你是如何使用那个正则的代码Regex reg = new Regex(@"(?is)<div\b(?:(?!id=).)*id=(['""]?)ShowPhoto\1[^>]*>((?><div[^>]*>(?<o>)|</div>(?<-o>)|(?:(?!</?div\b).)*)*(?(o)(?!)))</div>");
MatchCollection mc = reg.Matches(yourStr);
foreach (Match m in mc)
{
richTextBox2.Text += m.Groups[2].Value + "\n";
}