求一个正则表达式 如题<div class="MIB_feed_c">.......</div>这中间有无数html标记,包括div标签。我怎么样获取这中间的数据,包括这两个标签。另,网页中这样的标签不只出现一次。能一次都获取吗? 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 Regex reg = new Regex(@"(?is)<div[^>]*?class=""MIB_feed_c""[^>]*>(.*?)</div>");MatchCollection mc = reg.Matches(str);foreach (Match m in mc){ TextBox1.Text += m.Groups[1].Value + "\n";} 请问入门 str 是什么啊? str就是获取你网页的源代码。 /// <summary> /// 根据 url 获取网页编码 /// </summary> /// <param name="url"></param> /// <returns></returns> public static string GetEncoding(string url) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)); else reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)"); if (reg_charset.IsMatch(html)) { return reg_charset.Match(html).Groups["charset"].Value; } else if (response.CharacterSet != string.Empty) { return response.CharacterSet; } else return Encoding.Default.BodyName; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return Encoding.Default.BodyName; }用这段代码 运行一下 抓取你的HTML信息然后赋给String str=..然后在用如梦的方法。 求助如梦:比如<div class="MIB_feed_c"><div>test</div></div>匹配出来的结果是<div>test我想得到的结果是<div>test</div>谢谢! [code C#] string str = "<div class=\"MIB_feed_c\">test1test1<div>test1</div>test1test1</div>" +"aaaa<ul><li>aaaa</li></ul>" + "<div class=\"MIB_feed_c\"><div>test2</div></div>" + "bbbb" + "<div class=\"MIB_feed_c\"><div>test3</div></div>"; Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(<div[^>]*?>.*?</div>)*.*?</div>"); MatchCollection match = reg.Matches(str); foreach (Match m in match) { Response.Write(m.Value + "<br/>"); }[/code] 楼主说了是有嵌套的,如果用正则处理,除非是嵌套层次固定,否则不用平衡组基本上就是在开玩笑 string test = @"<div class=""MIB_feed_c""><div>test</div></div>"; Regex reg = new Regex(@"(?is)<div\s+class=""MIB_feed_c"">((?><div(?<o>)|</div>(?<-o>)|(?:(?!</?div).)*)*)(?(o)(?!))</div>"); MatchCollection mc = reg.Matches(test); foreach (Match m in mc) { richTextBox2.Text += m.Groups[1].Value + "\n================\n"; } 强悍的过客兄,学习了,平衡组以前还真没用过网上查了资料 string str = "<div class=\"MIB_feed_c\">test1<ul><li>ulli</li></ul>test1<div>tes<ul><li title='ul li'>ul li</li></ul>t1</div>test1test1</div>" +"aaaa<ul><li>aaaa</li></ul>" + "<div class=\"MIB_feed_c\"><div>test2</div></div>" + "bbbb" + "<div class=\"MIB_feed_c\"><div>test3</div></div>"; //Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(<div[^>]*?>.*?</div>)*.*?</div>"); Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(((?'Open'<div[^>]*>)[^<>]*)+((?'-Open'</div>)[^<>]*)+)*(?(Open)(?!))</div>"); MatchCollection match = reg.Matches(str); foreach (Match m in match) { Response.Write(m.Value + "<br/>"); } //咦,我的UBB出来了 string str = "<div class=\"MIB_feed_c\">test1<ul><li>ulli</li></ul>test1<div>tes<ul><li title='ul li'>ul li</li></ul>t1</div>test1test1</div>" +"aaaa<ul><li>aaaa</li></ul>" + "<div class=\"MIB_feed_c\"><div>test2</div></div>" + "bbbb" + "<div class=\"MIB_feed_c\"><div>test3</div></div>"; Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(((?'Open'<div[^>]*>)[^<>]*)+((?'-Open'</div>)[^<>]*)+)*(?(Open)(?!))</div>"); MatchCollection match = reg.Matches(str); foreach (Match m in match) { Response.Write(m.Value + "<br/>"); } 会员登录界面增加模块 asp.net 数据库查询出错?? 关于网页内容抓取有问,用的是HtmlAgilityPack 关于autocomplete的问题 关于Sql注入问题?????? linq是啥? 在datagrid 中如何 用javascript改变样式? 纯html 的 带grid 的select 框,有什么办法做? 关于客户端可修改界面的问题 主机托管的管理 如何点安全退出就直接关掉整个框架页? C#调用WinRar压缩文件夹时,文件夹含有空格怎么处理
MatchCollection mc = reg.Matches(str);
foreach (Match m in mc)
{
TextBox1.Text += m.Groups[1].Value + "\n";
}
str就是获取你网页的源代码。 /// <summary>
/// 根据 url 获取网页编码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string GetEncoding(string url)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
else
reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
if (reg_charset.IsMatch(html))
{
return reg_charset.Match(html).Groups["charset"].Value;
}
else if (response.CharacterSet != string.Empty)
{
return response.CharacterSet;
}
else
return Encoding.Default.BodyName;
}
}
catch
{
}
finally
{ if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close(); if (request != null)
request = null; } return Encoding.Default.BodyName;
}用这段代码 运行一下 抓取你的HTML信息然后赋给String str=..然后在用如梦的方法。
<div class="MIB_feed_c">
<div>test</div>
</div>
匹配出来的结果是<div>test我想得到的结果是
<div>test</div>
谢谢!
string str = "<div class=\"MIB_feed_c\">test1test1<div>test1</div>test1test1</div>"
+"aaaa<ul><li>aaaa</li></ul>"
+ "<div class=\"MIB_feed_c\"><div>test2</div></div>"
+ "bbbb"
+ "<div class=\"MIB_feed_c\"><div>test3</div></div>";
Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(<div[^>]*?>.*?</div>)*.*?</div>");
MatchCollection match = reg.Matches(str);
foreach (Match m in match)
{
Response.Write(m.Value + "<br/>");
}
[/code]
<div>test</div>
</div>";
Regex reg = new Regex(@"(?is)<div\s+class=""MIB_feed_c"">((?><div(?<o>)|</div>(?<-o>)|(?:(?!</?div).)*)*)(?(o)(?!))</div>");
MatchCollection mc = reg.Matches(test);
foreach (Match m in mc)
{
richTextBox2.Text += m.Groups[1].Value + "\n================\n";
}
网上查了资料
string str = "<div class=\"MIB_feed_c\">test1<ul><li>ulli</li></ul>test1<div>tes<ul><li title='ul li'>ul li</li></ul>t1</div>test1test1</div>"
+"aaaa<ul><li>aaaa</li></ul>"
+ "<div class=\"MIB_feed_c\"><div>test2</div></div>"
+ "bbbb"
+ "<div class=\"MIB_feed_c\"><div>test3</div></div>";
//Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(<div[^>]*?>.*?</div>)*.*?</div>");
Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(((?'Open'<div[^>]*>)[^<>]*)+((?'-Open'</div>)[^<>]*)+)*(?(Open)(?!))</div>");
MatchCollection match = reg.Matches(str);
foreach (Match m in match)
{
Response.Write(m.Value + "<br/>");
}
//咦,我的UBB出来了
string str = "<div class=\"MIB_feed_c\">test1<ul><li>ulli</li></ul>test1<div>tes<ul><li title='ul li'>ul li</li></ul>t1</div>test1test1</div>"
+"aaaa<ul><li>aaaa</li></ul>"
+ "<div class=\"MIB_feed_c\"><div>test2</div></div>"
+ "bbbb"
+ "<div class=\"MIB_feed_c\"><div>test3</div></div>";
Regex reg = new Regex(@"(?is)<div[^>]*?class=(['""\s]?)MIB_feed_c\1[^>]*?>.*?(((?'Open'<div[^>]*>)[^<>]*)+((?'-Open'</div>)[^<>]*)+)*(?(Open)(?!))</div>");
MatchCollection match = reg.Matches(str);
foreach (Match m in match)
{
Response.Write(m.Value + "<br/>");
}