MatchCollection mc = Regex.Match(html,@"(?is)(?<=<img src=(['""])?)(https?)?[^""]+(?:jpg|png|gif)(?=\1)"); foreach(Match m in mc) { //m.Value; }如果不限制扩展名。MatchCollection mc = Regex.Match(html,@"(?is)(?<=<img src=(['""])?)(https?)?[^""]+(?=\1)"); foreach(Match m in mc) { //m.Value; }
Regex reg = new Regex(@"(?i)<img[^>]*?\ssrc\s*=\s*(['""]?)(?<src>[^'""\s>]+)\1[^>]*>"); MatchCollection mc = reg.Matches(yourStr); foreach (Match m in mc) { Console.Write(m.Groups["src"].Value + "\n"); } 替换就用Regex.Replace
提取 Regex reg = new Regex(@"(?is)<\w+[^>]*?(?:src|background)=(['""]?)(?<src>[^'""\s>]+)\1[^>]*>"); MatchCollection mc = reg.Matches(yourStr); foreach (Match m in mc) { richTextBox2.Text += m.Groups["src"].Value + "\n"; }如果是相对地址替换为绝对地址 Regex reg = new Regex(@"(?is)(<\w+[^>]*?(?:src|background)=(['""]?))([^'""\s>]+\2[^>]*>)"); string result = reg.Replace(yourStr, "$1http://www.test.com/$3");
楼上几位 背景填充方式<td background="xxx.jpg"> 这种方式怎么取得呢?
MatchCollection mc = Regex.Match(html,@"(?is)(?<=<\w+ background=(['""])?)(https?)?[^""]+(?=\1)"); foreach(Match m in mc) { //m.Value; }
楼上的 background不怎么好用呀。
private static void TestRegex11() { string html = @"楼上几位 背景填充方式<td background=""xxx.jpg""> 这种方式怎么取得呢?"; MatchCollection mc = Regex.Matches(html, @"(?is)(?<=<\w+ background=(['""])?)(https?)?[^""]+(?=\1)"); foreach (Match m in mc) { Console.WriteLine(m.Value); } }
另外这个方法MatchCollection mc = Regex.Match(html,@"(?is)(?<=<img src=(['""])?)(https?)?[^""]+(?:jpg|png|gif)(?=\1)"); foreach(Match m in mc) { //m.Value; }也是没有引号就出不来。
foreach(Match m in mc)
{
//m.Value;
}如果不限制扩展名。MatchCollection mc = Regex.Match(html,@"(?is)(?<=<img src=(['""])?)(https?)?[^""]+(?=\1)");
foreach(Match m in mc)
{
//m.Value;
}
MatchCollection mc = reg.Matches(yourStr);
foreach (Match m in mc)
{
Console.Write(m.Groups["src"].Value + "\n");
}
替换就用Regex.Replace
Regex reg = new Regex(@"(?is)<\w+[^>]*?(?:src|background)=(['""]?)(?<src>[^'""\s>]+)\1[^>]*>");
MatchCollection mc = reg.Matches(yourStr);
foreach (Match m in mc)
{
richTextBox2.Text += m.Groups["src"].Value + "\n";
}如果是相对地址替换为绝对地址
Regex reg = new Regex(@"(?is)(<\w+[^>]*?(?:src|background)=(['""]?))([^'""\s>]+\2[^>]*>)");
string result = reg.Replace(yourStr, "$1http://www.test.com/$3");
这种方式怎么取得呢?
MatchCollection mc = Regex.Match(html,@"(?is)(?<=<\w+ background=(['""])?)(https?)?[^""]+(?=\1)");
foreach(Match m in mc)
{
//m.Value;
}
{
string html = @"楼上几位 背景填充方式<td background=""xxx.jpg"">
这种方式怎么取得呢?";
MatchCollection mc = Regex.Matches(html, @"(?is)(?<=<\w+ background=(['""])?)(https?)?[^""]+(?=\1)");
foreach (Match m in mc)
{
Console.WriteLine(m.Value);
} }
<td><td align="center" valign="middle" background="/images/1b3.jpg">
{
string html = @"<td colspan=""2"" rowspan=""2"" valign=""top"" background=""images/default_r7_c14_2.jpg"" height=480></td><td colspan=""2"" width=317 height=42 background=images/default_r6_c14.jpg></td>
<td><td align=""center"" valign=""middle"" background=""/images/1b3.jpg"">
";
MatchCollection mc = Regex.Matches(html, @"(?is)(?<=<\w+[\s\S]*?background=(['""])?)(https?)?[^""]+(?=\1)");
foreach (Match m in mc)
{
Console.WriteLine(m.Value);
} }
foreach(Match m in mc)
{
//m.Value;
}也是没有引号就出不来。
{
string html = @"<td colspan=""2"" rowspan=""2"" valign=""top"" background=""images/default_r7_c14_2.jpg"" height=480></td><td colspan=""2"" width=317 height=42 background=images/default_r6_c14.jpg></td>
<td><td align=""center"" valign=""middle"" background=""/images/1b3.jpg"">
";
MatchCollection mc = Regex.Matches(html, @"(?is)(?<=<\w+[\s\S]*?background=(['""]?))(https?)?[^"">]+(?=\1)");
foreach (Match m in mc)
{
Console.WriteLine(m.Value);
} }
{
string html = @"<td colspan=""2"" rowspan=""2"" valign=""top"" background=""images/default_r7_c14_2.jpg"" height=480></td><td colspan=""2"" width=317 height=42 background=images/default_r6_c14.jpg></td>
<td><td align=""center"" valign=""middle"" background=""/images/1b3.jpg"">
";
MatchCollection mc = Regex.Matches(html, @"(?is)(?<=<\w+[\s\S]*?background=(['""]?))(https?)?[^"">]+(?=\1)");
foreach (Match m in mc)
{
Console.WriteLine(m.Value);
} }
都正常的啊不过我才发现。没看完整题目。
2个都匹配。这样 private static void TestRegex11()
{
string html = @"<td colspan=""2"" rowspan=""2"" valign=""top"" background=""images/default_r7_c14_2.jpg"" height=480></td><td colspan=""2"" width=317 height=42 background=images/default_r6_c14.jpg></td>
<td><td align=""center"" valign=""middle"" background=""/images/1b3.jpg"">
<td align=""center"" valign=""middle"" background=/images/1b3.jpg>
<img src='xxx.jpg'/> ";
Regex regExp = new Regex(@"(?is)(?<=<\w+[\s\S]*?(?:background|src)=(['""]?))(https?)?[^'"">]+(?=\1)");
MatchCollection mc = regExp.Matches(html);
foreach (Match m in mc)
{
Console.WriteLine(m.Value);
}
}
(['""]?)[^'""]*\1就可以匹配"aa"
'ff'
xx
这类似的。
foreach (Match m in mc)
{
Console.WriteLine(m.Value);
}
不出来
background:url(http://www.jznews.com.cn/pic/huamen01.jpg);类似这样的又无法取到了。
如果不行就算了。目前的正则也可以满足我的作业要求了。谢谢。
(?:jpg|png|gif)
private static void TestRegex11()
{
string html = @"<td colspan=""2"" rowspan=""2"" valign=""top"" background=""images/default_r7_c14_2.jpg"" height=480></td><td colspan=""2"" width=317 height=42 background=images/default_r6_c14.jpg></td>
<td><td align=""center"" valign=""middle"" background=""/images/1b3.jpg"">
<td align=""center"" valign=""middle"" background=/images/1b3.jpg>
<img src='xxx.jpg'/> ";
Regex regExp = new Regex(@"(?is)(?<=<\w+[\s\S]*?(?:background|src)=(['""]?))(https?)?[^'"">]+(?:jpg|png|gif)
(?=\1)");
MatchCollection mc = regExp.Matches(html);
foreach (Match m in mc)
{
Console.WriteLine(m.Value);
}
}