如下
有字符串如
str=<html><head><title> 333 < 444</title></head><h1> 4 < 6 </h1><h2> 55 < 66</h2><br/></html>
现要求匹配每个标签,如果标签后有内容,需把内容一起匹配
就是说要求结果匹配结果分组如下
1:<html>
2:<head>
3:<title> 333<444
4:</title>
5:</head>
6:<h1> 4 <6
........
我只能做到下面这步,但这样就匹配不到“<”号以及后面的字符了
Matcher m =
Pattern.compile("(<\\w+>)[^<]*|(</\\w+>)|(<\\w+/>)").matcher(str);请高人不吝指教啊!!!!!!!!!!
有字符串如
str=<html><head><title> 333 < 444</title></head><h1> 4 < 6 </h1><h2> 55 < 66</h2><br/></html>
现要求匹配每个标签,如果标签后有内容,需把内容一起匹配
就是说要求结果匹配结果分组如下
1:<html>
2:<head>
3:<title> 333<444
4:</title>
5:</head>
6:<h1> 4 <6
........
我只能做到下面这步,但这样就匹配不到“<”号以及后面的字符了
Matcher m =
Pattern.compile("(<\\w+>)[^<]*|(</\\w+>)|(<\\w+/>)").matcher(str);请高人不吝指教啊!!!!!!!!!!
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class Test { private static String regex = "<(\\w+)>(.*)</\\1>|<(\\w+)/>"; private static Pattern pattern = Pattern.compile(regex); public static void main(String[] args) { String test = "<html><head><title> 333 < 444</title></head><h1> 4 < 6 </h1><h2> 55 < 66</h2><br/></html>"; List list = foo(test);
for (int i = 0; i < list.size(); i++) {
String element = (String) list.get(i);
System.out.println((i + 1) + ": " + element);
}
} public static List foo(String element) {
Matcher m = pattern.matcher(element); List list = new ArrayList();
while (m.find()) {
if (m.group().endsWith("/>")) {
list.add(m.group());
} else {
String subelement = m.group(2);
List sublist = foo(subelement);
if (sublist.isEmpty()) {
list.add("<" + m.group(1) + ">" + m.group(2));
} else {
list.add("<" + m.group(1) + ">");
}
list.addAll(sublist);
list.add("</" + m.group(1) + ">");
}
}
return list;
}
}
2: <head>
3: <title> 333 < 444
4: </title>
5: </head>
6: <h1> 4 < 6
7: </h1>
8: <h2> 55 < 66
9: </h2>
10: <br/>
11: </html>
<html><h1>aaaaaaaaaa</h1></html>
因此就不存在标签的尖括号和内容的尖括号的混淆问题了吧
2: <h1>aaaaaaaaaa
3: </h1>
4: </html>貌似非常之正确...
<html><h1> 4 < 6 </h1><h1> 4 </> 6 </h1></html>;输出:
1: <html>
2: <h1> 4 < 6 </h1><h1> 4 </> 6
3: </h1>
4: </html>第一个<h1>匹配了第二个</h1>
using System.Collections;namespace Test
{
class Class1
{
static void Main(string[] args)
{
String s = "<html><head><title> 333 < 444</title></head><h1> 4 < 6 </h1><h2> 55 < 66</h2><br/></html>";
int intFirstTabStart = 0;
int intSecondTabStart = 0;
IEnumerator strEnum = s.GetEnumerator(); for (int i = 0; i < s.Length; i++)
{
strEnum.MoveNext();
if (strEnum.Current.ToString() == "<")
{
intSecondTabStart = i;
}
else
{
if (strEnum.Current.ToString() == ">")
{
if (intFirstTabStart != intSecondTabStart)
{
System.Console.WriteLine(s.Substring(intFirstTabStart,(intSecondTabStart - intFirstTabStart)));
}
intFirstTabStart = intSecondTabStart;
intSecondTabStart = i + 1; if (i == s.Length -1)
{
System.Console.WriteLine(s.Substring(intFirstTabStart,(intSecondTabStart - intFirstTabStart)));
}
}
}
}
System.Console.WriteLine();
System.Console.WriteLine("按 Enter 键继续...");
System.Console.ReadLine();
}
}
}