包含css/javascript里面的内容.<style>
.title0 {font-size:17px;font-family:arial;font-weight:bold;text-decoration:none;color:#FFFFFF;}
A.title2:link {COLOR: #000000; TEXT-DECORATION: none}
A.title2:visited {COLOR: #000000; TEXT-DECORATION: none}
A.title2:active {COLOR: #ff0000; TEXT-DECORATION: none}
A.title2:hover {COLOR: #ff0000; TEXT-DECORATION: none}
</style>如上,不单单取出<.*?>
.title0 {font-size:17px;font-family:arial;font-weight:bold;text-decoration:none;color:#FFFFFF;}
A.title2:link {COLOR: #000000; TEXT-DECORATION: none}
A.title2:visited {COLOR: #000000; TEXT-DECORATION: none}
A.title2:active {COLOR: #ff0000; TEXT-DECORATION: none}
A.title2:hover {COLOR: #ff0000; TEXT-DECORATION: none}
</style>如上,不单单取出<.*?>
A.title2:link {COLOR: #000000; TEXT-DECORATION: none}
A.title2:visited {COLOR: #000000; TEXT-DECORATION: none}
A.title2:active {COLOR: #ff0000; TEXT-DECORATION: none}
A.title2:hover {COLOR: #ff0000; TEXT-DECORATION: none}
* @param args
*/
public static void main(String[] args) {
// <style>
// .title0 {font-size:17px;font-family:arial;font-weight:bold;text-decoration:none;color:#FFFFFF;}
// A.title2:link {COLOR: #000000; TEXT-DECORATION: none}
// A.title2:visited {COLOR: #000000; TEXT-DECORATION: none}
// A.title2:active {COLOR: #ff0000; TEXT-DECORATION: none}
// A.title2:hover {COLOR: #ff0000; TEXT-DECORATION: none}
// </style>
StringBuffer sb = new StringBuffer();
sb.append("<style>");
sb.append(".title0 {font-size:17px;font-family:arial;font-weight:bold;text-decoration:none;color:#FFFFFF;}");
sb.append("A.title2:link {COLOR: #000000; TEXT-DECORATION: none}");
sb.append("A.title2:visited {COLOR: #000000; TEXT-DECORATION: none}");
sb.append("A.title2:active {COLOR: #ff0000; TEXT-DECORATION: none}");
sb.append("A.title2:hover {COLOR: #ff0000; TEXT-DECORATION: none}");
sb.append("</style>");
String str = sb.toString();
System.out.println(str);
String formatStr = str.replaceAll("[/<//?/w+(/s+/S+?(=([/'/\"]?).*?[^//]/3)?)*/W*>/]" , "");
System.out.println(formatStr);
}
<style[^>]*>.*</style>
=> ""再进行
<.*?>
=> ""推荐:
http://www.regexlab.com/
理解起来很麻烦啊.下面是我在项目中写的一个方法.
/**
* 过滤掉所有的标签.
*
* @param input 输入字符串.
* @return
*/
public static String omitTag(String input) {
String regex = "<[\\w\\\"\\s\\.:=/_]*>";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(input); return matcher.replaceAll("");
}<[\\w\\\"\\s\\.:=/_]*>
表示<...>的东西全部不要,
把可能会出现的字符全部写进方括号.
\\w是\w的转义,\\s是\s的转义等.
\w字母,\\反斜杠,\"双引号,\s空格,等等.