import java.util.regex.*; public class MyRegex2 { public static void main(String[] args) { String s="This a xml book.I like xml.\n"; s+="This is a C# book.But I love Java.\n"; s+=" This is a SQL book.\n"; s+="This is a C++ book.\n"; s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格 s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉 Matcher m=Pattern.compile("(?m)\\ba(.*)book\\b").matcher(s); while(m.find()) System.out.println(m.group()); } }
改一下: import java.util.regex.*; public class MyRegex2 { public static void main(String[] args) { String s="This a xml book.I like xml.\n"; s+="This is a C# book.But I love Java.\n"; s+=" This is a SQL book.\n"; s+="This is a C++ book.\n"; s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格 s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉 Matcher m=Pattern.compile("(?m)\\b(?<= a )(.*)book\\b").matcher(s); while(m.find()) System.out.println(m.group()); } } 我的结果: xml book C# book SQL book C++ book不知道这样行不行
谢谢您的回答,我的意思是随便给定一篇文章P,然后将其中没有意义的单词去掉。 如:P={“An Instant and Accurate Estimation Method for Joins and Selection in a Retrieval-Intensive Environment”} 结果是R={“Instant Accurate Estimation Method Joins Selection Retrieval Intensive Environment”}
public static void main(String...args) { String str = "An Instant and, Accurate Estimation Method for Joins and Selection in a Retrieval-Intensive Environment";
你看下这样行吗? import java.util.regex.*; public class MyRegex2 { public static void main(String[] args) {
String[] temp={"a","an","this","that","which","what"}; String s="This a xml book.I like xml.\n"; s+="This is a C# book.But I love Java.\n"; s+=" This is an SQL book which I like.\n"; s+="This is a C++ book that I like.\n"; s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格 s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉 for (int i = 0; i < temp.length; i++) { s=s.replaceAll(" "+temp[i]+" ", " "); } System.out.println(s); } }
public class MyRegex2 { public static void main(String[] args) {
String s="This a xml book.I like xml.\n";
s+="This is a C# book.But I love Java.\n";
s+=" This is a SQL book.\n";
s+="This is a C++ book.\n";
s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格
s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉
Matcher m=Pattern.compile("(?m)\\ba(.*)book\\b").matcher(s);
while(m.find())
System.out.println(m.group());
}
}
import java.util.regex.*;
public class MyRegex2 { public static void main(String[] args) {
String s="This a xml book.I like xml.\n";
s+="This is a C# book.But I love Java.\n";
s+=" This is a SQL book.\n";
s+="This is a C++ book.\n";
s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格
s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉
Matcher m=Pattern.compile("(?m)\\b(?<= a )(.*)book\\b").matcher(s);
while(m.find())
System.out.println(m.group());
}
}
我的结果:
xml book
C# book
SQL book
C++ book不知道这样行不行
如:P={“An Instant and Accurate Estimation Method for Joins and Selection in a Retrieval-Intensive Environment”}
结果是R={“Instant Accurate Estimation Method Joins Selection Retrieval Intensive Environment”}
StopWords={a,an,this,that,which,what等}
import java.util.regex.Pattern;public class Test {
private final static String[] stopWords = {
"a", "an", "and", "for", "in", "the", "this", "that", "which", "what"
};
/**
* 用于处理辅助词的正则表达式匹配器
*/
private final static Matcher MATCHER = toRegex(stopWords);
/**
* 去除一些非字符的字符
*/
private final static Matcher REMOVE_OTHER = Pattern.compile("[^a-zA-Z\\s-]").matcher("");
public static void main(String...args) {
String str = "An Instant and, Accurate Estimation Method for Joins and Selection in a Retrieval-Intensive Environment";
long t0, t1;
t0 = System.nanoTime();
String s1 = processStopWord(str);
t1 = System.nanoTime();
System.out.println(s1);
System.out.println(t1 - t0);
}
private static String processStopWord(String statement) {
return MATCHER.reset(REMOVE_OTHER.reset(statement).replaceAll("")).replaceAll("");
}
/**
* 组成正则表达式
* @param stopWords
* @return
*
* 2009-2-22 下午11:06:08
*/
private static Matcher toRegex(String[] stopWords) {
StringBuffer sb = new StringBuffer("\\b(?i:");
for(int i = 0; i < stopWords.length; i++) {
if(i > 0) {
sb.append("|");
}
sb.append(stopWords[i]);
}
sb.append(")\\b\\s*");
// 拼接出的表达式是这样的:
// \b(?i:a|an|and|for|in|the|this|that|which|what)\b\s*
// 但是如果能拼摘成这样效率会高很多
// \b(?i:a(?:nd?)?|for|in|th(?:e|is|at)|wh(?:ich|at))\b\s*
return Pattern.compile(sb.toString()).matcher("");
}
}
import java.util.regex.*;
public class MyRegex2 { public static void main(String[] args) {
String[] temp={"a","an","this","that","which","what"};
String s="This a xml book.I like xml.\n";
s+="This is a C# book.But I love Java.\n";
s+=" This is an SQL book which I like.\n";
s+="This is a C++ book that I like.\n";
s=s.replaceAll(" {2,}", " ");//将多余空格转换成一个空格
s=s.replaceAll("(?m)^ +","");//将每行开头的空格去掉
for (int i = 0; i < temp.length; i++) {
s=s.replaceAll(" "+temp[i]+" ", " ");
}
System.out.println(s);
}
}