code import java.io.*; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.regex.Pattern; public class Test { public static void main(String[] args) { int b = 0; String s = "1"; int n = 0; String[] d = new String[50000]; Set<String> strings = new HashSet<String>(); try { BufferedReader br = new BufferedReader(new FileReader("d://test.txt")); try { while((s = br.readLine())!= null) {
int m = 0; char[] ch = s.toCharArray(); for(int i=0; i<s.length(); i++) {
import java.io.*;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Pattern;
public class Test {
public static void main(String[] args) {
int b = 0;
String s = "1";
int n = 0;
String[] d = new String[50000];
Set<String> strings = new HashSet<String>();
try {
BufferedReader br = new BufferedReader(new FileReader("d://test.txt"));
try {
while((s = br.readLine())!= null) {
int m = 0;
char[] ch = s.toCharArray();
for(int i=0; i<s.length(); i++) {
if(ch[i] == ' ' || ch[i] == ',' || ch[i] == '.') {
String string = "";
for(int j=m; j<i; j++) {
string = string + ch[j];
}
m = i + 1;
d[n] = string;
n++;
}
}
}
for(int i=0; i<d.length; i++) {
for(int j=0; j<d.length; j++) {
if((i != j) && d[i] == d[j]) {
strings.add(d[i]);
break;
} }
}
Iterator<String> it = strings.iterator();
while(it.hasNext()) {
System.out.println(it.next());
}
/*String[] d = null;
d = c.split(",",0);
for(int i=0; i<d.length; i++) {
System.out.print(d[i]);
}
System.out.println();*/
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
有点麻烦的是txt文本文件换行情况的处理,对-这样的连词符需要特殊处理一下拼接处理每一行的结尾与开头的单词如果对于汉语就没有单词这样一说了吧
for(int j=0; j<d.length; j++) {
if((i != j) && d[i] == d[j]) { //d[i] == d[j]有问题,这样即使得到所有的东西也不能判等。
//== 判断的是内存位置,换成d[i].equals(d[j])
strings.add(d[i]);
break;
}这里的判断是有问题的,讲d[i] == d[j]换成d[i].equals(d[j])