关于文件读写的小问题 如果有一个文件,里面有很多汉语词汇,用空格分开,怎么才能把它们存到一个string数组里呢(用来统计它们的词频并且存到treemap里排序) 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 import com.google.common.io.CharStreams; //来自 google guava import java.io.FileReader;String content = CharStreams.toString(new FileReader(file)); // 工具方法,将文件内容读到一个StringString[] words = content.split("\\s+"); 词频你最好用hashMap的类型吧。 import com.google.common.collect.TreeMultiset;TreeMultiset<String> wordset = TreeMultiSet.create();wordset.addAll(Arrays.asList(words));for(String word: wordset.elementSet()){ System.out.printf("%s: %d \n",word,wordset.count(word));} 如果原来文件词汇之间有空格,办法很多了import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.util.TreeMap;public class StatisticWordFrequency { public static String read(String filename) throws IOException { BufferedReader br = new BufferedReader(new FileReader(filename)); String s; StringBuffer sb = new StringBuffer(); while ((s = br.readLine()) != null) { sb.append(s + "\n"); } br.close(); return sb.toString(); } public static TreeMap getTreeMap(String[] words) { if (words == null) return null; TreeMap<String, Integer> treeMap = new TreeMap<String, Integer>(); for (int i = 0; i < words.length; i++) { if (!treeMap.containsKey(words[i])) treeMap.put(words[i], 1); else treeMap.put(words[i], treeMap.get(words[i]) + 1); } return treeMap; } public static TreeMap execute(String fileName) throws IOException { String sourceString = read(fileName);// 将这个文件用sourceString保存 String[] words = sourceString.split("\\s+"); return getTreeMap(words); } public static void main(String[] args) throws IOException { TreeMap treeMap = execute("c:/test.txt"); System.out.println(treeMap); }}/* 文件内容 HelloWorld Hello WorldHello World Hello World Hello WorldCSDN CSDNCSDNHello World测试结果:{CSDN=3, Hello=5, HelloWorld=1, World=5} */ treemap只key的排序(并且key必须可以比较大小),而,如果统计的数字作为key,不太合适,因为,有单词出现频率相同的情况,这时,相同key的两个值只能有一个保存到map里面。并且,选用String作key,也不合适,字符串比较大小,估计要自定义方法来实现了。我写了点代码,也能实现统计、排序功能,就是不知LZ能否接受。 public static void main(){ String fileName = "";//文件名 HashMap<String,Integer> counter = new HashMap<String, Integer>();//hashmap Scanner scan = null; try { scan = new Scanner(new File(fileName)); while(scan.hasNextLine()){ String line = scan.nextLine(); String [] strArr = line.split("\\s"); for(String str :strArr){ if(counter.containsKey(str)){ counter.put(str, counter.get(str)+1); }else{ counter.put(str,new Integer(1)); } } } } catch (FileNotFoundException e) { e.printStackTrace();//异常处理 }finally{ if(scan!=null){ scan.close(); } } //排序 class Wrapper implements Comparable<Wrapper>{ String string; int count; Wrapper(String string,int count){ this.string=string;this.count=count; } public int compareTo(Wrapper o) { if(o==null)return -1; return count==o.count?1:(count<o.count?-1:1); } } ArrayList<Wrapper> list = new ArrayList<Wrapper>(counter.size()); for(Entry<String, Integer> e : counter.entrySet()){ list.add(new Wrapper(e.getKey(),e.getValue())); } Collections.sort(list); //打印结果 for(Wrapper wapper : list){ System.out.println(wapper.string+" = "+wapper.count); } } 请教一下做https代理的问题 请教java中几种关于对象引用的问题! 把string带星期的时间转换为Date Java如何实现委托(delegate)? 将图片以字节流的形式直接写到jsp文件中,高手来帮忙。 java拷贝与粘贴 Java运行时问题 大伙帮帮我吧!Java中哪个方法可以作曲线图啊? 高手请进!给最高分! 我用的VisualCafe不能显示中文,怎么回事呀? java编程boolean 各位截取到 一段数据包 请问怎么分析啊,各位有搞过的吗?
import java.io.FileReader;String content = CharStreams.toString(new FileReader(file)); // 工具方法,将文件内容读到一个StringString[] words = content.split("\\s+");
wordset.addAll(Arrays.asList(words));for(String word: wordset.elementSet()){
System.out.printf("%s: %d \n",word,wordset.count(word));
}
import java.io.FileReader;
import java.io.IOException;
import java.util.TreeMap;public class StatisticWordFrequency { public static String read(String filename) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(filename));
String s;
StringBuffer sb = new StringBuffer();
while ((s = br.readLine()) != null) {
sb.append(s + "\n");
}
br.close();
return sb.toString();
} public static TreeMap getTreeMap(String[] words) {
if (words == null)
return null;
TreeMap<String, Integer> treeMap = new TreeMap<String, Integer>();
for (int i = 0; i < words.length; i++) {
if (!treeMap.containsKey(words[i]))
treeMap.put(words[i], 1);
else
treeMap.put(words[i], treeMap.get(words[i]) + 1);
}
return treeMap;
} public static TreeMap execute(String fileName) throws IOException {
String sourceString = read(fileName);// 将这个文件用sourceString保存
String[] words = sourceString.split("\\s+");
return getTreeMap(words); } public static void main(String[] args) throws IOException {
TreeMap treeMap = execute("c:/test.txt");
System.out.println(treeMap);
}
}
/*
文件内容
HelloWorld Hello World
Hello World
Hello World Hello World
CSDN CSDNCSDNHello World测试结果:
{CSDN=3, Hello=5, HelloWorld=1, World=5}
*/
而,如果统计的数字作为key,不太合适,
因为,有单词出现频率相同的情况,这时,相同key的两个值只能有一个保存到map里面。
并且,选用String作key,也不合适,字符串比较大小,估计要自定义方法来实现了。我写了点代码,也能实现统计、排序功能,就是不知LZ能否接受。
public static void main(){
String fileName = "";//文件名
HashMap<String,Integer> counter = new HashMap<String, Integer>();//hashmap
Scanner scan = null;
try {
scan = new Scanner(new File(fileName));
while(scan.hasNextLine()){
String line = scan.nextLine();
String [] strArr = line.split("\\s");
for(String str :strArr){
if(counter.containsKey(str)){
counter.put(str, counter.get(str)+1);
}else{
counter.put(str,new Integer(1));
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();//异常处理
}finally{
if(scan!=null){
scan.close();
}
}
//排序
class Wrapper implements Comparable<Wrapper>{
String string;
int count;
Wrapper(String string,int count){
this.string=string;this.count=count;
}
public int compareTo(Wrapper o) {
if(o==null)return -1;
return count==o.count?1:(count<o.count?-1:1);
}
}
ArrayList<Wrapper> list = new ArrayList<Wrapper>(counter.size());
for(Entry<String, Integer> e : counter.entrySet()){
list.add(new Wrapper(e.getKey(),e.getValue()));
}
Collections.sort(list);
//打印结果
for(Wrapper wapper : list){
System.out.println(wapper.string+" = "+wapper.count);
}
}