考虑节省内存和提高速度，遍历指定目录下所有文本文件中所有出现单词的出现频率。

这是我的实现代码import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class CheakFile {
protected boolean done = true;
/**
* 遍历指定目录中指定文件，将遍历到得文件放入堆栈中
*/
class Producer implements Runnable {
protected BlockingQueue queue; Producer(BlockingQueue theQueue) {
this.queue = theQueue;
} public void run() {
try { ArrayList documentlist = new ArrayList();
System.out.println("请输入数据：");
Scanner s = new Scanner(System.in);
// while(s.hasNext()){
// documentlist.add(s.next());
// }
// Scanner s2 = new Scanner(System.in);
String document = s.next();
String ftype = s.next();
System.out.println(document + ftype);
ArrayList list = (ArrayList) readDocument(document, ftype);
System.out.println(list);
for (int i = 0; i < list.size(); i++) {
queue.put(list.get(i));
} } catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} } public List readDocument(String DocumetPath, String filetype) {
ArrayList filelist = new ArrayList();
File dir = new File(DocumetPath);
File[] files = dir.listFiles(); if (files == null)
return null;
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
readDocument(files[i].getAbsolutePath(), filetype);
} else {
String strFileName = files[i].getAbsolutePath()
.toLowerCase();
String sufix = null;
StringTokenizer fx = new StringTokenizer(strFileName, ".");
int n = fx.countTokens();
while (fx.hasMoreTokens()) {
sufix = fx.nextToken();
}
if (sufix.equals(filetype)) {
filelist.add(files[i].getAbsolutePath());
} }
return filelist;
} return null;
}
} /**
* 从堆栈中循环取出文件读取，记录每个文件中单词出现的次数
*/
class Consumer implements Runnable { protected BlockingQueue queue; int countAll = 0; Consumer(BlockingQueue theQueue) {
this.queue = theQueue;
} public void run() {
/**
* 合并Map
*/
TreeMap<String, Integer>[] map = null;
Map<String, Integer> Omap = new HashMap<String, Integer>();
while (done){
try {
                    if(queue==null){
                     this.wait(100);
                     continue;
                    }
for (int j = 0; j < queue.size(); j++) {
String path = (String) queue.take();
map[j] = countTaget(path);
}
for (int i = 0; i < map.length - 1; i++) {
Omap = map[i + 1];
Set<String> set = Omap.keySet();
Iterator<String> it = set.iterator(); while (it.hasNext()) {
String key = it.next();
Integer ss = map[i].get(key);
if (ss != null) {
int res = Omap.get(key).intValue()
+ ss.intValue();
Omap.put(key, res);
}
}
}
System.out.println("目录下文本文件的单词出现频率为：" + Omap);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// if (done) {
//             return;
//           }
}
} public TreeMap countTaget(String filepath) {
BufferedReader buf;
TreeMap myTreeMap = null;
try {
buf = new BufferedReader(new FileReader(filepath));
StringBuffer sbuf = new StringBuffer();// 缓冲字符串
String line = null;
while ((line = buf.readLine()) != null) {
sbuf.append(line);// 追加到缓冲字符串中
}
buf.close();// 读取结束
Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词
String string1 = sbuf.toString().toLowerCase();// 转换成小写
Matcher matcher = expression.matcher(string1);// 定义string1的匹配器
myTreeMap = new TreeMap();// 创建树映射存放键/值对
// int n=0;//文章中单词总数
Object word = null;// 文章中的单词
Object num = null;// 出现的次数
while (matcher.find()) {// 是否匹配单词
word = matcher.group();// 得到一个单词-树映射的键
// n++;//单词数加1
if (myTreeMap.containsKey(word)) {// 如果包含该键，单词出现过
num = myTreeMap.get(word);// 得到单词出现的次数
Integer count = (Integer) num;// 强制转化
myTreeMap.put(word, new Integer(count.intValue() + 1));
} else {
myTreeMap.put(word, new Integer(1));// 否则单词第一次出现，添加到映射中
} }
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return myTreeMap;
}
}
public void setup() throws InterruptedException {
BlockingQueue myQueue = new LinkedBlockingQueue();
boolean h=true;
Thread t1 = new Thread(new Producer(myQueue));
t1.start(); Thread t2 = new Thread(new Consumer(myQueue));
// try {
// t2.sleep(10*1000);
// } catch (InterruptedException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
t2.setDaemon(true);
t2.start();

} public static void main(String args[]) { CheakFile cf = new CheakFile();
try {
cf.setup();
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// BlockingQueue myQueue = new LinkedBlockingQueue();
// Producer p = new Producer(myQueue);
// Consumer c = new Consumer(myQueue);
// Thread t1 = new Thread(p);
// t1.start();
// Thread t2 = new Thread(c);
// t2.start(); // Let the simulation run for, say, 10 seconds
try {
Thread.sleep(10 * 1000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
cf.done = false;
}
}
一运行就报空指针：
java.lang.NullPointerException
at CheakFile$Consumer.run(CheakFile.java:118)
at java.lang.Thread.run(Thread.java:595)
高手请帮忙看下啊！！

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

CheakFile文件118行有错，看看再调试调试
而且都说了是空指针异常，找找看！
呵呵，不错。但可以试试Hadoop的MapReduce分布式计算模型。
----------------------------------------------------------
1.      package org.myorg;
2.
3. import java.io.IOException;
4. import java.util.*;
5.
6. import org.apache.hadoop.fs.Path;
7. import org.apache.hadoop.conf.*;
8. import org.apache.hadoop.io.*;
9. import org.apache.hadoop.mapred.*;
10. import org.apache.hadoop.util.*;
11.
12. public class WordCount {
13.
14.     public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
15.       private final static IntWritable one = new IntWritable(1);
16.       private Text word = new Text();
17.
18.       public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
19.         String line = value.toString();
20.         StringTokenizer tokenizer = new StringTokenizer(line);
21.         while (tokenizer.hasMoreTokens()) {
22.           word.set(tokenizer.nextToken());
23.           output.collect(word, one);
24.         }
25.       }
26.     }
27.
28.     public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
29.       public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
30.         int sum = 0;
31.         while (values.hasNext()) {
32.           sum += values.next().get();
33.         }
34.         output.collect(key, new IntWritable(sum));
35.       }
36.     }
37.
38.     public static void main(String[] args) throws Exception {
39.       JobConf conf = new JobConf(WordCount.class);
40.       conf.setJobName("wordcount");
41.
42.       conf.setOutputKeyClass(Text.class);
43.       conf.setOutputValueClass(IntWritable.class);
44.
45.       conf.setMapperClass(Map.class);
46.       conf.setCombinerClass(Reduce.class);
47.       conf.setReducerClass(Reduce.class);
48.
49.       conf.setInputFormat(TextInputFormat.class);
50.       conf.setOutputFormat(TextOutputFormat.class);
51.
52.       FileInputFormat.setInputPaths(conf, new Path(args[0]));
53.       FileOutputFormat.setOutputPath(conf, new Path(args[1]));
54.
55.       JobClient.runJob(conf);
57.     }
58. }
59. -----------------------http://hadoop.apache.org/common/docs/r0.20.2/mapred_tutorial.html