现在有一个文本文件叫做index.txt,里面有很多行,每一行都是一个文件名,每个文件名所指向的文件里都包含一些英文的段落(以空格隔开的单词)。现在想在读每一行的时候创建一个进程,该进程统计每个英文单词出现的次数并且最后汇总所有单词的出现次数并字母顺序打印如
apple 3
beach 1
me 5
zero 5本人已经完成了单词统计的部分(尚未完成重复词计数),突然觉得自己的想法可能满足不了实现这个程序的要求,所以散100分和高手们讨论求点思路。如果能有高手给几段代码是最好,但是也十分欢迎一起讨论你的想法。完成部分如下:
ZhengProject4.java
import edu.truman.cs260.Zheng.RunIndex;
import edu.truman.cs260.Zheng.Counter;
import edu.truman.cs260.Zheng.Word;import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
//import java.util.ArrayList;
import java.util.ArrayList;/**
* @author Tian
*
*/
public class ZhengProject4 { /**
* @param args
*/
public static void main(String[] args) {
Runnable counter = new RunIndex(null);
//ArrayList<String> names = new ArrayList<String>();
File file = new File("index.txt");
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(file));
String text = null; while ((text = reader.readLine()) != null)
{
//System.out.println(text);
counter = new RunIndex(text);
Thread t1 = new Thread(counter);
t1.start();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
//ArrayList <Counter> counters;
//Counter counters = new Counter(null);
//for (Counter counter : counters) System.out.println (counters.getCounter()+ ": "+counters.getEvent());
}
}
}
RunIndex.java
/**
*
*/
package edu.truman.cs260.Zheng;import java.lang.Runnable;
import java.io.File;
import java.util.ArrayList;/**
* @author Tian
*
*/
public class RunIndex implements Runnable{
private String indexName; public RunIndex(String aIndexName) {
indexName = aIndexName;
}
public void run() {
TextReader parser=new TextReader(new File(indexName));
ArrayList <Word> words = parser.readIn();
for (Word word : words) System.out.println (word.getWord()+ ": "+word.getEvent());
}}
TextReader.java:
/**
*
*/
package edu.truman.cs260.Zheng;import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.StringTokenizer; /**
* @author Tian
*
*/
public class TextReader {
private File file; public TextReader (File aFile) {
assert aFile.exists() && aFile.isFile();
file = aFile;
}
public ArrayList <Word> readIn(){
BufferedReader input = null;
try {
ArrayList <Word> words = new ArrayList <Word> ();
input = new BufferedReader(new FileReader(file));
String line;
while ((line = input.readLine()) != null) {
StringTokenizer tokenizer=new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
String token=tokenizer.nextToken();
Word word=new Word(token);
int index=words.indexOf(word);
if (index==-1) {
word.increase();
words.add(word);
} else words.get(index).increase();
}
}
input.close();
Collections.sort(words);
return words;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (input!= null) {
try {
input.close();
} catch(IOException e) {
}
}
}
}
/* public void readIn () { //StringBuffer contents = new StringBuffer();
BufferedReader reader = null;
ArrayList<String> names = new ArrayList<String>();
try {
reader = new BufferedReader(new FileReader(file));
String text = null;
String tester = null;
//Iterator<String> it = names.iterator();
while ((tester = reader.readLine()) != null)
{
System.out.println(tester);
names.add(text);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
System.out.println(names);
}*/
}
Word.java:
/**
*
*/
package edu.truman.cs260.Zheng;/**
* @author Tian
*
*/
public class Word implements Comparable <Word> {
private String word;
private int event;
public Word(String aWord) {
assert aWord != null;
word = aWord;
}
public String getWord() {
return word;
}
public int getEvent() {
return event;
}
public void increase(){
event++;
}
public boolean equals(Word another){
if (another == null)
return false;
if (another instanceof Word){
Word anotherWord = (Word)another;
return anotherWord.getWord().equals(word);
} else
return false;
}
public int compareTo (Word aWord) {
if (event < aWord.getEvent())
return -1;
else if (event == aWord.getEvent())
return 0;
else
return 1;
}}
现在主要面临的困境有两个,一是在主类中我不同进程的实现是通过一个while循环,所以我无法对他们具体控制。如果想让他们对一个全局计数变量或对象操作的话,是需要锁进程之类的吧?实在是初学,没有十分理解那部分,希望能有人指导一下怎么改写变成带锁的不同进程。二是每读一个单词,我会对Word类的一个对象进行操作,增加计数什么的,但是不同的线程是相互独立的,怎么才能让这个Word被公用呢?直接用static变量不太符合面向对象的思路,所以我不是很想用,有没有解决办法?或者有没有高手觉得根据我的要求,我程序的结构上有什么问题?欢迎大家指出!一百分求讨论。十分感谢!
apple 3
beach 1
me 5
zero 5本人已经完成了单词统计的部分(尚未完成重复词计数),突然觉得自己的想法可能满足不了实现这个程序的要求,所以散100分和高手们讨论求点思路。如果能有高手给几段代码是最好,但是也十分欢迎一起讨论你的想法。完成部分如下:
ZhengProject4.java
import edu.truman.cs260.Zheng.RunIndex;
import edu.truman.cs260.Zheng.Counter;
import edu.truman.cs260.Zheng.Word;import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
//import java.util.ArrayList;
import java.util.ArrayList;/**
* @author Tian
*
*/
public class ZhengProject4 { /**
* @param args
*/
public static void main(String[] args) {
Runnable counter = new RunIndex(null);
//ArrayList<String> names = new ArrayList<String>();
File file = new File("index.txt");
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(file));
String text = null; while ((text = reader.readLine()) != null)
{
//System.out.println(text);
counter = new RunIndex(text);
Thread t1 = new Thread(counter);
t1.start();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
//ArrayList <Counter> counters;
//Counter counters = new Counter(null);
//for (Counter counter : counters) System.out.println (counters.getCounter()+ ": "+counters.getEvent());
}
}
}
RunIndex.java
/**
*
*/
package edu.truman.cs260.Zheng;import java.lang.Runnable;
import java.io.File;
import java.util.ArrayList;/**
* @author Tian
*
*/
public class RunIndex implements Runnable{
private String indexName; public RunIndex(String aIndexName) {
indexName = aIndexName;
}
public void run() {
TextReader parser=new TextReader(new File(indexName));
ArrayList <Word> words = parser.readIn();
for (Word word : words) System.out.println (word.getWord()+ ": "+word.getEvent());
}}
TextReader.java:
/**
*
*/
package edu.truman.cs260.Zheng;import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.StringTokenizer; /**
* @author Tian
*
*/
public class TextReader {
private File file; public TextReader (File aFile) {
assert aFile.exists() && aFile.isFile();
file = aFile;
}
public ArrayList <Word> readIn(){
BufferedReader input = null;
try {
ArrayList <Word> words = new ArrayList <Word> ();
input = new BufferedReader(new FileReader(file));
String line;
while ((line = input.readLine()) != null) {
StringTokenizer tokenizer=new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
String token=tokenizer.nextToken();
Word word=new Word(token);
int index=words.indexOf(word);
if (index==-1) {
word.increase();
words.add(word);
} else words.get(index).increase();
}
}
input.close();
Collections.sort(words);
return words;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (input!= null) {
try {
input.close();
} catch(IOException e) {
}
}
}
}
/* public void readIn () { //StringBuffer contents = new StringBuffer();
BufferedReader reader = null;
ArrayList<String> names = new ArrayList<String>();
try {
reader = new BufferedReader(new FileReader(file));
String text = null;
String tester = null;
//Iterator<String> it = names.iterator();
while ((tester = reader.readLine()) != null)
{
System.out.println(tester);
names.add(text);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
System.out.println(names);
}*/
}
Word.java:
/**
*
*/
package edu.truman.cs260.Zheng;/**
* @author Tian
*
*/
public class Word implements Comparable <Word> {
private String word;
private int event;
public Word(String aWord) {
assert aWord != null;
word = aWord;
}
public String getWord() {
return word;
}
public int getEvent() {
return event;
}
public void increase(){
event++;
}
public boolean equals(Word another){
if (another == null)
return false;
if (another instanceof Word){
Word anotherWord = (Word)another;
return anotherWord.getWord().equals(word);
} else
return false;
}
public int compareTo (Word aWord) {
if (event < aWord.getEvent())
return -1;
else if (event == aWord.getEvent())
return 0;
else
return 1;
}}
现在主要面临的困境有两个,一是在主类中我不同进程的实现是通过一个while循环,所以我无法对他们具体控制。如果想让他们对一个全局计数变量或对象操作的话,是需要锁进程之类的吧?实在是初学,没有十分理解那部分,希望能有人指导一下怎么改写变成带锁的不同进程。二是每读一个单词,我会对Word类的一个对象进行操作,增加计数什么的,但是不同的线程是相互独立的,怎么才能让这个Word被公用呢?直接用static变量不太符合面向对象的思路,所以我不是很想用,有没有解决办法?或者有没有高手觉得根据我的要求,我程序的结构上有什么问题?欢迎大家指出!一百分求讨论。十分感谢!
解决方案 »
- 小菜鸟写了个计算器程序,求大大指点还有没有问题
- java Iterator<E>的问题
- 一个算法问题
- java是用什么语言写的??
- 如何禁用Jtabbedpane上的其它面板,只设置其中的一个有效?
- 利用PreparedStatement的setString()时总是抛出SQLException异常
- 学习java的困惑
- 使用jdbc操作odbc数据源的问题!
- SCJP过了以后到底有多大前途???请给我点信心吧!!!来者有分
- 对java掌握到什么程度算真正的“了解”、“熟悉”、“精通”?
- java中的空接口的疑问
- java中String类型05-5月 -07 11.34.45.12346 下午 如何转成Timestamp
1、线程数量不要太多,你的CPU没有那么多核,磁盘IO阻塞也没有达到那么严重能让你复用CPU;除非你打算用分布式,也就是多台电脑协同完成;
2、每个线程一个自己的全局计数变量,这样就不需要加锁了,性能高;
3、线程处理完毕自己所负责的文件后,就把自己的计数器返回给主线程(负责分配任务的),主线程将其计数值合并到主线程所维护的总计数器中;
4、給空闲出来的线程分配下一行(新的待处理文件)。
public class RunIndex implements Runnable{
private String indexName;
private Map<Word, Integer> map; public RunIndex(String aIndexName, Map<Word, Integer> map) {
indexName = aIndexName;
this.map = map;
}
public void run() {
TextReader parser=new TextReader(new File(indexName));
ArrayList <Word> words = parser.readIn();
for (Word word : words) {//System.out.println (word.getWord()+ ": "+word.getEvent());
synchronized(map) { //锁共享资源
if (map.containsKey(words)) {
map.put(words, map.get(words) + 1);
} else {
map.put(words, 1);
}
}
}
}}public class ZhengProject4 { /**
* @param args
*/
public static void main(String[] args) {
Runnable counter = new RunIndex(null);
//ArrayList<String> names = new ArrayList<String>();
File file = new File("index.txt");
BufferedReader reader = null;
Map<Word, Integer> map = new HashMap<Word, Integer>(); //创建共享资源
List<Thread> list = new ArrayList<Thread>();
try {
reader = new BufferedReader(new FileReader(file));
String text = null; while ((text = reader.readLine()) != null)
{
//System.out.println(text);
counter = new RunIndex(text, map); //每个线程使用共享资源
Thread t1 = new Thread(counter);
list.add(t1);
t1.start();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
//ArrayList <Counter> counters;
//Counter counters = new Counter(null);
//for (Counter counter : counters) System.out.println (counters.getCounter()+ ": "+counters.getEvent());
} while (true) { //等待线程执行结束
boolean end = true;
for (Thread t : list) {
if (t.isAlive()) { //判断每个线程是否都还活着
end = false; //有一个活着就说明还没有完全结束
break;
}
}
if (end) break; //都没有活着的线程,说明线程都已结束
Thread.yield();
} //打印结果
for (Map.Entry<Word, Integer> e : map.entrySet()) {
System.out.printf("%s, %s", e.getKey(), e.getValue());
}
}
}
谢谢三楼!还没有来得及在您的例子上调试,我这会儿是个学习用的小程序,目前不准备考虑效率,主要就是为了理解这个概念,但是现在卡住了,就理解不了了。我自己的index.txt是四行的,每个文本文件里就一句话,应该不会出现那种很多很多的极端情况。
我估计是你想复杂了,先重述下你的数据结构:
◎ 你有一个主文件 index.txt,里面列出了很多需要统计单词数量的子文件,相当于一个文件列表;
◎ 需要进行统计的子文件,内容都是英文的段落(以空格隔开的单词)。不清楚你单个子文件的大小,但我相信如果只是几十兆这个规模的话,是肯定不需要针对单个子文件使用多线程的。
那么方案其实挺简单,可以这么说,主体基本上就跟3楼阿宝的模型是类似的,但有两种不同:
1、我建议不要每行(每个子文件)启动一个线程,因为如果你index.txt行数太多的话,比如数万行,你这个代价太高了,资源都浪费到线程切换去了;如果你index.txt行数不多的话,比如才几十行,那无所谓;也就是这里建议引入线程池的概念而已了,限制运行线程的规模;
2、我建议线程各自独立计数器,处理完毕后再将线程的计数器合并到主计数器中;你可以选择在每个子文件处理完毕时进行合并,也可以选择在所有子文件全部处理完毕时再一次性合并;好处是,以后你如果想增加这种:暂停、恢复、容错等功能,就比较容易了。总的来说,如果index.txt规模不大,3楼阿宝的模型就能够非常好的完成任务了。
edu.truman.cs260.Zheng.Word@e76cbf7 1
edu.truman.cs260.Zheng.Word@17dfafd1 1
edu.truman.cs260.Zheng.Word@272d7a10 1
edu.truman.cs260.Zheng.Word@1aa8c488 1
edu.truman.cs260.Zheng.Word@2352544e 1
edu.truman.cs260.Zheng.Word@7ecec0c5 1
edu.truman.cs260.Zheng.Word@22998b08 1
edu.truman.cs260.Zheng.Word@457471e0 1
edu.truman.cs260.Zheng.Word@7a6d084b 1
edu.truman.cs260.Zheng.Word@5e8fce95 1
edu.truman.cs260.Zheng.Word@3dfeca64 1
edu.truman.cs260.Zheng.Word@1948cc8c 1
edu.truman.cs260.Zheng.Word@c3bb2b8 1
edu.truman.cs260.Zheng.Word@3343c8b3 1
edu.truman.cs260.Zheng.Word@5fe04cbf 1原本应该前面是单词后面是计数,结果前面的单词都成了这种类似地址的东西。请问一下这个是map的问题还是 stringtokenizer的问题? 谢谢!
已经解决了,不是图的原因。在这个模型下我有种多余的对象叫Word,这些乱七八糟的输出的正是那些个对象。我把那东西删了全用String以后就全好了。谢谢!
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;import edu.truman.cs260.Zheng.RunIndex;
/**
* @author Tian
* CS260 Final Project
* Project 4 - Multithreaded Programming
* This is a program that makes a list of all the words in a collection of
* text files store in "index.txt", and the frequencies with which how many
* time one word appears. This program uses one separate thread for each file
* to count words in each file and total them into a map structure.
*/
public class ZhengProject4 { /**
* Main method
*/
public static void main(String[] args) {
Runnable counter = new RunIndex(null, null);
File file = new File("index.txt");
BufferedReader reader = null;
Map<String, Integer> map = new TreeMap<String, Integer>();
List<Thread> list = new ArrayList<Thread>();
// Read index.txt and for each line, create a thread
try {
reader = new BufferedReader(new FileReader(file));
String text = null; while ((text = reader.readLine()) != null)
{
counter = new RunIndex(text, map);
Thread t1 = new Thread(counter);
list.add(t1);
t1.start();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
} // Check if all the threads are finished
while (true) {
boolean end = true;
for (Thread t : list) {
if (t.isAlive()) {
end = false;
break;
}
}
if (end) break;
Thread.yield();
} // Output result
for (Map.Entry<String, Integer> e : map.entrySet()) {
System.out.print(e.getKey());
System.out.print(" ");
System.out.println(e.getValue());
}
}
}
package edu.truman.cs260.Zheng;import java.io.File;
import java.util.ArrayList;
import java.util.Map;/**
* @author Tian
* This is the class that implements Runnable interface.
* It executes every time a thread is created.
*/
public class RunIndex implements Runnable{
private String indexName;
private Map<String, Integer> map;
/**
* Constructor of RunIndex object
* @param aIndexName name of a text file need to be processed
* @param map the map contains result
*/
public RunIndex(String aIndexName, Map<String, Integer> map) {
indexName = aIndexName;
this.map = map;
} /**
* This is the method of the thread's task.
*/
public void run() {
TextReader parser=new TextReader(new File(indexName));
ArrayList<String> words = parser.readIn();
for (String word : words) {
synchronized(map) {
if (map.containsKey(word)) {
map.put(word, map.get(word) + 1);
} else {
map.put(word, 1);
}
}
}
}
}package edu.truman.cs260.Zheng;import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer; /**
* @author Tian
* This is a class of text reader. It reads a text file and
* separate words in that file.
*/
public class TextReader {
private File file;
/**
* Constructor of TextReader object
* @param aFile the file that need to be proccessed
*/
public TextReader (File aFile) {
file = aFile;
}
/**
* This is a method that read text file in and separate words.
* @return words an ArrayList contains separated words
*/
public ArrayList <String> readIn(){
BufferedReader input = null;
try {
ArrayList <String> words = new ArrayList <String> ();
input = new BufferedReader(new FileReader(file));
String line;
while ((line = input.readLine()) != null) {
StringTokenizer tokenizer=new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
String token=tokenizer.nextToken();
String word=new String(token);
words.add(word);
}
}
input.close();
return words;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (input!= null) {
try {
input.close();
} catch(IOException e) {
}
}
}
}
}