我有两个文件 一个叫id.txt
里面有很多行的数字
12
13
14
59
219
另一个叫list.txt
里面是这样的
12 [29,39,18,59]
13 [18.49,59,219]
14 [8,18,39]
59 [481]
..
希望输出的就是在list.txt里面 方括号中的信息在id.txt中也出现
比如id里面有59 那么12对应的输出应该是
12 [59]
13 [59,219]我现在也写出来一个程序 但是速度非常慢 因为我的list.txt 大约有300M左右 而 id.txt也有10M 希望求高手帮我改进一下,或者重新写一个!在线等!import java.io.*;
import java.util.*;
import java.lang.*;
public class trythis{
public static void main(String args[])throws IOException{
File id = new File("id.txt");
Scanner in = new Scanner(id);
File friendship = new File("list.txt");
Scanner inF= new Scanner(friendship);
ArrayList<String> idArray = new ArrayList();
//String [] userID;
//int size = 6;
while (in.hasNextLine()){
idArray.add(in.nextLine());
}
String [] userFriend;
Map<String, ArrayList> map_root = new HashMap<String, ArrayList>();
while (inF.hasNextLine())
{
userFriend=inF.nextLine().split("\t");
String friendList = userFriend[1].substring(userFriend[1].indexOf("[") + 1, userFriend[1].lastIndexOf("]"));
String[] friends = friendList.split(",");
ArrayList<String> als=new ArrayList();
for(int i=0;i<friends.length;i++){
als.add(friends[i]);
}
// System.out.println(als);
map_root.put(userFriend[0],als);
}
// System.out.println(map_root);
Map<String, ArrayList> map_rs = new HashMap<String, ArrayList>();
for (String key : map_root.keySet()){
ArrayList<String> resultList =new ArrayList();
for (Iterator i = idArray.iterator(); i.hasNext();) {
Object temp = i.next();
if (map_root.get(key).contains(temp)) {
resultList.add(temp.toString());
}
map_rs.put(key, resultList);
}
}
StringBuffer buf=new StringBuffer();
for(String key:map_rs.keySet()){
buf.append(key+" "+map_rs.get(key)+"\r\n");
}
FileOutputStream os=new FileOutputStream(new File("friends.txt"));
os.write(buf.toString().getBytes(), 0, buf.toString().getBytes().length);
}
}
里面有很多行的数字
12
13
14
59
219
另一个叫list.txt
里面是这样的
12 [29,39,18,59]
13 [18.49,59,219]
14 [8,18,39]
59 [481]
..
希望输出的就是在list.txt里面 方括号中的信息在id.txt中也出现
比如id里面有59 那么12对应的输出应该是
12 [59]
13 [59,219]我现在也写出来一个程序 但是速度非常慢 因为我的list.txt 大约有300M左右 而 id.txt也有10M 希望求高手帮我改进一下,或者重新写一个!在线等!import java.io.*;
import java.util.*;
import java.lang.*;
public class trythis{
public static void main(String args[])throws IOException{
File id = new File("id.txt");
Scanner in = new Scanner(id);
File friendship = new File("list.txt");
Scanner inF= new Scanner(friendship);
ArrayList<String> idArray = new ArrayList();
//String [] userID;
//int size = 6;
while (in.hasNextLine()){
idArray.add(in.nextLine());
}
String [] userFriend;
Map<String, ArrayList> map_root = new HashMap<String, ArrayList>();
while (inF.hasNextLine())
{
userFriend=inF.nextLine().split("\t");
String friendList = userFriend[1].substring(userFriend[1].indexOf("[") + 1, userFriend[1].lastIndexOf("]"));
String[] friends = friendList.split(",");
ArrayList<String> als=new ArrayList();
for(int i=0;i<friends.length;i++){
als.add(friends[i]);
}
// System.out.println(als);
map_root.put(userFriend[0],als);
}
// System.out.println(map_root);
Map<String, ArrayList> map_rs = new HashMap<String, ArrayList>();
for (String key : map_root.keySet()){
ArrayList<String> resultList =new ArrayList();
for (Iterator i = idArray.iterator(); i.hasNext();) {
Object temp = i.next();
if (map_root.get(key).contains(temp)) {
resultList.add(temp.toString());
}
map_rs.put(key, resultList);
}
}
StringBuffer buf=new StringBuffer();
for(String key:map_rs.keySet()){
buf.append(key+" "+map_rs.get(key)+"\r\n");
}
FileOutputStream os=new FileOutputStream(new File("friends.txt"));
os.write(buf.toString().getBytes(), 0, buf.toString().getBytes().length);
}
}
list.txt 中,每行的[]里面只保留那些 id.txt 有的数字,对不?你的核心问题是要消灭双循环:
for (String key : map_root.keySet()){
ArrayList<String> resultList =new ArrayList();
for (Iterator i = idArray.iterator(); i.hasNext();) {
否则你这个双循环就要老命了。当然你还有其它很多问题,主要是滥用List、对象等,会很消耗内存和CPU。
◎ 流式输入与流式输出,降低内存开销;
◎ 借助Hash提供快速检索,降低查找速度;
◎ 定期输出处理进度,避免盲目等待。
给点伪代码设计:
1、将id.txt中所有数字,读取入 HashSet ids 中
2、输出第一步的时间开销
3、准备一个复用的 StringBuilder sb
4、用 Scanner sc 打开 list.txt 文件
5、用 FileWriter fw 打开 friends.txt 文件
6、循环 sc.hasNextLine()
7、==拆解成字符数组friends(这里如果不拆解,直接按位处理性能更高);
8、==遍历字符数组->fid
9、----检查ids中是否存在该fid,存在则保存入sb
10、==检查sb长度是否>0
11、----将sb信息写入fw中
12、----将sb清空:sb.setLength(0)
13、==每1W笔处理,就输出一个时间
14、关闭fw,关闭sc
大神如果能把具体代码敲给我 磕头拜谢呜呜
但是老师就让用Java编 说不要数据库
8、==遍历字符数组->fid
9、----检查ids中是否存在该fid,存在则保存入sb
10、==检查sb长度是否>0
11、----将sb信息写入fw中
12、----将sb清空:sb.setLength(0)
13、==每1W笔处理,就输出一个时间
这几步不太明白
import java.util.*;
import java.lang.*;
import java.text.*;public class tryA{
public static void main(String args[])throws IOException{
FileInputStream fstream = new FileInputStream("X.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
//output begining time
Date now1 = new Date();
DateFormat d1 = DateFormat.getDateTimeInstance();
String time1 = d1.format(now1);
System.out.println(time1);
String line=null;
Set ids=new HashSet();
while ((line=br.readLine())!=null)
{
ids.add(line);
}
//output read id time
Date now2 = new Date();
DateFormat d2 = DateFormat.getDateTimeInstance();
String time2 = d1.format(now2);
System.out.println(time2);
File friendship = new File("a.txt");
Scanner sc=new Scanner(friendship);
StringBuilder sb= new StringBuilder();
String[] strs;
FileWriter fw = new FileWriter("afriends.txt");
while (sc.hasNextLine())
{
strs=sc.nextLine().split(" ");
String friendList = strs[1].substring(strs[1].indexOf("[") + 1, strs[1].lastIndexOf("]"));
String[] friends = friendList.split(",");
for (int i=0;i<friends.length;i++)
{
if (ids.contains(friends[i]))
{
sb.append(friends[i]).append(",");
}
}
fw.write(strs[0]+" Friends["+sb.toString()+"]\r\n");
sb.setLength(0);
}
fw.close();
sc.close();
}
}
id.txt 7MB,用户数:951879
list.txt 200MB,朋友数随机按1~50个
跑了33秒,输出结果:
26MB,810729行import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.HashSet;
import java.util.Random;
import java.util.Scanner;public class FriendFilter { private static final int NUM_TOTAL = 10000000;
private static final int USER_SIZE = 1000000;
private static final int FIREND_SIZE = 50; private static final String FILE_ID = "id.txt";
private static final String FILE_LIST = "list.txt";
private static final String FILE_RESULT = "result.txt"; public static void main(String[] args) throws Exception {
//genUserId(); // Spend: 721ms
//genFriends(); // Spend: 17913ms // 1、将id.txt中所有数字,读取入 HashSet ids 中
long timer = System.currentTimeMillis();
HashSet<String> ids = new HashSet<String>();
BufferedReader brId = new BufferedReader(new InputStreamReader(FriendFilter.class.getResourceAsStream(FILE_ID)));
String idLine;
while (null != (idLine = brId.readLine())) {
ids.add(idLine);
} // 2、输出第一步的时间开销
timer = System.currentTimeMillis() - timer;
System.out.println("Read id done: " + ids.size() + "\tSpend: " + timer + "ms"); // 3、准备一个复用的 StringBuilder sb
StringBuilder sb = new StringBuilder(); // 4、用 BufferedReader brList 打开 list.txt 文件
BufferedReader brList = new BufferedReader(new InputStreamReader(
FriendFilter.class.getResourceAsStream(FILE_LIST))); // 5、用 FileWriter fw 打开 friends.txt 文件
FileWriter fw = new FileWriter(FriendFilter.class.getResource(".").getFile() + FILE_RESULT); // 6、循环 sc.hasNextLine()
int cnt = 0;
timer = System.currentTimeMillis();
String line;
while (null != (line = brList.readLine())) {
// 7、==拆解成字符数组friends(这里如果不拆解,直接按位处理性能更高);
String[] infos = line.split("\t");
String[] friends = infos[1].substring(infos[1].indexOf("[") + 1, infos[1].lastIndexOf("]")).split(",");
// 8、==遍历字符数组->fid
for (String ff : friends) {
// 9、----检查ids中是否存在该fid,存在则保存入sb
if (ids.contains(ff))
sb.append(',').append(ff);
}
// 10、==检查sb长度是否>0
if (sb.length() > 0) {
// 11、----将sb信息写入fw中
fw.write(infos[0]);
fw.write("\t[");
fw.write(sb.deleteCharAt(0).toString());
fw.write("]\n");
// 12、----将sb清空:sb.setLength(0)
sb.setLength(0);
}
// 13、==每1W笔处理,就输出一个时间
if (++cnt % 10000 == 0) {
long spend = System.currentTimeMillis() - timer;
int least = USER_SIZE - cnt;
System.out.println(cnt + "/" + least + "\tSpend: " + spend + "ms\tMay need: "
+ (least * spend / cnt / 1000) + "s");
}
} // 14、关闭fw,关闭sc
fw.close();
brList.close();
} public static void genUserId() throws IOException {
long timer = System.currentTimeMillis();
String file = FriendFilter.class.getResource(FILE_ID).getFile();
System.out.println("Generating " + file + " .....");
FileWriter fw = new FileWriter(file);
Random rand = new Random();
for (int i = 0; i < USER_SIZE; i++) {
fw.write(String.valueOf(rand.nextInt(NUM_TOTAL)));
fw.write("\n");
}
fw.close();
timer = System.currentTimeMillis() - timer;
System.out.println("Generation done: " + USER_SIZE + "\tSpend: " + timer + "ms");
} public static void genFriends() throws IOException {
long timer = System.currentTimeMillis();
String file = FriendFilter.class.getResource(FILE_LIST).getFile();
System.out.println("Generating " + file + " .....");
FileWriter fw = new FileWriter(file);
Random rand = new Random();
for (int i = 0; i < USER_SIZE; i++) {
fw.write(String.valueOf(rand.nextInt(NUM_TOTAL)));
fw.write("\t[");
for (int r = rand.nextInt(FIREND_SIZE); r >= 0; r--) {
fw.write(String.valueOf(rand.nextInt(NUM_TOTAL)));
if (r > 0)
fw.write(",");
}
fw.write("]\n");
}
fw.close();
timer = System.currentTimeMillis() - timer;
System.out.println("Generation done: " + USER_SIZE + "\tSpend: " + timer + "ms");
}
}
package net.csdn.bbs.mrsworf;import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashSet;public class FindFriends { interface FriendFinder{
void findFriends(File idFile, File listFile, File targetFile) throws IOException, IllegalAccessException;
}
static class EasyFriendFinder implements FriendFinder{
private boolean ignoreWrongFormat = true;//是否忽略List文件中数据的格式错误
private long wrongFormatCount = 0;//List文件中,数据格式错误的行数。
public void findFriends(File idFile, File listFile, File targetFile) throws IOException, IllegalAccessException {
// 由于ID文件大约10M,一般不会爆掉内存,所以,完全读取到内存中来
HashSet<String> idCache = new HashSet<String>();
BufferedReader reader = new BufferedReader(new FileReader(idFile));
String line = null;
try{
while((line=reader.readLine())!=null){
if(line.trim().length()<=0)continue;
idCache.add(line.trim());
}
}finally{
reader.close();
}
//读取List文件中的内容,同时,生成目标文件。
reader = new BufferedReader(new FileReader(listFile));
BufferedWriter writer = new BufferedWriter(new FileWriter(targetFile));
try{
while((line=reader.readLine())!=null){
//解析List文件中每一行的数据。 样例:12 [29,39,18,59]
int index = line.indexOf('[');//创建一个行级索引,辅助数据解析。
int index_ = line.lastIndexOf(']');//创建另一个行级索引,辅助数据解析。
if(index<0){
wrongFormatCount++;
if(ignoreWrongFormat)continue;//是否忽略List文件中数据的格式错误,如果不忽略则抛出异常,中断执行。
else throw new IllegalAccessException("数据格式不正确,没找到'['字符:"+line);
}
if(index_<0){
wrongFormatCount++;
if(ignoreWrongFormat)continue;//是否忽略List文件中数据的格式错误,如果不忽略则抛出异常,中断执行。
else throw new IllegalAccessException("数据格式不正确,没找到']'字符:"+line);
}
String id = line.substring(0,index).trim();
String allFriends = line.substring(index+1,index_);//所有的friend_id,以逗号分开。
String friends [] = allFriends.split("\\,|\\,");//以逗号为分隔符,拆分字符串
writer.write(id);writer.write(' ');writer.write('[');
for(int i=0;i<friends.length;i++){
if(idCache.contains(friends[i].trim())){
if(i>0)writer.write(',');
writer.write(friends[i]);
}
}
writer.write(']');writer.newLine();
}
}finally{
writer.close();
reader.close();//当writer.close()出现异常时,该语句将不会被执行,这是个BUG,楼主可以优化一下。
}
}
public boolean isIgnoreWrongFormat() {
return ignoreWrongFormat;
}
public void setIgnoreWrongFormat(boolean ignoreWrongFormat) {
this.ignoreWrongFormat = ignoreWrongFormat;
}
public long getWrongFormatCount() {
return wrongFormatCount;
}
}
/**
* 测试用例
*/
public static void main(String[] args) {
final String IdFile = "id.txt";
final String ListFile = "list.txt";
final String TargetFile = "friends.txt";
FriendFinder finder = new EasyFriendFinder();
try {
finder.findFriends(new File(IdFile),new File(ListFile),new File(TargetFile));
} catch (IOException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
} }
}