JAVA如何去除2个文本相同的部分?急急急！

如题，有两个文本文件a.txt和b.txt；要删除b.txt中a.txt所包含的内容，生成c.txt。
如：a.txt内容如下:
12345
23456
78990
12342
...
b.txt内容如下:
12345
23456
78990
12342
34251
32587
78234
...
经过处理后，得到的c.txt内容如下：
34251
32587
78234
...a.txt的内容比b.txt要少很多，并且b.txt的内容在100万行左右！有没有什么高效的算法?谢谢！

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

都是5位的数字吗？
如果是的话就用位图排序，首先读取b.txt文件构造bitmap,然后遍历a.txt并把重复的位置为0，最会输出bitmap到c.txt。
那 a.txt 有多少行数据啊？
public class Test {
private static final int MAX = 100000;
private int[] bitmap = new int[MAX / 32 + 1]; public void filter() {
// read b.txt
File file = new File("c:\\b.txt");
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while (line != null) {
int n = Integer.parseInt(line);
bitmap[n >>> 5] |= (1 << (n & 0X1F));
line = reader.readLine();
} } catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException ignoreWhenClose) {
}
}
} // read a.txt and iterate
file = new File("c:\\a.txt");
reader = null;
try {
reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while (line != null) {
int n = Integer.parseInt(line);
if ((bitmap[n >>> 5] & (1 << (n & 0X1F))) != 0) {
bitmap[n >>> 5] &= ~(1 << (n & 0X1F));
line = reader.readLine();
}
} } catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException ignoreWhenClose) {
}
}
} //
file = new File("c:\\c.txt");
BufferedWriter writer = null;
try {
writer = new BufferedWriter(new FileWriter(file));
for (int i = 0; i < MAX; i++) {
if ((bitmap[i >>> 5] & (1 << (i & 0X1F))) != 0)
writer.append(String.valueOf(i)).append("\n");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (IOException ignoreWhenClose) {
}
}
}
} public static void main(String[] args) {
new Test().filter();
}
}
把A B两个文本的内容分别放进两个List中，用两个循环判断B中是否有A中的数据，如果有则把改数据从B对应的List中删除
具体的比较，删除，生成新的txt都不是问题，关键是文档太大的话，占用资源过大。想不出什么好办法解决
如果a不大的话，可以对a进行排序，然后打开b文件查找（二分查找）该元素是否在a中有，
如果没有就输入到c中。b数据量比较大的话就应该直接文件操作。
用HashMap类吧..利用Key_value 的方法..轻松解决
主要得看a.txt有多少行.如果不多可以全部读出作成一个字典,再读b.txt,读一行就查一行,字典里面没有就输出.
那应该是8位数字了，把MAX改一下就好了，用位图效率应该是最高的了。
如果两个文件不大的话用下面我给你的程序即可。如果文件很大，我建议用数据来来处理。import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.List;public class DiffentContent {
/**
* 获得b.txt文件中a.txt没有的部分
*/
private List<String> Diffent(File a, File b) {
List<String> list = new ArrayList<String>();
try {
list = this.readB(b);// 得到要比较的b.txt中的内容
BufferedReader reader = new BufferedReader(new FileReader(a));// 把a.txt读入到缓存
String oneLine = "";
while ((oneLine = reader.readLine()) != null) {// 一行一行的读取
oneLine = oneLine.trim();// 去掉前后空格
if (list.contains(oneLine))//如果包含就移除
list.remove(oneLine);
}
} catch (Exception e) {
e.printStackTrace();
}
return list;
}

/**
* 把B文件的没一行放入到List中
* @return
*/
private List<String> readB(File b) {
List<String> list = new ArrayList<String>();//要返回的b.txt文本中的内容
if(b != null) {
try {
BufferedReader reader = new BufferedReader(new FileReader(b));//把b.txt读入到缓存
String oneLine = "";
while((oneLine=reader.readLine()) != null){//一行一行的读取
list.add(oneLine.trim());//加入到List中
}
} catch (Exception e) {
e.printStackTrace();
}
}
return list;
}

/**
* 把不同的内容写入到c.txt文件中
*/
private void writeC(List<String> different, File c) {
try {
if(different != null) {
FileWriter writer = new FileWriter(c);
for(String str : different) {
writer.append(str).append("\r\n");
}
writer.flush();
writer.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}

//入口方法
public static void main(String[] args) {
File a = new File("c:/a.txt");//a.txt文件
File b = new File("c:/b.txt");//b.txt文件
File c = new File("c:/c.txt");//输出的c.txt文件
DiffentContent diffentContent = new DiffentContent();
List<String> diffent = diffentContent.Diffent(a, b);//获得b.txt文件中a.txt没有的部分
diffentContent.writeC(diffent, c);
}}
倒底是几位并不是问题，是整形就可以了，如果长度不够无非是把int改成long而已，
“我建议用数据来来处理”这里想说的是数据库来处理。
如：
select num from a where a.num not in (select num from b);
这里的a表就是a.txt的内容，b就是b.txt的内容
补充一点，我的这个列表中的数字串不需要转换，直接作为String使用的。
目前这个不需要走数据库，因为ISP那边直接给的是文本格式的数据，我们只能这么处理，并且对速度要求大，不适合再走一次数据库啊！
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.List;public class DiffentContent {
    /**
     * 获得b.txt文件中a.txt没有的部分
     */
    private List<String> Diffent(File a, File b) {
        List<String> list = new ArrayList<String>();
        try {
            list = this.readB(b);// 得到要比较的b.txt中的内容
            BufferedReader reader = new BufferedReader(new FileReader(a));// 把a.txt读入到缓存
            String oneLine = "";
            while ((oneLine = reader.readLine()) != null) {// 一行一行的读取
                oneLine = oneLine.trim();// 去掉前后空格
                if (list.contains(oneLine))//如果包含就移除
                    list.remove(oneLine);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return list;
    }

    /**
     * 把B文件的没一行放入到List中
     * @return
     */
    private List<String> readB(File b) {
        List<String> list = new ArrayList<String>();//要返回的b.txt文本中的内容
        if(b != null) {
            try {
                BufferedReader reader = new BufferedReader(new FileReader(b));//把b.txt读入到缓存
                String oneLine = "";
                while((oneLine=reader.readLine()) != null){//一行一行的读取
                    list.add(oneLine.trim());//加入到List中
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return list;
    }

    /**
     * 把不同的内容写入到c.txt文件中
     */
    private void writeC(List<String> different, File c) {
        try {
            if(different != null) {
                FileWriter writer = new FileWriter(c);
                for(String str : different) {
                    writer.append(str).append("\r\n");
                }
                writer.flush();
                writer.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    //入口方法
    public static void main(String[] args) {
        File a = new File("c:/a.txt");//a.txt文件
        File b = new File("c:/b.txt");//b.txt文件
        File c = new File("c:/c.txt");//输出的c.txt文件
        DiffentContent diffentContent = new DiffentContent();
        List<String> diffent = diffentContent.Diffent(a, b);//获得b.txt文件中a.txt没有的部分
        diffentContent.writeC(diffent, c);
    }}
Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
恩。这个报错是在意料之中的。你加大些内存就可以了。你是在Eclipse中执行还是直接执行的
启动参数加上“-Xms128M -Xmx600M ”