有两个文本文件A和B,这两个文件里的内容类似(如AB两文件的数据大部分相同,只有少部分数据不同),要对这两个文件进
行比较,将这两个文件中的不同的数据找出来,然后存到一个新的文本文件里。注:以A为基准进行比较;即,A里有的而B里没有的就将其不同的数据存到新文件;B里有的而A里没有的就不对其进行存储。请教大侠们,要如何入手啊?小弟现在的想法是两文件都存到字符串,然后在进步操作,不知道这样是否否可行啊?此外,还有没有更好的方法啊?希望大虾们能提供下代码,最好是完整代码!谢谢啦!!!!!很急的啊!!!!!
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;public class FileComper {
String sPath;
String sFilePath;
String sContent;
public static void main(String[] args) {
FileComper test = new FileComper();
String crlf = System.getProperty("line.separator");
String str1="你好这是测试字段xx"+crlf+"abcdefgaigeiieriepty";
String str2="你x这是测y字段yy"+crlf+"dbcdefgaigeiieriy";
test.writeFilebyExistsOver(str1, "d:\\1.txt");
test.writeFilebyExistsOver(str2, "d:\\2.txt");
test.readFile("d:\\1.txt", "d:\\2.txt");
}
public String readFile(String Filename,String Filename2) {
String FileName = Filename;
File myFile = new File(FileName);
File myFile2 = new File(Filename2);
if (!myFile.exists()) {
System.err.println("Can't Find " + FileName);
}
if (!myFile2.exists()) {
System.err.println("Can't Find " + Filename2);
}
StringBuffer temp = new StringBuffer();
String str3="";
try {
BufferedReader in = new BufferedReader(new FileReader(myFile));
BufferedReader in2 = new BufferedReader(new FileReader(myFile2));
String str="",str2="";
while ((str = in.readLine()) != null) {
str2 = in2.readLine();
System.out.println("a:"+str);
System.out.println("b:"+str2);
// System.out.println(f(str,str2));
str3 = str3+ f(str,str2);
}
in.close();
in2.close();
writeFilebyExistsOver(str3, "d:\\3.txt");
} catch (IOException e) {
e.getStackTrace();
}
// System.out.print(temp.toString());
return temp.toString();
}
String f(String aStrA,String aStrB){
String str=""; //用来存储不同字符
for(int i=0;i<=aStrA.length();i++){
if(i==aStrA.length()){
break;
}
if(i>aStrB.length()-1){
str = str + aStrA.substring(i, aStrA.length());
break;
}else{
String a=aStrA.substring(i,i+1); //每次获取一个字符进行逐个比较
String b=aStrB.substring(i,i+1);
if(a.equals(b)||a==b){
continue;
}else{
str=str+a;
}
}
}
return str;
}
public void writeFilebyExistsOver(String datas, String Filename) {
FileOutputStream outSTr = null;
BufferedOutputStream Buff = null;
try {
File file = new File(Filename.toString());
if (file.exists()) {
delFile(Filename);
}
outSTr = new FileOutputStream(file);
Buff = new BufferedOutputStream(outSTr);
long begin0 = System.currentTimeMillis();
Buff.write(datas.getBytes());
Buff.flush();
Buff.close();
long end0 = System.currentTimeMillis();
System.out.println("BufferedOutputStream执行耗时:" + (end0 - begin0)
+ " 豪秒");
Buff.close();
outSTr.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
} catch (Exception e) {
e.printStackTrace();
}
}
}
public boolean delFile(String s) {
sFilePath = s;
sFilePath = sFilePath.toString();
File dFile = new File(sFilePath);
if (dFile.exists()) {
try {
dFile.delete();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
}
} else {
System.out.print("文件:" + s + "不存在!");
return false;
}
}
}
HLR SUBSCRIBER DATASUBSCRIBER IDENTITY
MSISDN IMSI STATE AUTHD
8613821087959 460001013233963 CONNECTED AVAILABLENAM
0PERMANENT SUBSCRIBER DATA
SUD
CAT-10 DBSG-1 TSMO-0 OBR-2
BS26-1 TS11-1 TS21-1 TS22-1
TS61-1 BS3G-1 TCSI-1 OCSI-1
REDMCH-1 OFA-1 PWD-0000 CFU-1
CFB-1 CFNRY-1 CFNRC-1 BAOC-1
BOIC-1 CAW-1 SOCFB-0 SOCFRY-0
SOCFRC-0 SOCFU-0 SOCB-0 SOCLIP-0
HOLD-1 CLIP-1AMSISDN BS BC
NONE SUPPLEMENTARY SERVICE DATA
BSG
TS10
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC ACTIVE-OP
CAW ACTIVE-OP
CFU NOT ACTIVE
CFB NOT ACTIVE
CFNRY NOT ACTIVE
CFNRC ACTIVE-OP 8613800220309 BSG
TS20
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC ACTIVE-OPBSG
TS60
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC NOT ACTIVE
CAW NOT ACTIVE
CFU NOT ACTIVE
CFB NOT ACTIVE
CFNRY NOT ACTIVE
CFNRC NOT ACTIVEBSG
BS20
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC NOT ACTIVE
CAW NOT ACTIVE
CFU NOT ACTIVE
CFB NOT ACTIVE
CFNRY NOT ACTIVE
CFNRC NOT ACTIVEBSG
BS30
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC NOT ACTIVE
CAW NOT ACTIVE
CFU NOT ACTIVE
CFB NOT ACTIVE
CFNRY NOT ACTIVE
CFNRC ACTIVE-OP 8613800220309 LOCATION DATA
VLR ADDRESS MSRN MSC NUMBER LMSID
4-8613440759 8613440759 SGSN NUMBER
4-8613740216
MS PURGED IN SGSNPACKET DATA PROTOCOL CONTEXT DATA
APNID PDPADD EQOSID VPAA PDPCH PDPTY PDPID
1235 365 NO IPV4 1
1234 365 NO IPV4 2END
<hgsdp:msisdn=8613820858133,all;
HLR SUBSCRIBER DATASUBSCRIBER IDENTITY
MSISDN IMSI STATE AUTHD
8613820858133 460000823232614 CONNECTED AVAILABLENAM
0PERMANENT SUBSCRIBER DATA
SUD
CAT-10 DBSG-1 TSMO-0 OBR-2
TS11-1 TS21-1 TS22-1 BS3G-1
TCSI-1 OCSI-1 REDMCH-1 OFA-1
PWD-0000 CFU-1 CFB-1 CFNRY-1
CFNRC-1 BAOC-1 BOIC-1 CAW-1
SOCFB-0 SOCFRY-0 SOCFRC-0 SOCFU-0
SOCB-0 SOCLIP-0 HOLD-1 CLIP-1AMSISDN BS BC
NONE SUPPLEMENTARY SERVICE DATA
BSG
TS10
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC ACTIVE-OP
CAW ACTIVE-OP
CFU NOT ACTIVE
CFB NOT ACTIVE
CFNRY NOT ACTIVE
CFNRC NOT ACTIVEBSG
TS20
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC ACTIVE-OPBSG
BS30
SS STATUS FNUM TIME
SADD
BAOC NOT ACTIVE
BOIC ACTIVE-OP
CAW ACTIVE-OP
CFU NOT ACTIVE
CFB NOT ACTIVE
CFNRY NOT ACTIVE
CFNRC NOT ACTIVELOCATION DATA
VLR ADDRESS MSRN MSC NUMBER LMSID
4-8613742202 8613742202 SGSN NUMBER
4-8613740204
MS PURGED IN SGSNPACKET DATA PROTOCOL CONTEXT DATA
APNID PDPADD EQOSID VPAA PDPCH PDPTY PDPID
1235 365 NO IPV4 1
1234 365 NO IPV4 2END<hgsdp:msisdn=8613622099586,all;
HLR SUBSCRIBER DATASUBSCRIBER IDENTITY
MSISDN IMSI STATE AUTHD
8613622099586 460002011213450 CONNECTED AVAILABLENAM
0PERMANENT SUBSCRIBER DATA
SUD
CAT-10 DBSG-1 TSMO-0 OBR-2
TS11-1 TS21-1 TS22-1 BS3G-1
TCSI-1 OCSI-1 REDMCH-1 OFA-1
PWD-0000 BOIC-1 SOCB-0 SOCLIP-0
CLIP-1AMSISDN BS BC
NONE SUPPLEMENTARY SERVICE DATA
BSG
TS10
SS STATUS FNUM TIME
SADD
BOIC NOT ACTIVEBSG
TS20
SS STATUS FNUM TIME
SADD
BOIC NOT ACTIVEBSG
BS30
SS STATUS FNUM TIME
SADD
BOIC NOT ACTIVELOCATION DATA
VLR ADDRESS MSRN MSC NUMBER LMSID
4-8613440756 8613440756 SGSN NUMBER
UNKNOWNPACKET DATA PROTOCOL CONTEXT DATA
APNID PDPADD EQOSID VPAA PDPCH PDPTY PDPID
1235 365 NO IPV4 1
1234 365 NO IPV4 2END
“TCSI”和“OCSI”的数据项的有无或数据项后边的数字参数的不同。然后把该号码及其不同的数据项找出来存入新文件里。
PilotEdit提供了请大的文件比较的功能,可以将第一个文件中存在而第二个文件中不存在的字符串拷贝到剪贴板。
具体怎样做可以参考手册 http://www.pilotedit.com/uploads/PilotEdit_2.6.pdf 第38页的例子。
File fileA = new File("文件A");
InputStream isA = new FileInputStream(fileA);
BufferedReader brA = new BufferedReader(new InputStreamReader(isA)); File fileB = new File("文件B");
InputStream isB = new FileInputStream(fileB);
BufferedReader brB = new BufferedReader(new InputStreamReader(isB)); while(true)
{
String lineA = brA.readLine();
String lineB = brB.readLine(); if(!lineA.equals(lineB))
{
... ...
}
}
1.msisdn这个号,在同一文件中是否可能有重复?
2.比较的文件中,A文件有某个msisdn号,是否B文件中一定也有这个号?如果一定有,相对位置是不是一样的?3.你只是比较两个文件,还是用一个文件去和某个目录下的所有类似文件都做比较?
4.用来比较的文件的文件名的命名是否有一定的规则?5.比较结果的保存文件如何命名?这个题和两个文件的比较不太一样啊.
好,答完以上问题,大家可以试试着做一做了.
2.类似8613821087959这样的号码,A里有,B里可能没有;
3.只是比较两个文件
4.比较的文件的文件名的命名没有什么规则
5.保存文件可随意命名
import java.io.*;
import java.util.regex.*;
public class UnknowFilesCompare{
public static void main(String[] args) throws Exception{
//结果存在a_b_compared.txt中。
unknowFilesComparator("a.txt","b.txt");
} static void unknowFilesComparator(String fileA,String fileB) throws Exception{
ArrayList<String[]> ra=usefulData(fileA);
//printList(ra);
ArrayList<String[]> rb=usefulData(fileB);
//printList(rb);
//把两个文件的主文件名提取出来,用来构成结果文件的文件名。
String f1=fileA.split("\\.")[0];
String f2=fileA.split("\\.")[0];
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f1+"_"+f2+"_compared.txt"),"UTF-8" ));
StringBuilder sb=new StringBuilder();
for(int i=0;i<ra.size();i++){
String[] temp1=ra.get(i);
////found用来标志是否找到相同的号
boolean found=false;
sb.append("MSISDN: "+temp1[0]+"\r\n");
for(int j=0;j<rb.size();j++){
String[] temp2=rb.get(j);
if(temp1[0].equals(temp2[0])){
Arrays.sort(temp2,1,temp2.length);
found=true;
//allEqauls用来标志是不是相同的号的数据也全相同。
boolean allEqauls=true;
for(int k=1;k<temp1.length;k++){
int index=Arrays.binarySearch(temp2,1,temp2.length,temp1[k]);
if(!(index>=1&&index<temp2.length&&temp2[index].equals(temp1[k]))){
allEqauls=false;
sb.append(temp1[k]+" ");
}
}//比较数据
if(allEqauls){
sb.append("___Same___"+fileB);
}
sb.append("\r\n");
}//有相同的号吗?
}//找相同的msisdn号.
if(!found){
for(int j=1;j<temp1.length;j++){
sb.append(temp1[j]+" ");
}
sb.append("\r\n---Not found in "+fileB);
}
bw.write(sb.toString(),0,sb.length());
bw.newLine();
sb.delete(0,sb.length());
}//for:ra
bw.flush();
bw.close();
}
//把有用的数据提取出来,以ArrayList的形式返回.每一个MSISDN号的数据放在String[]数组中,所有的号放在ArrayList中
public static ArrayList<String[]> usefulData(String fileName){
Scanner scan=null;
String temp1=null;
ArrayList<String> temp=new ArrayList<String>();
ArrayList<String[]> result=new ArrayList<String[]>();
try{
scan=new Scanner(new File(fileName));
}catch(FileNotFoundException ffe){
ffe.printStackTrace();
}
while(scan.hasNext()){
if(scan.findInLine(".+\\=(\\d+)\\,.+")!=null){//找到MSISDN号的那一行。
temp.add(scan.match().group(1));
scan.nextLine();
while(scan.findInLine("SUD")==null) scan.nextLine(); //找到SUD那一行。
scan.nextLine();
while(scan.findInLine("AMSISDN.+")==null){ //只要没有到达AMSISDN那一行。
temp1=scan.nextLine().trim();
if(temp1.length()!=0){
temp.addAll(Arrays.asList(temp1.split("\\s+")));
}
}
result.add(temp.toArray(new String[temp.size()]));
temp.clear();
}
scan.nextLine();
}
scan.close();
return result;
}
static void printList(ArrayList<String[]> li){
for(int i=0;i<li.size();i++){
System.out.println(Arrays.toString(li.get(i)));
}
}}结果:
MSISDN: 8613821087959
___Same___b.txtMSISDN: 8613820858133
CFU-1 CFB-1 CFNRY-1 CFNRC-1 BAOC-1 CAW-1 SOCFB-0 SOCFRY-0 SOCFRC-0 SOCFU-0 HOLD-1 MSISDN: 8613622099586
___Same___b.txt
以上结果是13楼的例样中第二个号与第三个号互换,而内容没有换,形成一个b.txt文件.原例样为a.txt的结果.
java.io.FileNotFoundException: a.txt (系统找不到指定的文件。)
at java.io.FileInputStream.open(Native Method)
at java.io.FileInputStream.<init>(FileInputStream.java:106)
at java.util.Scanner.<init>(Scanner.java:636)
at UnknowFilesCompare.usefulData(UnknowFilesCompare.java:67)
at UnknowFilesCompare.unknowFilesComparator(UnknowFilesCompare.java:12)
at UnknowFilesCompare.main(UnknowFilesCompare.java:8)
Exception in thread "main" java.lang.NullPointerException
at UnknowFilesCompare.usefulData(UnknowFilesCompare.java:71)
at UnknowFilesCompare.unknowFilesComparator(UnknowFilesCompare.java:12)
at UnknowFilesCompare.main(UnknowFilesCompare.java:8)
上面的,a.txt,b.txt,是我机器上用来调试的文件。
换成你自己要比较的文件,你要比较的文件要和程序放在同一个目录。
结果文件的文件名: 第一个文件主文件名_第二个文件的主文件名_compared.txt
工具的名字叫 Beyond.Compare_2.4.3.243_SC-special.exe
你可以下载一下看看,好容易用的
这位兄弟,hasBag()这个方法在那啊,我在API中没有找到,如果有这个方法,编程确实可以简单一些.
我试着改了下
public static ArrayList<String[]> usefulData(String fileName){
Scanner scan=null;
String temp1=null;
ArrayList<String> temp=new ArrayList<String>();
ArrayList<String[]> result=new ArrayList<String[]>();
try{
scan=new Scanner(new File(fileName));
}catch(FileNotFoundException ffe){
ffe.printStackTrace();
}
while(scan.hasNext()){
if(scan.findInLine(".+\\=(\\d+)\\,.+")!=null){//找到MSISDN号的那一行。
temp.add(scan.match().group(1));
scan.nextLine();
while(scan.findInLine("NAM")==null) scan.nextLine(); //找到SUD那一行。
String nam=scan.nextLine();
while(scan.findInLine("SUD")==null) scan.nextLine(); //找到SUD那一行。
scan.nextLine();
while(scan.findInLine("AMSISDN.+")==null){ //只要没有到达AMSISDN那一行。
temp1=scan.nextLine().trim();
if(temp1.length()!=0){
String [] ss=temp1.split("\\s+");
int il=ss.length;
String [] newss =Arrays.copyOf(ss, il+2);
newss[il]="nam-"+nam;
//newss[il+1]=nam;
//temp.addAll(Arrays.asList(temp1.split("\\s+")));
temp.addAll(Arrays.asList(newss));
}
}
result.add(temp.toArray(new String[temp.size()]));
temp.clear();
}
scan.nextLine();
}
scan.close();
return result;
}
for(int k=1;k<temp1.length;k++){
int index=Arrays.binarySearch(temp2,1,temp2.length,temp1[k]);
if(!(index>=1&&index<temp2.length&&temp2[index].equals(temp1[k]))){
allEqauls=false;
if(temp1[k].trim().startsWith("TCSI")||temp1[k].trim().startsWith("OCSI"))
sb.append("HGCMP:MSISDN = "+temp1[0]+","+temp1[k]+";\r\n");
else if(temp1[k].trim().startsWith("nam"))
sb.append("HGSnC:MSISDN = "+temp1[0]+",SUD = "+temp1[k].substring(temp1[k].length()-1)+";\r\n");
else
sb.append("HGSDC:MSISDN = "+temp1[0]+",SUD = "+temp1[k]+";\r\n");
}
}//比较数据
可是为何会报错和(空指针)异常啊?
你找到NAM一行后,没有提取数据.给我发两份实际的数据文件到,我晚上回家给你改一下.同时,再把你的要求再说一下,NAM下的数据怎么比较?
VLR ADDRESS下的数据又是怎么比较的?
import java.io.*;
import java.util.regex.*;
public class UnknowFilesCompare{
public static void main(String[] args) throws Exception{ unknowFilesComparator("f:/test/a.txt","f:/test/b.txt");
} static void unknowFilesComparator(String fileA,String fileB) throws Exception{
File fa=new File(fileA);
File fb=new File(fileB);
String faName=fa.getName(); //get file name
String fbName=fb.getName();
String faa=fa.getAbsolutePath(); //get absolute path with file name
String faPath=faa.substring(0,faa.lastIndexOf("\\")+1); //get absolute path without file name(Windows system)
//create resulte file name, resulte file will save in the same path as fileA
//For example: fileA f:/test/a.txt fileB f:/test/b.txt, resulte file : f:/test/a_b_compared.txt
File fc=new File(faPath+faName.split("\\.")[0]+"_"+fbName.split("\\.")[0]+"_compared.txt");
//count file
////For example: fileA f:/test/a.txt fileB f:/test/b.txt, count file : f:/test/a_b_count.txt
File fd=new File(faPath+faName.split("\\.")[0]+"_"+fbName.split("\\.")[0]+"_count.txt");
ArrayList<String[]> ra=usefulData(fa);
ArrayList<String[]> rb=usefulData(fb);
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fc),"UTF-8" ));
BufferedWriter bw2=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fd),"UTF-8" ));
StringBuilder sb=new StringBuilder();
HashMap<String,Integer> count=new HashMap<String,Integer>();
String myKey=null;
Integer myVal=null;
for(int i=0;i<rb.size();i++){ //preliminary sorting, avert repetitive sort in for(int j=0;j<rb.size();j++)
String[] temp=rb.get(i); //get MSISDN(i)
Arrays.sort(temp,2,temp.length-1); // sort NAM data ,
}
for(int i=0;i<ra.size();i++){
String[] temp1=ra.get(i);
////found : flag found same MSISDN
boolean found=false;
sb.append("MSISDN: "+temp1[0]+"\r\n"); //MSISDN output
for(int j=0;j<rb.size();j++){
String[] temp2=rb.get(j);
if(temp1[0].equals(temp2[0])){
sb.append("NAM:\r\n"+faName+":"+temp1[1]+" "+fbName+":"+temp2[1]+"\r\n"); //NAM data output
found=true;
//allEqauls flag All Equals
boolean allEqauls=true;
sb.append("SUD:\r\n");
for(int k=2;k<temp1.length-1;k++){
int index=Arrays.binarySearch(temp2,1,temp2.length-1,temp1[k]);
if(!(index>=2&&index<temp2.length-1&&temp2[index].equals(temp1[k]))){ //find diffrent SUD data
allEqauls=false;
sb.append(temp1[k]+" "); //diffrent SUD data output
myKey=temp1[k].split("-")[0];
myVal=count.get(myKey); //use HashMap to count
if(myVal==null){
count.put(myKey,1);
}else{
count.put(myKey,myVal+1);
}
}
}//compare NAM data
if(allEqauls){
sb.append("___Same___"+fileB);
}
sb.append("\r\n");
sb.append("VLR ADDRESS:\r\n"+faName+":"+temp1[temp1.length-1]+" "+fbName+":"+temp2[temp2.length-1]+"\r\n"); //VLR ADDRESS output
}//find same MSISDN
}//found msisdn
if(!found){
for(int j=1;j<temp1.length;j++){
sb.append(temp1[j]+" ");
}
sb.append("\r\n---Not found in "+fileB);
}
bw.write(sb.toString(),0,sb.length());
bw.newLine();
sb.delete(0,sb.length());
}//for:ra
bw.flush();
bw.close();
for(String k:count.keySet()){ //count output
String line=k+": "+count.get(k)+"\r\n";
bw2.write(line,0,line.length());
}
bw2.flush();
bw2.close();
}
//retrun :ArrayList.element of ArrayList is a Stiring[].
//for each String[]:[0]:MSISDN [1]:NAM [2~length-2]:SUD Data [length-1]:VLR ADDRESS
public static ArrayList<String[]> usefulData(File myFile){
Scanner scan=null;
String temp1=null;
ArrayList<String> temp=new ArrayList<String>();
ArrayList<String[]> result=new ArrayList<String[]>();
try{
scan=new Scanner(myFile);
}catch(FileNotFoundException ffe){
ffe.printStackTrace();
}
while(scan.hasNext()){
if(scan.findInLine(".+\\=(\\d+)\\,.+")!=null){//find line "MSISDN"
temp.add(scan.match().group(1));
scan.nextLine();
while(scan.findInLine("NAM.*")==null) scan.nextLine(); //find line "NAM"
scan.nextLine();
temp1=scan.nextLine().trim(); //get NAM data
temp.add(temp1);
while(scan.findInLine("SUD.*")==null) scan.nextLine(); //find line "SUD"。
scan.nextLine();
while(scan.findInLine("AMSISDN.*")==null){
temp1=scan.nextLine().trim(); //get SUD data
if(temp1.length()!=0){
temp.addAll(Arrays.asList(temp1.split("\\s+")));
}
}
scan.nextLine();
while(scan.findInLine("VLR\\s+ADDRESS.+")==null) scan.nextLine(); //find line "VLR ADDRESS "
scan.nextLine();
temp1=scan.nextLine().trim(); //get VLR ADDRESS data
temp.add(temp1.split("\\s+")[0]);
result.add(temp.toArray(new String[temp.size()]));
temp.clear();
}
scan.nextLine();
}
scan.close();
return result;
}
static void printList(ArrayList<String[]> li){
for(int i=0;i<li.size();i++){
String[] sts=li.get(i);
System.out.println(sts[0]);
for(int j=1;j<sts.length;j++){
System.out.print(sts[j]+" ");
}
System.out.println("");
}
}}楼主有了新要求.按新要求改了一下程序.
import java.io.*;
import java.util.regex.*;
public class UnknowFilesCompare{
public static void main(String[] args) throws Exception{ unknowFilesComparator("f:/test/a.txt","f:/test/b.txt");
} static void unknowFilesComparator(String fileA,String fileB) throws Exception{
File fa=new File(fileA);
File fb=new File(fileB);
String faName=fa.getName(); //get file name
String fbName=fb.getName();
String faa=fa.getAbsolutePath(); //get absolute path with file name
String faPath=faa.substring(0,faa.lastIndexOf("\\")+1); //get absolute path without file name(Windows system)
//create resulte file name, resulte file will save in the same path as fileA
//For example: fileA f:/test/a.txt fileB f:/test/b.txt, resulte file : f:/test/a_b_compared.txt
File fc=new File(faPath+faName.split("\\.")[0]+"_"+fbName.split("\\.")[0]+"_compared.txt");
//count file
////For example: fileA f:/test/a.txt fileB f:/test/b.txt, count file : f:/test/a_b_count.txt
File fd=new File(faPath+faName.split("\\.")[0]+"_"+fbName.split("\\.")[0]+"_count.txt");
ArrayList<String[]> ra=usefulData(fa);
ArrayList<String[]> rb=usefulData(fb);
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fc),"UTF-8" ));
BufferedWriter bw2=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fd),"UTF-8" ));
StringBuilder sb=new StringBuilder();
HashMap<String,Integer> count=new HashMap<String,Integer>();
String myKey=null;
Integer myVal=null;
for(int i=0;i<rb.size();i++){ //preliminary sorting, avert repetitive sort in for(int j=0;j<rb.size();j++)
String[] temp=rb.get(i); //get MSISDN(i)
Arrays.sort(temp,2,temp.length-1); // sort NAM data ,
}
for(int i=0;i<ra.size();i++){
String[] temp1=ra.get(i);
////found : flag found same MSISDN
boolean found=false;
sb.append("MSISDN: "+temp1[0]+"\r\n"); //MSISDN output
for(int j=0;j<rb.size();j++){
String[] temp2=rb.get(j);
if(temp1[0].equals(temp2[0])){
sb.append("NAM:\r\n"+faName+":"+temp1[1]+" "+fbName+":"+temp2[1]+"\r\n"); //NAM data output
found=true;
//allEqauls flag All Equals
boolean allEqauls=true;
sb.append("SUD:\r\n");
for(int k=2;k<temp1.length-1;k++){
int index=Arrays.binarySearch(temp2,1,temp2.length-1,temp1[k]);
if(!(index>=2&&index<temp2.length-1&&temp2[index].equals(temp1[k]))){ //find diffrent SUD data
allEqauls=false;
sb.append(temp1[k]+" "); //diffrent SUD data output
myKey=temp1[k].split("-")[0];
myVal=count.get(myKey); //use HashMap to count
if(myVal==null){
count.put(myKey,1);
}else{
count.put(myKey,myVal+1);
}
}
}//compare NAM data
if(allEqauls){
sb.append("___Same___"+fileB);
}
sb.append("\r\n");
sb.append("VLR ADDRESS:\r\n"+faName+":"+temp1[temp1.length-1]+" "+fbName+":"+temp2[temp2.length-1]+"\r\n"); //VLR ADDRESS output
}//find same MSISDN
}//found msisdn
if(!found){
for(int j=1;j<temp1.length;j++){
sb.append(temp1[j]+" ");
}
sb.append("\r\n---Not found in "+fileB);
}
bw.write(sb.toString(),0,sb.length());
bw.newLine();
sb.delete(0,sb.length());
}//for:ra
bw.flush();
bw.close();
for(String k:count.keySet()){ //count output
String line=k+": "+count.get(k)+"\r\n";
bw2.write(line,0,line.length());
}
bw2.flush();
bw2.close();
}
//retrun :ArrayList.element of ArrayList is a Stiring[].
//for each String[]:[0]:MSISDN [1]:NAM [2~length-2]:SUD Data [length-1]:VLR ADDRESS
public static ArrayList<String[]> usefulData(File myFile){
Scanner scan=null;
String temp1=null;
ArrayList<String> temp=new ArrayList<String>();
ArrayList<String[]> result=new ArrayList<String[]>();
try{
scan=new Scanner(myFile);
}catch(FileNotFoundException ffe){
ffe.printStackTrace();
}
while(scan.hasNext()){
if(scan.findInLine(".+\\=(\\d+)\\,.+")!=null){//find line "MSISDN"
temp.add(scan.match().group(1));
scan.nextLine();
while(scan.findInLine("NAM.*")==null) scan.nextLine(); //find line "NAM"
scan.nextLine();
temp1=scan.nextLine().trim(); //get NAM data
temp.add(temp1);
while(scan.findInLine("SUD.*")==null) scan.nextLine(); //find line "SUD"。
scan.nextLine();
while(scan.findInLine("AMSISDN.*")==null){
temp1=scan.nextLine().trim(); //get SUD data
if(temp1.length()!=0){
temp.addAll(Arrays.asList(temp1.split("\\s+")));
}
}
scan.nextLine();
while(scan.findInLine("VLR\\s+ADDRESS.+")==null) scan.nextLine(); //find line "VLR ADDRESS "
scan.nextLine();
temp1=scan.nextLine().trim(); //get VLR ADDRESS data
temp.add(temp1.split("\\s+")[0]);
result.add(temp.toArray(new String[temp.size()]));
temp.clear();
}
scan.nextLine();
}
scan.close();
return result;
}
static void printList(ArrayList<String[]> li){
for(int i=0;i<li.size();i++){
String[] sts=li.get(i);
System.out.println(sts[0]);
for(int j=1;j<sts.length;j++){
System.out.print(sts[j]+" ");
}
System.out.println("");
}
}}
细节上,没有理解楼主的意思,不过,楼主可以根据我的代码稍作更改即可。
如果我理解的正确,就不用更改了。
楼主把以下代码拷贝到一个类里面,运行一下即可。
static class Entry{//信息块的映射类
static final Pattern NumberPattern = Pattern.compile("<hgsdp:msisdn=(\\d{13}).+");
static final String SUD = "SUD";
static final String AMSISDN = "AMSISDN";
static final String END = "END";
String msisdn;//手机号码
int sud;//SUD的位置
int amsisdn;//AMSISDN的位置
Map<String,String> info = new LinkedHashMap<String,String>();//存放要比较的信息。
List<String> contents = new ArrayList<String>();//用于程序功能扩展,本程序可不使用。
}
/**
* @param src 基准文件(A)
* @param des 待比较的文件(B)
* @param diff 差异存放地
* @throws IOException
*/
public static void compare(File src,File des,File diff) throws IOException{
Map<String,Entry> srcImage = parseFile(src);
Map<String,Entry> desImage = parseFile(des);
Set<String> desKey = desImage.keySet();
desKey.retainAll(srcImage.keySet());
Set<Entry> cache = new LinkedHashSet<Entry>();
for(String desNumber:desKey){
Entry desE = desImage.get(desNumber);
Entry srcE = srcImage.get(desNumber);
Entry e = new Entry();
e.msisdn = desNumber;
compare(srcE,desE,e);
if(e.info.size()>0)cache.add(e);
}
save(cache,diff);
}
private static void save(Set<Entry> cache, File diff) throws IOException {
BufferedWriter bw = new BufferedWriter(new FileWriter(diff));
for(Entry e:cache){
bw.append(e.msisdn);
bw.newLine();
for(Map.Entry<String, String> info:e.info.entrySet()){
bw.append(info.getKey()).append('-').append(info.getValue()).append('\t');
}
bw.flush();
}
bw.close();
} private static void compare(Entry srcE, Entry desE, Entry e) {
Set<String> desKey = desE.info.keySet();
desKey.retainAll(srcE.info.keySet());
for(String key:desKey){//把两文件不同的信息找出来,将DES的信息存入DIFF。
if(desE.info.get(key).equals(srcE.info.get(key)))continue;
e.info.put(key, desE.info.get(key));
}
Set<String> srcKey = srcE.info.keySet();
srcKey.removeAll(desE.info.keySet());
for(String key:srcKey){//SRC有而DES没有的信息,存入DIFF
e.info.put(key, srcE.info.get(key));
}
} private static Map<String, Entry> parseFile(File file) throws IOException {
Map<String,Entry> fileImage = new LinkedHashMap<String,Entry>();
Entry e = null;
Scanner scan = new Scanner(file);
while(scan.hasNextLine()){
String line = scan.nextLine();
Matcher m = Entry.NumberPattern.matcher(line);//匹配信息块的开始行
if(m.matches()){
e= new Entry();
e.msisdn = m.group(1);
}
if(e==null)continue;
if(line.trim().equals(Entry.SUD))e.sud=e.contents.size();
if(line.trim().startsWith(Entry.AMSISDN))e.amsisdn=e.contents.size();
e.contents.add(line);
if(line.trim().equals(Entry.END)){//信息块的结束行
for(int index = e.sud+1;index<e.amsisdn;index++){//添加要比较的信息。
if(e.contents.get(index).trim().length()<=0)continue;
String array [] = e.contents.get(index).split("\\s");
for(String info :array){
String tmp [] = info.split("\\-");
if(tmp==null || tmp.length<2)continue;
e.info.put(tmp[0], tmp[1]);
}
}
fileImage.put(e.msisdn, e);
}
}
return fileImage;
}
public static void main(String[] args) throws IOException {//用于测试的主函数
compare(new File("D:\\A.txt"),new File("D:\\B.txt"),new File("D:\\C.txt"));
}