private static void CollectWordPair(String artical, String Char8xxFile){ int i, j, k; int l = 0; int j1, j2, j3; int flag = 0; char Char8xx[] = new char[1000];
int NumberOfRead = -1; File InputFile = new File(artical); FileReader InWord = null; File CharFile = new File(Char8xxFile); FileReader InChar = null; char Line[] = new char[8192]; String Sentence = new String(); StringBuffer Sen_NoString = new StringBuffer();
StringBuffer Sen = new StringBuffer(); String[] phrase = new String[8192]; String CharNoString = new String();
//read in char file try { InChar = new FileReader(CharFile); InChar.read(Char8xx); //System.out.println("read char file success");
} catch(FileNotFoundException e){ System.out.println("Char File Not Found"); System.exit(1); } catch(Exception e){ System.out.println("error during read char file"); System.exit(1); }
int a = 0; //read in passage line by line to check, till it is over! do{ //read in a line try{ //System.out.println("read artical file success"); NumberOfRead = InWord.read(Line); } catch(Exception e){ System.out.println("error during read char file"); System.exit(1); }
存放结果?
方式2:改变算法
private static Hashtable P_table = new Hashtable();
//construct the hash table
private static void ConstructTable(){
String word = new String();
byte Aword[] = null;
ResultSet rs = null;
Statement stm1 = null;
Connection con1 = null;
int t = 0;
String JDBC_DRIVER = "com.mysql.jdbc.Driver";
String DATABASE_URL = "jdbc:mysql://localhost/wordbase";
int c = 0;
try{
Class.forName(JDBC_DRIVER);
con1 = DriverManager.getConnection(DATABASE_URL, "root", "root");
stm1 = con1.createStatement();
//construct w_table
rs = stm1.executeQuery("SELECT * FROM worddatas");
rs.beforeFirst();
while(rs.next()){
t = rs.getInt(4);
//System.out.println(Aword);
W_table.put(rs.getString(3), new Integer(t));
//System.out.println((String)W_table.get(Aword));
//i++;
}
System.out.println(c);
System.out.println(W_table.size());
rs = stm1.executeQuery("SELECT * FROM Probability_c");
rs.beforeFirst();
while(rs.next()){
Vector vc = new Vector();
vc.add(0, (String)rs.getString(1));
vc.add(1, (String)rs.getString(2));
vc.add(2, new Integer(rs.getInt(4)));
vc.add(3, new Integer(0));
P_table.put(rs.getString(3), vc);
//P_table.put(rs.getString(3), new Integer(t));
}
//System.out.println(i);
con1.close();
}
catch(Exception e){
System.out.println(e+"in table construct");
System.exit(1);
} }
int i, j, k;
int l = 0;
int j1, j2, j3;
int flag = 0;
char Char8xx[] = new char[1000];
int NumberOfRead = -1;
File InputFile = new File(artical);
FileReader InWord = null;
File CharFile = new File(Char8xxFile);
FileReader InChar = null;
char Line[] = new char[8192];
String Sentence = new String();
StringBuffer Sen_NoString = new StringBuffer();
StringBuffer Sen = new StringBuffer();
String[] phrase = new String[8192];
String CharNoString = new String();
//read in char file
try {
InChar = new FileReader(CharFile);
InChar.read(Char8xx);
//System.out.println("read char file success");
}
catch(FileNotFoundException e){
System.out.println("Char File Not Found");
System.exit(1);
}
catch(Exception e){
System.out.println("error during read char file");
System.exit(1);
}
try
{
InWord = new FileReader(InputFile);
}
catch(FileNotFoundException e){
System.out.println(e);
}
int a = 0;
//read in passage line by line to check, till it is over!
do{
//read in a line
try{
//System.out.println("read artical file success");
NumberOfRead = InWord.read(Line);
}
catch(Exception e){
System.out.println("error during read char file");
System.exit(1);
}
for(i=0; i<8192; i++){
if((Line[i] != '\n') ){
Sen.append(Line[i]);
}
else{
//System.out.println("sentence contrust success");
//change the sentence char to no_string for the ease of sql process
l = 0;
for(k=0; k<Sen.length(); k++){
for(j=0; j<1000; j++){
if(Sen.charAt(k) == Char8xx[j]){
//System.out.println(Sen.charAt(k));
j1 = j/100;
j2 = (j - 100 * j1)/10;
j3 = j - 100 * j1 - 10 * j2;
Sen_NoString.append(j1);
Sen_NoString.append(j2);
Sen_NoString.append(j3);
Sen_NoString.append('$');
break;
}
}
if(j == 1000)Sen_NoString.append("###$");
}
// System.out.println(Sen_NoString.toString());
// System.out.println(Sen_NoString.length());
k = 0;
while( k<Sen_NoString.length()){
CharNoString = Sen_NoString.substring(k, k+4);
if( CharNoString.equalsIgnoreCase( "###$")){
k = k + 4;
}
else{
for(j=k+4; j<Sen_NoString.length(); j=j+4){
CharNoString = Sen_NoString.substring(j, j+4);
if( CharNoString.equalsIgnoreCase( "###$")){
phrase[l++] = Sen_NoString.substring(k, j);
//System.out.println(phrase[l-1]);
//System.out.println(l-1);
k = j + 4;
break;
}
}
}
}
// System.out.println("change to NoString success");
Sen.delete(0, Sen.length());
Sen_NoString.delete(0, Sen_NoString.length());
//Start collect word-pairs
for(j=0; j<l; j++){
//System.out.println(phrase[j]);
doCollectProcess(phrase[j]);
}
System.out.println(a++);
}
}
}while(NumberOfRead == 8192);
}
可以考虑加个算发,比如hashTable里先放1000行表数据
,算法就多了,比如最近最多使用,最近使用等