20101020061010|808080123100001|1488015462605121|0001|000000001000|1001|20101020|4007|156|20101020|745147|20101020060855|13776413305|A73E0784555C454927D809A3776952114EFCACA0276C6F8B79FDF55558D316021867784F5E08A8143D6B91ACDFD23AC3975A52C52EFB5CA5FD4744227AA4999513324B043D4521F134A787EBD8C67E5023EC63A7CD2C1920A4E04733C4470F2DE757EE9DEE7F34BAA414901BFBB4419E79787CFFD3254B00A9E6985471C0D3ED
20101020064016|808080123100001|1488015462897029|0001|000000001000|1001|20101020|4007|156|20101020|745412|20101020063917|13776413305|8C28C569D19DF848216A8FE00EE5E029F3EC13E5018BDBD0F1CA1C1B80975EE1318E07851516583A894F4D07CF8696FF7E1098ADBBC85E8083E2C99361D476693986F4031A3C9FC9010CF0A34C2CD15551A46D1409DCE365B4FAAFD24DA6073C842865F3B4BBA8DA4F2A0C106F7B618DC9CC5117CAE69099801B226535363C6A
20101020071538|808080123100001|1488015463780929|0001|000000001000|1001|20101020|1023|156|20101020|746973|20101020071458|13705192654|8DFFCA3E5BA75A2A5D02DB69DEF6EF944DE14719B9967D0CF438104A959A3AB8BD8B44FB60D302B0CA56D311BA80951C487D42BBCC2AFD13D783CAD44021DAC3A5E59D28CFD00AB9983D50F71293B56B7894EED16F580378348962C7B14DAB002FE6FEC6EB93F6B8842F4DEC7A9512882DBF358EE2E73425485EBA88E9DCE6E2
。。一个目录下是多个这文本文件,文件中存的是 上面那样的数据,一条条的,对应着数据库中的一条记录,现在要把数据写入到库中,请问众神有什么高招,提高效率。
20101020064016|808080123100001|1488015462897029|0001|000000001000|1001|20101020|4007|156|20101020|745412|20101020063917|13776413305|8C28C569D19DF848216A8FE00EE5E029F3EC13E5018BDBD0F1CA1C1B80975EE1318E07851516583A894F4D07CF8696FF7E1098ADBBC85E8083E2C99361D476693986F4031A3C9FC9010CF0A34C2CD15551A46D1409DCE365B4FAAFD24DA6073C842865F3B4BBA8DA4F2A0C106F7B618DC9CC5117CAE69099801B226535363C6A
20101020071538|808080123100001|1488015463780929|0001|000000001000|1001|20101020|1023|156|20101020|746973|20101020071458|13705192654|8DFFCA3E5BA75A2A5D02DB69DEF6EF944DE14719B9967D0CF438104A959A3AB8BD8B44FB60D302B0CA56D311BA80951C487D42BBCC2AFD13D783CAD44021DAC3A5E59D28CFD00AB9983D50F71293B56B7894EED16F580378348962C7B14DAB002FE6FEC6EB93F6B8842F4DEC7A9512882DBF358EE2E73425485EBA88E9DCE6E2
。。一个目录下是多个这文本文件,文件中存的是 上面那样的数据,一条条的,对应着数据库中的一条记录,现在要把数据写入到库中,请问众神有什么高招,提高效率。
public void loadFile() { try { String sql = "INSERT INTO account_of_check" +
"(accountchid,Chinapaydate,MerId,Ordid,TransType,Transamt,Transstat," +
"Transdate,GateId,Curid,cpDate,Cpseqid,longname1," +
"telno,checkValue)" +
"VALUES(SEQ_account_of_check.nextVal,?,?,?,?,?,?," +
"?,?,?,?,?,?," +
"?,?)" ;
PreparedStatement pstmt = null;
DataBaseConnection dbc = null ;
dbc = new DataBaseConnection() ;
Connection conn = dbc.getConnection();
conn.setAutoCommit(false);
pstmt = conn.prepareStatement(sql,ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
RandomAccessFile raf = new RandomAccessFile(path, openFileStyle);
String line_record = raf.readLine(); while (line_record != null) { // 解析每一条记录// parseRecord(line_record);
//把文件中的每一行根据“|”分割
String[] fields = line_record.split(fieldLimitChar);
if (fields.length == fieldAllCount) {
try {
try{
pstmt.setString(1,tranStr(fields[0]));
pstmt.setString(2,tranStr(fields[1])) ;
pstmt.setString(3,tranStr(fields[2])) ;
pstmt.setString(4,tranStr(fields[3])) ;
pstmt.setString(5,tranStr(fields[4]));
pstmt.setString(6,tranStr(fields[5])) ;
pstmt.setString(7,tranStr(fields[6])) ;
pstmt.setString(8,tranStr(fields[7])) ;
pstmt.setString(9,tranStr(fields[8]));
pstmt.setString(10,tranStr(fields[9])) ;
pstmt.setString(11,tranStr(fields[10]));
pstmt.setString(12,tranStr(fields[11])) ;
pstmt.setString(13,tranStr(fields[12]));
pstmt.setString(14,tranStr(fields[13])) ;
pstmt.addBatch();
if (count % 100 == 0) {
pstmt.executeBatch();
conn.commit();
pstmt.clearBatch();
}
}catch(Exception e){
e.printStackTrace();
}
} finally {
}
count++; }
line_record = raf.readLine(); }
pstmt.executeBatch();
if (pstmt != null) {
try {
pstmt.close();
}catch(SQLException e) {}
}
//必须关闭Connection
if (conn != null) {
try {
conn.close();
}catch(SQLException e) {}
}
System.out.println("共有合法的记录" + count + "条"); } catch (Exception e) { e.printStackTrace(); } }上面是代码现在处理记录5928条,程序运行时间: 8077ms当前目录下有多个文件,每个文件都有 近6000条数据 怎么处理???
既然数据来自于自有的格式化数据,不用担心恶意Sql注入问题,可以不用prepareStatement,而是直接拼出最终的sql语句。
一次读入一个大数据块,自己分行,这样可以减少文件IO的次数,尤其你还是远程读文件
例如用begin 50个INSERT的SQL end;
这种批量提交执行的SQL来提升 入库的效率
LOAD DATA LOCAL INFILE 'C:/abc.txt'
INTO TABLE table
COLUMNS TERMINATED BY '|'
oracle中是使用sqlldr(SQL*LOADER)实现
demo.ctl如下
load data infile datadome.dat "str X'7C'"
into table ***
(column1)
其中str属性指定行结束符
是16进制格式的select utl_raw.cast_to_raw('|') from dual--查看管道符(|)d的16进制串
比下面的
20101020061010|808080123100001|1488015462605121|0001|000000001000|1001|20101020|4007|156|20101020|745147|20101020060855|13776413305|A73E0784555C454927D809A3776952114EFCACA0276C6F8B79FDF55558D316021867784F5E08A8143D6B91ACDFD23AC3975A52C52EFB5CA5FD4744227AA4999513324B043D4521F134A787EBD8C67E5023EC63A7CD2C1920A4E04733C4470F2DE757EE9DEE7F34BAA414901BFBB4419E79787CFFD3254B00A9E6985471C0D3ED少好几列,上面的的是记录总条数,总金额
OPTIONS (skip=1)
load data
INFILE 'G:\workspase2\Test\src\resource\test\808080123100001_20101020_20101021042834.txt'
into table account_of_check
(
accountchid char terminated by '|',
Chinapaydate char terminated by '|',
MerId char terminated by '|',
Ordid char terminated by '|',
TransType char terminated by '|',
Transamt char terminated by '|',
Transstat char terminated by '|',
Transdate char terminated by '|',
GateId char terminated by '|',
Curid char terminated by '|',
cpDate char terminated by '|',
Cpseqid char terminated by '|',
longname1 char terminated by '|',
telno char terminated by '|',
checkValue char terminated by '|')
sqlldr xwmall/[email protected]/oradb control =E:\Noname1.ctl
这个oracle 的load 这样用不行啊
可以把oracle的给个简单例子不
load data
infile 'F:/文件.txt'
append into table 表名
fields terminated by '|'
TRAILING NULLCOLS
(字段名,字段名,默认值字段名 “值”)
默认值字段名 “值”可以直接将“值”录入数据表,不会去取文件中数据,但要放到最后
将所有文件放在Vector中 用synchronized方法进行异步处理
开多个线程进行读取
你这个文件才5000多条,实在是太小了,也不适合文件分段读取之类的,听你的问题是有很这样的文件,怕量大了慢,你就写个线程池,多弄几个线程吧,每个线程读取几个文件,那样会节省点总体的时间。上面的程序包含创建数据库连接,文件连接的内容,而且读取文件的流也不是最优的,所以要8秒多,在多个文件的情况下,是可以共享这些时间的,而且SPLIT效率很慢,需替换成别的。
你这个文件才5000多条,实在是太小了,也不适合文件分段读取之类的,听你的问题是有很这样的文件,怕量大了慢,你就写个线程池,多弄几个线程吧,每个线程读取几个文件,那样会节省点总体的时间。
上面的程序包含创建数据库连接,文件连接的内容,而且读取文件的流也不是最优的,所以要8秒多,在多个文件的情况下,是可以共享这些时间的,而且SPLIT效率很慢,需替换成别的。