merge into A
using B
on(条件)
。
A表是7千万的大表(分区表,每天这么多数据量),B表数据量在400万左右,on条件为
A的分区字段=一个值以及3个主键的关联关系
现在每天执行一下要2个多小时,请问有没有好的办法该进下?
B表由于每天数据都要truncate和重新insert,所以不好建索引
请哪位大拿帮帮忙,thanks
using B
on(条件)
。
A表是7千万的大表(分区表,每天这么多数据量),B表数据量在400万左右,on条件为
A的分区字段=一个值以及3个主键的关联关系
现在每天执行一下要2个多小时,请问有没有好的办法该进下?
B表由于每天数据都要truncate和重新insert,所以不好建索引
请哪位大拿帮帮忙,thanks
B表也要建,现在是对A表全表扫描,找与B表对应的数据,所以b表也要建索引。如果能把A表的数据缩小一个范围就好了,7千万可不是个小数量
不用merge的思路,有没有其他好的思路比它效率高?
cursor c is select * from B;
type t_type is table of pk_col%type;
t_array t_type;open c;
loop;
fetch c bulk collect into t_array limit 3000;
forall i in t_array.first .. t_array.last
merge into A
using B...
on (... and B.pk=t_array(i) )
...
exit when c%notfound;
end loop;
close c;
type t_type is table of pk_col%type;
t_array t_type;
t_num number;begin
t_num := 0;
open c;
loop
fetch c bulk collect into t_array limit 3000;
t_num := t_num + 1;
forall i in t_array.first .. t_array.last
merge into A
using B...
on (... and A.pk=t_array(i) ) //A.pk=t_array(i) 还是 B.pk=t_array(i) 具体那个比较快,可以找一个具体的值测试一下
...
exit when c%notfound;
end loop;
close c;
commit;
exception
when others then
rollback;
dbms_output.put_line(sqlerrm);
dbms_output.put_line('From '|| t_num * 3000 || ' records failed!!');
end;
V_ROWCOUNT := 0;
V_SQL := 'ALTER TABLE DWD_DQ_USER_SVC TRUNCATE PARTITION PART' || V_DAY;
EXECUTE IMMEDIATE V_SQL; --老数据的插入
INSERT /*+APPEND*/
INTO DWD_DQ_USER_SVC NOLOGGING
SELECT /*+PARALLEL(A,4)*/
V_ACCT_DAY,
A.USER_NO,
A.SERVICE_ID,
A.MAIN_FLAG,
A.START_DATE,
A.END_DATE,
A.ITEM_ID,
A.PACKAGE_ID,
A.USER_NO_A,
V_DAY
FROM DWD_DQ_USER_SVC A
WHERE PART_ID =
SUBSTR(TO_CHAR(TO_DATE(V_ACCT_DAY, 'YYYYMMDD') - 1, 'YYYYMMDD'),
7,
2)
AND NVL(END_DATE, V_ACCT_DAY) >= V_ACCT_DAY;
V_ROWCOUNT := SQL%ROWCOUNT;
COMMIT;
V_SQL := 'TRUNCATE TABLE TEMP_DQ_USER_SVC';
EXECUTE IMMEDIATE V_SQL;
INSERT /*+APPEND*/
INTO TEMP_DQ_USER_SVC NOLOGGING
SELECT /*+PARALLEL(A,4)*/
USER_ID,
SERVICE_ID,
MAIN_TAG,
START_DATE,
END_DATE,
ITEM_ID,
PACKAGE_ID,
USER_ID_A
FROM STAGE.TF_F_USER_SVC_INC A
WHERE DAY_ID = V_ACCT_DAY
AND ROWID = (SELECT MAX(ROWID)
FROM STAGE.TF_F_USER_SVC_INC B
WHERE B.USER_ID = A.USER_ID
AND B.SERVICE_ID = A.SERVICE_ID
AND B.START_DATE = A.START_DATE);
COMMIT;
--插入增量数据
MERGE /*+parallel(a,4) parallel(b,4)*/INTO DWD_DQ_USER_SVC A
USING TEMP_DQ_USER_SVC B
ON (A.USER_NO = B.USER_ID AND A.SERVICE_ID = B.SERVICE_ID AND A.START_DATE = B.START_DATE AND A.PART_ID = V_DAY)
WHEN MATCHED THEN
UPDATE SET A.END_DATE = B.END_DATE
WHEN NOT MATCHED THEN
INSERT
(A.ACCT_DAY,
A.USER_NO,
A.SERVICE_ID,
A.MAIN_FLAG,
A.START_DATE,
A.END_DATE,
A.ITEM_ID,
A.PACKAGE_ID,
A.USER_NO_A,
A.PART_ID)
VALUES
(V_ACCT_DAY,
B.USER_ID,
B.SERVICE_ID,
B.MAIN_TAG,
B.START_DATE,
B.END_DATE,
B.ITEM_ID,
B.PACKAGE_ID,
B.USER_ID_A,
SUBSTR(V_ACCT_DAY, 7, 2)); --计算插入数据条数
V_ROWCOUNT := V_ROWCOUNT + SQL%ROWCOUNT;
COMMIT; V_SQL := 'TRUNCATE TABLE TEMP_DQ_USER_SVC';
EXECUTE IMMEDIATE V_SQL;
这个是我现在用的sql,昨天晚上跑了9000秒,A表和B表都是没有索引的表,A表是list分区表,按月循环建的,楼上的写法我也写了一点,但好像forall和merge一起写不行,不知道为什么?能帮我看下现在的代码有啥新思路或好办法优化一下,或者我的sql是不是写得有问题?
v_flag out varchar2) is
type pktype is record(
v_userno test_xww_temp_user_svc.user_id%type,
v_serviceid test_xww_temp_user_svc.service_id%type,
v_startdate test_xww_temp_user_svc.start_date%type);
type t_array is table of pktype;
v_pk t_array;
cursor c is
select user_id,service_id,start_date from test_xww_temp_user_svc;begin
open c;
loop
fetch c bulk collect
into v_pk limit 3000;
for i in v_pk.first .. v_pk.last loop
merge into test_xww_user_svc A using
(select * from test_xww_temp_USER_SVC where user_id=v_pk(i).v_userno
and service_id=v_pk(i).v_serviceid and start_date=v_pk(i).v_startdate) B
on (A.USER_NO = B.USER_ID AND A.SERVICE_ID = B.SERVICE_ID AND A.START_DATE = B.START_DATE AND A.PART_ID = '12')
WHEN MATCHED THEN UPDATE SET A.END_DATE = B.END_DATE
WHEN NOT MATCHED THEN
INSERT(A.ACCT_DAY, A.USER_NO, A.SERVICE_ID, A.MAIN_FLAG, A.START_DATE, A.END_DATE, A.ITEM_ID, A.PACKAGE_ID, A.USER_NO_A, A.PART_ID)
VALUES('20090312', B.USER_ID, B.SERVICE_ID, B.MAIN_TAG, B.START_DATE, B.END_DATE, B.ITEM_ID, B.PACKAGE_ID, B.USER_ID_A, SUBSTR('20090312', 7, 2));
exit when c%notfound;
end loop;
end loop;
close c;
end;
我这样写不报错,但是如果把for i in v_pk.first .. v_pk.last loop这一段用
forall i in v_pk.first .. v_pk.last
就不行,
Error: PLS-00436: implementation restriction: cannot reference fields of BULK In-BIND table of records
Line: 20
Text: and service_id=v_pk(i).v_serviceid and start_date=v_pk(i).v_startdate) BError: PLS-00382: expression is of wrong type
Line: 20
Text: and service_id=v_pk(i).v_serviceid and start_date=v_pk(i).v_startdate) BError: PLS-00436: implementation restriction: cannot reference fields of BULK In-BIND table of records
Line: 20
Text: and service_id=v_pk(i).v_serviceid and start_date=v_pk(i).v_startdate) BError: PLS-00382: expression is of wrong type
Line: 20
Text: and service_id=v_pk(i).v_serviceid and start_date=v_pk(i).v_startdate) BError: PLS-00436: implementation restriction: cannot reference fields of BULK In-BIND table of records
Line: 19
Text: (select * from test_xww_temp_USER_SVC where user_id=v_pk(i).v_usernoError: PLS-00382: expression is of wrong type
Line: 19
Text: (select * from test_xww_temp_USER_SVC where user_id=v_pk(i).v_usernoError: PL/SQL: ORA-22806: not an object or REF
Line: 20
Text: and service_id=v_pk(i).v_serviceid and start_date=v_pk(i).v_startdate) B
2.清空表TEMP_DQ_USER_SVC数据,重新生成表TEMP_DQ_USER_SVC的数据
3.merge into 表A楼主尝试一下在开始的时候计算时间到底耗费大概是怎么样的
v_date1 number;
v_date2 number;
v_date3 number;v_date1 := dbms_utility.get_time ;
dbms_output.put_line('---------------Begin Time: ' || v_date1 || ' ------------------') ;
1.按月分区,然后先清空当前这个月分区内所有数据,根据上个月的数据先插入数据
v_date2 := dbms_utility.get_time ;
dbms_output.put_line('---------------First Step: ' || (v_date2 - v_date1) || ' ------------------') ;2.清空表TEMP_DQ_USER_SVC数据,重新生成表TEMP_DQ_USER_SVC的数据
v_date3 := dbms_utility.get_time ;
dbms_output.put_line('---------------Second Step: ' || (v_date3 - v_date2) || ' ------------------') ;3.merge into 表A
dbms_output.put_line('---------------Third Step: ' || (dbms_utility.get_time - v_date3) || ' ------------------') ;统计一下时间耗费状况对楼主的业务逻辑非常困惑,每次都需要把整个分区的数据清空,每次都要处理总数据1/12的数据
不是merge into
时间消耗早前已测试,是merge into的时间消耗
这样做了以后,发现有个疑问,原来我merge目标表时,on条件是加了A表的分区判断,但从速度比较来看,貌似分区判断没起作用,能请高手帮忙看看原来的过程,是不是写
得有问题?
1.不用将A的数据复制到临时表
2.在表A建立USER_NO,SERVICE_ID,START_DATE,PART_ID的局部(LOCAL)复合索引
3.将MERGE /*+parallel(a,4) parallel(b,4)*/改为MERGE /*+leading(b) use_nl(b a)*/
我前段时间也遇到过一个这样的问题,但是数据量在 1000万左右.
但是由于比对的过程比较复杂,所以就没有用merge语句.
采用的是内存数据库,在内存中进行分析,让后将分析的结果保存到历史库中.