创建数据库:create table DATA_MODEL_FINAL
(
OBJID NUMBER not null,
OBJUSERID VARCHAR2(20),
OBJELEVATION FLOAT,
GROUPID VARCHAR2(20) default 0
);
create bitmap index GROUPID_model_index on data_model_final(GROUPID);
create unique index objuserid_index on data_model_final(objuserid);
create table DATA_PIPE_FINAL
(
OBJID NUMBER not null,
STARPOINTELEVATION FLOAT,
GROUPID VARCHAR2(20) default 0,
START_POINT VARCHAR2(20));
create index START_POINT_index on data_pipe_final(START_POINT);
create bitmap index GROUPID_pipe_index on data_pipe_final(GROUPID);insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910615,'AJ31123',0,'0201');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910616,'AJ31124',0,'0202');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910617,'AJ31125',0,'0203');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910618,'AJ31126',0,'0204');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910619,'AJ31127',0,'0204');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910620,'AJ31128',0,'0204');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201101,'AJ31126',1.5,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201102,'B000001',1.8,'0101');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201103,'AJ31126',2.2,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201104,'AJ31126',1.5,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201105,'AJ31127',1.5,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201106,'AJ31127',1.9,'0104');
update语句: UPDATE DATA_MODEL_FINAL A SET OBJELEVATION=(SELECT MAX(B.STARPOINTELEVATION) FROM DATA_PIPE_FINAL B
WHERE B.GROUPID='0104' AND B.START_POINT=A.OBJUSERID
) WHERE A.GROUPID='0204';以上数据库是我精简了很多字段和数据的,实际上这2个表的数据都超过50万条了,使用update很影响效率,因此我想把以上update语句改为merge into以提高效率,各位有什么好方法否? merge into DATA_MODEL_FINAL A
using DATA_PIPE_FINAL B
on (B.GROUPID='0104' AND A.GROUPID='0204' AND B.START_POINT=A.OBJUSERID)
when matched then
update set A.OBJELEVATION=MAX(B.STARPOINTELEVATION)
(
OBJID NUMBER not null,
OBJUSERID VARCHAR2(20),
OBJELEVATION FLOAT,
GROUPID VARCHAR2(20) default 0
);
create bitmap index GROUPID_model_index on data_model_final(GROUPID);
create unique index objuserid_index on data_model_final(objuserid);
create table DATA_PIPE_FINAL
(
OBJID NUMBER not null,
STARPOINTELEVATION FLOAT,
GROUPID VARCHAR2(20) default 0,
START_POINT VARCHAR2(20));
create index START_POINT_index on data_pipe_final(START_POINT);
create bitmap index GROUPID_pipe_index on data_pipe_final(GROUPID);insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910615,'AJ31123',0,'0201');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910616,'AJ31124',0,'0202');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910617,'AJ31125',0,'0203');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910618,'AJ31126',0,'0204');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910619,'AJ31127',0,'0204');
insert into DATA_MODEL_FINAL (OBJID,OBJUSERID,OBJELEVATION,GROUPID) values (1910620,'AJ31128',0,'0204');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201101,'AJ31126',1.5,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201102,'B000001',1.8,'0101');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201103,'AJ31126',2.2,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201104,'AJ31126',1.5,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201105,'AJ31127',1.5,'0104');
insert into data_pipe_final (OBJID,START_POINT,STARPOINTELEVATION,GROUPID) values (201106,'AJ31127',1.9,'0104');
update语句: UPDATE DATA_MODEL_FINAL A SET OBJELEVATION=(SELECT MAX(B.STARPOINTELEVATION) FROM DATA_PIPE_FINAL B
WHERE B.GROUPID='0104' AND B.START_POINT=A.OBJUSERID
) WHERE A.GROUPID='0204';以上数据库是我精简了很多字段和数据的,实际上这2个表的数据都超过50万条了,使用update很影响效率,因此我想把以上update语句改为merge into以提高效率,各位有什么好方法否? merge into DATA_MODEL_FINAL A
using DATA_PIPE_FINAL B
on (B.GROUPID='0104' AND A.GROUPID='0204' AND B.START_POINT=A.OBJUSERID)
when matched then
update set A.OBJELEVATION=MAX(B.STARPOINTELEVATION)
如果想问其他的话,应该没多少方法了其他比较快的,就是把 update 改为 insert
using (SELECT MAX(STARPOINTELEVATION) as STARPOINTELEVATION, START_POINT FROM DATA_PIPE_FINAL
WHERE GROUPID='0104'
group by START_POINT) b
on (A.GROUPID='0204' AND B.START_POINT=A.OBJUSERID)
when matched then
update set A.OBJELEVATION=MAX(B.STARPOINTELEVATION)
直接
update set A.OBJELEVATION=B.STARPOINTELEVATION
ORACLE UPDATE 语句语法和性能分析的一点看法
• 为了方便起见,建立了以下简单模型,和构造了部分测试数据:
在某个业务受理子系统BSS中,
--客户资料表
create table customers
(
customer_id number(8) not null, -- 客户标示
city_name varchar2(10) not null, -- 所在城市
customer_type char(2) not null, -- 客户类型
...
)
create unique index PK_customers on customers (customer_id)
由于某些原因,客户所在城市这个信息并不什么准确,不过在
客户服务部的CRM子系统中,通过主动服务获取了部分客户20%的所在
城市等准确信息,于是你将该部分信息提取至一张临时表中:
create table tmp_cust_city
(
customer_id number(8) not null,
citye_name varchar2(10) not null,
customer_type char(2) not null
)
1) 最简单的形式
--经确认customers表中所有customer_id小于1000均为’北京’
--1000以内的均是公司走向全国之前的本城市的老客户:)
update customers
set city_name=’北京’
where customer_id<1000
2) 两表(多表)关联update -- 仅在where字句中的连接
--这次提取的数据都是VIP,且包括新增的,所以顺便更新客户类别
update customers a -- 使用别名
set customer_type=’01’ --01 为vip,00为普通
where exists (select 1
from tmp_cust_city b
where b.customer_id=a.customer_id
)
3) 两表(多表)关联update -- 被修改值由另一个表运算而来
update customers a -- 使用别名
set city_name=(select b.city_name from tmp_cust_city b where b.customer_id=a.customer_id)
where exists (select 1
from tmp_cust_city b
where b.customer_id=a.customer_id
)
-- update 超过2个值
update customers a -- 使用别名
set (city_name,customer_type)=(select b.city_name,b.customer_type
from tmp_cust_city b
where b.customer_id=a.customer_id)
where exists (select 1
from tmp_cust_city b
where b.customer_id=a.customer_id
)
注意在这个语句中,
=(select b.city_name,b.customer_type
from tmp_cust_city b
where b.customer_id=a.customer_id
)
和
(select 1
from tmp_cust_city b
where b.customer_id=a.customer_id
)
是两个独立的子查询,查看执行计划可知,对b表/索引扫描了2篇;
如果舍弃where条件,则默认对A表进行全表
更新,但由于(select b.city_name from tmp_cust_city b where where b.customer_id=a.customer_id)
有可能不能提供"足够多"值,因为tmp_cust_city只是一部分客户的信息,
所以报错(如果指定的列--city_name能为NULL则另当别论):
01407, 00000, "cannot update (%s) to NULL"
// *Cause:
// *Action:
一个替代的方法能采用:
update customers a -- 使用别名
set city_name=nvl((select b.city_name from tmp_cust_city b where b.customer_id=a.customer_id),a.city_name)
或
set city_name=nvl((select b.city_name from tmp_cust_city b where b.customer_id=a.customer_id),’未知’)
-- 当然这不符合业务逻辑了
4) 上述3)在一些情况下,因为B表的纪录只有A表的20-30%的纪录数,
考虑A表使用INDEX的情况,使用cursor也许会比关联update带来更好的性能:
set serveroutput on
declare
cursor city_cur is
select customer_id,city_name
from tmp_cust_city
order by customer_id;
begin
for my_cur in city_cur loop
update customers
set city_name=my_cur.city_name
where customer_id=my_cur.customer_id;
/** 此处也能单条/分批次提交,避免锁表情况 **/
-- if mod(city_cur%rowcount,10000)=0 then
-- dbms_output.put_line(’----’);
-- commit;
-- end if;
end loop;
end;
5) 关联update的一个特例及性能再探讨
在oracle的update语句语法中,除了能update表之外,也能是视图,所以有以下1个特例:
update (select a.city_name,b.city_name as new_name
from customers a,
tmp_cust_city b
where b.customer_id=a.customer_id
)
set city_name=new_name
这样能避免对B表或其索引的2次扫描,但前提是 A(customer_id) b(customer_id)必需是unique index
或primary key。否则报错:
01779, 00000, "cannot modify a column which maps to a non key-preserved table"
// *Cause: An attempt was made to insert or update columns of a join view which
// map to a non-key-preserved table.
// *Action: Modify the underlying base tables directly.
6)oracle另一个常见错误
回到3)情况,由于某些原因,tmp_cust_city customer_id 不是唯一index/primary key
update customers a -- 使用别名
set city_name=(select b.city_name from tmp_cust_city b where b.customer_id=a.customer_id)
where exists (select 1
from tmp_cust_city b
where b.customer_id=a.customer_id
)
当对于一个给定的a.customer_id
(select b.city_name from tmp_cust_city b where b.customer_id=a.customer_id)
返回多余1条的情况,则会报如下错误:
01427, 00000, "single-row subquery returns more than one row"
// *Cause:
// *Action:
一个比较简单近似于不负责任的做法是
update customers a -- 使用别名
set city_name=(select b.city_name from tmp_cust_city b where b.customer_id=a.customer_id)
怎么理解 01427 错误,在一个非常复杂的多表连接update的语句,经常因考虑不周,出现这个错误,
仍已上述例子来描述,一个比较简便的方法就是将A表代入 值表达式 中,使用group by 和
having 字句查看重复的纪录
(select b.customer_id,b.city_name,count(*)
from tmp_cust_city b,customers a
where b.customer_id=a.customer_id
group by b.customer_id,b.city_name
having count(*)>=2
UPDATE DATA_MODEL_FINAL A SET OBJELEVATION=(SELECT MAX(B.STARPOINTELEVATION) FROM DATA_PIPE_FINAL B
WHERE B.GROUPID='0104' AND B.START_POINT=A.OBJUSERID
) WHERE A.GROUPID='0204';
建议改成下面的:
UPDATE DATA_MODEL_FINAL A
SET OBJELEVATION=(SELECT MAX(B.STARPOINTELEVATION) FROM DATA_PIPE_FINAL B WHERE B.GROUPID='0104' AND B.START_POINT=A.OBJUSERID)
WHERE EXISTS(SELECT 1 FROM DATA_PIPE_FINAL B WHERE B.GROUPID='0104' AND B.START_POINT=A.OBJUSERID)
AND A.GROUPID='0204';你自己测试一下看看上下这两条语句哪个效率高吧,我觉得用exists的高
merge into DATA_MODEL_FINAL A
using (SELECT MAX(STARPOINTELEVATION) as STARPOINTELEVATION, START_POINT FROM DATA_PIPE_FINAL
WHERE GROUPID='0104'
group by START_POINT) b
on (A.GROUPID='0204' AND B.START_POINT=A.OBJUSERID)
when matched then
update set A.OBJELEVATION=MAX(B.STARPOINTELEVATION)
例如:UPDATE Table2
SET Table2.ColB = Table2.ColB + Table1.ColB
FROM Table2
INNER JOIN Table1
ON (Table2.ColA = Table1.ColA);实际更新的操作是在要更新的表上进行的,而不是在from子句所形成的新的结果集上进行的。
Oracle没有update from语法,可以通过两种写法实现同样的功能:
1:子查询UPDATE A SET A.NAME=(SELECT B.NAME FROM B WHERE B.ID=A.ID),本查询要根据具体情况看看是否变通成如下
(1)单列
UPDATE A
SET A.NAME=(SELECT B.NAME FROM B WHERE B.ID=A.ID)
WHERE A.ID IN (SELECT ID FROM B);(2)多列
UPDATE order_rollup
SET(qty,price)=(SELECT SUM(qty),SUM(price) FROM order_lines WHERE customer_id='KOHL' )
WHERE cust_id='KOHL' AND order_period=TO_DATE('01-Oct-2000')
2:利用视图来做
UPDATE (SELECT A.NAME ANAME,B.NAME BNAME FROM A,B WHERE A.ID=B.ID)
SET ANAME=BNAME;例如:UPDATE tablea a
SET a.fieldforupdate = (SELECT b.fieldsource FROM tableb b WHERE a.keyfield = b.keyfield)
WHERE EXISTS (SELECT b.fieldsource FROM tableb b WHERE a.keyfield = b.keyfield)有三点需要注意:
1. 对于一个给定的a.keyfield的值,SELECT b.fieldsource FROM tableb b WHERE a.keyfield = b.keyfield 的值只能是一个唯一值,不能是多值。
2. 在绝大多数情况下,最后面的where EXISTS子句是重要的,否则将得到错误的结果。
3. 对于视图更新的限制:
如果视图基于多个表的连接,那么用户更新(update)视图记录的能力将受到限制。除非update只涉及一个表且视图列中包含了被更新的表的整个主键,否则不能更新视图的基表。
发现的比较晚,8楼的UPDATE 很有意思哈