--查出所有重复记录 select name,sex,age,id from( select name,sex,age,id from ttt where exists (select name,sex from (select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a where ttt.name=a.name and ttt.sex=a.sex))a union ( select name,sex,age,id from ttt where exists (select name,age from (select name,age,count(name) from ttt group by name,age having count(name)>=2)a where ttt.name=a.name and ttt.age=a.age))
--查询重复条数 select count(*) from ( select name,sex,age,id from( select name,sex,age,id from ttt where exists (select name,sex from (select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a where ttt.name=a.name and ttt.sex=a.sex))a union ( select name,sex,age,id from ttt where exists (select name,age from (select name,age,count(name) from ttt group by name,age having count(name)>=2)a where ttt.name=a.name and ttt.age=a.age)) )
--执行计划 SELECT STATEMENT, GOAL = ALL_ROWS Cost=14 Cardinality=2 Bytes=78 SORT UNIQUE Cost=14 Cardinality=2 Bytes=78 UNION-ALL FILTER TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=39 FILTER SORT GROUP BY NOSORT Cost=3 Cardinality=1 Bytes=13 TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=13 FILTER TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=39 FILTER SORT GROUP BY NOSORT Cost=3 Cardinality=1 Bytes=23 TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=23 4次全表,貌似效率..... 方法1是3次全表.
--查询重复里的不重复数据 select max(ttt.id) as id,ttt.name,ttt.sex,ttt.age from ttt where exists( select name,sex,age from ( select a.name,a.sex,b.age from( select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a,
( select name,age,count(name) from ttt group by name,age having count(name)>=2)b where a.name=b.name ) c where ttt.name=c.name and ttt.sex=c.sex and ttt.age=c.age) group by ttt.name,ttt.sex,ttt.age
Have a try. with a as (select id, count(*) over(partition by t.name, t.sex) count from ttt t), b as (select id, count(*) over(partition by t.name, t.age) count from ttt t) select count(id) from (select a.id from a where a.count >= 2 union select b.id from b where b.count >= 2)
在原来基础上还可以继续优化,代码如下: select count(*) from (select id, count(*) over(partition by t.name, t.sex) count1, count(*) over(partition by t.name, t.age) count2 from ttt t) a where a.count1 > 1 or a.count2 > 1
不重复的居多,总数据在650W左右,重复的大概60W到70W左右....
over partition by倒是没想到过,多谢提醒 附上一个在itpub上问到的答案,大家共同参考,呵呵 SELECT * FROM ttt a WHERE EXISTS (SELECT 1 FROM ttt b WHERE a.NAME=b.NAME AND (a.sex=b.sex OR a.age=b.age) AND a.ROWID <> b.ROWID);
执行计划挺好,只扫一次索引,但是,实际执行结果.... 因为实绩表比较大... 目前表现最好的就是SELECT * FROM ttt a WHERE EXISTS (SELECT 1 FROM ttt b WHERE a.NAME=b.NAME AND (a.sex=b.sex OR a.age=b.age) AND a.ROWID <> b.ROWID);
这个案例不错,优化要根据具体环境来做的 set autotrace on set timing on 把执行计划和统计信息share一下吧
SELECT * FROM ttt a WHERE EXISTS (SELECT 1 FROM ttt b WHERE a.NAME=b.NAME AND (a.sex=b.sex OR a.age=b.age) AND a.ROWID <> b.ROWID);很给力![code] delete from ttt a WHERE EXISTS (SELECT 1 FROM ttt b WHERE a.NAME=b.NAME AND (a.sex=b.sex OR a.age=b.age) AND a.ROWID <> b.ROWID);[/code]
select name,sex,age,id from(
select name,sex,age,id from ttt where exists (select name,sex from
(select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a
where ttt.name=a.name and ttt.sex=a.sex))a
union
( select name,sex,age,id from ttt where exists (select name,age from
(select name,age,count(name) from ttt group by name,age having count(name)>=2)a
where ttt.name=a.name and ttt.age=a.age))
select count(*) from (
select name,sex,age,id from(
select name,sex,age,id from ttt where exists (select name,sex from
(select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a
where ttt.name=a.name and ttt.sex=a.sex))a
union
( select name,sex,age,id from ttt where exists (select name,age from
(select name,age,count(name) from ttt group by name,age having count(name)>=2)a
where ttt.name=a.name and ttt.age=a.age))
)
SELECT STATEMENT, GOAL = ALL_ROWS Cost=14 Cardinality=2 Bytes=78
SORT UNIQUE Cost=14 Cardinality=2 Bytes=78
UNION-ALL
FILTER
TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=39
FILTER
SORT GROUP BY NOSORT Cost=3 Cardinality=1 Bytes=13
TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=13
FILTER
TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=39
FILTER
SORT GROUP BY NOSORT Cost=3 Cardinality=1 Bytes=23
TABLE ACCESS FULL Object owner=VACCGD Object name=TTT Cost=3 Cardinality=1 Bytes=23
4次全表,貌似效率.....
方法1是3次全表.
select max(ttt.id) as id,ttt.name,ttt.sex,ttt.age from ttt where exists(
select name,sex,age from
(
select a.name,a.sex,b.age from(
select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a,
( select name,age,count(name) from ttt group by name,age having count(name)>=2)b
where a.name=b.name
)
c where ttt.name=c.name and ttt.sex=c.sex and ttt.age=c.age)
group by ttt.name,ttt.sex,ttt.age
with a as
(select id, count(*) over(partition by t.name, t.sex) count from ttt t),
b as
(select id, count(*) over(partition by t.name, t.age) count from ttt t)
select count(id)
from (select a.id
from a
where a.count >= 2
union
select b.id from b where b.count >= 2)
select count(*)
from (select id,
count(*) over(partition by t.name, t.sex) count1,
count(*) over(partition by t.name, t.age) count2
from ttt t) a
where a.count1 > 1
or a.count2 > 1
附上一个在itpub上问到的答案,大家共同参考,呵呵
SELECT * FROM ttt a WHERE EXISTS
(SELECT 1 FROM ttt b WHERE a.NAME=b.NAME
AND (a.sex=b.sex OR a.age=b.age)
AND a.ROWID <> b.ROWID);
因为实绩表比较大...
目前表现最好的就是SELECT * FROM ttt a WHERE EXISTS
(SELECT 1 FROM ttt b WHERE a.NAME=b.NAME
AND (a.sex=b.sex OR a.age=b.age)
AND a.ROWID <> b.ROWID);
set autotrace on
set timing on
把执行计划和统计信息share一下吧
没太明白,这里面有两条 ‘b’,‘女’,‘4’ 这个是算2条还是算1条呢?如果是算1条,正确答案就不是7吧?
(SELECT 1 FROM ttt b WHERE a.NAME=b.NAME
AND (a.sex=b.sex OR a.age=b.age)
AND a.ROWID <> b.ROWID);很给力![code]
delete from ttt a WHERE EXISTS
(SELECT 1 FROM ttt b WHERE a.NAME=b.NAME
AND (a.sex=b.sex OR a.age=b.age)
AND a.ROWID <> b.ROWID);[/code]
因为是两条记录所以算2条
而且AND a.ROWID <> b.ROWID 可以改成AND a.ID <> b.ID ID也是主键。