如何剔除重复的记录

如何剔除重复的记录背景，有一个表，A（日期型），B，C为复合主键。
另外还有三个字段D（日期型），E，F现在要根据D，E，F判断表中相同的记录，然后相同的记录中取A最大的那条记录。效果：
     A             |      B      |          C        |       D              |    E       |      F
---------------------------------------------------------------------------------------
20081015    |   123     |         123     |    20081015   |     aaa  |      aaa
20081014    |   123     |        321      |    20081015   |     aaa   |     aaa根据上面的数据，选出的结果是第一条记录。※主要还要考虑效率问题，总共有20万条记录，选出来的数据要全部Insert到另一张表中查了以前的帖子，没有找到适合的，请大家指教，先行谢过了。

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

try it:create table tableB as (select max(A),b,c,d,e from tableA order by c,d,e group by c,d,e);
-- TRY IT ..
INSERT INTO TABLE_NAME
SELECT *
  FROM TABLE_A T1
WHERE EXISTS (SELECT 1
          FROM TABLE_A T2
         WHERE T1.D = T2.D
           AND T1.E = T2.E
           AND T1.F = T2.F
           AND T1.A > T2.A);
select * from
(select * from test ) ta,
(select * from test ) tb
where ta.b = tb.b
and ta.a = tb.a
and ta.a = (
select max(ta.a) from
(select * from test ) ta,
(select * from test ) tb
where ta.b = tb.b
and ta.a = tb.a
)
-- 哈哈，没看清还有除去重复记录，用下面改后的SQL:
INSERT INTO TABLE_NAME
SELECT *
  FROM TEST_A T1
WHERE ROWID < (SELECT MIN(ROWID)
                  FROM TEST_A T2
                 WHERE T1.D = T2.D
                   AND T1.E = T2.E
                   AND T1.F = T2.F
                   AND T1.A > T2.A);
再试一下：insert into tableB as
   (select tab1.a,tab1.b,tab1.c,tab1.d,tab1.e,tab1.f from
       (select a,b,c,d,e,f,count(*) from tableA group by d,e,f having count(*)>1) tab1,
       (select max(t1.a) ta
               from (select * from tableA ) t1,(select * from tableA ) t2
               where t1.d = t2.d and t1.e = t2.e and t1.f=t2.f) tab2
       where tab1.a=tab2.ta
   )
俺总结的，自已参考修改--以下是删除重复数据的3种方案
--3种方案都是最优化的
--object_name, object_id. 这2个字段用来判断重复--1.
delete
  from x
where rowid in
   (
select rd
  from (
select rowid rd
,row_number() over(partition by object_name, object_id order by rowid) rn
  from x
   ) x
where rn > 1
   )--2.
delete
  from x
where rowid not in (
select max(rowid)
  from x
group by object_name, object_id
)

--3.
create table tmp_x
AS
select x1.(字段列表..略)
  from
   (
select x.*, row_number() over(partition by object_name, object_id order by rowid) rn
   from x
   ) x1
where rn = 1;

truncate table x;insert into x
select *
  from tmp_x; drop table tmp_x;
看我的.
insert into other_tb
  select A, B, C, D, E, F
    from old_table
   where rowid in (select rd
                     from (select rowid rd,
                                  row_number() over(partition by D, E, F order by A desc) rn
                             from old_table
                            where rn = 1))
http://topic.csdn.net/u/20081016/01/223a8629-0ba6-4659-a1cf-d7b4ca5cbdad.html
这种删除重复记录的，一般就用ROWID来搞定。8楼的是可行的。
INSERT INTO Tbl_NewData(A,B,C,D,E,F)
SELECT MAX(A) AS MAX_A
          ,B
          ,C
          ,D
          ,E
          ,F
FROM Tbl_Data
GROUP BY B,C,D,E,F
INSERT INTO Tbl_NewData(A,B,C,D,E,F)
SELECT A,B,C,D,E,F
  FROM Tbl_Data TA
      ,(SELECT MAX(A) AS MAX_A
             , D      AS GROUP_D
             , E      AS GROUP_E
             , F      AS GROUP_F
          FROM Tbl_Data
         GROUP BY D, E, F) TB
WHERE TA.A = TB.MAX_A
   AND TA.D = TB.GROUP_D
   AND TA.E = TB.GROUP_E
   AND TA.F = TB.GROUP_F
delete
  from taba
where (a,d,e,f) not in (
             select max(a) a,d,e,f
               from x
              group by d, e,f)
不清楚LZ，是只把重复记录中选A最大的一条记录，还是没有重复的也要选出来？
如果是后者，那8楼的符合要求，
select * from a where a in (select max(a) from a group by d,e,f);
select * from a where a in (select max(a) from a group by d,e,f);
刚测试过楼主的实例。有一个问题当a,d列两条记录的内容分别相同。。那上面的语句就不成立还是楼主贴上实这、际的数据。。如A列的日期为20080101 D列的日期为20081010这样