删除重复数据一、具有主键的情况
a.具有唯一性的字段id(为唯一主键)
delete table
where id not in
(
select max(id) from table group by col1,col2,col3...
)
group by 子句后跟的字段就是你用来判断重复的条件,如只有col1,
那么只要col1字段内容相同即表示记录相同。b.具有联合主键
假设col1+','+col2+','...col5 为联合主键
select * from table where col1+','+col2+','...col5 in (
select max(col1+','+col2+','...col5) from table
where having count(*)>1
group by col1,col2,col3,col4
)
group by 子句后跟的字段就是你用来判断重复的条件,
如只有col1,那么只要col1字段内容相同即表示记录相同。c:判断所有的字段
select * into #aa from table group by id1,id2,....
delete table
insert into table
select * from #aa二、没有主键的情况a:用临时表实现
select identity(int,1,1) as id,* into #temp from ta
delete #temp
where id not in
(
select max(id) from # group by col1,col2,col3...
)
delete table ta
inset into ta(...)
select ..... from #tempb:用改变表结构(加一个唯一字段)来实现
alter table 表 add newfield int identity(1,1)
delete 表
where newfield not in
(
select min(newfield) from 表 group by 除newfield外的所有字段
)alter table 表 drop column newfield
where a.userid=b.userid
and a.mobile=b.mobile
and a.content=b.content
and a.id>b.id
declare @t table(id int identity,userid varchar(10),mobile varchar(10),content varchar(10))
insert @t select 'aa','112','aa'
union all select 'bb','112','aa'
union all select 'bb','112','aa' --重复
union all select 'aa','113','aa'
union all select 'cc','112','aa'
union all select 'dd','112','aa'
union all select 'bb','112','ab'--删除处理
delete a from @t a,@t b
where a.userid=b.userid
and a.mobile=b.mobile
and a.content=b.content
and a.id>b.id--显示处理结果
select * from @t/*--测试结果id userid mobile content
----------- ---------- ---------- ----------
1 aa 112 aa
2 bb 112 aa
4 aa 113 aa
5 cc 112 aa
6 dd 112 aa
7 bb 112 ab(所影响的行数为 6 行)
--*/
insert into t1
select 1,2,3
union
select 22,3,6
union
select 1,2,6
union
select 26,58,32
union
select 1,2,5
union
select 2,9,3alter table t1 add key_col int not null identitydelete from t1 where exists
(select * from t1 as t2
where t2.c1=t1.c1 and t2.c2=t1.c2 and t2.key_col>t1.key_col)
alter table t1 drop column key_col
在sqlserver里面执行不到一秒就完成,到mysql里面就死了,半天无反应同样的连接语句,两个数据库系统速度差这么大吗??晕
on a.userid=b.userid
and a.mobile=b.mobile
and a.content=b.content
and a.id>b.id
where a.userid=b.userid
and a.mobile=b.mobile
and a.content=b.content
and a.id>b.id
到mysql里面就死了?
不会这么差吧?
--mysql不会这么差吧?delete a from 表 a join 表 b --改为真正的join呢?
on a.userid=b.userid
and a.mobile=b.mobile
and a.content=b.content
and a.id>b.id
==========================================================改为真正的join也是如此,好长时间无反应,只好强行结束程序,MYSQL也死了,也得重启
总共数据量在20万左右,所以我又加了条件
where a.senttime>'2004-08-29' and a.senttime<'2004-08-30'
或者 and a.senttime>'2004-08-29' and a.senttime<'2004-08-30' 不是由于这个条件而死的吧?没办法,只好做个小工具,在程序里用strsql="select .... where id in("+strid+")";
这种方法解决了.