1.查询重复记录
select url from FD_LOAN_INFO_DATA
where 1=1
group by url
having count(url)>12.过滤重复记录保存到临时表
drop table FD_LOAN_INFO_DATA_b
create table FD_LOAN_INFO_DATA_b
as select * from (
select *, count(distinct url) from FD_LOAN_INFO_DATA b
where 1=1
group by url
)a
drop table FD_LOAN_INFO_DATA_tttt
3.源数据表删除
alter table FD_LOAN_INFO_DATA rename to FD_LOAN_INFO_DATA_tttt4.临时表名改成源数据表名
alter table FD_LOAN_INFO_DATA_b rename to FD_LOAN_INFO_DATA
alter table FD_LOAN_INFO_DATA drop column `count(distinct url)`;
这个
select url from FD_LOAN_INFO_DATA
where 1=1
group by url
having count(url)>12.过滤重复记录保存到临时表
drop table FD_LOAN_INFO_DATA_b
create table FD_LOAN_INFO_DATA_b
as select * from (
select *, count(distinct url) from FD_LOAN_INFO_DATA b
where 1=1
group by url
)a
drop table FD_LOAN_INFO_DATA_tttt
3.源数据表删除
alter table FD_LOAN_INFO_DATA rename to FD_LOAN_INFO_DATA_tttt4.临时表名改成源数据表名
alter table FD_LOAN_INFO_DATA_b rename to FD_LOAN_INFO_DATA
alter table FD_LOAN_INFO_DATA drop column `count(distinct url)`;
这个
inner join
(select url,max(ZQSJ) as ma from FD_LOAN_INFO_DATA group by url) b
on a.url=b.url and a.ZQSJ<ma
你可以找几条数据试试看:
如以下数据FD_LOAN_INFO_DATA表:
"1" "1" "http://163.com" "2012-01-01"
"2" "1" "http://163.com" "2012-01-03"
"3" "1" "http://163.com" "2012-01-02"
"4" "2" "http://baidu.com" "2012-01-03"
"5" "2" "http://baidu.com" "2012-01-03"
"6" "2" "http://baidu.com" "2012-01-01"
"7" "3" "http://sina.com" "2012-01-01"
"8" "4" "http://csdn.com" "2012-01-01"FD_LOAN_INFO_DATA_b表的数据:
"1" "1" "http://163.com" "2012-01-01" "1"
"4" "2" "http://baidu.com" "2012-01-03" "1"
"8" "4" "http://csdn.com" "2012-01-01" "1"
"7" "3" "http://sina.com" "2012-01-01" "1"
明显相同url 的1,2,3数据应该保留的是2,而你保留的是1
---------------------------------------------------------------------------
SELECT * FROM FD_LOAN_INFO_DATA a WHERE NOT EXISTS (SELECT 1 FROM FD_LOAN_INFO_DATA b WHERE a.ZQSJ<b.ZQSJ AND a.URL=b.URL);
此sql是提取相同url 的最大的时间的数据,不排除时间相同的数据,如有相同的还要额外在处理删除之
楼主没有给出数据,如果有此情况,需加入唯一标识的字段,如ID,再进行处理