大家好,求一句SQL,求连续记录的最大个数
create table #t
(_id varchar(3), _date int) insert #t select '001',20100101
union all select '001',20100102
union all select '002',20100103
union all select '002',20100101
union all select '002',20100102
union all select '002',20100104
union all select '001',20100105
union all select '001',20100106
union all select '001',20100107
union all select '002',20100105
union all select '002',20100107
union all select '002',20100108
union all select '003',20100101
union all select '003',20100102
union all select '003',20100104
union all select '003',20100105
union all select '003',20100106
union all select '003',20100108
union all select '003',20100109
union all select '003',20100111要求: 求出每个ID 的最大连续记录的个数。记录有几百万, 效率要好一点。 谢谢!
结果为:001最大连续为从20100105到20100107,数量为3,
002最大连续为从20100101到20100104,数量为4,
003最大连续为从20100104到20100106,数量为3
===================
001 3
002 4
003 3
===================
如果不影响效率的话,最好把时间段也带上:
=================================
001 3 20100105 20100107
002 4 20100101 20100104
003 3 20100104 20100106
=================================
要能加条件, 比如查询连续数量大于3的,结果只有一条
======================================
002 4 20100101 20100104
create table #t
(_id varchar(3), _date int) insert #t select '001',20100101
union all select '001',20100102
union all select '002',20100103
union all select '002',20100101
union all select '002',20100102
union all select '002',20100104
union all select '001',20100105
union all select '001',20100106
union all select '001',20100107
union all select '002',20100105
union all select '002',20100107
union all select '002',20100108
union all select '003',20100101
union all select '003',20100102
union all select '003',20100104
union all select '003',20100105
union all select '003',20100106
union all select '003',20100108
union all select '003',20100109
union all select '003',20100111要求: 求出每个ID 的最大连续记录的个数。记录有几百万, 效率要好一点。 谢谢!
结果为:001最大连续为从20100105到20100107,数量为3,
002最大连续为从20100101到20100104,数量为4,
003最大连续为从20100104到20100106,数量为3
===================
001 3
002 4
003 3
===================
如果不影响效率的话,最好把时间段也带上:
=================================
001 3 20100105 20100107
002 4 20100101 20100104
003 3 20100104 20100106
=================================
要能加条件, 比如查询连续数量大于3的,结果只有一条
======================================
002 4 20100101 20100104
go
create table #t
(id varchar(3), date int) insert #t select '001',20100101
union all select '001',20100102
union all select '002',20100103
union all select '002',20100101
union all select '002',20100102
union all select '002',20100104
union all select '001',20100105
union all select '001',20100106
union all select '001',20100107
union all select '002',20100105
union all select '002',20100107
union all select '002',20100108
union all select '003',20100101
union all select '003',20100102
union all select '003',20100104
union all select '003',20100105
union all select '003',20100106
union all select '003',20100108
union all select '003',20100109
union all select '003',20100111--1.
alter table #t add d int
go
declare @i int, @j int,@k int
update #t set
d = @j,
@j = case when @i = id then @j else isnull(@j,0)+1 end,
@i = id
--2.if OBJECT_ID('tempdb..#') is not null drop table #
go
select *,0 c into # from #t order by d,date
go
declare @i int, @j int,@k int
update # set
c = @j,
@j = case when @i = id and @k=date-1 then @j +1 else 1 end,
@i = id,
@k=date
select ID,MAX(c) 最大连续数量 from # group by ID/*
ID 最大连续数量
---- -----------
001 3
002 4
003 3(3 行受影响)
*/先来一个,效率不知怎么样,还行的话再封装为存储过程.
from(
select _id,min_date,max_date,num,max(num) over(partition by _id) as max_num
from(
select *
,min(_date) over(partition by _id,rn) as min_date
,max(_date) over(partition by _id,rn) as max_date
,counT(*) over(partition by _id,rn) as num
from (
select *
,dateadd(day,(row_number() over(partition by _id order by cast(ltrim(_date) as datetime)))*-1,cast(ltrim(_date) as datetime)) as rn
from #t
) a
) a
) a where num=max_num
group by _id,max_num,min_date,max_date,max_num_id max_num min_date max_date max_num
---- ----------- ----------- ----------- -----------
001 3 20100105 20100107 3
002 5 20100101 20100105 5
003 3 20100104 20100106 3(3 行受影响)为什么002是5个
;with ta as
(select _id, _date1 = _date - row_number() over (partition by _id order by _date)
from #t),
tb as
(select _id, cnt = count(*)
from ta
group by _id, _date1)
select _id, maxcnt = max(cnt)
from tb
group by _id
go
create table #t(_id varchar(3), _date int ,d int)
insert #t(_id,_date) select '001',20100101
union all select '001',20100102
union all select '002',20100103
union all select '002',20100101
union all select '002',20100102
union all select '002',20100104
union all select '001',20100105
union all select '001',20100106
union all select '001',20100107
union all select '002',20100105
union all select '002',20100107
union all select '002',20100108
union all select '003',20100101
union all select '003',20100102
union all select '003',20100104
union all select '003',20100105
union all select '003',20100106
union all select '003',20100108
union all select '003',20100109
union all select '003',20100111
--1.更新分区段的行
declare @i int, @j int,@k int
update #t set d = @j,@j = case when @i = _id then @j else isnull(@j,0)+1 end,@i = _id
--2.查询
;with t1 as
(
select _id,d, _date1 = _date - row_number() over (partition by d order by _date),_date
from #t
)
,t2 as
(
select _id, d, cnt = count(1),mindate=MIN(_date),maxdate= MAX(_date)
from t1
group by _id, d, _date1
)
select _id, maxcnt = max(cnt),
mindate=(select mindate from t2 where cnt=max(t.cnt) and _id=t._id),
maxdate=(select maxdate from t2 where cnt=max(t.cnt) and _id=t._id)
from t2 t
group by _id/*
_id maxcnt mindate maxdate
---- ----------- ----------- -----------
001 3 20100105 20100107
002 4 20100101 20100104
003 3 20100104 20100106(3 行受影响)
*/
Select * from #t where _id='001' and _date between 20100105 and 20100107
union all
Select * from #t where _id='002' and _date between 20100101 and 20100104
union all
Select * from #t where _id='003' and _date between 20100104 and 20100106
union all
Select * from #t where _id not in('001','002','003')
) t
Group by t._id
这样也能达到要求,数据量大了效果不知道
go
create table #t(_id varchar(3), _date int)
insert #t(_id,_date) select '001',20100101
union all select '001',20100102
union all select '002',20100103
union all select '002',20100101
union all select '002',20100102
union all select '002',20100104
union all select '001',20100105
union all select '001',20100106
union all select '001',20100107
union all select '002',20100105
union all select '002',20100107
union all select '002',20100108
union all select '003',20100101
union all select '003',20100102
union all select '003',20100104
union all select '003',20100105
union all select '003',20100106
union all select '003',20100108
union all select '003',20100109
union all select '003',20100111
SELECT RN=IDENTITY(INT),* INTO # FROM #T ORDER BY _ID,_DATESELECT _ID,MAX(CNT) MAXCNT
FROM (
SELECT _ID,COUNT(1) CNT
FROM # T
WHERE EXISTS(SELECT 1 FROM # WHERE _ID=T._ID AND (_DATE=T._DATE-1 OR T._DATE=_DATE-1))
GROUP BY _ID,_DATE-RN
) AS T
GROUP BY _IDDROP TABLE #
/*
_id maxcnt
---- -----------
001 3
002 5
003 3(3 行受影响)
*/
GROUP BY _ID,_DATE-RN你这里是减号吗?
报错:
服务器: 消息 207,级别 16,状态 3,行 1
列名 'RN' 无效。