select [id],avg(DataItem) from
(
select top 3 *
from TestTable
where [id]= 1
order by CreateDate
union all
select top 3 *
from TestTable
where [id]= 2
order by CreateDate
)
group by [id]
(
select top 3 *
from TestTable
where [id]= 1
order by CreateDate
union all
select top 3 *
from TestTable
where [id]= 2
order by CreateDate
)
group by [id]
if object_id('[TestTable]') is not null drop table [TestTable]
create table [TestTable] (id int,DataItem decimal(19,4),CreateDate smalldatetime)
insert into [TestTable]
select 1,1.1,'2001-1-1' union all
select 1,1.2,'2001-1-2' union all
select 1,1.3,'2001-1-3' union all
select 1,1.4,'2001-1-4' union all
select 1,1.5,'2001-1-5' union all
select 2,2.1,'2003-2-2' union all
select 2,2.2,'2003-2-3' union all
select 2,2.3,'2003-2-4' union all
select 2,null,'2003-2-5'
select identity(int,1,1) as nid,* into # from [TestTable] order by id,CreateDate
select id,case when count(*)=count(dataitem) then avg(dataitem) else null end as DataItem
from # as a where nid in (select top 3 nid from # where id=a.id order by CreateDate desc)
group by id
/*
1 1.400000
2 NULL
*/
一條為null,結果都為null
INSERT INTO TestTable values(1,1.1,'2001-1-1')
INSERT INTO TestTable values(1,1.2,'2001-1-2')
INSERT INTO TestTable values(1,1.3,'2001-1-3')
INSERT INTO TestTable values(1,1.4,'2001-1-4')
INSERT INTO TestTable values(1,1.5,'2001-1-5')
INSERT INTO TestTable values(2,2.1,'2003-2-2')
INSERT INTO TestTable values(2,2.2,'2003-2-3')
INSERT INTO TestTable values(2,2.3,'2003-2-4')
INSERT INTO TestTable values(2,NULL,'2003-2-5')go
with t
as
(
select *,
row_number()over(partition by ID order by CreateDate desc)row
from
TestTable)
select
a.ID,(a.DataItem+b.DataItem+c.DataItem)/3 DataItem
from
T a
join
T b on a.ID=b.ID and a.row=b.row-1
join
T c on a.ID=c.ID and b.row=c.row-1
where
a.row=1
ID DataItem
---------- ---------------------------------------
1 1.400000
2 NULL(2 個資料列受到影響)
sql 2005可以这样:
;with t as
(select row_number()over(partition by ID order by CreateDate desc) as nid,* from TestTable)
select id,case when count(*)=count(dataitem) then avg(dataitem) else null end as DataItem
from t as a where nid in (select top 3 nid from t where id=a.id order by CreateDate desc)
group by id
with t
as
(
select *,
row_number()over(partition by ID order by CreateDate desc)row
from
TestTable)
select
a.ID,(a.DataItem+b.DataItem+c.DataItem+d.DataItem)/4 DataItem
from
T a
join
T b on a.ID=b.ID and a.row=b.row-1
join
T c on a.ID=c.ID and b.row=c.row-1
join
T d on a.ID=d.ID and c.row=d.row-1
where
a.row=1
呢?
看上去这个里面做了很多join操作,效率跟上面那个比那个会好点呢?多谢大家了
百W级做这种统计,本身就会很慢,如果不用临时表,sql 2005可以用我5楼的。
希望哪位高人能回答下
用perfectaction 的代码,7千多条数据5分钟没出结果
用roy_88 的代码,同样的环境,一秒多钟出结果没有任何其他意思,只是想知道这两个SQL的性能差距在哪里,多谢高人指点
create index ix_01 on TestTable(id,CreateDate desc)
(
id int,
DataItem decimal(19,4),
CreateDate smalldatetime
)INSERT INTO TestTable values(1,1.1,'2001-1-1')
INSERT INTO TestTable values(1,1.2,'2001-1-2')
INSERT INTO TestTable values(1,1.3,'2001-1-3')
INSERT INTO TestTable values(1,1.4,'2001-1-4')
INSERT INTO TestTable values(1,1.5,'2001-1-5')
INSERT INTO TestTable values(2,2.1,'2003-2-2')
INSERT INTO TestTable values(2,2.2,'2003-2-3')
INSERT INTO TestTable values(2,2.3,'2003-2-4')
INSERT INTO TestTable values(2,NULL,'2003-2-5')
/*---
select id,
sum(DataItem) as DataItem
from (
select a.* from TestTable a
where a.CreateDate in (select top 3 CreateDate from TestTable b where a.id=b.id order by CreateDate desc))a
group by id
*/--借助临时表select a.* into #tmp from TestTable a
where a.CreateDate in (select top 3 CreateDate from TestTable b where a.id=b.id order by CreateDate desc)
--更新DataItem值为null值的所有记录
update a
set DataItem=null
from #tmp a ,#tmp b where a.Id=b.Id and b.DataItem is null
select id,
sum(DataItem) as DataItem
from #tmp
group by id
avg(DataItem) as DataItem
from #tmp
group by id
因此这个索引已经存在了因为
select *,
row_number()over(partition by ID order by CreateDate desc)row
from
TestTable
这个结果集比较小,只有几百条数据
我看了roy_88的执行计划,就是这个结果集在不断的join,因此这可能是他的速度比较快的原因
如果这个结果集比较大了,我估计也不会有这么好的效果
from t as a where nid in (select top 3 nid from t where id=a.id order by CreateDate desc)
是肯定会很耗时的。
select top 3 nid from t where id=a.id order by CreateDate desc
会要对表进行一个全表扫描吧,因为没有对id建索引,建的是(id,CreateDate)的联合索引,所以没办法是用索引是吧
然后前面是where条件连接的,应该是前面那个结果集有多少条记录这个语句就会要执行多少次吧?哎,我始终搞不清楚sql语句执行的规则,有没有什么好书推荐看下啊,谢谢了
如果某条记录只有2次的结果,那用他的语句会把这个记录忽略掉
比如如果上面表再加两条测试数据
id DataItem CreateDate
3 3.1 2002-2-2
3 3.2 2002-2-3
那用roy_88的语句统计结果中不会出现
3 null
但是现在想要出现
3 null这个统计结果,该怎么做呢?
作者: 胡百敬
http://www.douban.com/subject/1239520/听说要出2005版了,现在大陆好像还没上市。
如果某条记录只有2次的结果,那用他的语句会把这个记录忽略掉
比如如果上面表再加两条测试数据
id DataItem CreateDate
3 3.1 2002-2-2
3 3.2 2002-2-3
那用roy_88的语句统计结果中不会出现
3 null
但是现在想要出现
3 null这个统计结果,该怎么做呢?自己再顶下
这样出来的结果就会有 3 null 的值了
结贴