紧急求助各位专家，取最近的几条数据做统计的难题

select [id],avg(DataItem) from
(
select top 3 *
from TestTable
where [id]= 1
order by CreateDate
union all
select top 3 *
from TestTable
where [id]= 2
order by CreateDate
)
group by [id]

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

--> 测试数据: [TestTable]
if object_id('[TestTable]') is not null drop table [TestTable]
create table [TestTable] (id int,DataItem decimal(19,4),CreateDate smalldatetime)
insert into [TestTable]
select 1,1.1,'2001-1-1' union all
select 1,1.2,'2001-1-2' union all
select 1,1.3,'2001-1-3' union all
select 1,1.4,'2001-1-4' union all
select 1,1.5,'2001-1-5' union all
select 2,2.1,'2003-2-2' union all
select 2,2.2,'2003-2-3' union all
select 2,2.3,'2003-2-4' union all
select 2,null,'2003-2-5'
select identity(int,1,1) as nid,* into # from [TestTable] order by id,CreateDate
select id,case when count(*)=count(dataitem) then avg(dataitem) else null end as DataItem
from # as a where nid in (select top 3 nid from # where id=a.id order by CreateDate desc)
group by id
/*
1 1.400000
2 NULL
*/
2    null?
一條為null,結果都為null
CREATE TABLE TestTable ( id char(10), DataItem decimal(19,4), CreateDate smalldatetime )
INSERT INTO TestTable values(1,1.1,'2001-1-1')
INSERT INTO TestTable values(1,1.2,'2001-1-2')
INSERT INTO TestTable values(1,1.3,'2001-1-3')
INSERT INTO TestTable values(1,1.4,'2001-1-4')
INSERT INTO TestTable values(1,1.5,'2001-1-5')
INSERT INTO TestTable values(2,2.1,'2003-2-2')
INSERT INTO TestTable values(2,2.2,'2003-2-3')
INSERT INTO TestTable values(2,2.3,'2003-2-4')
INSERT INTO TestTable values(2,NULL,'2003-2-5')go
with t
as
(
select *,
row_number()over(partition by ID order by CreateDate desc)row
from
TestTable)
select
a.ID,(a.DataItem+b.DataItem+c.DataItem)/3 DataItem
from
T a
join
T b on a.ID=b.ID and a.row=b.row-1
join
T c on a.ID=c.ID and b.row=c.row-1
where
a.row=1
ID         DataItem
---------- ---------------------------------------
1          1.400000
2          NULL(2 個資料列受到影響)
sql 2005可以这样：
;with t as
(select row_number()over(partition by ID order by CreateDate desc) as nid,* from TestTable)
select id,case when count(*)=count(dataitem) then avg(dataitem) else null end as DataItem
from t as a where nid in (select top 3 nid from t where id=a.id order by CreateDate desc)
group by id
这个比较容易看懂，不过使用了临时表，如果表数据量比较大（百万级别），使用临时表是不是会很影响效率呢？这个比较难看懂，如果要扩展成4条数据的，是不是写成
with t
as
(
select *,
    row_number()over(partition by ID order by CreateDate desc)row
from
    TestTable)
select
    a.ID,(a.DataItem+b.DataItem+c.DataItem+d.DataItem)/4 DataItem
from
    T a
join
    T b on a.ID=b.ID and a.row=b.row-1
join
    T c on a.ID=c.ID and b.row=c.row-1
join
    T d on a.ID=d.ID and c.row=d.row-1
where
    a.row=1
呢?
看上去这个里面做了很多join操作，效率跟上面那个比那个会好点呢？多谢大家了
百W级做这种统计，本身就会很慢，如果不用临时表，sql 2005可以用我5楼的。
为什么性能上会有这么大的差距呢
希望哪位高人能回答下
用perfectaction 的代码，7千多条数据5分钟没出结果
用roy_88 的代码，同样的环境，一秒多钟出结果没有任何其他意思，只是想知道这两个SQL的性能差距在哪里,多谢高人指点
加索引：
create index ix_01 on TestTable(id,CreateDate desc)
CREATE TABLE TestTable
(
    id int,
    DataItem decimal(19,4),
    CreateDate smalldatetime
)INSERT INTO TestTable values(1,1.1,'2001-1-1')
INSERT INTO TestTable values(1,1.2,'2001-1-2')
INSERT INTO TestTable values(1,1.3,'2001-1-3')
INSERT INTO TestTable values(1,1.4,'2001-1-4')
INSERT INTO TestTable values(1,1.5,'2001-1-5')
INSERT INTO TestTable values(2,2.1,'2003-2-2')
INSERT INTO TestTable values(2,2.2,'2003-2-3')
INSERT INTO TestTable values(2,2.3,'2003-2-4')
INSERT INTO TestTable values(2,NULL,'2003-2-5')
/*---
select id,
sum(DataItem) as DataItem
from (
select a.* from TestTable a
where a.CreateDate in (select top 3 CreateDate  from TestTable b where a.id=b.id order by CreateDate desc))a
group by id
*/--借助临时表select a.* into #tmp from TestTable a
where a.CreateDate in (select top 3 CreateDate  from TestTable b where a.id=b.id order by CreateDate desc)
--更新DataItem值为null值的所有记录
update a
set DataItem=null
from #tmp a ,#tmp b where a.Id=b.Id and b.DataItem is null
select id,
sum(DataItem) as DataItem
from #tmp
group by id
最后一句应该为select id,
avg(DataItem) as DataItem
from #tmp
group by id
这个组合索引是存在的，因为表示以id,CreateDate作为主键的
因此这个索引已经存在了因为
select *,
    row_number()over(partition by ID order by CreateDate desc)row
from
    TestTable
这个结果集比较小，只有几百条数据
我看了roy_88的执行计划，就是这个结果集在不断的join，因此这可能是他的速度比较快的原因
如果这个结果集比较大了，我估计也不会有这么好的效果
我那个:
from t as a where nid in (select top 3 nid from t where id=a.id order by CreateDate desc)
是肯定会很耗时的。
哦，是了，这里的
select top 3 nid from t where id=a.id order by CreateDate desc
会要对表进行一个全表扫描吧，因为没有对id建索引，建的是(id,CreateDate)的联合索引，所以没办法是用索引是吧
然后前面是where条件连接的，应该是前面那个结果集有多少条记录这个语句就会要执行多少次吧？哎，我始终搞不清楚sql语句执行的规则，有没有什么好书推荐看下啊，谢谢了
在使用roy_88 的语句的时候发现一个问题：
如果某条记录只有2次的结果，那用他的语句会把这个记录忽略掉
比如如果上面表再加两条测试数据
id  DataItem  CreateDate
3    3.1        2002-2-2
3    3.2        2002-2-3
那用roy_88的语句统计结果中不会出现
3  null
但是现在想要出现
3  null这个统计结果，该怎么做呢?
Microsoft SQL Server性能调校
作者: 胡百敬
http://www.douban.com/subject/1239520/听说要出2005版了，现在大陆好像还没上市。
在使用roy_88 的语句的时候发现一个问题：
如果某条记录只有2次的结果，那用他的语句会把这个记录忽略掉
比如如果上面表再加两条测试数据
id  DataItem  CreateDate
3    3.1        2002-2-2
3    3.2        2002-2-3
那用roy_88的语句统计结果中不会出现
3  null
但是现在想要出现
3  null这个统计结果，该怎么做呢?自己再顶下
只有两条记录，inner join 一下肯定没了。
感谢大家，我找到方法来，把roy_88的语句中join改成left join就好了
这样出来的结果就会有 3 null 的值了
结贴