select top 10 * from (
select
a.id,GsID,Qymc,Cpmc,picurl,ROW_NUMBER() over (partition by gsid order by newid()) as tempRowID
from Spzs a left join qyml b
on a.GsID=b.id
where a.flag=1 and b.flag=0 and b.enddate>getdate() and picurl is not null and picurl<>''
and gsid not in (0,209696,209696,60893,56128,54278,48227,48227,6476,58777,58397,56125,54943,54088,52846,322,206271,207286,62609,61985,57718,56238,16702,1619,610,191,178,211452,211132,205393,170022,70667,63266,62782,61816,61688,61203,59499,59014,58474,56629,56398,56134,54887,54194,1393,209)
and (contains((cpmc,jysm,xxsm),'白板') or contains((cpmc,jysm,xxsm),'信息化'))
) a
where tempRowID=1 order by NEWID()指令太长了,从新粘贴一下
select
a.id,GsID,Qymc,Cpmc,picurl,ROW_NUMBER() over (partition by gsid order by newid()) as tempRowID
from Spzs a left join qyml b
on a.GsID=b.id
where a.flag=1 and b.flag=0 and b.enddate>getdate() and picurl is not null and picurl<>''
and gsid not in (0,209696,209696,60893,56128,54278,48227,48227,6476,58777,58397,56125,54943,54088,52846,322,206271,207286,62609,61985,57718,56238,16702,1619,610,191,178,211452,211132,205393,170022,70667,63266,62782,61816,61688,61203,59499,59014,58474,56629,56398,56134,54887,54194,1393,209)
and (contains((cpmc,jysm,xxsm),'白板') or contains((cpmc,jysm,xxsm),'信息化'))
) a
where tempRowID=1 order by NEWID()指令太长了,从新粘贴一下
-->这个可以这样
contains((cpmc,jysm,xxsm),'"白板" OR "信息化"') 许多字段都没标记前缀,根本不只到那个表的字段,怎么做优化?看你的意思是每次20个这样的取,应该考虑用临时表:
先将各自符合条件的记录选入 #a、#b,#a 中的 NEWID() 一次性生成;
删除不符合关联条件的记录;
#a 中取 NEWID() 的前20个,再到 b# 中与这20个关联的记录进行 NEWID() 选取,返回数据;
把这20个相关的数据都删除。
insert into @tb select a.ID,GsID,Qymc,Cpmc,picurl from Spzs a left join Qyml b on a.GsID=b.ID where a.flag=1 and picurl is not null and picurl<>'' and b.flag=0 and b.enddate>getdate() and gsid not in (0,209696,209696,60893,56128,54278,48227,48227,6476,58777,58397,56125,54943,54088,52846,322,206271,207286,62609,61985,57718,56238,16702,1619,610,191,178,211452,211132,205393,170022,70667,63266,62782,61816,61688,61203,59499,59014,58474,56629,56398,56134,54887,54194,1393,209) and (contains((cpmc,jysm,xxsm),'白板') or contains((cpmc,jysm,xxsm),'信息化'))
select top 10 * from (select *,ROW_NUMBER() over (partition by cid order by newid()) as tempRowID from @tb) a where tempRowID=1 order by NEWID()
我给弄成这样,效率还是不太满意,基本在9到13秒之间
1)contains 用组合条件,查一遍总比查两遍快。
2)不要用 NOT IN!
你的 NOT IN 范围会越来越大,过滤的越多性能反而更低。
不要把多次取数作为独立需求处理,要把一串取数作为整体需求处理。
除了第一次需要生成大结果比较慢,每次只需要取20条并从临时表中删除,绝对能秒取。
3)第一次需要生成大结果时就决定了企业“随机”的次序,每次只需要按照这个次序取前20条即可。
4)关于每企业“随机”选一个产品
a.可以在第一次需要生成大结果时就“随机”选一个产品;
b.也可以在每次取20条企业时再分别“随机”选一个产品。
你需要实测一下哪个方案快。
select
a.id,GsID,Qymc,Cpmc,picurl,ROW_NUMBER() over (partition by gsid order by newid()) as tempRowID
from Spzs a left join qyml b
on a.GsID=b.id
where a.flag=1 and b.flag=0 and b.enddate>getdate() and picurl is not null and picurl<>''
and gsid not in (0,209696,209696,60893,56128,54278,48227,48227,6476,58777,58397,56125,54943,54088,52846,322,206271,207286,62609,61985,57718,56238,16702,1619,610,191,178,211452,211132,205393,170022,70667,63266,62782,61816,61688,61203,59499,59014,58474,56629,56398,56134,54887,54194,1393,209)
and (contains((cpmc,jysm,xxsm),'白板') or contains((cpmc,jysm,xxsm),'信息化'))
) a
where tempRowID=1 order by NEWID()
第一个问题:为什么随机取数据要在数据库中来做?
可不可以:1、程序实现 2、通过生成10个随机ID,然后再根据ID去锁定10条数据?
第二个问题:not in里的内容太多,为何不换个写法?
使用一个 contains 用 or 判断多个关键字,效率提高两秒左右
第一个问题,数据量比较大,总产品数100多万条记录,通过程序获取完整的数据不现实
第二个问题,我说的条件里少说了一条,如果该公司信息出现过,则不能重复出现。
4)随机到的产品必须符合包含关键字,也就是说必须使用 contains,因为一个公司多个产品,只有部分符合条件,不能完全随机获取
(
pid int ,
cid int ,
qymc nvarchar(100) ,
cpmc nvarchar(100) ,
picurl nvarchar(200)
)
insert into @tb select a.ID,GsID,Qymc,Cpmc,picurl
from Spzs a
left join Qyml b on a.GsID = b.ID
where a.flag = 1
and picurl is not null
and picurl <> ''
and b.flag = 0
and b.enddate > getdate()
and gsid not in ( 0,209696,209696,60893,56128,54278,48227,48227,6476,58777,58397,56125,54943,54088,52846,322,206271,207286,62609,
61985,57718,56238,16702,1619,610,191,178,211452,211132,205393,170022,70667,63266,62782,61816,61688,61203,59499,
59014,58474,56629,56398,56134,54887,54194,1393,209 )
and ( contains (( cpmc,jysm,xxsm),'白板' )
or contains (( cpmc,jysm,xxsm),'信息化' )
)
select top 10 *
from ( select *,row_number () over (partition by cid order by newid ()) as tempRowID from @tb
) a
where tempRowID = 1
order by newid()第1段:把這部份。contains (( cpmc,jysm,xxsm),'白板' ) or contains (( cpmc,jysm,xxsm),'信息化' )
得的結果先插入臨時表,第2段:再在臨時表上結合其他條件進行2次篩選。還慢,那麼你先分段測試,先測第1段耗時,再測第2段耗時。這樣你基本知道問題在哪裡了,繼續優化。
有些企业没符合的产品、删除,某些产品没符合的企业、删除。
#b同一个企业下的产品只随机保留一条,其余的删除。这不“随机的企业、随机的产品”不就达成了。
declare @re table(pid int,cid int,qymc nvarchar(100),cpmc nvarchar(100),picurl nvarchar(200),cflag int,etime datetime,rankid int)
insert into @tb select id,gsid,Cpmc,Flag,picurl from spzs where and contains((cpmc,jysm),'"白板" or "信息化"')
delete from @tb where picurl is null or picurl='' or flag=0
delete from @tb where cid in (0,209696,209696,60893,56128,54278,48227,48227,6476,58777,58397,56125,54943,54088,52846,322,206271,207286,62609,61985,57718,56238,16702,1619,610,191,178,211452,211132,205393,170022,70667,63266,62782,61816,61688,61203,59499,59014,58474,56629,56398,56134,54887,54194,1393,209)
insert into @re select pid,cid,Qymc,cpmc,picurl,b.flag,EndDate,QymlRankID from @tb a left join qyml b on a.cid=b.ID where b.flag=0 and b.enddate>getdate()
select top 10 * from (select *,ROW_NUMBER() over (partition by cid order by newid()) as tempRowID from @re) a where tempRowID=1 order by rankid desc
最后搞成这样了,运行时间约2秒左右,感谢各位