最近做一个统计分析相关的功能,需求是分析统计出某一个时间段(时间段不定,比如2017-03-20 08:00~2017-03-27 08:00)出租车的前十的起点小区总发车数,还有统计出这十个小区分别发往各个地方的前9的发车数。问题点:现查询统计一个星期的数据(约55w)时间在18s左右,响应时间太长。
现排查到的原因是数据量较大进行多次分组排序导致查询太慢,个人刚入门菜鸟一个,感觉sql语句没办法在进行优化。所以想请教一下各位有没有好一点的解决方案,控制在5s内响应。sql优化或者其他实现方案。
采用oracle数据库,主表TSRT_TAXI_hourdemand 表结构如下,数据量约6千万,每日增长约8w,TIME有索引
从表TSRT_TAXI_TRAFFICZONE ,结构如下,数据300多
采用分组查询,sql如下,
with t as
 (select *
    from (select a.*, b.AREA_ID SGRID_ZONE, c.AREA_ID EGRID_ZONE
            from (select a.*,
                         CASE
                           WHEN TO_CHAR(a.TIME, 'HH24:MI') BETWEEN '08:00' and
                                '10:00' THEN
                            1
                           WHEN TO_CHAR(a.TIME, 'HH24:MI') BETWEEN '17:00' and
                                '20:00' THEN
                            2
                           ELSE
                            0
                         END as unit
                    from TSRT_TAXI_hourdemand a
                   where a.TIME >=
                         to_date('2017-03-26 00', 'yyyy-mm-dd HH24:mi')
                     and a.TIME <=
                         to_date('2017-03-27 23', 'yyyy-mm-dd HH24:mi')) a
            left join TSRT_TAXI_TRAFFICZONE b
              on b.GRID_ID = a.SGRID_ID
            left join TSRT_TAXI_TRAFFICZONE c
              on c.GRID_ID = a.EGRID_ID)
   WHERE SGRID_ZONE IS NOT NULL
     AND EGRID_ZONE IS NOT NULL
     AND SGRID_ZONE != EGRID_ZONE
     )
select b.AREA_NAME SGRID_NAME,
       b.LONGITUDE SGRID_LON,
       b.LATITUDE  SGRID_LAT,
       c.AREA_NAME EGRID_NAME,
       c.LONGITUDE EGRID_LON,
       c.LATITUDE  EGRID_LAT,
       FLOW_NO,
       rn
  from (select b.SGRID_ZONE, b.EGRID_ZONE, b.FLOW_NO, b.rn as rn
          from (select a.*,
                       row_number() over(partition by a.SGRID_ZONE order by a.FLOW_NO DESC) rn
                  from (select t.SGRID_ZONE,
                               t.EGRID_ZONE,
                               sum(t.FLOW_NO) as FLOW_NO
                          from t
                         where t.SGRID_ZONE in
                               (select b.SGRID_ZONE
                                  from (SELECT t.SGRID_ZONE, SUM(T.FLOW_NO)
                                          FROM t
                                         group by SGRID_ZONE
                                         ORDER BY SUM(T.FLOW_NO) DESC) b
                                 where ROWNUM <= 10) 
                         group by t.SGRID_ZONE, t.EGRID_ZONE) a) b
         where b.rn < 10) a
  left join ttcb_trans_area b
    on b.ID = a.SGRID_ZONE
  left join ttcb_trans_area c
    on c.ID = a.EGRID_ZONE;