有一个表 tab 字段有date_id,type_id,user_phone,数据如下
日期 商品分类 手机号
20110701 T恤 13400000000
20110701 T恤 13400000001
20110701 短裤 13400000001
20110701 短裤 13400000005
20110702 T恤 13400000000
20110702 T恤 13400000002
20110702 短裤 13400000002
20110702 短裤 13400000003
20110703 T恤 13400000003
20110703 T恤 13400000004
20110703 短裤 13400000003
20110703 短裤 13400000005求新增客户和累计新增客户
20110703计算方法如下:
20110703新增客户 = 去重(20110703的手机号 MINUS (20110701+20110702的手机号))
20110703累计新增客户 = 去重((20110703+20110702)的手机号 MINUS 20110701的手机号))查询20110703的结果要求为:
商品分类 新增用户数 累计新增用户数
T恤 2 3
短裤 0 2
上面的描述不知道说清楚了没?要求效率要尽量的快,能不能只扫描表tab一次。
日期 商品分类 手机号
20110701 T恤 13400000000
20110701 T恤 13400000001
20110701 短裤 13400000001
20110701 短裤 13400000005
20110702 T恤 13400000000
20110702 T恤 13400000002
20110702 短裤 13400000002
20110702 短裤 13400000003
20110703 T恤 13400000003
20110703 T恤 13400000004
20110703 短裤 13400000003
20110703 短裤 13400000005求新增客户和累计新增客户
20110703计算方法如下:
20110703新增客户 = 去重(20110703的手机号 MINUS (20110701+20110702的手机号))
20110703累计新增客户 = 去重((20110703+20110702)的手机号 MINUS 20110701的手机号))查询20110703的结果要求为:
商品分类 新增用户数 累计新增用户数
T恤 2 3
短裤 0 2
上面的描述不知道说清楚了没?要求效率要尽量的快,能不能只扫描表tab一次。
20110703新增客户:
select distinct 手机号 from tab where 日期 = '20110703'
minus
select distinct 手机号 from tab where 日期 >= '20110701' and 日期 <= '20110702'20110703累计新增客户:
select distinct 手机号 from tab where 日期 >= '20110702' and 日期 <= '20110703'
minus
select distinct 手机号 from tab where 日期 = '20110701'
SQL> select * from tab;
DAET_ID TYPE_ID USER_PHONE
-------- ---------- -----------
20110701 T恤 13400000000
20110701 T恤 13400000001
20110701 短裤 13400000001
20110701 短裤 13400000005
20110702 T恤 13400000000
20110702 T恤 13400000002
20110702 短裤 13400000002
20110702 短裤 13400000003
20110703 T恤 13400000003
20110703 T恤 13400000004
20110703 短裤 13400000003
20110703 短裤 13400000005
12 rows selected
select type_id, sum(add_num) add_num, sum(total_num) total_num
from (select a.type_id, count(*) as add_num, 0 total_num
from (select t.type_id, t.user_phone
from tab t
where t.daet_id = '20110703'
minus
select t.type_id, t.user_phone
from tab t
where t.daet_id >= '20110701'
and t.daet_id <= '20110702') a
group by a.type_id
union all
select a.type_id, 0 add_num, count(*) as total_num
from (select t.type_id, t.user_phone
from tab t
where t.daet_id >= '20110702'
and t.daet_id <= '20110703'
minus
select t.type_id, t.user_phone
from tab t
where t.daet_id = '20110701') a
group by a.type_id
)
group by type_id;TYPE_ID ADD_NUM TOTAL_NUM
---------- ---------- ----------
短裤 0 2
T恤 2 3
20110701 T恤 13400000000
20110701 T恤 13400000001
20110701 短裤 13400000001
20110701 短裤 13400000005
20110702 T恤 13400000000
20110702 T恤 13400000002
20110702 短裤 13400000002
20110702 短裤 13400000003
20110703 T恤 13400000003
20110703 T恤 13400000004
20110703 短裤 13400000003
20110703 短裤 13400000005
20110704 T恤 13400000003
20110704 T恤 13400000004
20110704 短裤 13400000003
20110704 短裤 13400000005
有点没理解,如果统计 20110704的数据20110704新增客户 = 去重(20110704的手机号 MINUS (20110701+20110702+20110703的手机号))
20110704累计新增客户 = 去重((20110704+20110703+20110702)的手机号 MINUS 20110701的手机号))
是这样吗????
20110704累计新增客户 = 去重((20110704+20110703+20110702)的手机号 MINUS 20110701的手机号))是这样的,
SQL> select * from tab;DATE_ID TYPE_ID USER_PHONE
-------------------- -------------------- --------------------
20110701 T恤 13400000000
20110701 T恤 13400000001
20110701 短裤 13400000001
20110701 短裤 13400000005
20110702 T恤 13400000000
20110702 T恤 13400000002
20110702 短裤 13400000002
20110702 短裤 13400000003
20110703 T恤 13400000003
20110703 T恤 13400000004
20110703 短裤 13400000003
20110703 短裤 1340000000512 rows selectedSQL> SELECT m.type_id,
2 SUM(CASE WHEN (m.rn = 1 AND m.date_id = '20110703') THEN 1 ELSE 0 END) new_num,
3 SUM(CASE WHEN (m.rn = 1 AND m.date_id > m.min_date) THEN 1 ELSE 0 END) all_new_num
4 FROM (
5 SELECT t.date_id,
6 t.type_id,
7 ROW_NUMBER() OVER(PARTITION BY t.type_id,t.user_phone ORDER BY t.date_id) rn,
8 MIN(t.date_id) over(PARTITION BY t.type_id ORDER BY t.date_id) min_date
9 FROM tab t
10 ) m GROUP BY m.type_id
11 ;TYPE_ID NEW_NUM ALL_NEW_NUM
-------------------- ---------- -----------
T恤 2 3
短裤 0 2
SUM(CASE WHEN (m.rn = 1 AND m.date_id = '20110703') THEN 1 ELSE 0 END) new_num,
SUM(CASE WHEN (m.rn = 1 AND m.date_id > m.min_date AND m.date_id <= '20110703') THEN 1 ELSE 0 END) all_new_num
FROM (
SELECT t.date_id,
t.type_id,
ROW_NUMBER() OVER(PARTITION BY t.type_id,t.user_phone ORDER BY t.date_id) rn,
MIN(t.date_id) over(PARTITION BY t.type_id ORDER BY t.date_id) min_date
FROM tab t
) m GROUP BY m.type_id