现有一表document,有一字段label用于标识标签的字段,label之间用","分隔,表中现有的记录如下:
id label
1 中国,人民,1
2 中国,2,3
3 中国,1现在需要用一句sql查询出document表中label出现的次数排序,结果要如下这样:
label num
中国 3
1 2
人民 1
2 1
3 1
id label
1 中国,人民,1
2 中国,2,3
3 中国,1现在需要用一句sql查询出document表中label出现的次数排序,结果要如下这样:
label num
中国 3
1 2
人民 1
2 1
3 1
如果不是,写不出来.
你把数据分拆写到临时表,然后统计各个LABLE的值
再返回结果集
union
select '1',count(*) from T_0 where instr(lable,'1')>0
union
select '人民',count(*) from T_0 where instr(lable,'人民')>0
union
select '2',count(*) from T_0 where instr(lable,'2')>0
union
select '3',count(*) from T_0 where instr(lable,'3')>0
SQL> select * from document; LID LLABEL
---------- ------------------------------
1 china,people,1
2 china,2,3
3 china,1SQL>
SQL> select new_label, count(1) counts
2 from (select substr(',' || llabel || ',',
3 instr(',' || llabel || ',', ',', 1, rn) + 1,
4 instr(',' || llabel || ',', ',', 1, rn + 1) -
5 instr(',' || llabel || ',', ',', 1, rn) - 1) as new_label
6 from document do,
7 (select rownum rn from all_objects where rownum <= 10) ao
8 where instr(',' || llabel, ',', 1, rn) > 0) t1
9 group by new_label
10 order by 2 desc;NEW_LABEL COUNTS
-------------------------------- ----------
china 3
1 2
2 1
people 1
3 1
有多少个逗号是个数<=9,万一多于9呢
all_object 万一数量还没有','多呢
可惜拿不到分数了--建表
create table tmp_hzf_test1
(id number ,
label varchar2(100)) ;
--数值
1 中国,人民,1
2 中国,2,3
3 中国,1--语句
select element_name,count(*) from (
select distinct id,level,
substrb(label,
decode(level,1,1,instrb(label,',',1,level-1)+1),
decode(instrb(label,',',1,level),0,lengthb(label)+1,instrb(label,',',1,level))
-decode(level,1,1,instrb(label,',',1,level-1)+1)) element_name
from tmp_hzf_test1
connect by level<= Lengthb(label)-lengthb(replace(label,',',''))+1
)
group by element_name
--结果
1 2
2 1
3 1
人民 1
中国 3
SQL> set timing on
SQL> set serveroutput on
SQL> select * from document; LID LLABEL
---------- --------------------------------------------------------------------------------
4 china,4,1,china,4,1
5 china,4,1,china,4,1,china,21,3,country,29,3,china,28,3,china,30,3
6 china,21,3,china,29,3,country,4,1,china,4,1,country,21,3,china,29,3,country,21,3
7 china,43,12,china,41,1
8 people
9 people,4,1,country,4,1
10 country,4,1,good,4,1
1 china,people,1
2 china,2,3
3 china,110 rows selectedExecuted in 0.953 secondsSQL>
SQL> select element_name, count(*)
2 from (select distinct lid,
3 level,
4 substrb(llabel,
5 decode(level,
6 1,
7 1,
8 instrb(llabel, ',', 1, level - 1) + 1),
9 decode(instrb(llabel, ',', 1, level),
10 0,
11 lengthb(llabel) + 1,
12 instrb(llabel, ',', 1, level)) -
13 decode(level,
14 1,
15 1,
16 instrb(llabel, ',', 1, level - 1) + 1)) element_name
17 from document
18 connect by level <=
19 Lengthb(llabel) - lengthb(replace(llabel, ',', '')) + 1)
20 group by element_name;select element_name, count(*)
from (select distinct lid,
level,
substrb(llabel,
decode(level,
1,
1,
instrb(llabel, ',', 1, level - 1) + 1),
decode(instrb(llabel, ',', 1, level),
0,
lengthb(llabel) + 1,
instrb(llabel, ',', 1, level)) -
decode(level,
1,
1,
instrb(llabel, ',', 1, level - 1) + 1)) element_name
from document
connect by level <=
Lengthb(llabel) - lengthb(replace(llabel, ',', '')) + 1)
group by element_nameORA-01013: user requested cancel of current operation
ORA-01013: user requested cancel of current operation -- 用户取消操作的原因是运行了用connect by实现的方法已经有10 mins了还没出结果。
SQL>
SQL> select new_label, count(1) counts
2 from (select substr(',' || llabel || ',',
3 instr(',' || llabel || ',', ',', 1, rn) + 1,
4 instr(',' || llabel || ',', ',', 1, rn + 1) -
5 instr(',' || llabel || ',', ',', 1, rn) - 1) as new_label
6 from document do,
7 (select rownum rn
8 from all_objects
9 where rownum <= (select max(length(llabel) -
10 length(replace(llabel, ',')) + 1)
11 from document)) ao
12 where instr(',' || llabel, ',', 1, rn) > 0) t1
13 group by new_label;NEW_LABEL COUNTS
-------------------------------------------------------------------------------- ----------
1 13
12 1
2 1
21 4
28 1
29 4
3 11
30 1
4 10
41 1
43 1
china 17
country 6
good 1
people 315 rows selectedExecuted in 1 secondsSQL> Summary: 总共10条数据测试,connect by 方法10 mins 内出不了结果,all_objects方法用了1 secs.
关于2个疑问:
1: 有多少个逗号是个数 <=9,万一多于9呢。 -- 根据兄弟方法的提示,已经用max。。修正了,会自动取逗号最多的一条。TQ!
2: "all_objects 万一数量还没有','多呢" -- 我相信逗号的最多数不会比all_objects表总记录条数多的。
select distinct id,rn,
substrb(label,
decode(rn,1,1,instrb(label,',',1,rn-1)+1),
decode(instrb(label,',',1,rn),0,lengthb(label)+1,instrb(label,',',1,rn))
-decode(rn,1,1,instrb(label,',',1,rn-1)+1)) element_name
from tmp_hzf_test1 a ,(select level rn from
(select max(Lengthb(label)-lengthb(replace(label,',',''))+1) mrn from tmp_hzf_test1 )
connect by
level<=mrn ) b
where Lengthb(label)-lengthb(replace(label,',',''))+1 >= b.rn )
group by element_name;
我自己建了个 datatable把数据填进去
在用空间 绑定 datatable
过几天要做 统计
估计 会用到这个帖子 了