有一个表,里面有一列是 NVARCHAR (200)里面存了一些空格分隔的词语,例如 : 11 22 33
22 33 44
11 44 55
33 66 99注意以上数据仅是一列(以空格分隔的),不是3列我现在想统计出这一列记录中出现频率最高的几个词,例如上面例子的结果就是:33 - 出现 3次
22 - 出现 2次
11 - 出现 2次
44 - 出现 2次另外:split函数我已经有了。解决了问题就马上给分!
22 33 44
11 44 55
33 66 99注意以上数据仅是一列(以空格分隔的),不是3列我现在想统计出这一列记录中出现频率最高的几个词,例如上面例子的结果就是:33 - 出现 3次
22 - 出现 2次
11 - 出现 2次
44 - 出现 2次另外:split函数我已经有了。解决了问题就马上给分!
go
create function f_split
(
@s varchar(8000), --待分拆的字符串
@split varchar(10) --数据分隔符
)returns table
as
return
(
select substring(@s,number,charindex(@split,@s+@split,number)-number)as col
from master..spt_values
where type='p' and number<=len(@s+'a')
and charindex(@split,@split+@s,number)=number
)
goselect col,count(1) as 次数 from dbo.f_split(col)
你已经有了SPLIT 函数了。那就将SPLIT出的结果 SUM...GROUP BY
--示例数据
CREATE TABLE tb(ID int,col varchar(50),num int)
INSERT tb SELECT 1,'aa,bb,cc',10
UNION ALL SELECT 2,'aa,aa,bb',20
UNION ALL SELECT 3,'aa,aa,bb',20
UNION ALL SELECT 4,'dd,ccc,c',30
UNION ALL SELECT 5,'ddaa,ccc',40
UNION ALL SELECT 6,'eee,ee,c',50
GO--1. 记录数统计示例
--分拆处理需要的辅助表(由于是直接处理,所以根据col1列中最大的数据长度来创建)
DECLARE @len int
SELECT TOP 1 @len=LEN(col)+1 FROM tb ORDER BY LEN(col) DESC
IF ISNULL(@len,1)=1 RETURN
SET ROWCOUNT @len
SELECT ID=IDENTITY(int,1,1) INTO # FROM syscolumns a,syscolumns b
ALTER TABLE # ADD PRIMARY KEY(ID)
SET ROWCOUNT 0select * from #
--统计处理
SELECT data=SUBSTRING(a.col,b.ID,CHARINDEX(',',a.col+',',b.ID)-b.ID),
[COUNT]=COUNT(DISTINCT a.ID),
Numbers=COUNT(*)
FROM tb a,# b
WHERE b.ID<=LEN(a.col)
AND SUBSTRING(','+a.col,b.ID,1)=','
GROUP BY SUBSTRING(a.col,b.ID,CHARINDEX(',',a.col+',',b.ID)-b.ID)
DROP TABLE #
GO-----------------------------
data COUNT Numbers
aa 3 5
bb 3 3
c 2 2
cc 1 1
ccc 2 2
dd 1 1
ddaa 1 1
ee 1 1
eee 1 1
看不懂,前面你创建了一个split函数,后面这一句
select col,count(1) as 次数 from dbo.f_split(col) 是干嘛??
go
create table [tb] (col varchar(10))
insert into [tb]
select '11 22 33' union all
select '22 33 44' union all
select '11 44 55' union all
select '33 66 99'
select col,count(*)
from(
select parsename(replace(col,' ','.'),1)col from [tb]
union all
select parsename(replace(col,' ','.'),2) from [tb]
union all
select parsename(replace(col,' ','.'),3) from [tb]
)T group by col
go
insert tb
select '11 22 33' union all
select '22 33 44' union all
select '11 44 55' union all
select '33 66 99'
godeclare @sql varchar(8000)
select
@sql=isnull(@sql+' union all ','')+' select '+replace(col,' ',' as col union all select ')
from
tbset @sql='select col,count(1) as cnt from ('+@sql+') t group by col'exec (@sql)/**
col cnt
----------- -----------
11 2
22 2
33 3
44 2
55 1
66 1
99 1
**/
-------------------------------------------------------------------------------------------------------------------------------- -----------
11 2
22 2
33 3
44 2
55 1
66 1
99 1(7 個資料列受到影響)
--> 测试数据:@table
if object_id('[tb]') is not null drop table [tb]
create table [tb](code varchar(30))
insert [tb]
select '22 33 44' union all
select '11 44 55' union all
select '33 66 99'
--存储过程
create proc dbo.proc_getcode
as
begin
declare @code varchar(30),@pos int, @ret varchar(20)
declare @table table(code varchar(30))
DECLARE cur_getcode Cursor For
select code from [tb]
OPEN cur_getcode
FETCH cur_getcode Into @code
while @@fetch_status = 0
begin
select @code = @code + ' '
select @pos = charindex(' ',@code) while @pos > 0
begin
select @ret = substring(@code,1,@pos-1)
insert into @table select @ret
select @code = stuff(@code,1,@pos,'')
select @pos = charindex(' ',@code)
end
FETCH cur_getcode Into @code
end
Close cur_getcode
Deallocate cur_getcode
select code,count(1) as 次数 from @table
group by codeend
--调用
exec proc_getcode--结果
--------------------------
11 1
22 1
33 2
44 2
55 1
66 1
99 1
[COUNT]=COUNT(DISTINCT a.ID), --- 我的表里没有这一ID列的,哪来的distinct
alter proc dbo.proc_getcode
as
begin
declare @code varchar(30),@pos int, @ret varchar(20)
declare @table table(code varchar(30))
DECLARE cur_getcode Cursor For
select code from [tb]
OPEN cur_getcode
FETCH cur_getcode Into @code
while @@fetch_status = 0
begin
select @code = @code + ' '
select @pos = charindex(' ',@code) while @pos > 0
begin
select @ret = substring(@code,1,@pos-1)
insert into @table select @ret
select @code = stuff(@code,1,@pos,'')
select @pos = charindex(' ',@code)
end
FETCH cur_getcode Into @code
end
Close cur_getcode
Deallocate cur_getcode
select top 3 code,count(1) as 次数 from @table
group by code
order by 次数 descend
--------------------
33 3
44 2
22 2
create table tb(col varchar(200))
go
insert tb
select '11 22 33' union all
select '22 33 44' union all
select '11 44 55' union all
select '33 66 99'
godeclare @sql varchar(8000)
select
@sql=isnull(@sql+' union all ','')+' select '+replace(col,' ',' as col union all select ')
from
tbset @sql='select top 3 col,count(1) as cnt from ('+@sql+') t group by col order by cnt desc'exec (@sql)/**
col cnt
----------- -----------
33 3
44 2
22 2
**/