本来这个功能我已经用程序编码实现了的,无奈老大要求功能都要用SQL脚本实现简单的说就是我有一个查询结果,其结果可以看成是多个组如:
declare @temp1 table (group int, name varchar(10),[C1] varchar(10), [C2] varchar(10))
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'结果是
1,A,qq,this is qq
1,B,qb,this is qb
1,C,tm,this is tm 2,A,icq,this is icq
2,D,msn,this is msn
2,E,skype,this is skype以上数据按group列可看成两组
我有另一个查询结果,结果也可以看成多个组declare @temp2 table (group int, name varchar(10),[C1] varchar(10), [Order] varchar(10),[DESC] varchar(10))
insert into @temp2
select 1,'C','tm',1,'this is tm', union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq'结果是
1,C,tm,1,this is tm
1,A,qq,2,this is qq
1,B,qb,3,this is qb 2,A,icq,1,this is icq
2,D,msn,1, this is msn以上数据按group列也可看成两组
问题是用高效的SQL方法比较@temp1里哪些组完全与@temp2里哪些组相同,并输出@temp1里能够匹配相同的组比较条件是
1. @temp1 的[C1] 与@temp2 的[C1] 列对应相等
2. @temp1 的[C2] 与@temp2 的[DESC] 列对应相等(比较时忽略字符串中的空格字符)比如:
"this is icq" 与"this is icq" 是完全一样的注意:
@temp1和@temp2里相同的组所拥有的记录个数肯定是相同的,如果记录个数不相同那么肯定组就不相同
组里的数据出现的顺序可以是任意的,也就是说不能认为两组结果都是完全一致的顺序,必须注意@temp2里的[order]烈对分组里数据出现顺序的影响,需要比较@temp1里的每个记录是否在@temp2的某个组下面是否确切都能满足忽略重复匹配的组,假设每个组在两个比较烈上的情况是肯定都不一致的所以上面测试数据比较后的结果应该是只有group为1的组完全一致最后输出
@temp1里的
1,A,qq,this is qq
1,B,qb,this is qb
1,C,tm,this is tm
由于实际的数据记录会比上面的事例多的多,大概@temp1和@temp2都会是出现几千个分组,因此最后的SQL必须是高效的
declare @temp1 table (group int, name varchar(10),[C1] varchar(10), [C2] varchar(10))
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'结果是
1,A,qq,this is qq
1,B,qb,this is qb
1,C,tm,this is tm 2,A,icq,this is icq
2,D,msn,this is msn
2,E,skype,this is skype以上数据按group列可看成两组
我有另一个查询结果,结果也可以看成多个组declare @temp2 table (group int, name varchar(10),[C1] varchar(10), [Order] varchar(10),[DESC] varchar(10))
insert into @temp2
select 1,'C','tm',1,'this is tm', union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq'结果是
1,C,tm,1,this is tm
1,A,qq,2,this is qq
1,B,qb,3,this is qb 2,A,icq,1,this is icq
2,D,msn,1, this is msn以上数据按group列也可看成两组
问题是用高效的SQL方法比较@temp1里哪些组完全与@temp2里哪些组相同,并输出@temp1里能够匹配相同的组比较条件是
1. @temp1 的[C1] 与@temp2 的[C1] 列对应相等
2. @temp1 的[C2] 与@temp2 的[DESC] 列对应相等(比较时忽略字符串中的空格字符)比如:
"this is icq" 与"this is icq" 是完全一样的注意:
@temp1和@temp2里相同的组所拥有的记录个数肯定是相同的,如果记录个数不相同那么肯定组就不相同
组里的数据出现的顺序可以是任意的,也就是说不能认为两组结果都是完全一致的顺序,必须注意@temp2里的[order]烈对分组里数据出现顺序的影响,需要比较@temp1里的每个记录是否在@temp2的某个组下面是否确切都能满足忽略重复匹配的组,假设每个组在两个比较烈上的情况是肯定都不一致的所以上面测试数据比较后的结果应该是只有group为1的组完全一致最后输出
@temp1里的
1,A,qq,this is qq
1,B,qb,this is qb
1,C,tm,this is tm
由于实际的数据记录会比上面的事例多的多,大概@temp1和@temp2都会是出现几千个分组,因此最后的SQL必须是高效的
declare @temp1 table ([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(50))
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'
declare @temp2 table ([group] int, name varchar(10),[C1] varchar(10), [Order] varchar(10),[DESC] varchar(50))
insert into @temp2
select 1,'C','tm',1,'this is tm'union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq'
select *
from (
select *,COUNT(*) over(partition by [group]) as gcnt1
from (select *,COUNT(*) over(partition by [group]) as gcnt from @temp1 ) a
where exists(select 1
from (select *,COUNT(*) over(partition by [group]) as gcnt from @temp2) b
where a.c1=b.C1 and a.c2=b.[DESC] and A.gcnt=b.gcnt )
) a
where gcnt=gcnt1(6 行受影响)(5 行受影响)
group name C1 C2 gcnt gcnt1
----------- ---------- ---------- -------------------------------------------------- ----------- -----------
1 A qq this is qq 3 3
1 B qb this is qb 3 3
1 C tm this is tm 3 3(3 行受影响)
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'
declare @temp2 table ([group] int, name varchar(10),[C1] varchar(10), [Order] varchar(10),[DESC] varchar(50))
insert into @temp2
select 1,'C','tm',1,'this is tm'union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq'
;with cte1 as(select*,COUNT(*) over(partition by [group]) as gcnt
,rtrim(ltrim(replace(replace(REPLACE([C2],' ','</r><r>'),'<r></r>',''),'</r><r>',','))) as [New_C2]
from @temp1
)
,cte2 as(select*,COUNT(*) over(partition by [group]) as gcnt
,rtrim(ltrim(replace(replace(REPLACE([DESC],' ','</r><r>'),'<r></r>',''),'</r><r>',','))) as [New_DESC]
from @temp2
)
select *
from (
select *,COUNT(*) over(partition by [group]) as gcnt1
from cte1 a
where exists(select 1
from cte2 b
where a.c1=b.C1 and a.[New_C2]=b.[New_DESC] and A.gcnt=b.gcnt )
) a
where gcnt=gcnt1
(6 行受影响)(5 行受影响)
group name C1 C2 gcnt New_C2 gcnt1
----------- ---------- ---------- -------------------------------------------------- ----------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -----------
1 A qq this is qq 3 this,is,qq 3
1 B qb this is qb 3 this,is,qb 3
1 C tm this is tm 3 this,is,tm 3(3 行受影响)
declare @temp1 table
([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(20));
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype';declare @temp2 table
([group] int, name varchar(10),[C1] varchar(10), [Order] int,[DESC] varchar(20));
insert into @temp2
select 1,'C','tm',1,'this is tm' union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq';-- SQL 2005
;with t as(
select [group],checksum_agg(checksum([c1],replace([c2],' ',''))) chksum
from @temp1 group by [group]
intersect
select [group],checksum_agg(checksum([c1],replace([desc],' ','')))
from @temp2 group by [group])
select * from @temp1 where [group] in (select [group] from t);
/*
1 A qq this is qq
1 B qb this is qb
1 C tm this is tm
*/
create table #temp1
(
[group] int,
[name] varchar(10),
[C1] varchar(40),
[C2] varchar(40)
)
insert into #temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'create table #temp2
(
[group] int,
[name] varchar(10),
[C1] varchar(40),
[Order] varchar(40),
[DESC] varchar(40)
)
insert into #temp2
select 1,'C','tm',1,'this is tm' union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq' union all
select 3,'A','cq',1,'this is cq'select * from #temp1 m where
not exists
(
select isnull(a.[group],0) + isnull(b.[group],0) g from
#temp1 a full join #temp2 b
on a.[group] = b.[group] and a.c1 = b.c1 and a.c2 = b.[desc]
where
(a.[group] is null or b.[group] is null) and
( m.[group] = a.[group] or m.[group] = b.[group])
)group name C1 C2
----------- ---------- ---------------------------------------- ----------------------------------------
1 A qq this is qq
1 B qb this is qb
1 C tm this is tm(3 行受影响)
@temp1和@temp2里相同的组所拥有的记录个数肯定是相同的,如果记录个数不相同那么肯定组就不相同
组里的数据出现的顺序可以是任意的,也就是说不能认为两组结果都是完全一致的顺序,必须注意@temp2里的[order]烈对分组里数据出现顺序的影响,需要比较@temp1里的每个记录是否在@temp2的某个组下面是否确切都能满足嘛意思捏?
还要考虑@temp2里的[order]
这个脚本有明显的问题分组的group 绝大多数都是对不上的declare @temp1 table
([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(20));
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this i s qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype';declare @temp2 table
([group] int, name varchar(10),[C1] varchar(10), [Order] int,[DESC] varchar(20));
insert into @temp2
select 3,'C','tm',1,'this is tm' union all -- 修改 group 为3
select 3,'A','qq',2,'this is qq' union all
select 3,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq';-- SQL 2005
;with t as(
select [group],checksum_agg(checksum([c1],replace([c2],' ',''))) chksum
from @temp1 group by [group]
intersect
select [group],checksum_agg(checksum([c1],replace([desc],' ','')))
from @temp2 group by [group])
select * from @temp1 where [group] in (select [group] from t);查不出结果
declare @temp1 table
([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(20));
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype';declare @temp2 table
([group] int, name varchar(10),[C1] varchar(10), [Order] int,[DESC] varchar(20));
insert into @temp2
select 3,'C','tm',1,'this is tm' union all
select 3,'A','qq',2,'this is qq' union all
select 3,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq';-- SQL 2005
;with t1 as(
select [group],checksum_agg(checksum([c1],replace([c2],' ',''))) chksum
from @temp1 group by [group]
),
t2 as(
select [group],checksum_agg(checksum([c1],replace([desc],' ',''))) chksum
from @temp2 group by [group])
select * from @temp1 where [group] in
(select [group] from t1 where chksum in (select chksum from t2));
/*
1 A qq this is qq
1 B qb this is qb
1 C tm this is tm
*/
比较欣赏checksum 可以生成哈希索引
就是"this is qq"和"this is qq"应该是相等的。
"this is q q"和"this is qq"应该是不相等的是这样的情况吗。
完全忽略空格,不做分词考虑
"this is q q"和"this is qq"应该是不相等的是这样的情况吗。我已经去掉“,”可以测试通过空格的问题