多组数据之间比较找出完全一致的

本来这个功能我已经用程序编码实现了的,无奈老大要求功能都要用SQL脚本实现简单的说就是我有一个查询结果,其结果可以看成是多个组如:
declare @temp1 table (group int, name varchar(10),[C1] varchar(10), [C2] varchar(10))
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq'  union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'结果是
1,A,qq,this is qq
1,B,qb,this is qb
1,C,tm,this is tm 2,A,icq,this is icq
2,D,msn,this is msn
2,E,skype,this is skype以上数据按group列可看成两组
我有另一个查询结果,结果也可以看成多个组declare @temp2 table (group int, name varchar(10),[C1] varchar(10), [Order] varchar(10),[DESC] varchar(10))
insert into @temp2
select 1,'C','tm',1,'this is tm', union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is    msn' union all
select 2,'A','icq',1,'this is    icq'结果是
1,C,tm,1,this is tm
1,A,qq,2,this is qq
1,B,qb,3,this is qb 2,A,icq,1,this is    icq
2,D,msn,1, this is    msn以上数据按group列也可看成两组
问题是用高效的SQL方法比较@temp1里哪些组完全与@temp2里哪些组相同,并输出@temp1里能够匹配相同的组比较条件是
1. @temp1 的[C1] 与@temp2 的[C1] 列对应相等
2. @temp1 的[C2] 与@temp2 的[DESC] 列对应相等(比较时忽略字符串中的空格字符)比如:
"this is    icq" 与"this is icq" 是完全一样的注意:
@temp1和@temp2里相同的组所拥有的记录个数肯定是相同的,如果记录个数不相同那么肯定组就不相同
组里的数据出现的顺序可以是任意的,也就是说不能认为两组结果都是完全一致的顺序,必须注意@temp2里的[order]烈对分组里数据出现顺序的影响,需要比较@temp1里的每个记录是否在@temp2的某个组下面是否确切都能满足忽略重复匹配的组,假设每个组在两个比较烈上的情况是肯定都不一致的所以上面测试数据比较后的结果应该是只有group为1的组完全一致最后输出
@temp1里的
1,A,qq,this is qq
1,B,qb,this is qb
1,C,tm,this is tm
由于实际的数据记录会比上面的事例多的多,大概@temp1和@temp2都会是出现几千个分组,因此最后的SQL必须是高效的

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

我本来写的程序都满足了以上的要求,可无奈我的程序里定义了很多参数,而且嵌套和比较跳跃比较多,实在难以短时间内直接改写成SQL完成任务,所以请大家能否用SQL脚本的思维方式给个实现的参考
declare @temp1 table ([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(50))
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'
declare @temp2 table ([group] int, name varchar(10),[C1] varchar(10), [Order] varchar(10),[DESC] varchar(50))
insert into @temp2
select 1,'C','tm',1,'this is tm'union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq'
select *
from (
select *,COUNT(*) over(partition  by [group]) as gcnt1
from (select *,COUNT(*) over(partition  by [group]) as gcnt from @temp1 ) a
where exists(select 1
from (select *,COUNT(*) over(partition  by [group]) as gcnt  from @temp2) b
where a.c1=b.C1 and a.c2=b.[DESC] and A.gcnt=b.gcnt )
) a
where gcnt=gcnt1(6 行受影响)(5 行受影响)
group       name       C1         C2                                                 gcnt        gcnt1
----------- ---------- ---------- -------------------------------------------------- ----------- -----------
1           A          qq         this is qq                                         3           3
1           B          qb         this is qb                                         3           3
1           C          tm         this is tm                                         3           3(3 行受影响)
忘了忽略空格了declare @temp1 table ([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(50))
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'
declare @temp2 table ([group] int, name varchar(10),[C1] varchar(10), [Order] varchar(10),[DESC] varchar(50))
insert into @temp2
select 1,'C','tm',1,'this is      tm'union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq'
;with cte1 as(select*,COUNT(*) over(partition  by [group]) as gcnt
,rtrim(ltrim(replace(replace(REPLACE([C2],' ','</r><r>'),'<r></r>',''),'</r><r>',','))) as [New_C2]
from @temp1
  )
  ,cte2 as(select*,COUNT(*) over(partition  by [group]) as gcnt
,rtrim(ltrim(replace(replace(REPLACE([DESC],' ','</r><r>'),'<r></r>',''),'</r><r>',','))) as [New_DESC]
from @temp2
)
select *
from (
select *,COUNT(*) over(partition  by [group]) as gcnt1
from cte1 a
where exists(select 1
from cte2 b
where a.c1=b.C1 and a.[New_C2]=b.[New_DESC] and A.gcnt=b.gcnt )
) a
where gcnt=gcnt1
(6 行受影响)(5 行受影响)
group       name       C1         C2                                                 gcnt        New_C2                                                                                                                                                                                                                                                           gcnt1
----------- ---------- ---------- -------------------------------------------------- ----------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -----------
1           A          qq         this is qq                                         3           this,is,qq                                                                                                                                                                                                                                                       3
1           B          qb         this is qb                                         3           this,is,qb                                                                                                                                                                                                                                                       3
1           C          tm         this is tm                                         3           this,is,tm                                                                                                                                                                                                                                                       3(3 行受影响)
declare @temp1 table
([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(20));
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype';declare @temp2 table
([group] int, name varchar(10),[C1] varchar(10), [Order] int,[DESC] varchar(20));
insert into @temp2
select 1,'C','tm',1,'this is tm' union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq';-- SQL 2005
;with t as(
select [group],checksum_agg(checksum([c1],replace([c2],' ',''))) chksum
from @temp1 group by [group]
intersect
select [group],checksum_agg(checksum([c1],replace([desc],' ','')))
from @temp2 group by [group])
select * from @temp1 where [group] in (select [group] from t);
/*
1 A qq this is qq
1 B qb this is qb
1 C tm this is tm
*/
create table #temp1
(
[group] int,
[name] varchar(10),
[C1] varchar(40),
[C2] varchar(40)
)
insert into #temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype'create  table #temp2
(
[group] int,
[name] varchar(10),
[C1] varchar(40),
[Order] varchar(40),
[DESC] varchar(40)
)
insert into #temp2
select 1,'C','tm',1,'this is tm' union all
select 1,'A','qq',2,'this is qq' union all
select 1,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq' union all
select 3,'A','cq',1,'this is cq'select * from #temp1 m where
not exists
(
select isnull(a.[group],0) + isnull(b.[group],0) g from
#temp1 a full join #temp2 b
on a.[group] = b.[group] and a.c1 = b.c1 and a.c2 = b.[desc]
where
(a.[group] is null or b.[group] is null) and
( m.[group] = a.[group] or m.[group] = b.[group])
)group       name       C1                                       C2
----------- ---------- ---------------------------------------- ----------------------------------------
1           A          qq                                       this is qq
1           B          qb                                       this is qb
1           C          tm                                       this is tm(3 行受影响)
注意:
@temp1和@temp2里相同的组所拥有的记录个数肯定是相同的,如果记录个数不相同那么肯定组就不相同
组里的数据出现的顺序可以是任意的,也就是说不能认为两组结果都是完全一致的顺序,必须注意@temp2里的[order]烈对分组里数据出现顺序的影响,需要比较@temp1里的每个记录是否在@temp2的某个组下面是否确切都能满足嘛意思捏？
还要考虑@temp2里的[order]
如果需要考虑顺序，那么 @temp1 中记录的顺序又如何确定？根据插入的顺序？这个是不可靠的。
这个脚本有明显的问题分组的group 绝大多数都是对不上的declare @temp1 table
([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(20));
insert into @temp1
select 1,'A','qq','this is   qq' union all
select 1,'B','qb','this i  s qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype';declare @temp2 table
([group] int, name varchar(10),[C1] varchar(10), [Order] int,[DESC] varchar(20));
insert into @temp2
select 3,'C','tm',1,'this is tm' union all  -- 修改 group 为3
select 3,'A','qq',2,'this is qq' union all
select 3,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq';-- SQL 2005
;with t as(
select [group],checksum_agg(checksum([c1],replace([c2],' ',''))) chksum
from @temp1 group by [group]
intersect
select [group],checksum_agg(checksum([c1],replace([desc],' ','')))
from @temp2 group by [group])
select * from @temp1 where [group] in (select [group] from t);查不出结果
declare @temp1 table
([group] int, name varchar(10),[C1] varchar(10), [C2] varchar(20));
insert into @temp1
select 1,'A','qq','this is qq' union all
select 1,'B','qb','this is qb' union all
select 1,'C','tm','this is tm' union all
select 2,'A','icq','this is icq' union all
select 2,'D','msn','this is msn' union all
select 2,'E','skype','this is skype';declare @temp2 table
([group] int, name varchar(10),[C1] varchar(10), [Order] int,[DESC] varchar(20));
insert into @temp2
select 3,'C','tm',1,'this is tm' union all
select 3,'A','qq',2,'this is qq' union all
select 3,'B','qb',3,'this is qb' union all
select 2,'D','msn',2,'this is msn' union all
select 2,'A','icq',1,'this is icq';-- SQL 2005
;with t1 as(
select [group],checksum_agg(checksum([c1],replace([c2],' ',''))) chksum
from @temp1 group by [group]
),
t2 as(
select [group],checksum_agg(checksum([c1],replace([desc],' ',''))) chksum
from @temp2 group by [group])
select * from @temp1 where [group] in
(select [group] from t1 where chksum in (select chksum from t2));
/*
1 A qq this is qq
1 B qb this is qb
1 C tm this is tm
*/
比较欣赏checksum  可以生成哈希索引
7#的不是忽略了空格了吗。
就是"this is qq"和"this is     qq"应该是相等的。
"this is q    q"和"this is     qq"应该是不相等的是这样的情况吗。
完全忽略空格，不做分词考虑
"this is q q"和"this is qq"应该是不相等的是这样的情况吗。我已经去掉“，”可以测试通过空格的问题