表1
name start end fate_name
gene 19643 23759 t00005
mRNA 19643 23759 ID=m42816;Parent=t00005
5 19643 20059 ID=utr5p_of_m42816;Parent=m42816
exon 19643 20083 ID=e727005;Parent=m42816
CDS 20060 20083 ID=cds_of_m42816;Parent=m42816
exon 20417 20649 ID=e727006;Parent=m42816
CDS 20417 20649 ID=cds_of_m42816;Parent=m42816
exon 20764 20835 ID=e727007;Parent=m42816
CDS 20764 20835 ID=cds_of_m42816;Parent=m42816
exon 21294 21379 ID=e727008;Parent=m42816
CDS 21294 21379 ID=cds_of_m42816;Parent=m42816
exon 22247 22321 ID=e727009;Parent=m42816
CDS 22247 22321 ID=cds_of_m42816;Parent=m42816
exon 22685 23759 ID=e727010;Parent=m42816
CDS 22685 23193 ID=cds_of_m42816;Parent=m42816
3 23194 23759 ID=utr3p_of_m42816;Parent=m42816想要的结果是
表2
name start end fate_name
gene t00005
mRNA 1 4117 ID=m42816;Parent=t00005
5 1 417 ID=utr5p_of_m42816;Parent=m42816
exon1 1 441 ID=e727009;Parent=m42816
CDS1 418 441 ID=cds_of_m42816;Parent=m42816
exon2 775 1007 ID=e727009;Parent=m42816
CDS2 775 1007 ID=cds_of_m42816;Parent=m42816
exon3 1122 1193 ID=e727009;Parent=m42816
CDS3 1122 1193 ID=cds_of_m42816;Parent=m42816
exon4 1652 1737 ID=e727009;Parent=m42816
CDS4 1652 1737 ID=cds_of_m42816;Parent=m42816
exon5 2605 2679 ID=e727009;Parent=m42816
CDS5 2605 2679 ID=cds_of_m42816;Parent=m42816
exon6 3043 4117 ID=e727010;Parent=m42816
CDS6 3043 3551 ID=cds_of_m42816;Parent=m42816
3 3552 4117 ID=utr3p_of_m42816;Parent=m42816具体的要求是
1 将CDS,exon,3和5根据fate_name分成一组后按照start的大小排序
2 表2中mRNA的start=表1中的mRNA的start-表1中gene的start+1 例如19643-19643+1得到了表2中mRNA的start值,同理,表2中的end=表1中的mRNA的end-表1中gene的start+1 如23759-19643+1得到
3 表2其他的exon,CDS,3 和5中每项的start=表1中的对应的start-表1中mRNA的start+1,同样 表2的end==表1中的各自对应的end-表1中mRNA的start+1
name start end fate_name
gene 19643 23759 t00005
mRNA 19643 23759 ID=m42816;Parent=t00005
5 19643 20059 ID=utr5p_of_m42816;Parent=m42816
exon 19643 20083 ID=e727005;Parent=m42816
CDS 20060 20083 ID=cds_of_m42816;Parent=m42816
exon 20417 20649 ID=e727006;Parent=m42816
CDS 20417 20649 ID=cds_of_m42816;Parent=m42816
exon 20764 20835 ID=e727007;Parent=m42816
CDS 20764 20835 ID=cds_of_m42816;Parent=m42816
exon 21294 21379 ID=e727008;Parent=m42816
CDS 21294 21379 ID=cds_of_m42816;Parent=m42816
exon 22247 22321 ID=e727009;Parent=m42816
CDS 22247 22321 ID=cds_of_m42816;Parent=m42816
exon 22685 23759 ID=e727010;Parent=m42816
CDS 22685 23193 ID=cds_of_m42816;Parent=m42816
3 23194 23759 ID=utr3p_of_m42816;Parent=m42816想要的结果是
表2
name start end fate_name
gene t00005
mRNA 1 4117 ID=m42816;Parent=t00005
5 1 417 ID=utr5p_of_m42816;Parent=m42816
exon1 1 441 ID=e727009;Parent=m42816
CDS1 418 441 ID=cds_of_m42816;Parent=m42816
exon2 775 1007 ID=e727009;Parent=m42816
CDS2 775 1007 ID=cds_of_m42816;Parent=m42816
exon3 1122 1193 ID=e727009;Parent=m42816
CDS3 1122 1193 ID=cds_of_m42816;Parent=m42816
exon4 1652 1737 ID=e727009;Parent=m42816
CDS4 1652 1737 ID=cds_of_m42816;Parent=m42816
exon5 2605 2679 ID=e727009;Parent=m42816
CDS5 2605 2679 ID=cds_of_m42816;Parent=m42816
exon6 3043 4117 ID=e727010;Parent=m42816
CDS6 3043 3551 ID=cds_of_m42816;Parent=m42816
3 3552 4117 ID=utr3p_of_m42816;Parent=m42816具体的要求是
1 将CDS,exon,3和5根据fate_name分成一组后按照start的大小排序
2 表2中mRNA的start=表1中的mRNA的start-表1中gene的start+1 例如19643-19643+1得到了表2中mRNA的start值,同理,表2中的end=表1中的mRNA的end-表1中gene的start+1 如23759-19643+1得到
3 表2其他的exon,CDS,3 和5中每项的start=表1中的对应的start-表1中mRNA的start+1,同样 表2的end==表1中的各自对应的end-表1中mRNA的start+1
name start end fate_name
gene 19643 23759 t00005
mRNA 19643 23759 ID=m42816;Parent=t00005
5 19643 20059 ID=utr5p_of_m42816;Parent=m42816
exon 19643 20083 ID=e727005;Parent=m42816
CDS 20060 20083 ID=cds_of_m42816;Parent=m42816
exon 20417 20649 ID=e727006;Parent=m42816
CDS 20417 20649 ID=cds_of_m42816;Parent=m42816 要求就是fate—name这列中相同的,将 name列中 5,3 和 CDS exon 中对应的start都减去mRNA的start,end也都减去mRNA行中的end,而mRNA行中的start则减去gene的start, end 也是减去gene中的end
INSERT tb SELECT 1,2
UNION ALL ALL SELECT 6,2
UNION ALL SELECT 7,1
UNION ALL SELECT 8,5
UNION ALL SELECT 9,1
GO--查询的存储过程
CREATE PROC p_Qry
@group VARCHAR(1000)
AS
SET NOCOUNT ON
IF @group LIKE '%[^0-9,]%'
BEGIN
RAISERROR(N'"%s" 中包含非数字数据',1,16,@group)
RETURN
END
--将字符串分拆为分组表
DECLARE @t TABLE(ID int IDENTITY,Groups varchar(10),a int,b int)
DECLARE @i int,@pid varchar(10)
SELECT @i=CHARINDEX(',',@group+',')
,@pid=LEFT(@group,@i-1)
,@group=STUFF(@group,1,@i,'')+','
,@i=CHARINDEX(',',@group)
INSERT @t SELECT 'ID<='+@pid,NULL,@pid
WHILE @i>1
BEGIN
INSERT @t SELECT @pid+'<ID<='+LEFT(@group,@i-1),@pid,LEFT(@group,@i-1)
SELECT @pid=LEFT(@group,@i-1)
,@group=STUFF(@group,1,@i,'')
,@i=CHARINDEX(',',@group)
END
INSERT @t SELECT 'ID>'+@pid,@pid,NULL--根据分组表统计
SELECT b.Groups,Num=ISNULL(SUM(a.Num),0)
FROM tb a RIGHT JOIN @t b
ON (a.ID<=b.b OR b.b IS NULL)
AND(a.ID>b.a OR b.a IS NULL)
GROUP BY b.ID,b.Groups
ORDER BY b.ID
GO--调用存储过程进行查询
EXEC p_Qry '2,3,6'
/*--测试结果
Groups Num
---------- -----------
ID<=2 5
2<ID<=3 2
3<ID<=6 16
ID>6 7
--*/
SQL好象在这些方面不怎么强悍