有3个关系模式:
user(userId,userName) article(articleId,userId,title,content) vote(articleId,score)
用SQL完成此查询:
查询发表文章数大于5,文章平均得票数大于100的用户名,按平均得票数倒序排列。
当用户数超过1000万,文章数超过一亿时,如何考虑存储及性能的改进和优化。
user(userId,userName) article(articleId,userId,title,content) vote(articleId,score)
用SQL完成此查询:
查询发表文章数大于5,文章平均得票数大于100的用户名,按平均得票数倒序排列。
当用户数超过1000万,文章数超过一亿时,如何考虑存储及性能的改进和优化。
from (select username as 用户名,
avg(score) over(partition by userid) as 平均得票数,
rank() over(partition by userid order by score) as 文章序号
from (select t1.articleid, t1.userid, t2.score, t3.username
from article t1, vote t2, user_1 t3
where t1.articleid = t2.articleid
and t1.userid = t3.userid)) t
where t.平均得票数 > 100
and t.文章序号 >= 5
order by 平均得票数 desc;存储的话:user表按userid建成hash分区,分区数为4,article表按articleid(主分区键)和userid(子分区键)建为范围/散列组合分区(假设articleid有一定规律),在服务器上运行的话,user表和article表都执行并行查询。如果结果仅供查询的话,可以考虑建立物化视图。
另外,user表的userid,article表的articleid,vote表的articleid列都建为主键,article表的userid建立外键与user表关联,并且在该列建普通B树索引。在vote表上按article列建位图索引。
select tb_user.username,avg(score) from tb_user join tb_article on
tb_user.userid=tb_article.userid join tb_vote on
tb_article.articleid=tb_vote.articleid group by tb_user.username
having count(tb_article.articleid)>=5 and avg(score)>=100 order by avg(score) desc;
user(userId,userName,articleCount,avgVoteCount),按userId进行范围分区(百万数据一个分区)
article(articleId,userId,title,content,voteCount),按articleId进行范围分区(百万数据一个分区)二、设计索引:
user(userId) local、user(articleCount) global、user(articleCount) global
article(articleId) local、article(userId) local、article(voteCount) global三、物理存储
尽量将分区分配到不同的物理磁盘或者使用磁盘阵列RAID四、数据操作
1、发表文章时更新user表中articleCount
2、投票时更新article中的voteCount
3、根据userId关联计算user表的avgVoteCount五、统计
select userName from user where articleCount>5
and avgVoteCount>100 order by avgVoteCount desc;
userid number(3),
username varchar2(10)
);
create table article(
articleid number(3),
userid number(3),
title varchar2(20),
content varchar2(100)
);
create table vote(
articleid number(3),
score number(6)
); begin
insert into users values(1,'zhangsan');
insert into users values(2,'lisi');
insert into article values(1,1,'t1','t1');
insert into article values(2,1,'t1','t1');
insert into article values(3,1,'t1','t1');
insert into article values(4,1,'t1','t1');
insert into article values(5,1,'t1','t1');
insert into article values(6,1,'t1','t1');
insert into article values(7,2,'t1','t1');
insert into vote values(1,100);
insert into vote values(2,120);
insert into vote values(3,80);
insert into vote values(4,200);
insert into vote values(5,10);
insert into vote values(6,110);
insert into vote values(7,300);
end;select username from users where userid in (
select users.userid from users join article on users.userid=article.userid
group by users.userid having count(articleid)>5
) and userid in
(
select userid from article a join vote v on a.articleid = v.articleid
group by a.userid,a.articleid having avg(score)>100
);
没有进行优化
选择用户的发表的文章和得票数
select userId,score,article.articleid
from article
left join vote
on vote.articleId = article.articleId2
查询发表文章数大于5,文章平均得票数大于100的用户及其平均得票数
select userid,avg(score) score
from
(
select userId,score,article.articleid
from article
left join vote
on vote.articleId = article.articleId
)
group by userid
having count(articleid)>5
and avg(score)>1003
取得按平均得票数倒序排列的用户名
select username from
(
select userid,avg(score) score
from
(
select userId,score,article.articleid
from article
left join vote
on vote.articleId = article.articleId
)
group by userid
having count(articleid)>5
and avg(score)>100
) a
left join users
on a.userid=users.userid
order by
score desc完