Hive SQL50道练习题

Stella981
• 阅读 725

建表

create table student(s_id string,s_name string,s_birth string,s_sex string) row format delimited fields terminated by '\t';create table course(c_id string,c_name string,t_id string) row format delimited fields terminated by '\t';create table teacher(t_id string,t_name string) row format delimited fields terminated by '\t';create table score(s_id string,c_id string,s_score int) row format delimited fields terminated by '\t';

生成数据

vi /export/data/hivedatas/student.csv

01 赵雷 1990-01-01 男02 钱电 1990-12-21 男03 孙风 1990-05-20 男04 李云 1990-08-06 男05 周梅 1991-12-01 女06 吴兰 1992-03-01 女07 郑竹 1989-07-01 女08 王菊 1990-01-20 女

vi /export/data/hivedatas/course.csv

01  语文  0202  数学  0103  英语  03

vi /export/data/hivedatas/teacher.csv

01  张三02  李四03  王五

vi /export/data/hivedatas/score.csv

01  01  8001  02  9001  03  9902  01  7002  02  6002  03  8003  01  8003  02  8003  03  8004  01  5004  02  3004  03  2005  01  7605  02  8706  01  3106  03  3407  02  8907  03  98

导数据到hive

load data local inpath '/export/data/hivedatas/student.csv' into table student;load data local inpath '/export/data/hivedatas/course.csv' into table course;load data local inpath '/export/data/hivedatas/teacher.csv' into table teacher;load data local inpath '/export/data/hivedatas/score.csv' into table score;

–注:–hive查询语法

SELECT [ALL | DISTINCT] select_expr, select_expr, ...    FROM table_reference    [WHERE where_condition]    [GROUP BY col_list [HAVING condition]]    [CLUSTER BY col_list      | [DISTRIBUTE BY col_list] [SORT BY| ORDER BY col_list]    ]    [LIMIT number]

– 1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数:

select student.*,a.s_score as 01_score,b.s_score as 02_scorefrom student  join score a on student.s_id=a.s_id and a.c_id='01'  left join score b on student.s_id=b.s_id and b.c_id='02'where  a.s_score>b.s_score;

–答案2

select student.*,a.s_score as 01_score,b.s_score as 02_scorefrom studentjoin score a on  a.c_id='01'join score b on  b.c_id='02'where  a.s_id=student.s_id and b.s_id=student.s_id and a.s_score>b.s_score;

– 2、查询"01"课程比"02"课程成绩低的学生的信息及课程分数:

select student.*,a.s_score as 01_score,b.s_score as 02_scorefrom studentjoin score a on student.s_id=a.s_id and a.c_id='01'left join score b on student.s_id=b.s_id and b.c_id='02'where a.s_score<b.s_score;

–答案2

select student.*,a.s_score as 01_score,b.s_score as 02_scorefrom studentjoin score a on  a.c_id='01'join score b on  b.c_id='02'where  a.s_id=student.s_id and b.s_id=student.s_id and a.s_score<b.s_score;

– 3、查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:

select  student.s_id,student.s_name,tmp.平均成绩 from student  join (    select score.s_id,round(avg(score.s_score),1)as 平均成绩        from score group by s_id)as tmp  on tmp.平均成绩>=60where student.s_id = tmp.s_id

–答案2

select  student.s_id,student.s_name,round(avg (score.s_score),1) as 平均成绩 from studentjoin score on student.s_id = score.s_idgroup by student.s_id,student.s_namehaving avg (score.s_score) >= 60;

– 4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:
– (包括有成绩的和无成绩的)

select  student.s_id,student.s_name,tmp.avgScore from studentjoin (select score.s_id,round(avg(score.s_score),1)as avgScore from score group by s_id)as tmpon tmp.avgScore < 60where student.s_id=tmp.s_idunion allselect  s2.s_id,s2.s_name,0 as avgScore from student s2where s2.s_id not in    (select distinct sc2.s_id from score sc2);

–答案2

select  score.s_id,student.s_name,round(avg (score.s_score),1) as avgScore from studentinner join score on student.s_id=score.s_idgroup by score.s_id,student.s_namehaving avg (score.s_score) < 60union allselect  s2.s_id,s2.s_name,0 as avgScore from student s2where s2.s_id not in    (select distinct sc2.s_id from score sc2);

– 5、查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:

select student.s_id,student.s_name,(count(score.c_id) )as total_count,sum(score.s_score)as total_scorefrom studentleft join score on student.s_id=score.s_idgroup by student.s_id,student.s_name ;

– 6、查询"李"姓老师的数量:

select t_name,count(1) from teacher  where t_name like '李%' group by t_name;

– 7、查询学过"张三"老师授课的同学的信息:

select student.* from studentjoin score on student.s_id =score.s_idjoin  course on course.c_id=score.c_idjoin  teacher on course.t_id=teacher.t_id and t_name='张三';

– 8、查询没学过"张三"老师授课的同学的信息:

select student.* from studentleft join (select s_id from score      join  course on course.c_id=score.c_id      join  teacher on course.t_id=teacher.t_id and t_name='张三')tmpon  student.s_id =tmp.s_idwhere tmp.s_id is null;

– 9、查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:

select * from studentjoin (select s_id from score where c_id =1 )tmp1    on student.s_id=tmp1.s_idjoin (select s_id from score where c_id =2 )tmp2    on student.s_id=tmp2.s_id;

– 10、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:

select student.* from studentjoin (select s_id from score where c_id =1 )tmp1    on student.s_id=tmp1.s_idleft join (select s_id from score where c_id =2 )tmp2    on student.s_id =tmp2.s_idwhere tmp2.s_id is null;

– 11、查询没有学全所有课程的同学的信息:
–先查询出课程的总数量

   select count(1) from course;

–再查询所需结果

select student.* from studentleft join(      select s_id        from score          group by s_id            having count(c_id)=3)tmpon student.s_id=tmp.s_idwhere tmp.s_id is null;

–方法二(一步到位):

select student.* from studentjoin (select count(c_id)num1 from course)tmp1left join(      select s_id,count(c_id)num2        from score group by s_id)tmp2on student.s_id=tmp2.s_id and tmp1.num1=tmp2.num2where tmp2.s_id is null;

– 12、查询至少有一门课与学号为"01"的同学所学相同的同学的信息:

select student.* from studentjoin (select c_id from score where score.s_id=01)tmp1join (select s_id,c_id from score)tmp2    on tmp1.c_id =tmp2.c_id and student.s_id =tmp2.s_idwhere student.s_id  not in('01')group by student.s_id,s_name,s_birth,s_sex;

– 13、查询和"01"号的同学学习的课程完全相同的其他同学的信息:
–备注:hive不支持group_concat方法,可用 concat_ws(’|’, collect_set(str)) 实现

select student.*,tmp1.course_id from studentjoin (select s_id ,concat_ws('|', collect_set(c_id)) course_id from score      group by s_id having s_id not in (1))tmp1  on student.s_id = tmp1.s_idjoin (select concat_ws('|', collect_set(c_id)) course_id2            from score  where s_id=1)tmp2      on tmp1.course_id = tmp2.course_id2;

– 14、查询没学过"张三"老师讲授的任一门课程的学生姓名:

select student.* from student  left join (select s_id from score          join (select c_id from course join  teacher on course.t_id=teacher.t_id and t_name='张三')tmp2          on score.c_id=tmp2.c_id )tmp  on student.s_id = tmp.s_id  where tmp.s_id is null;

– 15、查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:

select student.s_id,student.s_name,tmp.avg_score from studentinner join (select s_id from score      where s_score<60        group by score.s_id having count(s_id)>1)tmp2on student.s_id = tmp2.s_idleft join (    select s_id,round(AVG (score.s_score)) avg_score      from score group by s_id)tmp      on tmp.s_id=student.s_id;

– 16、检索"01"课程分数小于60,按分数降序排列的学生信息:

select student.*,s_score from student,scorewhere student.s_id=score.s_id and s_score<60 and c_id='01'order by s_score desc;

– 17、按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:

select a.s_id,tmp1.s_score as chinese,tmp2.s_score as math,tmp3.s_score as english,    round(avg (a.s_score),2) as avgScorefrom score aleft join (select s_id,s_score  from score s1 where  c_id='01')tmp1 on  tmp1.s_id=a.s_idleft join (select s_id,s_score  from score s2 where  c_id='02')tmp2 on  tmp2.s_id=a.s_idleft join (select s_id,s_score  from score s3 where  c_id='03')tmp3 on  tmp3.s_id=a.s_idgroup by a.s_id,tmp1.s_score,tmp2.s_score,tmp3.s_score order by avgScore desc;

– 18.查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率:
–及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90

select course.c_id,course.c_name,tmp.maxScore,tmp.minScore,tmp.avgScore,tmp.passRate,tmp.moderate,tmp.goodRate,tmp.excellentRates from coursejoin(select c_id,max(s_score) as maxScore,min(s_score)as minScore,    round(avg(s_score),2) avgScore,    round(sum(case when s_score>=60 then 1 else 0 end)/count(c_id),2)passRate,    round(sum(case when s_score>=60 and s_score<70 then 1 else 0 end)/count(c_id),2) moderate,    round(sum(case when s_score>=70 and s_score<80 then 1 else 0 end)/count(c_id),2) goodRate,    round(sum(case when s_score>=80 and s_score<90 then 1 else 0 end)/count(c_id),2) excellentRatesfrom score group by c_id)tmp on tmp.c_id=course.c_id;

– 19、按各科成绩进行排序,并显示排名:
– row_number() over()分组排序功能(mysql没有该方法)

select s1.*,row_number()over(order by s1.s_score desc) Ranking    from score s1 where s1.c_id='01'order by noRanking ascunion all select s2.*,row_number()over(order by s2.s_score desc) Ranking    from score s2 where s2.c_id='02'order by noRanking ascunion all select s3.*,row_number()over(order by s3.s_score desc) Ranking    from score s3 where s3.c_id='03'order by noRanking asc;

– 20、查询学生的总成绩并进行排名:

select score.s_id,s_name,sum(s_score) sumscore,row_number()over(order by sum(s_score) desc) Ranking  from score ,student    where score.s_id=student.s_id    group by score.s_id,s_name order by sumscore desc;

– 21、查询不同老师所教不同课程平均分从高到低显示:
– 方法1

select course.c_id,course.t_id,t_name,round(avg(s_score),2)as avgscore from course

– 方法2

select course.c_id,course.t_id,t_name,round(avg(s_score),2)as avgscore from course,teacher,score

– 22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:

select tmp1.* from

– 23、统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比

select c.c_id,c.c_name,tmp1.s0_60, tmp1.percentum,tmp2.s60_70, tmp2.percentum,tmp3.s70_85, tmp3.percentum,tmp4.s85_100, tmp4.percentum

– 24、查询学生平均成绩及其名次:

select tmp.*,row_number()over(order by tmp.avgScore desc) Ranking from

– 25、查询各科成绩前三名的记录

–课程id为01的前三名

select score.c_id,course.c_name,student.s_name,s_score from score

–课程id为02的前三名

select score.c_id,course.c_name,student.s_name,s_score 

–课程id为03的前三名

select score.c_id,course.c_name,student.s_name,s_score 

– 26、查询每门课程被选修的学生数:

select c.c_id,c.c_name,tmp.number from course c

– 27、查询出只有两门课程的全部学生的学号和姓名:

select st.s_id,st.s_name from student st

– 28、查询男生、女生人数:

select tmp1.man,tmp2.women from

– 29、查询名字中含有"风"字的学生信息:

select * from student where s_name like '%风%';

– 30、查询同名同性学生名单,并统计同名人数:

select s1.s_id,s1.s_name,s1.s_sex,count(*) as sameName

– 31、查询1990年出生的学生名单:

select * from student where s_birth like '1990%';

– 32、查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:

select score.c_id,c_name,round(avg(s_score),2) as avgScore from score

– 33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:

select score.s_id,s_name,round(avg(s_score),2)as avgScore from score

– 34、查询课程名称为"数学",且分数低于60的学生姓名和分数:

select s_name,s_score as mathScore from student

– 35、查询所有学生的课程及分数情况:

select a.s_name,

– 36、查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数:

select student.s_id,s_name,c_name,s_score from student

– 37、查询课程不及格的学生:

select s_name,c_name as courseName,tmp.s_score

–38、查询课程编号为01且课程成绩在80分以上的学生的学号和姓名:

select student.s_id,s_name,s_score as score_01

– 39、求每门课程的学生人数:

select course.c_id,course.c_name,count(1)as selectNum

– 40、查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩:

select student.*,tmp3.c_name,tmp3.maxScore

– 41、查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩:

select distinct a.s_id,a.c_id,a.s_score from score a,score b

– 42、查询每门课程成绩最好的前三名:

select tmp1.* from

– 43、统计每门课程的学生选修人数(超过5人的课程才统计):
– 要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列

select distinct course.c_id,tmp.num from course

– 44、检索至少选修两门课程的学生学号:

select s_id,count(c_id) as totalCourse

– 45、查询选修了全部课程的学生信息:

select student.* 

–46、查询各学生的年龄(周岁):
– 按照出生日期来算,当前月日 < 出生年月的月日则,年龄减一
方法一

 select s_name,s_birth,

方法二:

select s_name,s_birth,

– 47、查询本周过生日的学生:
–方法1

select * from student where weekofyear(CURRENT_DATE)+1 =weekofyear(s_birth);

–方法2

select s_name,s_sex,s_birth from student

– 48、查询下周过生日的学生:
–方法1

select * from student where weekofyear(CURRENT_DATE)+1 =weekofyear(s_birth);

–方法2

select s_name,s_sex,s_birth from student

– 49、查询本月过生日的学生:
–方法1

select * from student where MONTH(CURRENT_DATE) =MONTH(s_birth);

–方法2

select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='10';

– 50、查询12月份过生日的学生:

select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='12';

欢迎点赞+收藏+转发朋友圈素质三连

Hive SQL50道练习题

文章不错?点个【在看】吧!** 👇**

本文分享自微信公众号 - 大数据技术与架构(import_bigdata)。
如有侵权,请联系 support@oschina.cn 删除。
本文参与“OSC源创计划”,欢迎正在阅读的你也加入,一起分享。

点赞
收藏
评论区
推荐文章
blmius blmius
3年前
MySQL:[Err] 1292 - Incorrect datetime value: ‘0000-00-00 00:00:00‘ for column ‘CREATE_TIME‘ at row 1
文章目录问题用navicat导入数据时,报错:原因这是因为当前的MySQL不支持datetime为0的情况。解决修改sql\mode:sql\mode:SQLMode定义了MySQL应支持的SQL语法、数据校验等,这样可以更容易地在不同的环境中使用MySQL。全局s
皕杰报表之UUID
​在我们用皕杰报表工具设计填报报表时,如何在新增行里自动增加id呢?能新增整数排序id吗?目前可以在新增行里自动增加id,但只能用uuid函数增加UUID编码,不能新增整数排序id。uuid函数说明:获取一个UUID,可以在填报表中用来创建数据ID语法:uuid()或uuid(sep)参数说明:sep布尔值,生成的uuid中是否包含分隔符'',缺省为
待兔 待兔
4个月前
手写Java HashMap源码
HashMap的使用教程HashMap的使用教程HashMap的使用教程HashMap的使用教程HashMap的使用教程22
Jacquelyn38 Jacquelyn38
3年前
2020年前端实用代码段,为你的工作保驾护航
有空的时候,自己总结了几个代码段,在开发中也经常使用,谢谢。1、使用解构获取json数据let jsonData  id: 1,status: "OK",data: 'a', 'b';let  id, status, data: number   jsonData;console.log(id, status, number )
Wesley13 Wesley13
3年前
mysql设置时区
mysql设置时区mysql\_query("SETtime\_zone'8:00'")ordie('时区设置失败,请联系管理员!');中国在东8区所以加8方法二:selectcount(user\_id)asdevice,CONVERT\_TZ(FROM\_UNIXTIME(reg\_time),'08:00','0
Stella981 Stella981
3年前
HIVE 时间操作函数
日期函数UNIX时间戳转日期函数: from\_unixtime语法:   from\_unixtime(bigint unixtime\, string format\)返回值: string说明: 转化UNIX时间戳(从19700101 00:00:00 UTC到指定时间的秒数)到当前时区的时间格式举例:hive   selec
Wesley13 Wesley13
3年前
00:Java简单了解
浅谈Java之概述Java是SUN(StanfordUniversityNetwork),斯坦福大学网络公司)1995年推出的一门高级编程语言。Java是一种面向Internet的编程语言。随着Java技术在web方面的不断成熟,已经成为Web应用程序的首选开发语言。Java是简单易学,完全面向对象,安全可靠,与平台无关的编程语言。
Stella981 Stella981
3年前
Django中Admin中的一些参数配置
设置在列表中显示的字段,id为django模型默认的主键list_display('id','name','sex','profession','email','qq','phone','status','create_time')设置在列表可编辑字段list_editable
Wesley13 Wesley13
3年前
MySQL部分从库上面因为大量的临时表tmp_table造成慢查询
背景描述Time:20190124T00:08:14.70572408:00User@Host:@Id:Schema:sentrymetaLast_errno:0Killed:0Query_time:0.315758Lock_
Python进阶者 Python进阶者
10个月前
Excel中这日期老是出来00:00:00,怎么用Pandas把这个去除
大家好,我是皮皮。一、前言前几天在Python白银交流群【上海新年人】问了一个Pandas数据筛选的问题。问题如下:这日期老是出来00:00:00,怎么把这个去除。二、实现过程后来【论草莓如何成为冻干莓】给了一个思路和代码如下:pd.toexcel之前把这