In [2]:
import os
import pandas as pd

MySQL


In [3]:
os.chdir('/Users/chweng/Desktop/SQLExerciseOnlineMusicPlayer')

Create an user

While creating an user, we'll see if to give user some privileges such as being able to select columns out of some tables:


In [ ]:
GRANT ALL PRIVILEGES ON *.* TO 'username'@'%' IDENTIFIED BY 'userpasswd' WITH GRANT OPTION;
  • with grant option: meaning that this user is able to grant privileges for some other people.
  • *.* means the user can access to anyDB.anyTable.
  • @'%' means that the user is came from any IP.

Create DB & tables:

commands:

  • create table mytable

  • load data infile filepath into table tablename


In [ ]:
CREATE DATABASE exercises

In [ ]:
CREATE TABLE userprofile (
    userid VARCHAR(20) NOT NULL,
    gender char(1),
    age int,
    country VARCHAR(20),
    registered VARCHAR(20),
    PRIMARY KEY (userid)
);

In [ ]:
LOAD DATA INFILE '/users/raemoen/BC3/userprofile.tsv' 
INTO TABLE userprofile 
FIELDS TERMINATED BY '\t' 
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
--IGNORE 1 ROWS;

In [ ]:
CREATE TABLE userdemand (
    userid VARCHAR(20) NOT NULL,
    time VARCHAR(50),
    artname VARCHAR(100),
    traname VARCHAR(100),
    FOREIGN KEY(userid) REFERENCES userprofile(userid)
);

In [ ]:
LOAD DATA INFILE '/users/raemoen/BC3/userdemand.tsv' 
INTO TABLE userdemand 
FIELDS TERMINATED BY '\t' 
ENCLOSED BY '"'
LINES TERMINATED BY '\n';
--IGNORE 1 ROWS;

In [ ]:
CREATE TABLE calendar (
    yMd VARCHAR(100),
    dofw VARCHAR(100),
    wnofy VARCHAR(100)
);

In [ ]:
LOAD DATA INFILE '/users/raemoen/BC3/calendar.csv' 
INTO TABLE calendar 
FIELDS TERMINATED BY ',' 
ENCLOSED BY '"'
LINES TERMINATED BY '\n';
#IGNORE 1 ROWS;

Now, the tables are created. We are ready to practice. Here are my solutions.

Q1: 男性與女性的會員數量與總點播次數


In [ ]:
select userprofile.gender as gender,count(distinct(userprofile.userid)) as numMembers,count(userdemand.time) as totalPlayTimes
from userprofile left outer join userdemand
on userprofile.userid=userdemand.userid
group by gender

In [4]:
df=pd.read_csv('q1.csv')

In [5]:
df


Out[5]:
gender numMembers totalPlayTimes
0 f 110 2808842
1 m 155 5411224

In [8]:
sum(df['totalPlayTimes']) # 總點播次數


Out[8]:
8220066

Q2: 會員數量排行顯示前五名的國家,和這些國家的總點播次數:


In [ ]:
select userprofile.country as country,count(distinct(userprofile.userid)) as numMembers,count(userdemand.time) as totalPlayTimes
from userprofile left outer join userdemand
on userprofile.userid=userdemand.userid
group by country
order by numMembers desc

In [ ]:


In [9]:
df=pd.read_csv('q2.csv')

In [16]:
df.head(5)


Out[16]:
country numMembers totalPlayTimes
0 United States 60 1956732
1 United Kingdom 35 768752
2 Poland 19 508740
3 Turkey 14 540688
4 Canada 12 435414

In [18]:
sum(df['totalPlayTimes'])  # 目的:驗證各國家點播次數相加是否為總點播次數。


Out[18]:
8220066

正確。各國家點播次數相加為總點播次數。

亦可用mysql指令驗證,如下:


In [ ]:
with temp as (select userprofile.country as country,count(distinct(userprofile.userid)) as numMembers,count(userdemand.time) as totalPlayTimes
from userprofile left outer join userdemand
on userprofile.userid=userdemand.userid
group by country
order by numMembers desc
)
select sum(totalPlayTimes) from temp;

Q3: 哪一天是會員透過我們的音樂網站聆聽音樂最多的一天?

解一:


In [ ]:
SELECT substring(time,1,10) as date,count(*) as nPlayTimes
FROM exercises.userdemand
group by date
order by nPlayTimes desc;

解二(此法不好,因建暫存table是不必要的):


In [ ]:
with temp as 
(select userid,substring(time,1,10) as time 
from userdemand
)
select time,count(*) as nPlayTimes from temp
group by time
order by nPlayTimes desc;

In [21]:
df=pd.read_csv('q3.csv')

In [23]:
df.head(3)


Out[23]:
date nPlayTimes
0 2008-09-24 14204
1 2009-02-08 12616
2 2009-02-01 12156

Q4: 會員年齡層分布

使用select (case when condition then xxx else xxx end)

解一 (用with as):


In [ ]:
with tmp as (SELECT userid,age,(case
when age>=0 and age<20 then '0-19'
when age>=20 and age<40 then '20-39'
when age>=40 and age<65 then '40-64'
when age>=65 then '65-'
end) as ageZone
FROM exercises.userprofile)
select ageZone,count(*) as numMembers from tmp group by ageZone

解二 (用subquery):


In [ ]:
select  t.ages,count(t.age)
from (
select age,
 CASE 
  WHEN age between 0 and 19 THEN "0-19"
  WHEN age between 20 and 39 THEN "20-39"
  WHEN age between 40 and 64 THEN "40-64"
  else "65--"
END ages
FROM exercises.userprofile) t
group by t.ages;

In [24]:
df=pd.read_csv('q4.csv')

In [25]:
df


Out[25]:
ageZone numMembers
0 0-19 35
1 20-39 223
2 40-64 6
3 65- 1

Q5: 從過去所有資料中,找出會員在假日最常聽的三首歌曲


In [ ]:
MariaDB [exercises]> select artname,traname,count(distinct(userid)) as listners
    -> from (select date(yMd) as yMd,dofw from calendar where dofw='Sa' or dofw='Su') as cal
    -> join 
    ->      (select userid,substring(time,1,10) as time,artname,traname from userdemand) as usrdemand
    -> on cal.yMd= usrdemand.time
    -> group by artname,traname
    -> order by listners desc, artname asc
    -> limit 5;
+--------------------+-------------------------+----------+
| artname            | traname                 | listners |
+--------------------+-------------------------+----------+
| Boy Division       | Love Will Tear Us Apart |       55 |
| Radiohead          | Creep                   |       51 |
| Massive Attack     | Teardrop                |       50 |
| The Postal Service | Such Great Heights      |       50 |
| Radiohead          | Paranoid Android        |       49 |
+--------------------+-------------------------+----------+
5 rows in set (20 min 10.04 sec)

T-SQL

e.g. 1: while loop, try& catch

Declare @x int,
        @s int

        set @x=1
        set @s=0

        while(@x<=10)
            begin
                set @s=@s+@x
                set @x=@x+1
            end
        select 'the answer= '+ convert(varchar(3),@s)  --use varchar to convert a variable to a string

begin try
    select 1/0
end try
begin catch -- question: which exception to catch? In this way, all kinds of exceptions will be catched?
    select 'Exception! The denominator is zero!'  
end catch

e.g.2 login via entrusted windows local account

C:\Users\Student>sqlcmd -E -dDB01
1> select * from student
2> go
id          name                 bdate            tel
----------- -------------------- ---------------- --------------------
        101 Geisel                     1980-08-08 012355
        102 Schlachter                 1980-08-01 012350

(2 個受影響的資料列)

e.g. 3 login via user name & password

C:\Users\Student>sqlcmd -Usa -PP@ssw0rd -dDB01
1> select * from student
2> go
id          name                 bdate            tel
----------- -------------------- ---------------- --------------------
        101 Geisel                     1980-08-08 012355
        102 Schlachter                 1980-08-01 012350

(2 個受影響的資料列)

e.g. 4 covers

I) login via user name &password: specify an SQL server

II) use select to select specific columns

C:\Users\Student>sqlcmd -Usa -PP@ssw0rd -dDB01 -s localhost:12321
1> select * from student
2> go
id         lname                lbdate           ltel
-----------l--------------------l----------------l--------------------
        101lGeisel              l      1980-08-08l012355
        102lSchlachter          l      1980-08-01l012350

(2 個受影響的資料列)
1>
2>
3> select [name][id] from student   --this is wrong. I should add a comma between [name] and [id]
4> go
id
--------------------
Geisel
Schlachter

(2 個受影響的資料列)
1> select [name],[id]  from student
2> go
name                lid
--------------------l-----------
Geisel              l        101
Schlachter          l        102

(2 個受影響的資料列)

e.g. 5 Here are several ways to join two or more tables:

--join two tables
SELECT
FROM    A  JOIN  B
ON     A.ID =  B.ID
--join three tables
SELECT
FROM    A  JOIN  B
ON     A.ID =  B.ID
JOIN  C
ON   B.LID =  C.LID

--another way to join two tables
SELECT
FROM    A  ,  B
WHERE   A.ID =  B.ID

--another way to join three tables
SELECT
FROM    A  ,  B  ,  C
WHERE    A.ID =  B.ID AND   B.LID =  C.LID

e.g. 6: some examples in Ch5 (join)

--計算 各部門(department_id) 平均 及 總合 salary,人數

  SELECT  d.DEPARTMENT_ID "department_id",avg(e.SALARY) "average",sum(e.SALARY) "sum",count(e.LAST_NAME)"count"
  FROM    EMPLOYEES e join DEPARTMENTS d
  on            e.DEPARTMENT_ID=d.DEPARTMENT_ID
  GROUP   BY  d.DEPARTMENT_ID;


--查詢 各員工姓名(last_name),薪資(salary),部門編號(department_id) 及所屬部門名稱(department_name)  

  SELECT  e.LAST_NAME,e.salary,d.DEPARTMENT_ID,d.DEPARTMENT_NAME
  FROM    EMPLOYEES e join DEPARTMENTS d
  on            e.DEPARTMENT_ID=d.DEPARTMENT_ID


--計算 各部門名稱(department_name) 平均 及 總合 salary ,人數

  SELECT  d.DEPARTMENT_ID,avg(e.salary) "ave",sum(e.salary)"sum",count(*)"headcount"
  FROM    EMPLOYEES e join DEPARTMENTS d
  on            e.DEPARTMENT_ID=d.DEPARTMENT_ID
  group by d.DEPARTMENT_ID


--計算 各部門名稱(department_id, department_name) 平均 及 總合 salary ,人數

  SELECT  d.DEPARTMENT_ID,   d.department_name,avg(e.salary) "ave",sum(e.salary)"sum",count(*)"headcount"
  FROM    EMPLOYEES e join DEPARTMENTS d
  on            e.DEPARTMENT_ID=d.DEPARTMENT_ID
  group by d.DEPARTMENT_ID,d.department_name

e.g. 7: some keywords: in/all/any/not in

select a.EMPLOYEE_ID,a.LAST_NAME,a.DEPARTMENT_ID, b.DEPARTMENT_ID,b.DEPARTMENT_NAME
from EMPLOYEES a right join DEPARTMENTS b
on  a.department_id=b.department_id
order by EMPLOYEE_ID


select e.LAST_NAME,e.SALARY
from EMPLOYEES e
where e.salary< (select avg(e.SALARY) from EMPLOYEES e)

select top 5 with ties e.SALARY, e.LAST_NAME
 from EMPLOYEES e
 order by e.SALARY desc

select e.LAST_NAME,e.SALARY
from EMPLOYEES e
where e.salary> any(select top 5 with ties e.SALARY
                                    from EMPLOYEES e
                                    order by e.SALARY desc)

select e.LAST_NAME,e.SALARY
from EMPLOYEES e
where e.salary> all(select top 5 with ties e.SALARY
                                    from EMPLOYEES e
                                    order by e.SALARY desc)

select e.LAST_NAME,e.SALARY
from EMPLOYEES e
where e.salary  in (select top 5 with ties e.SALARY
                                    from EMPLOYEES e
                                    order by e.SALARY desc)

select e.LAST_NAME,e.SALARY
from EMPLOYEES e
where e.salary not in (select top 5 with ties e.SALARY
                                    from EMPLOYEES e
                                    order by e.SALARY desc)

e.g. 8: some solutions in Ch6 (subquery)

--ch6
--查出學員任一科分數大於總平均的有那些 (含學號,個人分數)


select *
from  course
where  score  >  ( select avg(score)
                   from course
                  )


--依學號(id)分組, 計算學員總分最高者 (含學號,總分)

--method 1a
select top 1   id,  sum(score)
from  course
group by id
order by sum(score) desc


select top 1  sum(score)
from  course
group by id
order by sum(score) desc

select   id,  sum(score) sums
from  course
group by id
having sum(score)  = 262

 --method 1
select   id,  sum(score) sums
from  course
group by id
having sum(score)  =  (select top 1  sum(score)
                       from  course
                       group by id
                       order by sum(score) desc) 
--method 2
select   id,  sum(score) sums
INTO  totals
from  course
group by id

select * from totals
select  max(sums) from totals

select * from totals
where sums = (select  max(sums) from totals)

--method 3
select   id,  sum(score) sums
from  course
group by id
having sum(score) =  ( select  max(sums) 
                       from (select   id,  sum(score) sums
                             from  course
                             group by id) s
                       )

with s
as (select id, sum(score) sums
        from course
        group by id )
select * from s

e.x.1: some basic exercises (query, sub-query)

--ch3
--查出分數(score) 大於90分以上的學員(不管那一科)
select score,id
from course
where score>90

--查出課程名稱(name) 為 JAVA 且分數(score) 介於85分及90分之間的學員 

select id,score,name
from course
where name='java' and score<=90 and score>=85

--列出學員各科分數,從高到低排序(不管那一科)

select id,score,name
from course
order by score desc

--列出學員各科分數,先依課程名稱(name)排序, 相同名稱再依分數從高到低排列 

select id,score,name
from course
order by name, score desc

--ch4
--依各科(name)分組, 列出各科(name)最高 的分數是幾分

select name, max(score)
from course
group by name

--依學號(id)分組, 列出各人總分

select id, sum(score)
from course
group by id

--依學號(id)分組, 列出各人總分 大於 250 以上者  (列出學號,總分)

select id, sum(score)
from course
group by id
having sum(score)>250

--ch5
--列出學員姓名及各科成績
--列出學員姓名及各科成績 (未參加考試的學員也要列出) ????

select s.name,c.name, c.score
from course c, student s
where c.id=s.id
order by s.name

select s.name,c.name, isnull(c.score,0)
from student s  left outer join course c
on s.id=c.id
order by s.name

select id,score from course order by id
select id,name from student order by id

--ch6
--查出學員任一科分數大於總平均的有那些 (含學號,個人分數)
select id,score
from course
where score>(select avg(score)
from course )

--依學號(id)分組, 計算學員總分最高者 (含學號,總分)

select id, sum(score)
from course
group by id
having sum(score)=(select max(sums)
from (select id, sum(score) sums
from course
group by id ) s)

--進階
--ch45
--列出學員姓名及總分
select s.name, sum(c.score) "sum"
from student s join course c
on s.id=c.id
group by s.name

--列出學員學號,姓名及總分

select s.id,s.name, sum(c.score) "sum"
from student s join course c
on s.id=c.id
group by s.id,s.name

--查出學員總分數最高 (含學號,姓名,總分)

select s.id,s.name, sum(c.score) "sum"
from student s join course c
on s.id=c.id
group by s.id,s.name
having sum(c.score)=(select max(sums)
from (select id, sum(score) sums
from course
group by id ) s)

--ch456
--查出學員任一科分數大於總平均的有那些 (含姓名,個人分數) 

select s.id,s.name, c.name,c.score
from student s join course c
on s.id=c.id
where c.score> ( select (avg(ave_score))
                              from(select id, avg(score) ave_score
                              from course
                              group by id ) s) 
order by id

--查出學員各科分數大於該科平均的有那些 (含姓名,個人分數) 
select s.name,c.name,c.score 
from course c join student s
on c.id=s.id
where c.score> (select avg(cin.score) 
                            from  course cin
                            where cin.name=c.name) --內部相關欄位=外部相關欄位
order by s.name

--各科平均:
--JAVA                  82
--T-SQL              85
--電概                    83
--此結果是以下列query而得:
----select c.name,avg(c.score) 
----from course c join student s
----on c.id=s.id
----group by c.name

--ch8
--PIVOT 將學員各科成績轉成 PIVOT欄位 (含學號,科目,各科分數 3個欄位)

----select id,name,score
----from course

select *
from course
PIVOT ( sum(score) for name in ([JAVA],[電概],[T-SQL]) ) pvt

--ch8 adv Subquery
--列出學員任一科分數及總平均分數 (含學號,個人分數,平均分數)

select c.id,c.score,cnew.avgs
from course c join (select c.id, avg(c.score) avgs
                                from course c
                                 group by c.id) cnew
on c.id=cnew.id

--查出學員任一科分數大於總平均的有那些 (含學號,個人分數,平均分數)

select c0.id,c0.score
from course c0 join (select c.id,c.score,cnew.avgs
                     from course c join (select c.id, avg(c.score) avgs
                                         from course c
                                         group by c.id) cnew
                                         on c.id=cnew.id) couter
on c0.id=couter.id
where c0.score>couter.avgs

In the following we constructs a database that is for practice purpose

--construct the database which is used for the exercise

use DB01    --若 定序非 Chinese_Taiwan(damn, does Chinese Taiwan exist?) 則另建 DB

go

drop table course;
drop table student;
go

create table student
( id    int        PRIMARY KEY,   --學號
  name  nchar(20),                 --姓名
  bdate date,
  tel   char(20)
);

create table course
( id    int     ,   --學號
  name  char(20),   --課名
  score int,        --分數
  FOREIGN KEY(id) REFERENCES student(id) 
);
go

insert into student values ( 1, '徐上雯','1983-04-21','09205556781');
insert into student values ( 2, '曹廷'  ,'1983-09-23','09205556789');
insert into student values ( 3, '林玉婕','1983-08-25','09205556785');
insert into student values ( 4, '謝長恩','1983-07-27','09205556783');
insert into student values ( 5, '彭士軒','1983-05-29','09205556789');
insert into student values ( 6, '李思蓓','1983-06-22','09205556789');
insert into student values ( 7, '王易新','1983-03-24','09205556786');
insert into student values ( 8, '盧芝穎','1983-01-26','09205556789');
insert into student values ( 9, '韓奐宇','1983-02-28','09205556789');
insert into student values (10, '吳宗翰','1983-11-20','09205556782');
insert into student values (11, '胡仁恩','1983-10-21','09205556789');
insert into student values (12, '楊敬平','1983-04-23','09205556789');
insert into student values (13, '呂紹瑄','1983-05-25','09205556788');
insert into student values (14, '劉世勛','1983-07-23','09205556789');
insert into student values (15, '陳威宇','1983-02-27','09205556783');
insert into student values (16, '黃品真','1983-04-23','09205556789');
insert into student values (17, '翁啟閎','1983-06-29','09205556781');
insert into student values (18, '彭紀瑋','1983-04-22','09205556780');
insert into student values (19, '王臆詞','1983-08-24','09205556789');
insert into student values (20, '童麟凱','1983-04-26','09205556787');
insert into student values (21, '隋長志','1983-09-28','09205556789');
insert into student values (22, '吳子桓','1983-04-20','09205556789');
insert into student values (23, '王均'  ,'1983-05-25','09205556788');
go

insert into course values ( 1,'JAVA',85);
insert into course values ( 2,'JAVA',86);
insert into course values ( 3,'JAVA',80);
insert into course values ( 4,'JAVA',85);
insert into course values ( 5,'JAVA',75);
insert into course values ( 6,'JAVA',70);
insert into course values ( 7,'JAVA',92);
insert into course values ( 8,'JAVA',85);
insert into course values ( 9,'JAVA',83);

insert into course values (11,'JAVA',87);
insert into course values (12,'JAVA',85);
insert into course values (13,'JAVA',86);
insert into course values (14,'JAVA',85);
insert into course values (15,'JAVA',84);
insert into course values (16,'JAVA',75);
insert into course values (17,'JAVA',81);
insert into course values (18,'JAVA',85);
insert into course values (19,'JAVA',79);
insert into course values (20,'JAVA',85);
insert into course values (21,'JAVA',78);
insert into course values (22,'JAVA',85);

go

insert into course values ( 1,'電概',81);
insert into course values ( 2,'電概',80);
insert into course values ( 3,'電概',82);
insert into course values ( 4,'電概',84);
insert into course values ( 5,'電概',64);
insert into course values ( 6,'電概',83);
insert into course values ( 7,'電概',85);
insert into course values ( 8,'電概',87);
insert into course values ( 9,'電概',86);
insert into course values (10,'電概',85);
insert into course values (11,'電概',82);
insert into course values (12,'電概',81);
insert into course values (13,'電概',89);
insert into course values (14,'電概',85);
insert into course values (15,'電概',88);
insert into course values (16,'電概',87);
insert into course values (17,'電概',86);
insert into course values (18,'電概',85);
insert into course values (19,'電概',84);
insert into course values (20,'電概',82);
insert into course values (21,'電概',85);
insert into course values (22,'電概',81);
go

insert into course values ( 1,'T-SQL',85);
insert into course values ( 2,'T-SQL',85);
insert into course values ( 3,'T-SQL',80);
insert into course values ( 4,'T-SQL',85);
insert into course values ( 5,'T-SQL',85);
insert into course values ( 6,'T-SQL',83);
insert into course values ( 7,'T-SQL',85);
insert into course values ( 8,'T-SQL',82);
insert into course values ( 9,'T-SQL',91);
insert into course values (10,'T-SQL',85);
insert into course values (11,'T-SQL',85);
insert into course values (12,'T-SQL',87);
insert into course values (13,'T-SQL',85);
insert into course values (14,'T-SQL',85);
insert into course values (15,'T-SQL',86);
insert into course values (16,'T-SQL',85);
insert into course values (17,'T-SQL',85);
insert into course values (18,'T-SQL',89);
insert into course values (19,'T-SQL',85);
insert into course values (20,'T-SQL',88);
insert into course values (21,'T-SQL',85);
insert into course values (22,'T-SQL',84);
go

/* Unicode
insert into student values (24, N'曾珦煊','1983-06-22','09205556789');
insert into student values (25, N'黃頎晧','1983-04-23','09205556789');
*/