In [1]:
# Subquery Example with Pandas
In [2]:
import pandas as pd
import numpy as np
import MySQLdb
db = MySQLdb.connect(
"db.fastcamp.us", # DATABASE_HOST
"root", # DATABASE_USERNAME
"dkstncks", # DATABASE_PASSWORD
"sakila", # DATABASE_NAME
charset='utf8',
)
In [3]:
# Revenue Per Film
rental_df = pd.read_sql("SELECT * FROM rental;", db)
inventory_df = pd.read_sql("SELECT * FROM inventory;", db)
film_df = pd.read_sql("SELECT * FROM film;", db)
In [4]:
pd.merge(rental_df, inventory_df, on="inventory_id")
Out[4]:
rental_id
rental_date
inventory_id
customer_id
return_date
staff_id
last_update_x
film_id
store_id
last_update_y
0
1
2005-05-24 22:53:30
367
130
2005-05-26 22:04:30
1
2006-02-15 21:30:53
80
1
2006-02-15 05:09:17
1
1577
2005-06-16 04:03:28
367
327
2005-06-24 22:40:28
2
2006-02-15 21:30:53
80
1
2006-02-15 05:09:17
2
3584
2005-07-06 04:16:43
367
207
2005-07-13 07:08:43
1
2006-02-15 21:30:53
80
1
2006-02-15 05:09:17
3
10507
2005-08-01 11:22:20
367
45
2005-08-04 13:18:20
2
2006-02-15 21:30:53
80
1
2006-02-15 05:09:17
4
13641
2005-08-20 07:34:42
367
281
2005-08-26 05:18:42
1
2006-02-15 21:30:53
80
1
2006-02-15 05:09:17
5
2
2005-05-24 22:54:33
1525
459
2005-05-28 19:40:33
1
2006-02-15 21:30:53
333
2
2006-02-15 05:09:17
6
1449
2005-06-15 19:19:16
1525
471
2005-06-18 15:24:16
2
2006-02-15 21:30:53
333
2
2006-02-15 05:09:17
7
5499
2005-07-10 00:27:45
1525
127
2005-07-17 06:11:45
1
2006-02-15 21:30:53
333
2
2006-02-15 05:09:17
8
9711
2005-07-31 08:06:41
1525
231
2005-08-02 10:30:41
2
2006-02-15 21:30:53
333
2
2006-02-15 05:09:17
9
13031
2005-08-19 08:30:04
1525
567
2005-08-23 09:35:04
2
2006-02-15 21:30:53
333
2
2006-02-15 05:09:17
10
3
2005-05-24 23:03:39
1711
408
2005-06-01 22:12:39
1
2006-02-15 21:30:53
373
2
2006-02-15 05:09:17
11
2067
2005-06-17 16:11:08
1711
297
2005-06-22 13:01:08
2
2006-02-15 21:30:53
373
2
2006-02-15 05:09:17
12
3790
2005-07-06 14:13:45
1711
10
2005-07-14 13:35:45
1
2006-02-15 21:30:53
373
2
2006-02-15 05:09:17
13
7022
2005-07-27 02:31:15
1711
243
2005-07-29 02:52:15
1
2006-02-15 21:30:53
373
2
2006-02-15 05:09:17
14
11924
2005-08-17 16:22:05
1711
202
2005-08-26 12:34:05
1
2006-02-15 21:30:53
373
2
2006-02-15 05:09:17
15
4
2005-05-24 23:04:41
2452
333
2005-06-03 01:43:41
2
2006-02-15 21:30:53
535
1
2006-02-15 05:09:17
16
1533
2005-06-16 00:46:02
2452
116
2005-06-17 20:11:02
1
2006-02-15 21:30:53
535
1
2006-02-15 05:09:17
17
5639
2005-07-10 06:33:39
2452
249
2005-07-19 07:47:39
1
2006-02-15 21:30:53
535
1
2006-02-15 05:09:17
18
8185
2005-07-28 22:23:49
2452
568
2005-07-31 00:07:49
1
2006-02-15 21:30:53
535
1
2006-02-15 05:09:17
19
14099
2005-08-21 00:31:03
2452
224
2005-08-27 03:18:03
2
2006-02-15 21:30:53
535
1
2006-02-15 05:09:17
20
5
2005-05-24 23:05:21
2079
222
2005-06-02 04:33:21
1
2006-02-15 21:30:53
450
2
2006-02-15 05:09:17
21
2598
2005-06-19 05:59:57
2079
265
2005-06-24 11:44:57
2
2006-02-15 21:30:53
450
2
2006-02-15 05:09:17
22
4945
2005-07-08 22:45:02
2079
165
2005-07-11 23:59:02
2
2006-02-15 21:30:53
450
2
2006-02-15 05:09:17
23
7258
2005-07-27 11:05:54
2079
494
2005-08-02 11:36:54
1
2006-02-15 21:30:53
450
2
2006-02-15 05:09:17
24
13802
2005-08-20 12:44:53
2079
156
2005-08-22 09:18:53
2
2006-02-15 21:30:53
450
2
2006-02-15 05:09:17
25
6
2005-05-24 23:08:07
2792
549
2005-05-27 01:32:07
1
2006-02-15 21:30:53
613
1
2006-02-15 05:09:17
26
3360
2005-06-21 12:12:41
2792
498
2005-06-26 06:32:41
1
2006-02-15 21:30:53
613
1
2006-02-15 05:09:17
27
4673
2005-07-08 10:16:00
2792
91
2005-07-17 10:03:00
2
2006-02-15 21:30:53
613
1
2006-02-15 05:09:17
28
8445
2005-07-29 07:37:48
2792
384
2005-08-04 10:43:48
1
2006-02-15 21:30:53
613
1
2006-02-15 05:09:17
29
15883
2005-08-23 16:44:56
2792
550
2005-08-24 22:42:56
1
2006-02-15 21:30:53
613
1
2006-02-15 05:09:17
...
...
...
...
...
...
...
...
...
...
...
16014
11438
2005-08-02 20:21:08
1040
372
2005-08-10 22:12:08
1
2006-02-15 21:30:53
232
2
2006-02-15 05:09:17
16015
11804
2005-08-17 11:42:45
1040
556
2005-08-25 07:11:45
1
2006-02-15 21:30:53
232
2
2006-02-15 05:09:17
16016
11444
2005-08-02 20:32:55
1419
28
2005-08-08 23:21:55
2
2006-02-15 21:30:53
311
1
2006-02-15 05:09:17
16017
15051
2005-08-22 10:08:50
1419
300
2005-08-28 10:23:50
1
2006-02-15 21:30:53
311
1
2006-02-15 05:09:17
16018
11446
2005-08-02 20:33:37
748
342
2005-08-03 18:22:37
1
2006-02-15 21:30:53
164
1
2006-02-15 05:09:17
16019
15693
2005-08-23 10:00:24
748
94
2005-08-25 08:23:24
1
2006-02-15 21:30:53
164
1
2006-02-15 05:09:17
16020
11447
2005-08-02 20:36:25
3868
508
2005-08-07 18:52:25
1
2006-02-15 21:30:53
846
1
2006-02-15 05:09:17
16021
12003
2005-08-17 18:56:05
3868
70
2005-08-18 23:52:05
1
2006-02-15 21:30:53
846
1
2006-02-15 05:09:17
16022
11454
2005-08-02 21:04:39
2901
199
2005-08-05 19:03:39
1
2006-02-15 21:30:53
638
1
2006-02-15 05:09:17
16023
13982
2005-08-20 19:08:25
2901
448
2005-08-28 15:59:25
2
2006-02-15 21:30:53
638
1
2006-02-15 05:09:17
16024
11455
2005-08-02 21:07:06
2000
498
2005-08-12 01:21:06
1
2006-02-15 21:30:53
436
1
2006-02-15 05:09:17
16025
12425
2005-08-18 10:18:06
2000
405
2005-08-27 08:16:06
2
2006-02-15 21:30:53
436
1
2006-02-15 05:09:17
16026
11462
2005-08-02 21:36:46
167
268
2005-08-10 01:48:46
1
2006-02-15 21:30:53
37
1
2006-02-15 05:09:17
16027
12558
2005-08-18 14:52:35
167
289
2005-08-26 09:45:35
2
2006-02-15 21:30:53
37
1
2006-02-15 05:09:17
16028
11470
2005-08-02 21:48:28
2379
346
2005-08-05 23:58:28
2
2006-02-15 21:30:53
519
2
2006-02-15 05:09:17
16029
15589
2005-08-23 06:03:31
2379
22
2005-08-30 07:44:31
2
2006-02-15 21:30:53
519
2
2006-02-15 05:09:17
16030
11473
2005-08-02 21:52:03
620
536
2005-08-09 02:01:03
1
2006-02-15 21:30:53
135
1
2006-02-15 05:09:17
16031
12081
2005-08-17 22:10:46
620
597
2005-08-22 22:37:46
1
2006-02-15 21:30:53
135
1
2006-02-15 05:09:17
16032
11474
2005-08-02 21:53:08
574
214
2005-08-05 22:36:08
1
2006-02-15 21:30:53
124
2
2006-02-15 05:09:17
16033
12283
2005-08-18 04:54:25
574
240
2005-08-23 04:02:25
1
2006-02-15 21:30:53
124
2
2006-02-15 05:09:17
16034
11478
2005-08-02 22:09:05
3932
197
2005-08-04 18:02:05
1
2006-02-15 21:30:53
857
2
2006-02-15 05:09:17
16035
15988
2005-08-23 20:23:08
3932
400
2005-08-28 20:50:08
1
2006-02-15 21:30:53
857
2
2006-02-15 05:09:17
16036
11479
2005-08-02 22:18:13
4077
237
2005-08-12 00:43:13
1
2006-02-15 21:30:53
889
1
2006-02-15 05:09:17
16037
15849
2005-08-23 15:41:20
4077
482
2005-08-27 15:47:20
2
2006-02-15 21:30:53
889
1
2006-02-15 05:09:17
16038
11480
2005-08-02 22:18:24
4161
14
2005-08-04 21:22:24
2
2006-02-15 21:30:53
904
1
2006-02-15 05:09:17
16039
16030
2005-08-23 21:56:04
4161
137
2005-08-31 01:24:04
2
2006-02-15 21:30:53
904
1
2006-02-15 05:09:17
16040
11486
2005-08-02 22:34:06
1610
236
2005-08-09 00:46:06
2
2006-02-15 21:30:53
352
2
2006-02-15 05:09:17
16041
11986
2005-08-17 18:21:58
1610
352
2005-08-18 13:05:58
1
2006-02-15 21:30:53
352
2
2006-02-15 05:09:17
16042
11494
2005-08-02 22:51:23
3994
579
2005-08-09 01:52:23
1
2006-02-15 21:30:53
870
1
2006-02-15 05:09:17
16043
13733
2005-08-20 10:25:12
3994
85
2005-08-21 10:49:12
2
2006-02-15 21:30:53
870
1
2006-02-15 05:09:17
16044 rows × 10 columns
In [5]:
merged_df = pd.merge(
pd.merge(rental_df, inventory_df, on="inventory_id"),
film_df,
on="film_id",
)
In [6]:
revenue_per_film_df = \
merged_df.groupby("film_id").agg({"rental_rate": np.sum})
In [7]:
revenue_per_film_df["film_id"] = revenue_per_film_df.index
revenue_per_film_df.head()
Out[7]:
rental_rate
film_id
film_id
1
22.77
1
2
34.93
2
3
35.88
3
4
68.77
4
5
35.88
5
In [8]:
# Actor to Film Revenue
actor_df = pd.read_sql("SELECT * FROM actor;", db)
film_actor_df = pd.read_sql("SELECT * FROM film_actor;", db)
In [9]:
pd.merge(
actor_df,
film_actor_df,
on="actor_id",
)
Out[9]:
actor_id
first_name
last_name
last_update_x
film_id
last_update_y
0
1
PENELOPE
GUINESS
2006-02-15 04:34:33
1
2006-02-15 05:05:03
1
1
PENELOPE
GUINESS
2006-02-15 04:34:33
23
2006-02-15 05:05:03
2
1
PENELOPE
GUINESS
2006-02-15 04:34:33
25
2006-02-15 05:05:03
3
1
PENELOPE
GUINESS
2006-02-15 04:34:33
106
2006-02-15 05:05:03
4
1
PENELOPE
GUINESS
2006-02-15 04:34:33
140
2006-02-15 05:05:03
5
1
PENELOPE
GUINESS
2006-02-15 04:34:33
166
2006-02-15 05:05:03
6
1
PENELOPE
GUINESS
2006-02-15 04:34:33
277
2006-02-15 05:05:03
7
1
PENELOPE
GUINESS
2006-02-15 04:34:33
361
2006-02-15 05:05:03
8
1
PENELOPE
GUINESS
2006-02-15 04:34:33
438
2006-02-15 05:05:03
9
1
PENELOPE
GUINESS
2006-02-15 04:34:33
499
2006-02-15 05:05:03
10
1
PENELOPE
GUINESS
2006-02-15 04:34:33
506
2006-02-15 05:05:03
11
1
PENELOPE
GUINESS
2006-02-15 04:34:33
509
2006-02-15 05:05:03
12
1
PENELOPE
GUINESS
2006-02-15 04:34:33
605
2006-02-15 05:05:03
13
1
PENELOPE
GUINESS
2006-02-15 04:34:33
635
2006-02-15 05:05:03
14
1
PENELOPE
GUINESS
2006-02-15 04:34:33
749
2006-02-15 05:05:03
15
1
PENELOPE
GUINESS
2006-02-15 04:34:33
832
2006-02-15 05:05:03
16
1
PENELOPE
GUINESS
2006-02-15 04:34:33
939
2006-02-15 05:05:03
17
1
PENELOPE
GUINESS
2006-02-15 04:34:33
970
2006-02-15 05:05:03
18
1
PENELOPE
GUINESS
2006-02-15 04:34:33
980
2006-02-15 05:05:03
19
2
NICK
WAHLBERG
2006-02-15 04:34:33
3
2006-02-15 05:05:03
20
2
NICK
WAHLBERG
2006-02-15 04:34:33
31
2006-02-15 05:05:03
21
2
NICK
WAHLBERG
2006-02-15 04:34:33
47
2006-02-15 05:05:03
22
2
NICK
WAHLBERG
2006-02-15 04:34:33
105
2006-02-15 05:05:03
23
2
NICK
WAHLBERG
2006-02-15 04:34:33
132
2006-02-15 05:05:03
24
2
NICK
WAHLBERG
2006-02-15 04:34:33
145
2006-02-15 05:05:03
25
2
NICK
WAHLBERG
2006-02-15 04:34:33
226
2006-02-15 05:05:03
26
2
NICK
WAHLBERG
2006-02-15 04:34:33
249
2006-02-15 05:05:03
27
2
NICK
WAHLBERG
2006-02-15 04:34:33
314
2006-02-15 05:05:03
28
2
NICK
WAHLBERG
2006-02-15 04:34:33
321
2006-02-15 05:05:03
29
2
NICK
WAHLBERG
2006-02-15 04:34:33
357
2006-02-15 05:05:03
...
...
...
...
...
...
...
5432
199
JULIA
FAWCETT
2006-02-15 04:34:33
432
2006-02-15 05:05:03
5433
199
JULIA
FAWCETT
2006-02-15 04:34:33
541
2006-02-15 05:05:03
5434
199
JULIA
FAWCETT
2006-02-15 04:34:33
604
2006-02-15 05:05:03
5435
199
JULIA
FAWCETT
2006-02-15 04:34:33
640
2006-02-15 05:05:03
5436
199
JULIA
FAWCETT
2006-02-15 04:34:33
689
2006-02-15 05:05:03
5437
199
JULIA
FAWCETT
2006-02-15 04:34:33
730
2006-02-15 05:05:03
5438
199
JULIA
FAWCETT
2006-02-15 04:34:33
784
2006-02-15 05:05:03
5439
199
JULIA
FAWCETT
2006-02-15 04:34:33
785
2006-02-15 05:05:03
5440
199
JULIA
FAWCETT
2006-02-15 04:34:33
886
2006-02-15 05:05:03
5441
199
JULIA
FAWCETT
2006-02-15 04:34:33
953
2006-02-15 05:05:03
5442
200
THORA
TEMPLE
2006-02-15 04:34:33
5
2006-02-15 05:05:03
5443
200
THORA
TEMPLE
2006-02-15 04:34:33
49
2006-02-15 05:05:03
5444
200
THORA
TEMPLE
2006-02-15 04:34:33
80
2006-02-15 05:05:03
5445
200
THORA
TEMPLE
2006-02-15 04:34:33
116
2006-02-15 05:05:03
5446
200
THORA
TEMPLE
2006-02-15 04:34:33
121
2006-02-15 05:05:03
5447
200
THORA
TEMPLE
2006-02-15 04:34:33
149
2006-02-15 05:05:03
5448
200
THORA
TEMPLE
2006-02-15 04:34:33
346
2006-02-15 05:05:03
5449
200
THORA
TEMPLE
2006-02-15 04:34:33
419
2006-02-15 05:05:03
5450
200
THORA
TEMPLE
2006-02-15 04:34:33
462
2006-02-15 05:05:03
5451
200
THORA
TEMPLE
2006-02-15 04:34:33
465
2006-02-15 05:05:03
5452
200
THORA
TEMPLE
2006-02-15 04:34:33
474
2006-02-15 05:05:03
5453
200
THORA
TEMPLE
2006-02-15 04:34:33
537
2006-02-15 05:05:03
5454
200
THORA
TEMPLE
2006-02-15 04:34:33
538
2006-02-15 05:05:03
5455
200
THORA
TEMPLE
2006-02-15 04:34:33
544
2006-02-15 05:05:03
5456
200
THORA
TEMPLE
2006-02-15 04:34:33
714
2006-02-15 05:05:03
5457
200
THORA
TEMPLE
2006-02-15 04:34:33
879
2006-02-15 05:05:03
5458
200
THORA
TEMPLE
2006-02-15 04:34:33
912
2006-02-15 05:05:03
5459
200
THORA
TEMPLE
2006-02-15 04:34:33
945
2006-02-15 05:05:03
5460
200
THORA
TEMPLE
2006-02-15 04:34:33
958
2006-02-15 05:05:03
5461
200
THORA
TEMPLE
2006-02-15 04:34:33
993
2006-02-15 05:05:03
5462 rows × 6 columns
In [10]:
merged_df = pd.merge(
pd.merge(
actor_df,
film_actor_df,
on="actor_id",
),
revenue_per_film_df,
on="film_id",
)
merged_df.head()
Out[10]:
actor_id
first_name
last_name
last_update_x
film_id
last_update_y
rental_rate
0
1
PENELOPE
GUINESS
2006-02-15 04:34:33
1
2006-02-15 05:05:03
22.77
1
10
CHRISTIAN
GABLE
2006-02-15 04:34:33
1
2006-02-15 05:05:03
22.77
2
20
LUCILLE
TRACY
2006-02-15 04:34:33
1
2006-02-15 05:05:03
22.77
3
30
SANDRA
PECK
2006-02-15 04:34:33
1
2006-02-15 05:05:03
22.77
4
40
JOHNNY
CAGE
2006-02-15 04:34:33
1
2006-02-15 05:05:03
22.77
In [11]:
revenue_per_actor_df = merged_df.groupby("actor_id").agg({"rental_rate": np.sum})
revenue_per_actor_df.head()
Out[11]:
rental_rate
actor_id
1
813.95
2
1177.13
3
819.89
4
751.26
5
1401.04
In [12]:
revenue_per_actor_df["actor_id"] = revenue_per_actor_df.index
revenue_per_actor_df.head()
Out[12]:
rental_rate
actor_id
actor_id
1
813.95
1
2
1177.13
2
3
819.89
3
4
751.26
4
5
1401.04
5
In [13]:
pd.merge(
actor_df,
revenue_per_actor_df,
on="actor_id",
)
Out[13]:
actor_id
first_name
last_name
last_update
rental_rate
0
1
PENELOPE
GUINESS
2006-02-15 04:34:33
813.95
1
2
NICK
WAHLBERG
2006-02-15 04:34:33
1177.13
2
3
ED
CHASE
2006-02-15 04:34:33
819.89
3
4
JENNIFER
DAVIS
2006-02-15 04:34:33
751.26
4
5
JOHNNY
LOLLOBRIGIDA
2006-02-15 04:34:33
1401.04
5
6
BETTE
NICHOLSON
2006-02-15 04:34:33
1082.21
6
7
GRACE
MOSTEL
2006-02-15 04:34:33
1450.21
7
8
MATTHEW
JOHANSSON
2006-02-15 04:34:33
1183.83
8
9
JOE
SWANK
2006-02-15 04:34:33
831.19
9
10
CHRISTIAN
GABLE
2006-02-15 04:34:33
1080.38
10
11
ZERO
CAGE
2006-02-15 04:34:33
1060.02
11
12
KARL
BERRY
2006-02-15 04:34:33
1218.03
12
13
UMA
WOOD
2006-02-15 04:34:33
1647.63
13
14
VIVIEN
BERGEN
2006-02-15 04:34:33
1481.73
14
15
CUBA
OLIVIER
2006-02-15 04:34:33
1297.40
15
16
FRED
COSTNER
2006-02-15 04:34:33
1071.07
16
17
HELEN
VOIGHT
2006-02-15 04:34:33
1789.43
17
18
DAN
TORN
2006-02-15 04:34:33
1046.94
18
19
BOB
FAWCETT
2006-02-15 04:34:33
1110.66
19
20
LUCILLE
TRACY
2006-02-15 04:34:33
1126.84
20
21
KIRSTEN
PALTROW
2006-02-15 04:34:33
1663.48
21
22
ELVIS
MARX
2006-02-15 04:34:33
1098.83
22
23
SANDRA
KILMER
2006-02-15 04:34:33
1739.96
23
24
CAMERON
STREEP
2006-02-15 04:34:33
1012.82
24
25
KEVIN
BLOOM
2006-02-15 04:34:33
1074.40
25
26
RIP
CRAWFORD
2006-02-15 04:34:33
1519.87
26
27
JULIA
MCQUEEN
2006-02-15 04:34:33
1727.51
27
28
WOODY
HOFFMAN
2006-02-15 04:34:33
1670.40
28
29
ALEC
WAYNE
2006-02-15 04:34:33
1311.08
29
30
SANDRA
PECK
2006-02-15 04:34:33
704.13
...
...
...
...
...
...
170
171
OLYMPIA
PFEIFFER
2006-02-15 04:34:33
1322.25
171
172
GROUCHO
WILLIAMS
2006-02-15 04:34:33
1259.07
172
173
ALAN
DREYFUSS
2006-02-15 04:34:33
1234.33
173
174
MICHAEL
BENING
2006-02-15 04:34:33
950.08
174
175
WILLIAM
HACKMAN
2006-02-15 04:34:33
1240.93
175
176
JON
CHASE
2006-02-15 04:34:33
1256.65
176
177
GENE
MCKELLEN
2006-02-15 04:34:33
1486.49
177
178
LISA
MONROE
2006-02-15 04:34:33
812.32
178
179
ED
GUINESS
2006-02-15 04:34:33
1235.79
179
180
JEFF
SILVERSTONE
2006-02-15 04:34:33
873.75
180
181
MATTHEW
CARREY
2006-02-15 04:34:33
1935.22
181
182
DEBBIE
AKROYD
2006-02-15 04:34:33
1094.30
182
183
RUSSELL
CLOSE
2006-02-15 04:34:33
887.04
183
184
HUMPHREY
GARLAND
2006-02-15 04:34:33
1453.84
184
185
MICHAEL
BOLGER
2006-02-15 04:34:33
1721.47
185
186
JULIA
ZELLWEGER
2006-02-15 04:34:33
792.79
186
187
RENEE
BALL
2006-02-15 04:34:33
1392.01
187
188
ROCK
DUKAKIS
2006-02-15 04:34:33
1330.31
188
189
CUBA
BIRCH
2006-02-15 04:34:33
875.39
189
190
AUDREY
BAILEY
2006-02-15 04:34:33
987.67
190
191
GREGORY
GOODING
2006-02-15 04:34:33
1759.77
191
192
JOHN
SUVARI
2006-02-15 04:34:33
1537.42
192
193
BURT
TEMPLE
2006-02-15 04:34:33
1121.56
193
194
MERYL
ALLEN
2006-02-15 04:34:33
1176.20
194
195
JAYNE
SILVERSTONE
2006-02-15 04:34:33
1173.50
195
196
BELA
WALKEN
2006-02-15 04:34:33
1311.50
196
197
REESE
WEST
2006-02-15 04:34:33
1519.47
197
198
MARY
KEITEL
2006-02-15 04:34:33
1767.26
198
199
JULIA
FAWCETT
2006-02-15 04:34:33
808.45
199
200
THORA
TEMPLE
2006-02-15 04:34:33
1040.54
200 rows × 5 columns
Content source: dobestan/data-science-school
Similar notebooks: