In [1]:
# Subquery Example with Pandas

In [2]:
import pandas as pd
import numpy as np

import MySQLdb


db = MySQLdb.connect(
    "db.fastcamp.us",  # DATABASE_HOST
    "root",  # DATABASE_USERNAME
    "dkstncks",  # DATABASE_PASSWORD
    "sakila",  # DATABASE_NAME
    charset='utf8',
)

In [3]:
# Revenue Per Film

rental_df = pd.read_sql("SELECT * FROM rental;", db)
inventory_df = pd.read_sql("SELECT * FROM inventory;", db)
film_df = pd.read_sql("SELECT * FROM film;", db)

In [4]:
pd.merge(rental_df, inventory_df, on="inventory_id")


Out[4]:
rental_id rental_date inventory_id customer_id return_date staff_id last_update_x film_id store_id last_update_y
0 1 2005-05-24 22:53:30 367 130 2005-05-26 22:04:30 1 2006-02-15 21:30:53 80 1 2006-02-15 05:09:17
1 1577 2005-06-16 04:03:28 367 327 2005-06-24 22:40:28 2 2006-02-15 21:30:53 80 1 2006-02-15 05:09:17
2 3584 2005-07-06 04:16:43 367 207 2005-07-13 07:08:43 1 2006-02-15 21:30:53 80 1 2006-02-15 05:09:17
3 10507 2005-08-01 11:22:20 367 45 2005-08-04 13:18:20 2 2006-02-15 21:30:53 80 1 2006-02-15 05:09:17
4 13641 2005-08-20 07:34:42 367 281 2005-08-26 05:18:42 1 2006-02-15 21:30:53 80 1 2006-02-15 05:09:17
5 2 2005-05-24 22:54:33 1525 459 2005-05-28 19:40:33 1 2006-02-15 21:30:53 333 2 2006-02-15 05:09:17
6 1449 2005-06-15 19:19:16 1525 471 2005-06-18 15:24:16 2 2006-02-15 21:30:53 333 2 2006-02-15 05:09:17
7 5499 2005-07-10 00:27:45 1525 127 2005-07-17 06:11:45 1 2006-02-15 21:30:53 333 2 2006-02-15 05:09:17
8 9711 2005-07-31 08:06:41 1525 231 2005-08-02 10:30:41 2 2006-02-15 21:30:53 333 2 2006-02-15 05:09:17
9 13031 2005-08-19 08:30:04 1525 567 2005-08-23 09:35:04 2 2006-02-15 21:30:53 333 2 2006-02-15 05:09:17
10 3 2005-05-24 23:03:39 1711 408 2005-06-01 22:12:39 1 2006-02-15 21:30:53 373 2 2006-02-15 05:09:17
11 2067 2005-06-17 16:11:08 1711 297 2005-06-22 13:01:08 2 2006-02-15 21:30:53 373 2 2006-02-15 05:09:17
12 3790 2005-07-06 14:13:45 1711 10 2005-07-14 13:35:45 1 2006-02-15 21:30:53 373 2 2006-02-15 05:09:17
13 7022 2005-07-27 02:31:15 1711 243 2005-07-29 02:52:15 1 2006-02-15 21:30:53 373 2 2006-02-15 05:09:17
14 11924 2005-08-17 16:22:05 1711 202 2005-08-26 12:34:05 1 2006-02-15 21:30:53 373 2 2006-02-15 05:09:17
15 4 2005-05-24 23:04:41 2452 333 2005-06-03 01:43:41 2 2006-02-15 21:30:53 535 1 2006-02-15 05:09:17
16 1533 2005-06-16 00:46:02 2452 116 2005-06-17 20:11:02 1 2006-02-15 21:30:53 535 1 2006-02-15 05:09:17
17 5639 2005-07-10 06:33:39 2452 249 2005-07-19 07:47:39 1 2006-02-15 21:30:53 535 1 2006-02-15 05:09:17
18 8185 2005-07-28 22:23:49 2452 568 2005-07-31 00:07:49 1 2006-02-15 21:30:53 535 1 2006-02-15 05:09:17
19 14099 2005-08-21 00:31:03 2452 224 2005-08-27 03:18:03 2 2006-02-15 21:30:53 535 1 2006-02-15 05:09:17
20 5 2005-05-24 23:05:21 2079 222 2005-06-02 04:33:21 1 2006-02-15 21:30:53 450 2 2006-02-15 05:09:17
21 2598 2005-06-19 05:59:57 2079 265 2005-06-24 11:44:57 2 2006-02-15 21:30:53 450 2 2006-02-15 05:09:17
22 4945 2005-07-08 22:45:02 2079 165 2005-07-11 23:59:02 2 2006-02-15 21:30:53 450 2 2006-02-15 05:09:17
23 7258 2005-07-27 11:05:54 2079 494 2005-08-02 11:36:54 1 2006-02-15 21:30:53 450 2 2006-02-15 05:09:17
24 13802 2005-08-20 12:44:53 2079 156 2005-08-22 09:18:53 2 2006-02-15 21:30:53 450 2 2006-02-15 05:09:17
25 6 2005-05-24 23:08:07 2792 549 2005-05-27 01:32:07 1 2006-02-15 21:30:53 613 1 2006-02-15 05:09:17
26 3360 2005-06-21 12:12:41 2792 498 2005-06-26 06:32:41 1 2006-02-15 21:30:53 613 1 2006-02-15 05:09:17
27 4673 2005-07-08 10:16:00 2792 91 2005-07-17 10:03:00 2 2006-02-15 21:30:53 613 1 2006-02-15 05:09:17
28 8445 2005-07-29 07:37:48 2792 384 2005-08-04 10:43:48 1 2006-02-15 21:30:53 613 1 2006-02-15 05:09:17
29 15883 2005-08-23 16:44:56 2792 550 2005-08-24 22:42:56 1 2006-02-15 21:30:53 613 1 2006-02-15 05:09:17
... ... ... ... ... ... ... ... ... ... ...
16014 11438 2005-08-02 20:21:08 1040 372 2005-08-10 22:12:08 1 2006-02-15 21:30:53 232 2 2006-02-15 05:09:17
16015 11804 2005-08-17 11:42:45 1040 556 2005-08-25 07:11:45 1 2006-02-15 21:30:53 232 2 2006-02-15 05:09:17
16016 11444 2005-08-02 20:32:55 1419 28 2005-08-08 23:21:55 2 2006-02-15 21:30:53 311 1 2006-02-15 05:09:17
16017 15051 2005-08-22 10:08:50 1419 300 2005-08-28 10:23:50 1 2006-02-15 21:30:53 311 1 2006-02-15 05:09:17
16018 11446 2005-08-02 20:33:37 748 342 2005-08-03 18:22:37 1 2006-02-15 21:30:53 164 1 2006-02-15 05:09:17
16019 15693 2005-08-23 10:00:24 748 94 2005-08-25 08:23:24 1 2006-02-15 21:30:53 164 1 2006-02-15 05:09:17
16020 11447 2005-08-02 20:36:25 3868 508 2005-08-07 18:52:25 1 2006-02-15 21:30:53 846 1 2006-02-15 05:09:17
16021 12003 2005-08-17 18:56:05 3868 70 2005-08-18 23:52:05 1 2006-02-15 21:30:53 846 1 2006-02-15 05:09:17
16022 11454 2005-08-02 21:04:39 2901 199 2005-08-05 19:03:39 1 2006-02-15 21:30:53 638 1 2006-02-15 05:09:17
16023 13982 2005-08-20 19:08:25 2901 448 2005-08-28 15:59:25 2 2006-02-15 21:30:53 638 1 2006-02-15 05:09:17
16024 11455 2005-08-02 21:07:06 2000 498 2005-08-12 01:21:06 1 2006-02-15 21:30:53 436 1 2006-02-15 05:09:17
16025 12425 2005-08-18 10:18:06 2000 405 2005-08-27 08:16:06 2 2006-02-15 21:30:53 436 1 2006-02-15 05:09:17
16026 11462 2005-08-02 21:36:46 167 268 2005-08-10 01:48:46 1 2006-02-15 21:30:53 37 1 2006-02-15 05:09:17
16027 12558 2005-08-18 14:52:35 167 289 2005-08-26 09:45:35 2 2006-02-15 21:30:53 37 1 2006-02-15 05:09:17
16028 11470 2005-08-02 21:48:28 2379 346 2005-08-05 23:58:28 2 2006-02-15 21:30:53 519 2 2006-02-15 05:09:17
16029 15589 2005-08-23 06:03:31 2379 22 2005-08-30 07:44:31 2 2006-02-15 21:30:53 519 2 2006-02-15 05:09:17
16030 11473 2005-08-02 21:52:03 620 536 2005-08-09 02:01:03 1 2006-02-15 21:30:53 135 1 2006-02-15 05:09:17
16031 12081 2005-08-17 22:10:46 620 597 2005-08-22 22:37:46 1 2006-02-15 21:30:53 135 1 2006-02-15 05:09:17
16032 11474 2005-08-02 21:53:08 574 214 2005-08-05 22:36:08 1 2006-02-15 21:30:53 124 2 2006-02-15 05:09:17
16033 12283 2005-08-18 04:54:25 574 240 2005-08-23 04:02:25 1 2006-02-15 21:30:53 124 2 2006-02-15 05:09:17
16034 11478 2005-08-02 22:09:05 3932 197 2005-08-04 18:02:05 1 2006-02-15 21:30:53 857 2 2006-02-15 05:09:17
16035 15988 2005-08-23 20:23:08 3932 400 2005-08-28 20:50:08 1 2006-02-15 21:30:53 857 2 2006-02-15 05:09:17
16036 11479 2005-08-02 22:18:13 4077 237 2005-08-12 00:43:13 1 2006-02-15 21:30:53 889 1 2006-02-15 05:09:17
16037 15849 2005-08-23 15:41:20 4077 482 2005-08-27 15:47:20 2 2006-02-15 21:30:53 889 1 2006-02-15 05:09:17
16038 11480 2005-08-02 22:18:24 4161 14 2005-08-04 21:22:24 2 2006-02-15 21:30:53 904 1 2006-02-15 05:09:17
16039 16030 2005-08-23 21:56:04 4161 137 2005-08-31 01:24:04 2 2006-02-15 21:30:53 904 1 2006-02-15 05:09:17
16040 11486 2005-08-02 22:34:06 1610 236 2005-08-09 00:46:06 2 2006-02-15 21:30:53 352 2 2006-02-15 05:09:17
16041 11986 2005-08-17 18:21:58 1610 352 2005-08-18 13:05:58 1 2006-02-15 21:30:53 352 2 2006-02-15 05:09:17
16042 11494 2005-08-02 22:51:23 3994 579 2005-08-09 01:52:23 1 2006-02-15 21:30:53 870 1 2006-02-15 05:09:17
16043 13733 2005-08-20 10:25:12 3994 85 2005-08-21 10:49:12 2 2006-02-15 21:30:53 870 1 2006-02-15 05:09:17

16044 rows × 10 columns


In [5]:
merged_df = pd.merge(
    pd.merge(rental_df, inventory_df, on="inventory_id"),
    film_df,
    on="film_id",
)

In [6]:
revenue_per_film_df = \
merged_df.groupby("film_id").agg({"rental_rate": np.sum})

In [7]:
revenue_per_film_df["film_id"] = revenue_per_film_df.index
revenue_per_film_df.head()


Out[7]:
rental_rate film_id
film_id
1 22.77 1
2 34.93 2
3 35.88 3
4 68.77 4
5 35.88 5

In [8]:
# Actor to Film Revenue

actor_df = pd.read_sql("SELECT * FROM actor;", db)
film_actor_df = pd.read_sql("SELECT * FROM film_actor;", db)

In [9]:
pd.merge(
    actor_df,
    film_actor_df,
    on="actor_id",
)


Out[9]:
actor_id first_name last_name last_update_x film_id last_update_y
0 1 PENELOPE GUINESS 2006-02-15 04:34:33 1 2006-02-15 05:05:03
1 1 PENELOPE GUINESS 2006-02-15 04:34:33 23 2006-02-15 05:05:03
2 1 PENELOPE GUINESS 2006-02-15 04:34:33 25 2006-02-15 05:05:03
3 1 PENELOPE GUINESS 2006-02-15 04:34:33 106 2006-02-15 05:05:03
4 1 PENELOPE GUINESS 2006-02-15 04:34:33 140 2006-02-15 05:05:03
5 1 PENELOPE GUINESS 2006-02-15 04:34:33 166 2006-02-15 05:05:03
6 1 PENELOPE GUINESS 2006-02-15 04:34:33 277 2006-02-15 05:05:03
7 1 PENELOPE GUINESS 2006-02-15 04:34:33 361 2006-02-15 05:05:03
8 1 PENELOPE GUINESS 2006-02-15 04:34:33 438 2006-02-15 05:05:03
9 1 PENELOPE GUINESS 2006-02-15 04:34:33 499 2006-02-15 05:05:03
10 1 PENELOPE GUINESS 2006-02-15 04:34:33 506 2006-02-15 05:05:03
11 1 PENELOPE GUINESS 2006-02-15 04:34:33 509 2006-02-15 05:05:03
12 1 PENELOPE GUINESS 2006-02-15 04:34:33 605 2006-02-15 05:05:03
13 1 PENELOPE GUINESS 2006-02-15 04:34:33 635 2006-02-15 05:05:03
14 1 PENELOPE GUINESS 2006-02-15 04:34:33 749 2006-02-15 05:05:03
15 1 PENELOPE GUINESS 2006-02-15 04:34:33 832 2006-02-15 05:05:03
16 1 PENELOPE GUINESS 2006-02-15 04:34:33 939 2006-02-15 05:05:03
17 1 PENELOPE GUINESS 2006-02-15 04:34:33 970 2006-02-15 05:05:03
18 1 PENELOPE GUINESS 2006-02-15 04:34:33 980 2006-02-15 05:05:03
19 2 NICK WAHLBERG 2006-02-15 04:34:33 3 2006-02-15 05:05:03
20 2 NICK WAHLBERG 2006-02-15 04:34:33 31 2006-02-15 05:05:03
21 2 NICK WAHLBERG 2006-02-15 04:34:33 47 2006-02-15 05:05:03
22 2 NICK WAHLBERG 2006-02-15 04:34:33 105 2006-02-15 05:05:03
23 2 NICK WAHLBERG 2006-02-15 04:34:33 132 2006-02-15 05:05:03
24 2 NICK WAHLBERG 2006-02-15 04:34:33 145 2006-02-15 05:05:03
25 2 NICK WAHLBERG 2006-02-15 04:34:33 226 2006-02-15 05:05:03
26 2 NICK WAHLBERG 2006-02-15 04:34:33 249 2006-02-15 05:05:03
27 2 NICK WAHLBERG 2006-02-15 04:34:33 314 2006-02-15 05:05:03
28 2 NICK WAHLBERG 2006-02-15 04:34:33 321 2006-02-15 05:05:03
29 2 NICK WAHLBERG 2006-02-15 04:34:33 357 2006-02-15 05:05:03
... ... ... ... ... ... ...
5432 199 JULIA FAWCETT 2006-02-15 04:34:33 432 2006-02-15 05:05:03
5433 199 JULIA FAWCETT 2006-02-15 04:34:33 541 2006-02-15 05:05:03
5434 199 JULIA FAWCETT 2006-02-15 04:34:33 604 2006-02-15 05:05:03
5435 199 JULIA FAWCETT 2006-02-15 04:34:33 640 2006-02-15 05:05:03
5436 199 JULIA FAWCETT 2006-02-15 04:34:33 689 2006-02-15 05:05:03
5437 199 JULIA FAWCETT 2006-02-15 04:34:33 730 2006-02-15 05:05:03
5438 199 JULIA FAWCETT 2006-02-15 04:34:33 784 2006-02-15 05:05:03
5439 199 JULIA FAWCETT 2006-02-15 04:34:33 785 2006-02-15 05:05:03
5440 199 JULIA FAWCETT 2006-02-15 04:34:33 886 2006-02-15 05:05:03
5441 199 JULIA FAWCETT 2006-02-15 04:34:33 953 2006-02-15 05:05:03
5442 200 THORA TEMPLE 2006-02-15 04:34:33 5 2006-02-15 05:05:03
5443 200 THORA TEMPLE 2006-02-15 04:34:33 49 2006-02-15 05:05:03
5444 200 THORA TEMPLE 2006-02-15 04:34:33 80 2006-02-15 05:05:03
5445 200 THORA TEMPLE 2006-02-15 04:34:33 116 2006-02-15 05:05:03
5446 200 THORA TEMPLE 2006-02-15 04:34:33 121 2006-02-15 05:05:03
5447 200 THORA TEMPLE 2006-02-15 04:34:33 149 2006-02-15 05:05:03
5448 200 THORA TEMPLE 2006-02-15 04:34:33 346 2006-02-15 05:05:03
5449 200 THORA TEMPLE 2006-02-15 04:34:33 419 2006-02-15 05:05:03
5450 200 THORA TEMPLE 2006-02-15 04:34:33 462 2006-02-15 05:05:03
5451 200 THORA TEMPLE 2006-02-15 04:34:33 465 2006-02-15 05:05:03
5452 200 THORA TEMPLE 2006-02-15 04:34:33 474 2006-02-15 05:05:03
5453 200 THORA TEMPLE 2006-02-15 04:34:33 537 2006-02-15 05:05:03
5454 200 THORA TEMPLE 2006-02-15 04:34:33 538 2006-02-15 05:05:03
5455 200 THORA TEMPLE 2006-02-15 04:34:33 544 2006-02-15 05:05:03
5456 200 THORA TEMPLE 2006-02-15 04:34:33 714 2006-02-15 05:05:03
5457 200 THORA TEMPLE 2006-02-15 04:34:33 879 2006-02-15 05:05:03
5458 200 THORA TEMPLE 2006-02-15 04:34:33 912 2006-02-15 05:05:03
5459 200 THORA TEMPLE 2006-02-15 04:34:33 945 2006-02-15 05:05:03
5460 200 THORA TEMPLE 2006-02-15 04:34:33 958 2006-02-15 05:05:03
5461 200 THORA TEMPLE 2006-02-15 04:34:33 993 2006-02-15 05:05:03

5462 rows × 6 columns


In [10]:
merged_df = pd.merge(
    pd.merge(
        actor_df,
        film_actor_df,
        on="actor_id",
    ),
    revenue_per_film_df,
    on="film_id",
)
merged_df.head()


Out[10]:
actor_id first_name last_name last_update_x film_id last_update_y rental_rate
0 1 PENELOPE GUINESS 2006-02-15 04:34:33 1 2006-02-15 05:05:03 22.77
1 10 CHRISTIAN GABLE 2006-02-15 04:34:33 1 2006-02-15 05:05:03 22.77
2 20 LUCILLE TRACY 2006-02-15 04:34:33 1 2006-02-15 05:05:03 22.77
3 30 SANDRA PECK 2006-02-15 04:34:33 1 2006-02-15 05:05:03 22.77
4 40 JOHNNY CAGE 2006-02-15 04:34:33 1 2006-02-15 05:05:03 22.77

In [11]:
revenue_per_actor_df = merged_df.groupby("actor_id").agg({"rental_rate": np.sum})
revenue_per_actor_df.head()


Out[11]:
rental_rate
actor_id
1 813.95
2 1177.13
3 819.89
4 751.26
5 1401.04

In [12]:
revenue_per_actor_df["actor_id"] = revenue_per_actor_df.index
revenue_per_actor_df.head()


Out[12]:
rental_rate actor_id
actor_id
1 813.95 1
2 1177.13 2
3 819.89 3
4 751.26 4
5 1401.04 5

In [13]:
pd.merge(
    actor_df,
    revenue_per_actor_df,
    on="actor_id",
)


Out[13]:
actor_id first_name last_name last_update rental_rate
0 1 PENELOPE GUINESS 2006-02-15 04:34:33 813.95
1 2 NICK WAHLBERG 2006-02-15 04:34:33 1177.13
2 3 ED CHASE 2006-02-15 04:34:33 819.89
3 4 JENNIFER DAVIS 2006-02-15 04:34:33 751.26
4 5 JOHNNY LOLLOBRIGIDA 2006-02-15 04:34:33 1401.04
5 6 BETTE NICHOLSON 2006-02-15 04:34:33 1082.21
6 7 GRACE MOSTEL 2006-02-15 04:34:33 1450.21
7 8 MATTHEW JOHANSSON 2006-02-15 04:34:33 1183.83
8 9 JOE SWANK 2006-02-15 04:34:33 831.19
9 10 CHRISTIAN GABLE 2006-02-15 04:34:33 1080.38
10 11 ZERO CAGE 2006-02-15 04:34:33 1060.02
11 12 KARL BERRY 2006-02-15 04:34:33 1218.03
12 13 UMA WOOD 2006-02-15 04:34:33 1647.63
13 14 VIVIEN BERGEN 2006-02-15 04:34:33 1481.73
14 15 CUBA OLIVIER 2006-02-15 04:34:33 1297.40
15 16 FRED COSTNER 2006-02-15 04:34:33 1071.07
16 17 HELEN VOIGHT 2006-02-15 04:34:33 1789.43
17 18 DAN TORN 2006-02-15 04:34:33 1046.94
18 19 BOB FAWCETT 2006-02-15 04:34:33 1110.66
19 20 LUCILLE TRACY 2006-02-15 04:34:33 1126.84
20 21 KIRSTEN PALTROW 2006-02-15 04:34:33 1663.48
21 22 ELVIS MARX 2006-02-15 04:34:33 1098.83
22 23 SANDRA KILMER 2006-02-15 04:34:33 1739.96
23 24 CAMERON STREEP 2006-02-15 04:34:33 1012.82
24 25 KEVIN BLOOM 2006-02-15 04:34:33 1074.40
25 26 RIP CRAWFORD 2006-02-15 04:34:33 1519.87
26 27 JULIA MCQUEEN 2006-02-15 04:34:33 1727.51
27 28 WOODY HOFFMAN 2006-02-15 04:34:33 1670.40
28 29 ALEC WAYNE 2006-02-15 04:34:33 1311.08
29 30 SANDRA PECK 2006-02-15 04:34:33 704.13
... ... ... ... ... ...
170 171 OLYMPIA PFEIFFER 2006-02-15 04:34:33 1322.25
171 172 GROUCHO WILLIAMS 2006-02-15 04:34:33 1259.07
172 173 ALAN DREYFUSS 2006-02-15 04:34:33 1234.33
173 174 MICHAEL BENING 2006-02-15 04:34:33 950.08
174 175 WILLIAM HACKMAN 2006-02-15 04:34:33 1240.93
175 176 JON CHASE 2006-02-15 04:34:33 1256.65
176 177 GENE MCKELLEN 2006-02-15 04:34:33 1486.49
177 178 LISA MONROE 2006-02-15 04:34:33 812.32
178 179 ED GUINESS 2006-02-15 04:34:33 1235.79
179 180 JEFF SILVERSTONE 2006-02-15 04:34:33 873.75
180 181 MATTHEW CARREY 2006-02-15 04:34:33 1935.22
181 182 DEBBIE AKROYD 2006-02-15 04:34:33 1094.30
182 183 RUSSELL CLOSE 2006-02-15 04:34:33 887.04
183 184 HUMPHREY GARLAND 2006-02-15 04:34:33 1453.84
184 185 MICHAEL BOLGER 2006-02-15 04:34:33 1721.47
185 186 JULIA ZELLWEGER 2006-02-15 04:34:33 792.79
186 187 RENEE BALL 2006-02-15 04:34:33 1392.01
187 188 ROCK DUKAKIS 2006-02-15 04:34:33 1330.31
188 189 CUBA BIRCH 2006-02-15 04:34:33 875.39
189 190 AUDREY BAILEY 2006-02-15 04:34:33 987.67
190 191 GREGORY GOODING 2006-02-15 04:34:33 1759.77
191 192 JOHN SUVARI 2006-02-15 04:34:33 1537.42
192 193 BURT TEMPLE 2006-02-15 04:34:33 1121.56
193 194 MERYL ALLEN 2006-02-15 04:34:33 1176.20
194 195 JAYNE SILVERSTONE 2006-02-15 04:34:33 1173.50
195 196 BELA WALKEN 2006-02-15 04:34:33 1311.50
196 197 REESE WEST 2006-02-15 04:34:33 1519.47
197 198 MARY KEITEL 2006-02-15 04:34:33 1767.26
198 199 JULIA FAWCETT 2006-02-15 04:34:33 808.45
199 200 THORA TEMPLE 2006-02-15 04:34:33 1040.54

200 rows × 5 columns