In [1]:
# Subquery Example

In [2]:
import pandas as pd

import MySQLdb


db = MySQLdb.connect(
    "db.fastcamp.us",  # DATABASE_HOST
    "root",  # DATABASE_USERNAME
    "dkstncks",  # DATABASE_PASSWORD
    "sakila",  # DATABASE_NAME
    charset='utf8',
)

In [3]:
# Revenue Per Film

REVENUE_PER_FILM_SQL_QUERY = """
    SELECT
        film.film_id,
        film.rental_rate * COUNT(rental.rental_id) "Revenue"
    FROM
        rental,
        inventory,
        film
    WHERE
        rental.inventory_id = inventory.inventory_id
        AND inventory.film_id = film.film_id
    GROUP BY 1
    ;
"""

pd.read_sql(REVENUE_PER_FILM_SQL_QUERY, db)


Out[3]:
film_id Revenue
0 1 22.77
1 2 34.93
2 3 35.88
3 4 68.77
4 5 35.88
5 6 62.79
6 7 74.85
7 8 89.82
8 9 35.88
9 10 114.77
10 11 23.76
11 12 25.74
12 13 44.91
13 15 65.78
14 16 41.86
15 17 17.82
16 18 21.78
17 19 20.79
18 20 49.90
19 21 109.78
20 22 62.79
21 23 20.79
22 24 41.86
23 25 65.78
24 26 21.78
25 27 12.87
26 28 74.85
27 29 29.90
28 30 26.91
29 31 154.69
... ... ...
928 971 89.82
929 972 109.78
930 973 154.69
931 974 5.94
932 975 47.84
933 976 22.77
934 977 35.88
935 978 8.91
936 979 149.70
937 980 79.84
938 981 20.79
939 982 22.77
940 983 26.91
941 984 26.91
942 985 109.78
943 986 53.82
944 987 47.84
945 988 44.85
946 989 124.75
947 990 7.92
948 991 47.84
949 992 13.86
950 993 59.80
951 994 64.87
952 995 114.77
953 996 6.93
954 997 5.94
955 998 8.91
956 999 50.83
957 1000 154.69

958 rows × 2 columns


In [4]:
# Actor to Film Revenue

SQL_QUERY = """
    SELECT
        actor.actor_id,
        CONCAT(actor.first_name, " ", actor.last_name) AS "Actor Name",
        SUM(rev_per_film.Revenue) "Total Film Revenue"
    FROM
        ({REVENUE_PER_FILM_SQL_QUERY}) AS rev_per_film,
        actor,
        film_actor
    WHERE
        actor.actor_id = film_actor.actor_id
        AND film_actor.film_id = rev_per_film.film_id
    GROUP BY 1
    ORDER BY 3 DESC
""".format(
    REVENUE_PER_FILM_SQL_QUERY=REVENUE_PER_FILM_SQL_QUERY.replace(";", "")
)


pd.read_sql(SQL_QUERY, db)


Out[4]:
actor_id Actor Name Total Film Revenue
0 107 GINA DEGENERES 2469.47
1 60 HENRY BERRY 1983.88
2 181 MATTHEW CARREY 1935.22
3 81 SCARLETT DAMON 1848.28
4 102 WALTER TORN 1829.60
5 58 CHRISTIAN AKROYD 1800.48
6 144 ANGELA WITHERSPOON 1799.46
7 17 HELEN VOIGHT 1789.43
8 198 MARY KEITEL 1767.26
9 111 CAMERON ZELLWEGER 1760.40
10 191 GREGORY GOODING 1759.77
11 136 ED MANSFIELD 1759.22
12 90 SEAN GUINESS 1759.01
13 23 SANDRA KILMER 1739.96
14 65 ANGELA HUDSON 1734.26
15 27 JULIA MCQUEEN 1727.51
16 185 MICHAEL BOLGER 1721.47
17 66 MARY TANDY 1709.65
18 83 BEN WILLIS 1709.49
19 117 RENEE TRACY 1700.54
20 28 WOODY HOFFMAN 1670.40
21 21 KIRSTEN PALTROW 1663.48
22 142 JADA RYDER 1660.40
23 147 FAY WINSLET 1655.57
24 13 UMA WOOD 1647.63
25 41 JODIE DEGENERES 1640.07
26 92 KIRSTEN AKROYD 1621.34
27 127 KEVIN GARLAND 1607.35
28 151 GEOFFREY HESTON 1603.38
29 105 SIDNEY CROWE 1579.43
... ... ... ...
170 32 TIM HACKMAN 1000.76
171 190 AUDREY BAILEY 987.67
172 38 TOM MCKELLEN 979.25
173 170 MENA HOPPER 976.44
174 99 JIM MOSTEL 975.15
175 77 CARY MCCONAUGHEY 973.57
176 69 KENNETH PALTROW 969.49
177 164 HUMPHREY WILLIS 968.06
178 63 CAMERON WRAY 967.97
179 91 CHRISTOPHER BERRY 967.29
180 174 MICHAEL BENING 950.08
181 79 MAE HOFFMAN 946.73
182 88 KENNETH PESCI 945.08
183 183 RUSSELL CLOSE 887.04
184 189 CUBA BIRCH 875.39
185 180 JEFF SILVERSTONE 873.75
186 110 SUSAN DAVIS 866.80
187 9 JOE SWANK 831.19
188 141 CATE HARRIS 825.38
189 3 ED CHASE 819.89
190 1 PENELOPE GUINESS 813.95
191 178 LISA MONROE 812.32
192 199 JULIA FAWCETT 808.45
193 186 JULIA ZELLWEGER 792.79
194 4 JENNIFER DAVIS 751.26
195 148 EMILY DEE 719.84
196 71 ADAM GRANT 706.19
197 30 SANDRA PECK 704.13
198 31 SISSY SOBIESKI 666.65
199 35 JUDY DEAN 640.45

200 rows × 3 columns


In [5]:
print(SQL_QUERY)


    SELECT
        actor.actor_id,
        CONCAT(actor.first_name, " ", actor.last_name) AS "Actor Name",
        SUM(rev_per_film.Revenue) "Total Film Revenue"
    FROM
        (
    SELECT
        film.film_id,
        film.rental_rate * COUNT(rental.rental_id) "Revenue"
    FROM
        rental,
        inventory,
        film
    WHERE
        rental.inventory_id = inventory.inventory_id
        AND inventory.film_id = film.film_id
    GROUP BY 1
    
) AS rev_per_film,
        actor,
        film_actor
    WHERE
        actor.actor_id = film_actor.actor_id
        AND film_actor.film_id = rev_per_film.film_id
    GROUP BY 1
    ORDER BY 3 DESC


In [ ]: