In [1]:
# 특정 카테고리에 포함된 영화들의 렌탈 횟수
# "comedy", "sports", "family" 카테고리에 포함되는 영화들의 렌탈 횟수
# rental, inventory, film, film_category, category table 이용

import pandas as pd
import MySQLdb

In [2]:
db = MySQLdb.connect(
    "db.fastcamp.us",
    "root",
    "dkstncks",
    "sakila",
    charset="utf8",
)

In [3]:
SQL_QUERY = """
    SELECT 
        c.category_id category_id,
        c.name category_name, 
        COUNT(*) rentals_per_category
    FROM 
        rental r
        JOIN inventory i ON r.inventory_id = i.inventory_id
        JOIN film_category fc ON fc.film_id = i.film_id
        JOIN category c ON fc.category_id = c.category_id
    WHERE 
        c.name IN ("Family", "Comedy", "Sports")
    GROUP BY 
        category_id
    ORDER BY rentals_per_category DESC
;
"""

pd.read_sql(SQL_QUERY, db)


Out[3]:
category_id category_name rentals_per_category
0 15 Sports 1179
1 8 Family 1096
2 5 Comedy 941

In [4]:
# Store 1의 등급별 매출 중 "G", "PG-13"의 렌탈매출 구하기
# table : 
# 1. film : rental_rate, film_id
# 2. inventory : film_id, inventory_id
# 3. rental : inventory_id

In [5]:
SQL_QUERY = """
    SELECT 
        i.store_id store_id,
        f.rating rating,
        SUM(p.amount) total_revenue
    FROM 
        payment p
        JOIN rental r ON p.rental_id = r.rental_id
        JOIN inventory i ON r.inventory_id = i.inventory_id
        JOIN film f ON i.film_id = f.film_id
    GROUP BY 
        store_id, 
        f.rating
    HAVING 
        store_id = 1 
        AND rating IN ("G", "PG-13")
;        
"""
pd.read_sql(SQL_QUERY, db)


Out[5]:
store_id rating total_revenue
0 1 G 5810.22
1 1 PG-13 8091.51

In [6]:
# 배우별 매출 
# 1. 영화별 매출을 구하고
## 1-1. payment, rental, inventory, film table 이용 
# 2. 이후 배우별 매출 구하기

In [11]:
# 영화별 매출
SQL_QUERY = """
    SELECT 
        f.film_id film_id,
        COUNT(p.amount) revenue       
    FROM 
        payment p
        JOIN rental r ON p.rental_id = r.rental_id
        JOIN inventory i ON r.inventory_id = i.inventory_id
        JOIN film f ON i.film_id = f.film_id
    GROUP BY 
        film_id
;        
"""

pd.read_sql(SQL_QUERY, db)

# 배우별 매출
RESULT_SQL_QUERY = """
    SELECT 
        a.actor_id actor_id, 
        a.last_name last_name,
        a.first_name first_name,
        SUM(sq.revenue) revenue_per_actor
    FROM 
        ({SQL_QUERY}) AS sq  
        JOIN film_actor fa ON sq.film_id = fa.film_id
        JOIN actor a ON fa.actor_id = a.actor_id
    GROUP BY 
        actor_id
;
""".format(
    SQL_QUERY=SQL_QUERY.replace(";", ""),
)

pd.read_sql(RESULT_SQL_QUERY, db)


Out[11]:
actor_id last_name first_name revenue_per_actor
0 1 GUINESS PENELOPE 305.0
1 2 WAHLBERG NICK 387.0
2 3 CHASE ED 311.0
3 4 DAVIS JENNIFER 274.0
4 5 LOLLOBRIGIDA JOHNNY 496.0
5 6 NICHOLSON BETTE 279.0
6 7 MOSTEL GRACE 479.0
7 8 JOHANSSON MATTHEW 317.0
8 9 SWANK JOE 381.0
9 10 GABLE CHRISTIAN 362.0
10 11 CAGE ZERO 398.0
11 12 BERRY KARL 497.0
12 13 WOOD UMA 537.0
13 14 BERGEN VIVIEN 527.0
14 15 OLIVIER CUBA 460.0
15 16 COSTNER FRED 393.0
16 17 VOIGHT HELEN 557.0
17 18 TORN DAN 306.0
18 19 FAWCETT BOB 334.0
19 20 TRACY LUCILLE 516.0
20 21 PALTROW KIRSTEN 452.0
21 22 MARX ELVIS 417.0
22 23 KILMER SANDRA 604.0
23 24 STREEP CAMERON 318.0
24 25 BLOOM KEVIN 360.0
25 26 CRAWFORD RIP 513.0
26 27 MCQUEEN JULIA 549.0
27 28 HOFFMAN WOODY 560.0
28 29 WAYNE ALEC 492.0
29 30 PECK SANDRA 287.0
... ... ... ... ...
170 171 PFEIFFER OLYMPIA 475.0
171 172 WILLIAMS GROUCHO 393.0
172 173 DREYFUSS ALAN 467.0
173 174 BENING MICHAEL 392.0
174 175 HACKMAN WILLIAM 407.0
175 176 CHASE JON 435.0
176 177 MCKELLEN GENE 451.0
177 178 MONROE LISA 368.0
178 179 GUINESS ED 521.0
179 180 SILVERSTONE JEFF 325.0
180 181 CARREY MATTHEW 678.0
181 182 AKROYD DEBBIE 370.0
182 183 CLOSE RUSSELL 296.0
183 184 GARLAND HUMPHREY 416.0
184 185 BOLGER MICHAEL 553.0
185 186 ZELLWEGER JULIA 221.0
186 187 BALL RENEE 499.0
187 188 DUKAKIS ROCK 469.0
188 189 BIRCH CUBA 361.0
189 190 BAILEY AUDREY 333.0
190 191 GOODING GREGORY 523.0
191 192 SUVARI JOHN 458.0
192 193 TEMPLE BURT 444.0
193 194 ALLEN MERYL 380.0
194 195 SILVERSTONE JAYNE 450.0
195 196 WALKEN BELA 450.0
196 197 WEST REESE 553.0
197 198 KEITEL MARY 674.0
198 199 FAWCETT JULIA 255.0
199 200 TEMPLE THORA 346.0

200 rows × 4 columns


In [12]:
print(RESULT_SQL_QUERY)


    SELECT 
        a.actor_id actor_id, 
        a.last_name last_name,
        a.first_name first_name,
        SUM(sq.revenue) revenue_per_actor
    FROM 
        (
    SELECT 
        f.film_id film_id,
        COUNT(p.amount) revenue       
    FROM 
        payment p
        JOIN rental r ON p.rental_id = r.rental_id
        JOIN inventory i ON r.inventory_id = i.inventory_id
        JOIN film f ON i.film_id = f.film_id
    GROUP BY 
        film_id
        
) AS sq  
        JOIN film_actor fa ON sq.film_id = fa.film_id
        JOIN actor a ON fa.actor_id = a.actor_id
    GROUP BY 
        actor_id
;


In [ ]: