In [1]:
# GROUP BY with SUM
# 월별 매출

In [2]:
import numpy as np
import pandas as pd

import MySQLdb


db = MySQLdb.connect(
    "db.fastcamp.us",  # DATABASE_HOST
    "root",  # DATABASE_USERNAME
    "dkstncks",  # DATABASE_PASSWORD
    "sakila",  # DATABASE_NAME
    charset='utf8',
)

cursor = db.cursor()

In [3]:
SQL_QUERY = """
    SELECT
        LEFT(payment.payment_date, 7) AS "Month",
        SUM(payment.amount) AS "Monthly Revenue"
    FROM 
        payment
    GROUP BY
        1
"""

pd.read_sql(SQL_QUERY, db)


Out[3]:
Month Monthly Revenue
0 2005-05 4824.43
1 2005-06 9631.88
2 2005-07 28373.89
3 2005-08 24072.13
4 2006-02 514.18

In [4]:
SQL_QUERY = """
    SELECT * FROM payment;
"""

payment_df = pd.read_sql(SQL_QUERY, db)
payment_df.head()


Out[4]:
payment_id customer_id staff_id rental_id amount payment_date last_update
0 1 1 1 76 2.99 2005-05-25 11:30:37 2006-02-15 22:12:30
1 2 1 1 573 0.99 2005-05-28 10:35:23 2006-02-15 22:12:30
2 3 1 1 1185 5.99 2005-06-15 00:54:12 2006-02-15 22:12:30
3 4 1 2 1422 0.99 2005-06-15 18:02:53 2006-02-15 22:12:30
4 5 1 2 1476 9.99 2005-06-15 21:08:46 2006-02-15 22:12:30

In [5]:
payment_df["Month"] = payment_df["payment_date"].apply(lambda x: str(x)[:7])

In [6]:
payment_df.head()


Out[6]:
payment_id customer_id staff_id rental_id amount payment_date last_update Month
0 1 1 1 76 2.99 2005-05-25 11:30:37 2006-02-15 22:12:30 2005-05
1 2 1 1 573 0.99 2005-05-28 10:35:23 2006-02-15 22:12:30 2005-05
2 3 1 1 1185 5.99 2005-06-15 00:54:12 2006-02-15 22:12:30 2005-06
3 4 1 2 1422 0.99 2005-06-15 18:02:53 2006-02-15 22:12:30 2005-06
4 5 1 2 1476 9.99 2005-06-15 21:08:46 2006-02-15 22:12:30 2005-06

In [7]:
payment_df.groupby("Month").agg({"payment_id": np.size, "amount": [np.sum, np.mean]})


Out[7]:
payment_id amount
size sum mean
Month
2005-05 1157 4824.43 4.169775
2005-06 2312 9631.88 4.166038
2005-07 6711 28373.89 4.227968
2005-08 5687 24072.13 4.232835
2006-02 182 514.18 2.825165