In [1]:
# GROUP BY example
# 월별 렌탈 횟수 지표 뽑아내기
In [2]:
import numpy as np
import pandas as pd
import MySQLdb
db = MySQLdb.connect(
"db.fastcamp.us", # DATABASE_HOST
"root", # DATABASE_USERNAME
"dkstncks", # DATABASE_PASSWORD
"sakila", # DATABASE_NAME
charset='utf8',
)
cursor = db.cursor()
In [3]:
SQL_QUERY = """
SELECT
LEFT(rental.rental_date, 7) "Monthly",
COUNT(rental.rental_id) "Total Rental Count"
FROM
rental
GROUP BY
1
ORDER BY
2 DESC
"""
pd.read_sql(SQL_QUERY, db)
Out[3]:
In [4]:
SQL_QUERY = """
SELECT * FROM rental;
"""
rental_df = pd.read_sql(SQL_QUERY, db)
In [5]:
rental_df.head()
Out[5]:
In [6]:
# 일단은 "Month" 라는 새로운 Column에 데이터를 넣는다.
rental_df["Month"] = rental_df["rental_date"].apply(lambda x: str(x)[:7])
In [7]:
rental_df.head()
Out[7]:
In [8]:
rental_df.groupby("Month").size().sort_values(ascending=False)
Out[8]: