In [1]:
# 유저 정보와 주소 정보를 한꺼번에 가져오기
# ( 단, 현재 유저 정보는 customer table에, 주소 정보는 address table 에 저장되어 있는 상태다 )

In [2]:
import pandas as pd

import MySQLdb


db = MySQLdb.connect(
    "db.fastcamp.us",  # DATABASE_HOST
    "root",  # DATABASE_USERNAME
    "dkstncks",  # DATABASE_PASSWORD
    "sakila",  # DATABASE_NAME
    charset='utf8',
)

cursor = db.cursor()

In [3]:
# 일단 그냥 한번 불러와보자. 어떤 일이 생기는지 살펴볼 필요가 있다.

SQL_QUERY = """
    SELECT COUNT(*) FROM customer;
"""

pd.read_sql(SQL_QUERY, db)


Out[3]:
COUNT(*)
0 599

In [4]:
SQL_QUERY = """
    SELECT COUNT(*) FROM address;
"""

pd.read_sql(SQL_QUERY, db)


Out[4]:
COUNT(*)
0 603

In [5]:
SQL_QUERY = """
    SELECT COUNT(*) FROM customer, address;
"""

pd.read_sql(SQL_QUERY, db)


Out[5]:
COUNT(*)
0 361197

In [6]:
# 599 * 603 => 361197

In [7]:
SQL_QUERY = """
    SELECT * FROM customer, address;
"""

df = pd.read_sql(SQL_QUERY, db)
df.head(5)


Out[7]:
customer_id store_id first_name last_name email address_id active create_date last_update address_id address address2 district city_id postal_code phone last_update
0 1 1 MARY SMITH MARY.SMITH@sakilacustomer.org 5 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1 47 MySakila Drive None Alberta 300 2014-09-25 22:30:27
1 2 1 PATRICIA JOHNSON PATRICIA.JOHNSON@sakilacustomer.org 6 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1 47 MySakila Drive None Alberta 300 2014-09-25 22:30:27
2 3 1 LINDA WILLIAMS LINDA.WILLIAMS@sakilacustomer.org 7 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1 47 MySakila Drive None Alberta 300 2014-09-25 22:30:27
3 4 2 BARBARA JONES BARBARA.JONES@sakilacustomer.org 8 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1 47 MySakila Drive None Alberta 300 2014-09-25 22:30:27
4 5 1 ELIZABETH BROWN ELIZABETH.BROWN@sakilacustomer.org 9 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1 47 MySakila Drive None Alberta 300 2014-09-25 22:30:27

In [8]:
# 제대로 된 정보를 얻으려면 이렇게 하셔야 합니다.

SQL_QUERY = """
    SELECT
        customer.*, address.address
    FROM customer, address
    WHERE
        customer.address_id = address.address_id
    ;
"""

pd.read_sql(SQL_QUERY, db).head()


Out[8]:
customer_id store_id first_name last_name email address_id active create_date last_update address
0 1 1 MARY SMITH MARY.SMITH@sakilacustomer.org 5 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1913 Hanoi Way
1 2 1 PATRICIA JOHNSON PATRICIA.JOHNSON@sakilacustomer.org 6 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1121 Loja Avenue
2 3 1 LINDA WILLIAMS LINDA.WILLIAMS@sakilacustomer.org 7 1 2006-02-14 22:04:36 2006-02-15 04:57:20 692 Joliet Street
3 4 2 BARBARA JONES BARBARA.JONES@sakilacustomer.org 8 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1566 Inegl Manor
4 5 1 ELIZABETH BROWN ELIZABETH.BROWN@sakilacustomer.org 9 1 2006-02-14 22:04:36 2006-02-15 04:57:20 53 Idfu Parkway

In [9]:
# 근데 이걸 효과적으로 하는 방법은 SQL JOIN을 사용하는 것이다.

SQL_QUERY = """
    SELECT customer.*, address
    FROM customer
    JOIN address ON customer.address_id = address.address_id
    ;
"""

pd.read_sql(SQL_QUERY, db).head()


Out[9]:
customer_id store_id first_name last_name email address_id active create_date last_update address
0 1 1 MARY SMITH MARY.SMITH@sakilacustomer.org 5 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1913 Hanoi Way
1 2 1 PATRICIA JOHNSON PATRICIA.JOHNSON@sakilacustomer.org 6 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1121 Loja Avenue
2 3 1 LINDA WILLIAMS LINDA.WILLIAMS@sakilacustomer.org 7 1 2006-02-14 22:04:36 2006-02-15 04:57:20 692 Joliet Street
3 4 2 BARBARA JONES BARBARA.JONES@sakilacustomer.org 8 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1566 Inegl Manor
4 5 1 ELIZABETH BROWN ELIZABETH.BROWN@sakilacustomer.org 9 1 2006-02-14 22:04:36 2006-02-15 04:57:20 53 Idfu Parkway

In [10]:
# 시간을 한번 재보자.

import time

# SELECT WHERE
start_time = time.clock()

SQL_QUERY = """
    SELECT
        customer.*, address.address
    FROM customer, address
    WHERE
        customer.address_id = address.address_id
    ;
"""

pd.read_sql(SQL_QUERY, db)

print("SELECT WHERE: {time}s".format(time=time.clock() - start_time))


# SELECT JOIN

start_time = time.clock()

SQL_QUERY = """
    SELECT customer.*, address
    FROM customer
    JOIN address ON customer.address_id = address.address_id
    ;
"""

pd.read_sql(SQL_QUERY, db)

print("SELECT JOIN : {time}s".format(time=time.clock() - start_time))


SELECT WHERE: 0.012294999999999945s
SELECT JOIN : 0.01564399999999999s

In [11]:
# Pandas JOIN

customer_df = pd.read_sql("SELECT * FROM customer;", db)
customer_df.head()


Out[11]:
customer_id store_id first_name last_name email address_id active create_date last_update
0 1 1 MARY SMITH MARY.SMITH@sakilacustomer.org 5 1 2006-02-14 22:04:36 2006-02-15 04:57:20
1 2 1 PATRICIA JOHNSON PATRICIA.JOHNSON@sakilacustomer.org 6 1 2006-02-14 22:04:36 2006-02-15 04:57:20
2 3 1 LINDA WILLIAMS LINDA.WILLIAMS@sakilacustomer.org 7 1 2006-02-14 22:04:36 2006-02-15 04:57:20
3 4 2 BARBARA JONES BARBARA.JONES@sakilacustomer.org 8 1 2006-02-14 22:04:36 2006-02-15 04:57:20
4 5 1 ELIZABETH BROWN ELIZABETH.BROWN@sakilacustomer.org 9 1 2006-02-14 22:04:36 2006-02-15 04:57:20

In [12]:
address_df = pd.read_sql("SELECT * FROM address;", db)
address_df.head()


Out[12]:
address_id address address2 district city_id postal_code phone last_update
0 1 47 MySakila Drive None Alberta 300 2014-09-25 22:30:27
1 2 28 MySQL Boulevard None QLD 576 2014-09-25 22:30:09
2 3 23 Workhaven Lane None Alberta 300 14033335568 2014-09-25 22:30:27
3 4 1411 Lillydale Drive None QLD 576 6172235589 2014-09-25 22:30:09
4 5 1913 Hanoi Way Nagasaki 463 35200 28303384290 2014-09-25 22:31:53

In [13]:
df = pd.merge(customer_df, address_df, on="address_id")

In [14]:
df.head()


Out[14]:
customer_id store_id first_name last_name email address_id active create_date last_update_x address address2 district city_id postal_code phone last_update_y
0 1 1 MARY SMITH MARY.SMITH@sakilacustomer.org 5 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1913 Hanoi Way Nagasaki 463 35200 28303384290 2014-09-25 22:31:53
1 2 1 PATRICIA JOHNSON PATRICIA.JOHNSON@sakilacustomer.org 6 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1121 Loja Avenue California 449 17886 838635286649 2014-09-25 22:34:01
2 3 1 LINDA WILLIAMS LINDA.WILLIAMS@sakilacustomer.org 7 1 2006-02-14 22:04:36 2006-02-15 04:57:20 692 Joliet Street Attika 38 83579 448477190408 2014-09-25 22:31:07
3 4 2 BARBARA JONES BARBARA.JONES@sakilacustomer.org 8 1 2006-02-14 22:04:36 2006-02-15 04:57:20 1566 Inegl Manor Mandalay 349 53561 705814003527 2014-09-25 22:32:18
4 5 1 ELIZABETH BROWN ELIZABETH.BROWN@sakilacustomer.org 9 1 2006-02-14 22:04:36 2006-02-15 04:57:20 53 Idfu Parkway Nantou 361 42399 10655648674 2014-09-25 22:33:16