In [1]:
import pymysql
import curl
In [2]:
db = pymysql.connect(
"db.fastcamp.us",
"root",
"dkstncks",
"sakila",
charset = "utf8",
)
customer_df = pd.read_sql("SELECT * FROM customer;", db)
rental_df = pd.read_sql("SELECT * FROM rental;", db)
In [3]:
df = rental_df.merge(customer_df, on="customer_id")
In [8]:
df.head(1)
Out[8]:
rental_id
rental_date
inventory_id
customer_id
return_date
staff_id
last_update_x
store_id
first_name
last_name
email
address_id
active
create_date
last_update_y
0
1
2005-05-24 22:53:30
367
130
2005-05-26 22:04:30
1
2006-02-15 21:30:53
1
CHARLOTTE
HUNTER
CHARLOTTE.HUNTER@sakilacustomer.org
134
1
2006-02-14 22:04:36
2006-02-15 04:57:20
In [4]:
rental_per_customer_groups = df.groupby("customer_id")
In [5]:
rental_per_customer_groups.get_group(1)
Out[5]:
rental_id
rental_date
inventory_id
customer_id
return_date
staff_id
last_update_x
store_id
first_name
last_name
email
address_id
active
create_date
last_update_y
1988
76
2005-05-25 11:30:37
3021
1
2005-06-03 12:00:37
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1989
573
2005-05-28 10:35:23
4020
1
2005-06-03 06:32:23
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1990
1185
2005-06-15 00:54:12
2785
1
2005-06-23 02:42:12
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1991
1422
2005-06-15 18:02:53
1021
1
2005-06-19 15:54:53
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1992
1476
2005-06-15 21:08:46
1407
1
2005-06-25 02:26:46
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1993
1725
2005-06-16 15:18:57
726
1
2005-06-17 21:05:57
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1994
2308
2005-06-18 08:41:48
197
1
2005-06-22 03:36:48
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1995
2363
2005-06-18 13:33:59
3497
1
2005-06-19 17:40:59
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1996
3284
2005-06-21 06:24:45
4566
1
2005-06-28 03:28:45
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1997
4526
2005-07-08 03:17:05
1443
1
2005-07-14 01:19:05
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1998
4611
2005-07-08 07:33:56
3486
1
2005-07-12 13:25:56
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
1999
5244
2005-07-09 13:24:07
3726
1
2005-07-14 14:01:07
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2000
5326
2005-07-09 16:38:01
797
1
2005-07-13 18:02:01
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2001
6163
2005-07-11 10:13:46
1330
1
2005-07-19 13:15:46
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2002
7273
2005-07-27 11:31:22
2465
1
2005-07-31 06:50:22
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2003
7841
2005-07-28 09:04:45
1092
1
2005-07-30 12:37:45
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2004
8033
2005-07-28 16:18:23
4268
1
2005-07-30 17:56:23
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2005
8074
2005-07-28 17:33:39
1558
1
2005-07-29 20:17:39
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2006
8116
2005-07-28 19:20:07
4497
1
2005-07-29 22:54:07
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2007
8326
2005-07-29 03:58:49
108
1
2005-08-01 05:16:49
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2008
9571
2005-07-31 02:42:18
2219
1
2005-08-02 23:26:18
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2009
10437
2005-08-01 08:51:04
14
1
2005-08-10 12:12:04
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2010
11299
2005-08-02 15:36:52
3232
1
2005-08-10 16:40:52
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2011
11367
2005-08-02 18:01:38
1440
1
2005-08-04 13:19:38
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2012
11824
2005-08-17 12:37:54
2639
1
2005-08-19 10:11:54
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2013
12250
2005-08-18 03:57:29
921
1
2005-08-22 23:05:29
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2014
13068
2005-08-19 09:55:16
3019
1
2005-08-20 14:44:16
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2015
13176
2005-08-19 13:56:54
2269
1
2005-08-23 08:50:54
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2016
14762
2005-08-21 23:33:57
4249
1
2005-08-23 01:30:57
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2017
14825
2005-08-22 01:27:57
1449
1
2005-08-27 07:01:57
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2018
15298
2005-08-22 19:41:37
1446
1
2005-08-28 22:49:37
1
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
2019
15315
2005-08-22 20:03:46
312
1
2005-08-30 01:51:46
2
2006-02-15 21:30:53
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
In [10]:
rental_per_customer_groups.agg({"customer_id": np.size})["customer_id"]
Out[10]:
customer_id
1 32
2 27
3 26
4 22
5 38
6 28
7 33
8 24
9 23
10 25
11 24
12 28
13 27
14 28
15 32
16 28
17 21
18 22
19 24
20 30
21 35
22 22
23 30
24 25
25 29
26 34
27 31
28 32
29 36
30 34
..
570 26
571 24
572 25
573 29
574 28
575 29
576 34
577 27
578 22
579 27
580 27
581 27
582 25
583 23
584 30
585 24
586 19
587 26
588 29
589 28
590 25
591 27
592 29
593 26
594 27
595 30
596 28
597 25
598 22
599 19
Name: customer_id, dtype: int64
In [6]:
# rental_per_customer_groups.agg({"customer_id": np.size})["customer_id"] > 30
is_many_rentals_user = rental_per_customer_groups.size() > 30
is_many_rentals_user
Out[6]:
customer_id
1 True
2 False
3 False
4 False
5 True
6 False
7 True
8 False
9 False
10 False
11 False
12 False
13 False
14 False
15 True
16 False
17 False
18 False
19 False
20 False
21 True
22 False
23 False
24 False
25 False
26 True
27 True
28 True
29 True
30 True
...
570 False
571 False
572 False
573 False
574 False
575 False
576 True
577 False
578 False
579 False
580 False
581 False
582 False
583 False
584 False
585 False
586 False
587 False
588 False
589 False
590 False
591 False
592 False
593 False
594 False
595 False
596 False
597 False
598 False
599 False
dtype: bool
In [13]:
#여기서는 어떤 문제로 되지 않는다. 다음에 알려주겠다.
In [14]:
# SQL로 하겠다
# 1. Sub Query - Query 안에 Query 가 들어있다.
In [17]:
SQL_QUERY = """
SELECT
r.customer_id,
COUNT(*) rentals
FROM rental r
JOIN customer c
ON r.customer_id = c.customer_id
GROUP BY r.customer_id
WHERE rentals > 30
"""
pd.read_sql(SQL_QUERY, db)
#순서가 FROM -> group by -> where(=>rental이 없다.) -> select에서 카운트가 마지막이라서
---------------------------------------------------------------------------
ProgrammingError Traceback (most recent call last)
C:\Anaconda3\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1563 else:
-> 1564 cur.execute(*args)
1565 return cur
C:\Anaconda3\lib\site-packages\pymysql\cursors.py in execute(self, query, args)
165
--> 166 result = self._query(query)
167 self._executed = query
C:\Anaconda3\lib\site-packages\pymysql\cursors.py in _query(self, q)
321 self._last_executed = q
--> 322 conn.query(q)
323 self._do_get_result()
C:\Anaconda3\lib\site-packages\pymysql\connections.py in query(self, sql, unbuffered)
836 self._execute_command(COMMAND.COM_QUERY, sql)
--> 837 self._affected_rows = self._read_query_result(unbuffered=unbuffered)
838 return self._affected_rows
C:\Anaconda3\lib\site-packages\pymysql\connections.py in _read_query_result(self, unbuffered)
1020 result = MySQLResult(self)
-> 1021 result.read()
1022 self._result = result
C:\Anaconda3\lib\site-packages\pymysql\connections.py in read(self)
1303 try:
-> 1304 first_packet = self.connection._read_packet()
1305
C:\Anaconda3\lib\site-packages\pymysql\connections.py in _read_packet(self, packet_type)
982 packet = packet_type(buff, self.encoding)
--> 983 packet.check_error()
984 return packet
C:\Anaconda3\lib\site-packages\pymysql\connections.py in check_error(self)
394 if DEBUG: print("errno =", errno)
--> 395 err.raise_mysql_exception(self._data)
396
C:\Anaconda3\lib\site-packages\pymysql\err.py in raise_mysql_exception(data)
101 errorclass = error_map.get(errno, InternalError)
--> 102 raise errorclass(errno, errval)
ProgrammingError: (1064, "42000You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'WHERE rentals > 30' at line 8")
During handling of the above exception, another exception occurred:
DatabaseError Traceback (most recent call last)
<ipython-input-17-430e60ea3f7a> in <module>()
9 WHERE rentals > 30
10 """
---> 11 pd.read_sql(SQL_QUERY, db)
C:\Anaconda3\lib\site-packages\pandas\io\sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
497 sql, index_col=index_col, params=params,
498 coerce_float=coerce_float, parse_dates=parse_dates,
--> 499 chunksize=chunksize)
500
501 try:
C:\Anaconda3\lib\site-packages\pandas\io\sql.py in read_query(self, sql, index_col, coerce_float, params, parse_dates, chunksize)
1597
1598 args = _convert_params(sql, params)
-> 1599 cursor = self.execute(*args)
1600 columns = [col_desc[0] for col_desc in cursor.description]
1601
C:\Anaconda3\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1574 ex = DatabaseError(
1575 "Execution failed on sql '%s': %s" % (args[0], exc))
-> 1576 raise_with_traceback(ex)
1577
1578 @staticmethod
C:\Anaconda3\lib\site-packages\pandas\compat\__init__.py in raise_with_traceback(exc, traceback)
331 if traceback == Ellipsis:
332 _, _, traceback = sys.exc_info()
--> 333 raise exc.with_traceback(traceback)
334 else:
335 # this version of raise is a syntax error in Python 3
C:\Anaconda3\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1562 cur.execute(*args, **kwargs)
1563 else:
-> 1564 cur.execute(*args)
1565 return cur
1566 except Exception as exc:
C:\Anaconda3\lib\site-packages\pymysql\cursors.py in execute(self, query, args)
164 query = self.mogrify(query, args)
165
--> 166 result = self._query(query)
167 self._executed = query
168 return result
C:\Anaconda3\lib\site-packages\pymysql\cursors.py in _query(self, q)
320 conn = self._get_db()
321 self._last_executed = q
--> 322 conn.query(q)
323 self._do_get_result()
324 return self.rowcount
C:\Anaconda3\lib\site-packages\pymysql\connections.py in query(self, sql, unbuffered)
835 sql = sql.encode(self.encoding, 'surrogateescape')
836 self._execute_command(COMMAND.COM_QUERY, sql)
--> 837 self._affected_rows = self._read_query_result(unbuffered=unbuffered)
838 return self._affected_rows
839
C:\Anaconda3\lib\site-packages\pymysql\connections.py in _read_query_result(self, unbuffered)
1019 else:
1020 result = MySQLResult(self)
-> 1021 result.read()
1022 self._result = result
1023 if result.server_status is not None:
C:\Anaconda3\lib\site-packages\pymysql\connections.py in read(self)
1302 def read(self):
1303 try:
-> 1304 first_packet = self.connection._read_packet()
1305
1306 if first_packet.is_ok_packet():
C:\Anaconda3\lib\site-packages\pymysql\connections.py in _read_packet(self, packet_type)
981
982 packet = packet_type(buff, self.encoding)
--> 983 packet.check_error()
984 return packet
985
C:\Anaconda3\lib\site-packages\pymysql\connections.py in check_error(self)
393 errno = self.read_uint16()
394 if DEBUG: print("errno =", errno)
--> 395 err.raise_mysql_exception(self._data)
396
397 def dump(self):
C:\Anaconda3\lib\site-packages\pymysql\err.py in raise_mysql_exception(data)
100 errval = data[4:].decode('utf-8', 'replace')
101 errorclass = error_map.get(errno, InternalError)
--> 102 raise errorclass(errno, errval)
DatabaseError: Execution failed on sql '
SELECT
r.customer_id,
COUNT(*) rentals
FROM rental r
JOIN customer c
ON r.customer_id = c.customer_id
GROUP BY r.customer_id
WHERE rentals > 30
': (1064, "42000You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'WHERE rentals > 30' at line 8")
In [18]:
# 1. Sub Query = Query 안에 Query가 들어있다.
In [23]:
SQL_QUERY = """
SELECT rentals_per_customer.customer_id "Customer ID", rentals_per_customer.rentals
FROM (
SELECT
r.customer_id,
COUNT(*) rentals
FROM rental r
JOIN customer c
ON r.customer_id = c.customer_id
GROUP BY r.customer_id
) AS rentals_per_customer
WHERE rentals > 30
;
"""
pd.read_sql(SQL_QUERY, db)
Out[23]:
Customer ID
rentals
0
1
32
1
5
38
2
7
33
3
15
32
4
21
35
5
26
34
6
27
31
7
28
32
8
29
36
9
30
34
10
35
32
11
38
34
12
46
34
13
50
35
14
51
33
15
64
33
16
66
34
17
75
41
18
78
31
19
84
33
20
86
33
21
89
32
22
91
35
23
102
33
24
103
31
25
114
33
26
119
34
27
120
32
28
122
32
29
125
32
...
...
...
104
416
31
105
438
33
106
439
36
107
442
32
108
446
31
109
451
33
110
452
32
111
454
33
112
459
38
113
462
33
114
468
39
115
469
40
116
470
32
117
473
34
118
479
31
119
494
31
120
502
34
121
503
32
122
506
35
123
513
31
124
520
32
125
522
33
126
526
45
127
532
32
128
533
33
129
535
32
130
550
32
131
566
34
132
569
32
133
576
34
134 rows × 2 columns
In [25]:
# sub query스럽지 않아서 나눠 쓰면 보기가 좋다.
In [28]:
RENTALS_PER_CUSTOMER_SQL_QUERY = """
SELECT
r.customer_id,
COUNT(*) rentals
FROM rental r
JOIN customer c
ON r.customer_id = c.customer_id
GROUP BY r.customer_id
;
"""
SQL_QUERY = """
SELECT *
FROM (
{rentals_per_customer_sql_query}
) AS rentals_per_customer
WHERE rentals > 30
;
""".format(
rentals_per_customer_sql_query=RENTALS_PER_CUSTOMER_SQL_QUERY.replace(";", "")
)
pd.read_sql(SQL_QUERY, db)
Out[28]:
customer_id
rentals
0
1
32
1
5
38
2
7
33
3
15
32
4
21
35
5
26
34
6
27
31
7
28
32
8
29
36
9
30
34
10
35
32
11
38
34
12
46
34
13
50
35
14
51
33
15
64
33
16
66
34
17
75
41
18
78
31
19
84
33
20
86
33
21
89
32
22
91
35
23
102
33
24
103
31
25
114
33
26
119
34
27
120
32
28
122
32
29
125
32
...
...
...
104
416
31
105
438
33
106
439
36
107
442
32
108
446
31
109
451
33
110
452
32
111
454
33
112
459
38
113
462
33
114
468
39
115
469
40
116
470
32
117
473
34
118
479
31
119
494
31
120
502
34
121
503
32
122
506
35
123
513
31
124
520
32
125
522
33
126
526
45
127
532
32
128
533
33
129
535
32
130
550
32
131
566
34
132
569
32
133
576
34
134 rows × 2 columns
In [29]:
print(SQL_QUERY)
SELECT *
FROM (
SELECT
r.customer_id,
COUNT(*) rentals
FROM rental r
JOIN customer c
ON r.customer_id = c.customer_id
GROUP BY r.customer_id
) AS rentals_per_customer
WHERE rentals > 30
;
In [32]:
# 30번 이상인 애들의 => 이름, 이메일
RESULT_SQL_QUERY = """
SELECT customer.last_name, customer.first_name, customer.email
FROM ({SQL_QUERY}) many_rental_user
JOIN customer ON many_rental_user.customer_id = customer.customer_id
;
""".format(
SQL_QUERY=SQL_QUERY.replace(";", "")
)
pd.read_sql(RESULT_SQL_QUERY, db)
Out[32]:
last_name
first_name
email
0
SMITH
MARY
MARY.SMITH@sakilacustomer.org
1
BROWN
ELIZABETH
ELIZABETH.BROWN@sakilacustomer.org
2
MILLER
MARIA
MARIA.MILLER@sakilacustomer.org
3
HARRIS
HELEN
HELEN.HARRIS@sakilacustomer.org
4
CLARK
MICHELLE
MICHELLE.CLARK@sakilacustomer.org
5
HALL
JESSICA
JESSICA.HALL@sakilacustomer.org
6
ALLEN
SHIRLEY
SHIRLEY.ALLEN@sakilacustomer.org
7
YOUNG
CYNTHIA
CYNTHIA.YOUNG@sakilacustomer.org
8
HERNANDEZ
ANGELA
ANGELA.HERNANDEZ@sakilacustomer.org
9
KING
MELISSA
MELISSA.KING@sakilacustomer.org
10
GREEN
VIRGINIA
VIRGINIA.GREEN@sakilacustomer.org
11
GONZALEZ
MARTHA
MARTHA.GONZALEZ@sakilacustomer.org
12
CAMPBELL
CATHERINE
CATHERINE.CAMPBELL@sakilacustomer.org
13
COLLINS
DIANE
DIANE.COLLINS@sakilacustomer.org
14
STEWART
ALICE
ALICE.STEWART@sakilacustomer.org
15
COX
JUDITH
JUDITH.COX@sakilacustomer.org
16
WARD
JANICE
JANICE.WARD@sakilacustomer.org
17
SANDERS
TAMMY
TAMMY.SANDERS@sakilacustomer.org
18
WOOD
LORI
LORI.WOOD@sakilacustomer.org
19
PERRY
SARA
SARA.PERRY@sakilacustomer.org
20
LONG
JACQUELINE
JACQUELINE.LONG@sakilacustomer.org
21
FLORES
JULIA
JULIA.FLORES@sakilacustomer.org
22
BUTLER
LOIS
LOIS.BUTLER@sakilacustomer.org
23
FORD
CRYSTAL
CRYSTAL.FORD@sakilacustomer.org
24
HAMILTON
GLADYS
GLADYS.HAMILTON@sakilacustomer.org
25
ELLIS
GRACE
GRACE.ELLIS@sakilacustomer.org
26
MARSHALL
SHERRY
SHERRY.MARSHALL@sakilacustomer.org
27
ORTIZ
SYLVIA
SYLVIA.ORTIZ@sakilacustomer.org
28
MURRAY
THELMA
THELMA.MURRAY@sakilacustomer.org
29
WEBB
ETHEL
ETHEL.WEBB@sakilacustomer.org
...
...
...
...
104
PINSON
JEFFERY
JEFFERY.PINSON@sakilacustomer.org
105
LOVELACE
BARRY
BARRY.LOVELACE@sakilacustomer.org
106
FENNELL
ALEXANDER
ALEXANDER.FENNELL@sakilacustomer.org
107
BUSTAMANTE
LEROY
LEROY.BUSTAMANTE@sakilacustomer.org
108
CULP
THEODORE
THEODORE.CULP@sakilacustomer.org
109
REA
JIM
JIM.REA@sakilacustomer.org
110
MILNER
TOM
TOM.MILNER@sakilacustomer.org
111
GRESHAM
ALEX
ALEX.GRESHAM@sakilacustomer.org
112
COLLAZO
TOMMY
TOMMY.COLLAZO@sakilacustomer.org
113
SHERROD
WARREN
WARREN.SHERROD@sakilacustomer.org
114
CARY
TIM
TIM.CARY@sakilacustomer.org
115
BULL
WESLEY
WESLEY.BULL@sakilacustomer.org
116
ALLARD
GORDON
GORDON.ALLARD@sakilacustomer.org
117
OLIVARES
JORGE
JORGE.OLIVARES@sakilacustomer.org
118
HITE
ZACHARY
ZACHARY.HITE@sakilacustomer.org
119
CHOATE
RAMON
RAMON.CHOATE@sakilacustomer.org
120
CORNWELL
BRETT
BRETT.CORNWELL@sakilacustomer.org
121
BARCLAY
ANGEL
ANGEL.BARCLAY@sakilacustomer.org
122
SEWARD
LESLIE
LESLIE.SEWARD@sakilacustomer.org
123
TUBBS
DUANE
DUANE.TUBBS@sakilacustomer.org
124
WESTMORELAND
MITCHELL
MITCHELL.WESTMORELAND@sakilacustomer.org
125
HAVENS
ARNOLD
ARNOLD.HAVENS@sakilacustomer.org
126
SEAL
KARL
KARL.SEAL@sakilacustomer.org
127
RENNER
NEIL
NEIL.RENNER@sakilacustomer.org
128
MILAM
JESSIE
JESSIE.MILAM@sakilacustomer.org
129
ELROD
JAVIER
JAVIER.ELROD@sakilacustomer.org
130
BROWNLEE
GUY
GUY.BROWNLEE@sakilacustomer.org
131
MENA
CASEY
CASEY.MENA@sakilacustomer.org
132
GARDINER
DAVE
DAVE.GARDINER@sakilacustomer.org
133
MCCARTER
MORRIS
MORRIS.MCCARTER@sakilacustomer.org
134 rows × 3 columns
In [35]:
# Temporary Table ( 임시 테이블 )
SQL_QUERY = """
DROP TEMPORARY TABLE IF EXISTS rentals_per_customer;
CREATE TEMPORARY TABLE rentals_per_customer
SELECT
r.customer_id,
COUNT(*) rentals
FROM rental r
JOIN customer c
ON r.customer_id = c.customer_id
GROUP BY r.customer_id
;
"""
# pd.read_sql() => 이걸로 실행시키면 오류가 난다. 그래서 cursor로 실행
cursor = db.cursor()
cursor.execute(SQL_QUERY)
Out[35]:
0
In [39]:
SQL_QUERY = """
SELECT rpc.customer_id, rpc.rentals
FROM rentals_per_customer rpc
WHERE rentals > 30
;
"""
pd.read_sql(SQL_QUERY, db)
Out[39]:
customer_id
rentals
In [41]:
SQL_QUERY = """
SELECT r.customer_id, COUNT(*) rentals
FROM rental r
JOIN customer c
ON r.customer_id = c.customer_id
GROUP BY r.customer_id
# WHERE rentals > 30 #연산에 대한 결과로 Filtering을 할 수 없다.
# 연산에 대한 결과로 Filtering을 할 수 있는 기능
HAVING rentals > 30
"""
pd.read_sql(SQL_QUERY, db)
Out[41]:
customer_id
rentals
0
1
32
1
5
38
2
7
33
3
15
32
4
21
35
5
26
34
6
27
31
7
28
32
8
29
36
9
30
34
10
35
32
11
38
34
12
46
34
13
50
35
14
51
33
15
64
33
16
66
34
17
75
41
18
78
31
19
84
33
20
86
33
21
89
32
22
91
35
23
102
33
24
103
31
25
114
33
26
119
34
27
120
32
28
122
32
29
125
32
...
...
...
104
416
31
105
438
33
106
439
36
107
442
32
108
446
31
109
451
33
110
452
32
111
454
33
112
459
38
113
462
33
114
468
39
115
469
40
116
470
32
117
473
34
118
479
31
119
494
31
120
502
34
121
503
32
122
506
35
123
513
31
124
520
32
125
522
33
126
526
45
127
532
32
128
533
33
129
535
32
130
550
32
131
566
34
132
569
32
133
576
34
134 rows × 2 columns
In [2]:
db = pymysql.connect(
"db.fastcamp.us",
"root",
"dkstncks",
"sakila",
charset="utf8"
)
film_df = pd.read_sql("SELECT * FROM film;", db)
rental_df = pd.read_sql("SELECT * FROM rental;", db)
payment_df = pd.read_sql("SELECT * FROM payment;", db)
inventory_df = pd.read_sql("SELECT * FROM inventory;", db)
film_df => film_id, title inventory_df => inventory_id, film_id rental_df => rental_id, inventory_id payment_df => rental_id, amount
In [23]:
SQL_QUERY = """
SELECT f.film_id, f.title, SUM(p.amount) "revenue"
FROM
film f,
rental r,
payment p,
inventory i
WHERE
f.film_id = i.film_id
AND i.inventory_id = r.inventory_id
AND r.rental_id = p.rental_id
GROUP BY f.film_id
ORDER BY revenue DESC
;
"""
pd.read_sql(SQL_QUERY, db)
Out[23]:
film_id
title
revenue
0
879
TELEGRAPH VOYAGE
231.73
1
973
WIFE TURN
223.69
2
1000
ZORRO ARK
214.69
3
369
GOODFELLAS SALUTE
209.69
4
764
SATURDAY LAMBS
204.72
5
893
TITANS JERK
201.71
6
897
TORQUE BOUND
198.72
7
403
HARRY IDAHO
195.70
8
460
INNOCENT USUAL
191.74
9
444
HUSTLER PARTY
190.78
10
670
PELICAN COMFORTS
188.74
11
127
CAT CONEHEADS
181.70
12
284
ENEMY ODDS
180.71
13
103
BUCKET BROTHERHOOD
180.66
14
715
RANGE MOONWALKER
179.73
15
563
MASSACRE USUAL
179.70
16
941
VIDEOTAPE ARSENIC
178.71
17
239
DOGMA FAMILY
178.70
18
31
APACHE DIVINE
178.69
19
938
VELVET TERMINATOR
177.74
20
244
DORADO NOTTING
176.73
21
327
FOOL MOCKINGBIRD
175.77
22
979
WITCHES PANIC
173.70
23
159
CLOSER BANG
172.72
24
767
SCALAWAG DUCK
172.68
25
745
ROSES TREASURE
171.72
26
865
SUNRISE LEAGUE
170.76
27
791
SHOW LORD
170.75
28
624
NIGHTMARE CHILL
169.75
29
771
SCORPION APOLLO
168.77
...
...
...
...
928
262
DUMBO LUST
13.94
929
965
WATERSHIP FRONTIER
13.94
930
516
LEGEND JEDI
13.91
931
630
NOTTING SPEAKEASY
13.90
932
423
HOLLYWOOD ANONYMOUS
13.87
933
910
TREATMENT JEKYLL
12.93
934
751
RUNAWAY TENENBAUMS
12.92
935
822
SOUP WISDOM
12.92
936
866
SUNSET RACER
12.92
937
940
VICTORY ACADEMY
12.91
938
899
TOWERS HURRICANE
12.89
939
566
MAUDE MOD
11.92
940
355
GHOSTBUSTERS ELF
11.91
941
594
MONTEZUMA COMMAND
11.91
942
477
JAWBREAKER BROOKLYN
11.90
943
157
CLOCKWORK PARADISE
11.90
944
178
CONNECTION MICROCOSMOS
10.93
945
52
BALLROOM MOCKINGBIRD
10.92
946
168
COMANCHEROS ENEMY
9.94
947
523
LIGHTS DEER
9.92
948
401
HAROLD FRENCH
9.92
949
839
STALLION SUNDANCE
8.93
950
475
JAPANESE RUN
7.94
951
196
CRUELTY UNFORGIVEN
7.93
952
718
REBEL AIRPORT
7.92
953
261
DUFFEL APOCALYPSE
6.93
954
996
YOUNG LANGUAGE
6.93
955
335
FREEDOM CLEOPATRA
5.95
956
635
OKLAHOMA JUMANJI
5.94
957
885
TEXAS WATCH
5.94
958 rows × 3 columns
In [24]:
SQL_QUERY = """
SELECT f.film_id, f.title, SUM(p.amount) "revenue"
FROM payment p
JOIN rental r ON p.rental_id = r.rental_id
JOIN inventory i ON i.inventory_id = r.inventory_id
JOIN film f ON f.film_id = i.film_id
GROUP BY f.film_id
ORDER BY revenue DESC
;
"""
pd.read_sql(SQL_QUERY, db)
Out[24]:
film_id
title
revenue
0
879
TELEGRAPH VOYAGE
231.73
1
973
WIFE TURN
223.69
2
1000
ZORRO ARK
214.69
3
369
GOODFELLAS SALUTE
209.69
4
764
SATURDAY LAMBS
204.72
5
893
TITANS JERK
201.71
6
897
TORQUE BOUND
198.72
7
403
HARRY IDAHO
195.70
8
460
INNOCENT USUAL
191.74
9
444
HUSTLER PARTY
190.78
10
670
PELICAN COMFORTS
188.74
11
127
CAT CONEHEADS
181.70
12
284
ENEMY ODDS
180.71
13
103
BUCKET BROTHERHOOD
180.66
14
715
RANGE MOONWALKER
179.73
15
563
MASSACRE USUAL
179.70
16
941
VIDEOTAPE ARSENIC
178.71
17
239
DOGMA FAMILY
178.70
18
31
APACHE DIVINE
178.69
19
938
VELVET TERMINATOR
177.74
20
244
DORADO NOTTING
176.73
21
327
FOOL MOCKINGBIRD
175.77
22
979
WITCHES PANIC
173.70
23
159
CLOSER BANG
172.72
24
767
SCALAWAG DUCK
172.68
25
745
ROSES TREASURE
171.72
26
865
SUNRISE LEAGUE
170.76
27
791
SHOW LORD
170.75
28
624
NIGHTMARE CHILL
169.75
29
771
SCORPION APOLLO
168.77
...
...
...
...
928
965
WATERSHIP FRONTIER
13.94
929
262
DUMBO LUST
13.94
930
516
LEGEND JEDI
13.91
931
630
NOTTING SPEAKEASY
13.90
932
423
HOLLYWOOD ANONYMOUS
13.87
933
910
TREATMENT JEKYLL
12.93
934
866
SUNSET RACER
12.92
935
751
RUNAWAY TENENBAUMS
12.92
936
822
SOUP WISDOM
12.92
937
940
VICTORY ACADEMY
12.91
938
899
TOWERS HURRICANE
12.89
939
566
MAUDE MOD
11.92
940
355
GHOSTBUSTERS ELF
11.91
941
594
MONTEZUMA COMMAND
11.91
942
477
JAWBREAKER BROOKLYN
11.90
943
157
CLOCKWORK PARADISE
11.90
944
178
CONNECTION MICROCOSMOS
10.93
945
52
BALLROOM MOCKINGBIRD
10.92
946
168
COMANCHEROS ENEMY
9.94
947
523
LIGHTS DEER
9.92
948
401
HAROLD FRENCH
9.92
949
839
STALLION SUNDANCE
8.93
950
475
JAPANESE RUN
7.94
951
196
CRUELTY UNFORGIVEN
7.93
952
718
REBEL AIRPORT
7.92
953
996
YOUNG LANGUAGE
6.93
954
261
DUFFEL APOCALYPSE
6.93
955
335
FREEDOM CLEOPATRA
5.95
956
635
OKLAHOMA JUMANJI
5.94
957
885
TEXAS WATCH
5.94
958 rows × 3 columns
In [3]:
customer_df = pd.read_sql("SELECT * FROM customer;", db)
In [4]:
customer_df.head(1)
Out[4]:
customer_id
store_id
first_name
last_name
email
address_id
active
create_date
last_update
0
1
1
MARY
SMITH
MARY.SMITH@sakilacustomer.org
5
1
2006-02-14 22:04:36
2006-02-15 04:57:20
In [5]:
payment_df.head(1)
Out[5]:
payment_id
customer_id
staff_id
rental_id
amount
payment_date
last_update
0
1
1
1
76.0
2.99
2005-05-25 11:30:37
2006-02-15 22:12:30
In [3]:
SQL_QUERY = """
SELECT c.first_name, c.last_name, SUM(p.amount) "revenue"
FROM customer c
JOIN payment p
ON c.customer_id = p.customer_id
GROUP BY c.customer_id
ORDER BY revenue DESC
;
"""
pd.read_sql(SQL_QUERY, db)
Out[3]:
first_name
last_name
revenue
0
KARL
SEAL
221.55
1
ELEANOR
HUNT
216.54
2
CLARA
SHAW
195.58
3
MARION
SNYDER
194.61
4
RHONDA
KENNEDY
194.61
5
TOMMY
COLLAZO
186.62
6
WESLEY
BULL
177.60
7
TIM
CARY
175.61
8
MARCIA
DEAN
175.58
9
ANA
BRADLEY
174.66
10
JUNE
CARROLL
173.63
11
LENA
JENSEN
170.67
12
DIANE
COLLINS
169.65
13
ARNOLD
HAVENS
167.67
14
CURTIS
IRBY
167.62
15
MIKE
WAY
166.65
16
DAISY
BATES
162.62
17
TONYA
CHAPMAN
161.68
18
LOUIS
LEONE
161.65
19
GORDON
ALLARD
160.68
20
BRITTANY
RILEY
159.72
21
GUY
BROWNLEE
159.68
22
WARREN
SHERROD
159.67
23
MARGIE
WADE
159.64
24
MAE
FLETCHER
158.69
25
STEVE
MACKENZIE
158.66
26
HARRY
ARCE
157.65
27
PRISCILLA
LOWE
157.65
28
ARTHUR
SIMPKINS
155.68
29
MICHELLE
CLARK
155.65
...
...
...
...
569
JO
FOWLER
73.80
570
JUAN
FRALEY
73.77
571
FELIX
GAFFNEY
73.76
572
JONATHAN
SCARBOROUGH
72.82
573
TERRY
GRISSOM
72.80
574
EUGENE
CULPEPPER
71.81
575
EDITH
MCDONALD
71.81
576
ANTHONY
SCHWAB
71.80
577
LAUREN
HUDSON
71.80
578
SAMANTHA
DUNCAN
71.77
579
IRMA
PEARSON
70.82
580
WAYNE
TRUONG
70.81
581
FLOYD
GANDY
69.83
582
PENNY
NEAL
68.82
583
JILL
HAWKINS
68.79
584
VERA
MCCOY
67.82
585
JOANN
GARDNER
66.84
586
LLOYD
DOWD
66.81
587
ALBERTO
HENNING
66.79
588
LESTER
KRAUS
65.84
589
KIRK
STCLAIR
64.81
590
MATTIE
HOFFMAN
64.78
591
ANITA
MORALES
62.85
592
TIFFANY
JORDAN
59.86
593
KATHERINE
RIVERA
58.86
594
ANNIE
RUSSELL
58.82
595
JOHNNY
TURPIN
57.81
596
BRIAN
WYMAN
52.88
597
LEONA
OBRIEN
50.86
598
CAROLINE
BOWMAN
50.85
599 rows × 3 columns
actor_df => actor_id, first_name, last_name film_actor_df => actor_id, film_id inventory_df => inventory_id, film_id rental_df => rental_id, inventory_id payment_df => rental_id, amount
In [12]:
SQL_QUERY = """
SELECT a.first_name, a.last_name, SUM(p.amount) "revenue"
FROM
actor a,
film_actor fa,
inventory i,
rental r,
payment p
WHERE
a.actor_id = fa.actor_id
AND fa.film_id = i.film_id
AND i.inventory_id = r.inventory_id
AND r.rental_id = p.rental_id
GROUP BY a.actor_id
ORDER BY revenue DESC
;
"""
pd.read_sql(SQL_QUERY, db)
Out[12]:
first_name
last_name
revenue
0
GINA
DEGENERES
3442.49
1
MATTHEW
CARREY
2742.19
2
MARY
KEITEL
2689.25
3
SCARLETT
DAMON
2655.28
4
WALTER
TORN
2620.62
5
ANGELA
WITHERSPOON
2614.46
6
CHRISTIAN
AKROYD
2611.49
7
HENRY
BERRY
2602.88
8
WOODY
HOFFMAN
2546.40
9
CAMERON
ZELLWEGER
2529.41
10
SANDRA
KILMER
2485.96
11
BEN
WILLIS
2459.49
12
SEAN
GUINESS
2424.00
13
MICHAEL
BOLGER
2401.48
14
JAYNE
NOLTE
2400.86
15
KEVIN
GARLAND
2393.36
16
WHOOPI
HURT
2366.72
17
ANGELA
HUDSON
2361.26
18
HELEN
VOIGHT
2358.45
19
KIRSTEN
PALTROW
2344.49
20
JULIA
MCQUEEN
2330.51
21
ED
MANSFIELD
2323.21
22
RENEE
TRACY
2321.56
23
UMA
WOOD
2314.64
24
REESE
WEST
2299.47
25
JADA
RYDER
2294.43
26
REESE
KILMER
2273.52
27
RIP
CRAWFORD
2271.88
28
WARREN
NOLTE
2271.37
29
EWAN
GOODING
2264.31
...
...
...
...
170
CAMERON
STREEP
1421.83
171
CHRIS
DEPP
1420.17
172
RITA
REYNOLDS
1420.06
173
DAN
TORN
1419.95
174
MINNIE
KILMER
1417.09
175
JIM
MOSTEL
1406.17
176
BETTE
NICHOLSON
1402.21
177
MENA
HOPPER
1396.44
178
AUDREY
BAILEY
1390.68
179
JOE
SWANK
1389.21
180
DAN
STREEP
1380.12
181
CHRISTOPHER
BERRY
1379.30
182
CUBA
BIRCH
1370.38
183
CATE
HARRIS
1360.37
184
SUSAN
DAVIS
1354.79
185
CARY
MCCONAUGHEY
1330.57
186
CAMERON
WRAY
1327.97
187
ED
CHASE
1322.89
188
JEFF
SILVERSTONE
1278.74
189
KENNETH
PESCI
1258.08
190
RUSSELL
CLOSE
1251.05
191
PENELOPE
GUINESS
1230.94
192
JULIA
FAWCETT
1189.42
193
JULIA
ZELLWEGER
1064.79
194
JENNIFER
DAVIS
1052.27
195
SANDRA
PECK
1040.12
196
JUDY
DEAN
976.47
197
ADAM
GRANT
974.19
198
SISSY
SOBIESKI
902.65
199
EMILY
DEE
883.85
200 rows × 3 columns
Content source: kimkipyo/dss_git_kkp
Similar notebooks: