In [1]:
import datetime
import time
import functools

import pandas as pd
import numpy as np

import nba_py
import nba_py.game
import nba_py.player
import nba_py.team

import pymysql
from sqlalchemy import create_engine

import sys
sys.path.append('/Users/wonderui/OneDrive/6_Module_Package')
sys.path.append('/Users/WangRui/OneDrive/6_Module_Package')
import hoop_pwd
pwd = hoop_pwd.password

In [7]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

try:
    # read sql table of game header
    game_header = pd.read_sql_table('game_header', conn)
    length_1 = len(game_header)
    print(str(length_1) + ' games loaded.')
    # set begin date to the newest date in sql table
    begin = datetime.datetime.strptime(game_header.iloc[-1]['GAME_DATE_EST'][:10], "%Y-%m-%d").date()
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if no table yet, set begin date to 2012-10-29
    begin = datetime.date(2012, 10, 29)
    # grab game headers of begining date
    game_header = nba_py.Scoreboard(month = begin.month, 
                                        day = begin.day, 
                                        year = begin.year, league_id = '00', offset = 0).game_header()

# set end date to today
end = datetime.date.today()

for i in range((end - begin).days + 1):
    # grab game headers from begin date to end date
    day = begin + datetime.timedelta(days = i)
    game_header = game_header.append(nba_py.Scoreboard(month = day.month, 
                                                       day = day.day, 
                                                       year = day.year, 
                                                       league_id = '00', 
                                                       offset = 0).game_header())
    print(str(day) + ' finished!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_header)
# drop the duplicate by game id
game_header = game_header.drop_duplicates('GAME_ID')
length_3 = len(game_header)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' games added.')

# sort game headers by game id ascending
# game_header = game_header.sort_values('GAME_ID')

# commit new game headers to sql table
game_header.to_sql('game_header', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' game headers commit complete!')


7019 games loaded.
2017-06-12 finished!    11:14:31
2017-06-13 finished!    11:14:34
2017-06-14 finished!    11:14:36
2017-06-15 finished!    11:14:37
2017-06-16 finished!    11:14:39
2017-06-17 finished!    11:14:41
2017-06-18 finished!    11:14:42
2017-06-19 finished!    11:14:44
2017-06-20 finished!    11:14:45
2017-06-21 finished!    11:14:47
2017-06-22 finished!    11:14:50
2017-06-23 finished!    11:14:51
2017-06-24 finished!    11:14:53
2017-06-25 finished!    11:14:55
2017-06-26 finished!    11:14:57
2017-06-27 finished!    11:14:58
2017-06-28 finished!    11:15:00
2017-06-29 finished!    11:15:01
2017-06-30 finished!    11:15:03
2017-07-01 finished!    11:15:04
2017-07-02 finished!    11:15:07
2017-07-03 finished!    11:15:08
2017-07-04 finished!    11:15:10
2017-07-05 finished!    11:15:11
2017-07-06 finished!    11:15:13
2017-07-07 finished!    11:15:15
2017-07-08 finished!    11:15:16
2017-07-09 finished!    11:15:18
2017-07-10 finished!    11:15:20
2017-07-11 finished!    11:15:22
2017-07-12 finished!    11:15:24
2017-07-13 finished!    11:15:25
2017-07-14 finished!    11:15:27
2017-07-15 finished!    11:15:29
2017-07-16 finished!    11:15:30
2017-07-17 finished!    11:15:32
2017-07-18 finished!    11:15:34
2017-07-19 finished!    11:15:35
2017-07-20 finished!    11:15:37
2017-07-21 finished!    11:15:38
2017-07-22 finished!    11:15:40
2017-07-23 finished!    11:15:41
2017-07-24 finished!    11:15:43
2017-07-25 finished!    11:15:44
2017-07-26 finished!    11:15:46
2017-07-27 finished!    11:15:48
2017-07-28 finished!    11:15:50
2017-07-29 finished!    11:15:52
2017-07-30 finished!    11:15:53
2017-07-31 finished!    11:15:55
2017-08-01 finished!    11:15:56
2017-08-02 finished!    11:15:58
2017-08-03 finished!    11:15:59
2017-08-04 finished!    11:16:00
2017-08-05 finished!    11:16:02
2017-08-06 finished!    11:16:03
2017-08-07 finished!    11:16:05
2017-08-08 finished!    11:16:06
2017-08-09 finished!    11:16:08
2017-08-10 finished!    11:16:09
2017-08-11 finished!    11:16:12
2017-08-12 finished!    11:16:14
2017-08-13 finished!    11:16:16
2017-08-14 finished!    11:16:18
2017-08-15 finished!    11:16:19
2017-08-16 finished!    11:16:21
2017-08-17 finished!    11:16:22
2017-08-18 finished!    11:16:24
2017-08-19 finished!    11:16:25
2017-08-20 finished!    11:16:26
2017-08-21 finished!    11:16:28
2017-08-22 finished!    11:16:29
2017-08-23 finished!    11:16:30
2017-08-24 finished!    11:16:32
2017-08-25 finished!    11:16:33
2017-08-26 finished!    11:16:34
2017-08-27 finished!    11:16:36
2017-08-28 finished!    11:16:38
2017-08-29 finished!    11:16:40
2017-08-30 finished!    11:16:41
2017-08-31 finished!    11:16:43
2017-09-01 finished!    11:16:44
2017-09-02 finished!    11:16:46
2017-09-03 finished!    11:16:47
2017-09-04 finished!    11:16:49
2017-09-05 finished!    11:16:50
2017-09-06 finished!    11:16:52
2017-09-07 finished!    11:16:53
2017-09-08 finished!    11:16:55
2017-09-09 finished!    11:16:57
2017-09-10 finished!    11:16:58
2017-09-11 finished!    11:16:59
2017-09-12 finished!    11:17:01
2017-09-13 finished!    11:17:02
2017-09-14 finished!    11:17:04
2017-09-15 finished!    11:17:05
2017-09-16 finished!    11:17:07
2017-09-17 finished!    11:17:09
2017-09-18 finished!    11:17:10
2017-09-19 finished!    11:17:11
2017-09-20 finished!    11:17:13
1 duplicates droped.
0 games added.
7019 game headers commit complete!

In [28]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

game_stats_logs = pd.DataFrame()

try:
    # read sql table of game stats logs id
    game_stats_logs_id = pd.read_sql_table('game_stats_logs', conn, columns = ['GAME_ID'])
    length_1 = len(game_stats_logs_id)
    print(str(length_1) + ' player stats loaded.')
except ValueError:
    print('no table yet!')
    length_1 = 0
    # create table and commit it to sql
    game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'append')
    print('game stats logs initialized!')

# define game types by the head of game id
game_type = {'001': 'pre_season', '002': 'regular_season', '003': 'all_star', '004': 'play_offs'}

# ------method 1------for game id in game headers from the max one in sql table
# for i in game_header[game_header['GAME_ID'] >= game_stats_logs['GAME_ID'].max()]['GAME_ID']:

# ------method 2------for game id in game header but not in game stats logs 
for i in game_header['GAME_ID'][game_header['GAME_ID'].isin(game_stats_logs_id['GAME_ID'].drop_duplicates()) == False]:
    # get game player stats of i
    game_stats = nba_py.game.Boxscore(i).player_stats()
    # create home team player stats
    home_team_id = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    home_stats_logs['LOCATION'] = 'HOME'
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    # create away team player stats
    away_team_id = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    away_stats_logs['LOCATION'] = 'AWAY'
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    # combine home and away team player stats and append to game stats logs
    game_stats_logs = game_stats_logs.append(home_stats_logs)
    game_stats_logs = game_stats_logs.append(away_stats_logs)
    print('game ' + i + ' added!    ' + str(datetime.datetime.now().time())[:8])

def min_convert(m):
    '''
    convert mm:ss to float
    '''
    try:
        if ':' in m:
            return float(m[:-3]) + round(float(m[-2:])/60, 2)
        else:
            return float(m)
    except TypeError:
        return None

# create float time
game_stats_logs['MINS'] = game_stats_logs['MIN'].apply(min_convert)
# add game type
game_stats_logs['GAME_TYPE'] = game_stats_logs['GAME_ID'].apply(lambda x: x[:3]).map(game_type)
# add game date and game sequence
game_stats_logs = game_stats_logs.merge(game_header[['GAME_DATE_EST', 'GAME_SEQUENCE', 'GAME_ID']], 
                                        how = 'left', on = 'GAME_ID')
# add new ordered game_id
game_stats_logs['GAME_ID_O'] = game_stats_logs['GAME_ID'].apply(lambda x: x[3:5] + x[:3] + x[-5:])

length_2 = len(game_stats_logs)
# drop duplicate game stats by game id and player id
game_stats_logs = game_stats_logs.drop_duplicates(['GAME_ID', 'PLAYER_ID'])
length_3 = len(game_stats_logs)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3) + ' player stats added.')

# commit new game stats logs to sql table
game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'append')
print(str(length_3) + ' player stats commit complete!')


182367 player stats loaded.
game 0011300114 added!    17:37:07
game 0041600404 added!    17:37:09
game 0041600405 added!    17:37:11
0 duplicates droped.
52 player stats added.
52 player stats commit complete!

In [ ]:


In [2]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [6]:


In [ ]:


In [ ]: