In [180]:
import pandas as pd
import datetime
import random
import pytz

from sqlalchemy import create_engine

from password import hoop_pwd
pwd = hoop_pwd.password

%matplotlib inline

from nba_seer import *

In [181]:
# create sql conn and load game stats logs ----------------------

conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)
game_stats_logs = pd.read_sql_table('game_stats_logs', conn)
game_stats_logs = game_stats_logs[game_stats_logs['GAME_TYPE'] != 'all_star']

print(str(len(game_stats_logs)) + ' player stats loaded.')


187359 player stats loaded.

In [182]:
# load player list ----------------------

all_players = nba_py.player.PlayerList(season='2017-18').info()

print('players list loaded')


players list loaded

In [183]:
def get_score(row):
    player_id = row['PERSON_ID']
    game_id = row['GAME_ID']
    stats = game_stats_logs[(game_stats_logs['PLAYER_ID'] == player_id) & 
                            (game_stats_logs['GAME_ID'] == game_id)].copy()
    #print(stats)
    stats['SCO'] = stats['PTS'] * 1 + stats['AST'] * 1.5 + \
    stats['OREB'] * 1 + stats['DREB'] * 0.7 + \
    stats['STL'] * 2 + stats['BLK'] * 1.8 + stats['TO'] * -1 + \
    stats['FGM'] * 0.4 + (stats['FGA'] - stats['FGM']) * -1 + stats['FG3M'] * 0.5
    return stats['SCO'].max()

In [184]:
def test_result(players):
    diff_pct = players['SCO_DIFF_PCT'].describe().to_frame()
    diff_sco = players['SCO_DIFF'].describe().to_frame()
    ans = pd.concat([diff_sco, diff_pct], axis=1)
    return ans

In [185]:
def factor_cov(players):
    cov_diff = pd.DataFrame()
    #cov_diff['cov'] = abs(players[['MIN_COV_20', 'SCO_COV_20']].mean(axis = 1))
    cov_diff['cov'] = abs(players['MIN_COV_20'] * players['SCO_COV_20'])
    cov_diff['diff_pct'] = abs(players['SCO_DIFF_PCT'])
    cov_diff.plot(x='cov', y='diff_pct', kind='scatter', grid=True, figsize=(10, 7))
    cov_diff_pct_corr = cov_diff['cov'].corr(cov_diff['diff_pct'])
    print('the corr between cov and different percantage is: ', cov_diff_pct_corr)

In [186]:
def diff_distribution(players, rg, interval):
    diff_pct = players[abs(players['SCO_DIFF_PCT']) <= rg]['SCO_DIFF_PCT'].abs()
    bins = []
    for i in range(int(rg/interval)):
        bins.append(i*interval)
    pct_cut = pd.cut(diff_pct, bins)
    pct_cut_df = pd.value_counts(pct_cut).to_frame().rename(columns={'SCO_DIFF_PCT': 'count'})
    pct_cut_df = pct_cut_df.sort_index(axis=0)
    pct_cut_df.plot(kind='bar', figsize=(15, 7))
    print('data in range:', len(diff_pct)/len(players))

In [187]:
game_date = game_stats_logs['GAME_DATE_EST'].drop_duplicates().apply(lambda x: x[:10])

In [220]:
players = pd.DataFrame()
for i in random.sample(list(game_date), 5):
    j = datetime.datetime.strptime(i, "%Y-%m-%d").date()
    games = get_games(j)
    players = players.append(get_players_p(games, game_stats_logs))
    print(str(j) + ' complete!')
else:
    print(len(players), ' players total!')


2017-01-15 complete!
2013-04-01 complete!
2014-04-06 complete!
2016-11-19 complete!
2014-12-30 complete!
1103  players total!

In [221]:
players = get_exp_sco(players, game_stats_logs)


5games days complete!
days rest complete!
ma20 complete!
ma10 complete!
ma5 complete!
min20 complete!
min10 complete!
min5 complete!
min_cov_20 complete!
sco_cov_20 complete!
sco cov less than 0 droped!
all done!

In [222]:
players['ACT_SCO'] = players.apply(get_score, axis = 1)
players['SCO_DIFF'] = players['EXP_SCO'] - players['ACT_SCO']
players['SCO_DIFF_PCT'] = players['SCO_DIFF'] / players['ACT_SCO']
print('test data loaded!')


test data loaded!

In [223]:
players = players[(players['ACT_SCO'] != 0) & (players['ACT_SCO'].notnull())]

In [224]:
players.sort_values('ACT_SCO')


Out[224]:
PERSON_ID TEAM_ID Location GAME_ID Against_Team_ID 5_g_d d_rest MA_20 MA_10 MA_5 MIN_20 MIN_10 MIN_5 MIN_COV_20 SCO_COV_20 EXP_SCO ACT_SCO SCO_DIFF SCO_DIFF_PCT
20 203501 1610612752 AWAY 0021301149 1610612748 8 1303 14.66 14.47 13.50 25.95 23.92 25.84 0.264 0.888 9.96 -6.3 16.26 -2.580952
78 201579 1610612754 HOME 0021301152 1610612737 8 1303 14.97 15.15 14.03 29.52 29.25 30.05 0.166 0.528 12.10 -6.0 18.10 -3.016667
18 203498 1610612750 AWAY 0021600608 1610612742 11 288 16.83 20.73 19.45 20.39 22.31 21.83 0.211 0.621 11.35 -5.3 16.65 -3.141509
164 201960 1610612762 HOME 0021201106 1610612757 17 1678 19.61 9.50 14.72 14.72 8.90 3.81 0.656 0.911 3.71 -4.3 8.01 -1.862791
185 203957 1610612762 HOME 0021400472 1610612750 11 1034 12.71 17.22 12.17 17.94 19.28 20.93 0.242 0.913 7.56 -3.5 11.06 -3.160000
36 1627832 1610612761 HOME 0021600606 1610612752 33 288 25.71 15.70 35.17 5.28 8.34 4.15 1.263 1.366 4.20 -3.0 7.20 -2.400000
98 2564 1610612759 AWAY 0021400468 1610612763 10 1035 20.41 21.11 24.26 27.29 27.13 26.73 0.234 0.642 16.48 -2.8 19.28 -6.885714
101 203898 1610612745 AWAY 0021600609 1610612751 16 290 9.05 9.19 17.03 5.70 6.65 1.48 0.953 1.367 1.51 -2.0 3.51 -1.755000
124 203117 1610612753 AWAY 0021201104 1610612745 9 1673 12.41 19.08 19.85 7.06 6.20 6.65 0.536 1.968 3.15 -2.0 5.15 -2.575000
171 203197 1610612762 AWAY 0021301155 1610612744 10 1303 11.13 10.90 7.59 15.74 15.92 16.05 0.293 1.171 4.36 -2.0 6.36 -3.180000
23 1626145 1610612750 AWAY 0021600608 1610612742 24 292 7.78 12.13 37.78 3.60 6.36 11.15 1.150 4.523 3.76 -2.0 5.76 -2.880000
35 1627751 1610612761 HOME 0021600606 1610612752 11 288 20.63 18.09 16.83 7.36 9.49 5.64 0.874 0.840 3.86 -2.0 5.86 -2.930000
173 1627739 1610612750 AWAY 0021600190 1610612763 9 345 18.74 20.80 22.04 21.34 20.95 17.33 0.263 0.553 11.33 -1.8 13.13 -7.294444
20 1627739 1610612750 AWAY 0021600608 1610612742 8 288 18.20 17.10 17.09 15.72 15.90 13.89 0.216 0.640 7.36 -1.8 9.16 -5.088889
99 201147 1610612745 AWAY 0021600609 1610612751 8 288 14.06 18.77 16.20 15.36 19.37 19.85 0.417 0.909 8.26 -1.8 10.06 -5.588889
110 203085 1610612740 HOME 0021400469 1610612756 8 1036 16.14 20.03 19.82 24.15 25.10 25.00 0.250 0.636 12.83 -1.8 14.63 -8.127778
143 201584 1610612763 AWAY 0021301154 1610612759 8 1303 16.76 15.72 22.47 27.34 27.91 30.62 0.208 0.560 14.56 -1.6 16.16 -10.100000
223 101150 1610612761 AWAY 0021400473 1610612757 10 1035 25.53 23.64 27.01 26.40 27.86 28.73 0.158 0.578 19.51 -1.5 21.01 -14.006667
138 203584 1610612763 HOME 0021600611 1610612741 10 288 18.06 23.39 14.18 19.91 17.77 17.87 0.345 0.892 9.54 -1.5 11.04 -7.360000
86 202347 1610612763 HOME 0021201103 1610612759 8 1673 13.47 7.03 13.44 20.40 19.46 21.08 0.238 0.854 6.38 -1.3 7.68 -5.907692
199 1626172 1610612744 AWAY 0021600191 1610612749 10 349 25.97 28.32 33.27 9.41 8.41 8.27 0.534 0.632 7.05 -1.3 8.35 -6.423077
148 101110 1610612764 AWAY 0021400470 1610612742 9 1262 10.78 7.32 7.32 18.56 17.26 17.73 0.230 0.798 4.20 -1.3 5.50 -4.230769
201 203584 1610612750 AWAY 0021400472 1610612762 10 1036 10.68 9.86 15.79 5.81 5.76 6.17 0.907 3.189 1.99 -1.0 2.99 -2.990000
40 202498 1610612752 AWAY 0021600606 1610612761 7 289 12.98 11.88 12.71 19.20 22.43 20.88 0.411 0.618 7.25 -1.0 8.25 -8.250000
83 201589 1610612763 HOME 0021201103 1610612759 8 1673 12.48 14.59 17.70 13.34 15.39 16.18 0.474 1.165 6.21 -1.0 7.21 -7.210000
62 202397 1610612749 HOME 0021201102 1610612766 16 1675 13.92 10.59 26.05 8.13 3.28 7.30 1.078 1.399 2.92 -1.0 3.92 -3.920000
73 202721 1610612751 AWAY 0021400467 1610612741 11 1036 20.17 17.76 14.43 6.40 4.97 8.50 0.700 1.301 3.21 -1.0 4.21 -4.210000
22 201575 1610612750 AWAY 0021600608 1610612742 34 288 16.17 19.11 19.11 20.31 24.29 24.29 0.732 0.785 11.56 -1.0 12.56 -12.560000
71 203186 1610612748 AWAY 0021600186 1610612764 14 345 21.63 16.81 11.42 16.53 12.86 10.64 0.432 0.650 6.16 -1.0 7.16 -7.160000
227 1627756 1610612741 AWAY 0021600192 1610612746 14 345 10.39 13.74 9.12 8.37 8.50 15.62 0.593 1.269 3.33 -1.0 4.33 -4.330000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
205 200746 1610612757 HOME 0021301157 1610612740 9 1303 25.39 29.61 31.82 33.39 33.55 32.85 0.155 0.527 26.74 40.6 -13.86 -0.341379
118 201143 1610612738 AWAY 0021600188 1610612765 15 364 30.04 31.47 27.28 27.87 23.47 25.09 0.291 0.325 20.95 41.0 -20.05 -0.489024
29 101108 1610612746 HOME 0021301150 1610612747 8 1304 36.17 36.52 38.65 36.14 37.61 38.35 0.100 0.326 38.52 41.0 -2.48 -0.060488
43 202681 1610612739 AWAY 0021400466 1610612737 9 1038 25.51 20.76 23.45 37.79 37.86 37.74 0.108 0.316 24.40 42.0 -17.60 -0.419048
178 200782 1610612756 HOME 0021301156 1610612760 10 1303 20.03 17.61 14.30 30.98 29.18 28.05 0.210 0.417 14.14 42.1 -27.96 -0.664133
169 203504 1610612762 AWAY 0021301155 1610612744 10 1303 16.85 15.40 13.07 33.02 34.02 33.23 0.137 0.472 14.03 42.6 -28.57 -0.670657
134 201144 1610612763 HOME 0021600611 1610612741 10 288 30.64 31.64 31.52 31.45 34.35 37.37 0.190 0.277 29.87 42.6 -12.73 -0.298826
196 201579 1610612754 AWAY 0021201107 1610612746 8 1673 29.10 28.80 31.34 29.51 30.07 32.47 0.179 0.315 25.35 42.7 -17.35 -0.406323
55 202322 1610612764 HOME 0021600186 1610612748 11 345 35.29 36.69 35.43 30.69 32.09 31.29 0.226 0.222 31.19 42.7 -11.51 -0.269555
27 200794 1610612737 HOME 0021400466 1610612739 8 1036 29.67 29.05 30.74 32.76 32.53 33.63 0.133 0.305 27.31 43.2 -15.89 -0.367824
169 977 1610612747 AWAY 0021400471 1610612743 15 1035 23.92 21.92 17.36 35.56 35.14 35.03 0.107 0.386 20.62 43.2 -22.58 -0.522685
30 201952 1610612737 HOME 0021400466 1610612739 15 1036 30.99 29.42 30.61 29.98 29.62 30.35 0.159 0.363 25.27 43.3 -18.03 -0.416397
104 201935 1610612745 HOME 0021301153 1610612743 9 1303 36.50 38.84 37.23 37.27 36.65 38.35 0.163 0.430 39.01 43.9 -4.89 -0.111390
191 201142 1610612760 AWAY 0021301156 1610612756 11 1303 39.45 40.73 40.53 39.27 38.74 38.39 0.170 0.287 43.37 44.0 -0.63 -0.014318
118 201166 1610612743 AWAY 0021301153 1610612745 10 1303 23.36 22.71 26.44 26.02 29.75 32.02 0.313 0.591 19.65 44.1 -24.45 -0.554422
4 201935 1610612745 HOME 0021600184 1610612762 9 345 44.38 45.83 43.15 34.75 36.86 36.23 0.135 0.246 44.39 44.4 -0.01 -0.000225
103 201600 1610612745 HOME 0021301153 1610612743 9 1303 20.25 24.44 22.17 21.79 27.17 31.45 0.392 0.417 16.59 44.5 -27.91 -0.627191
106 202326 1610612758 HOME 0021600610 1610612760 10 288 38.01 39.41 36.86 35.12 34.40 33.09 0.099 0.314 36.19 44.5 -8.31 -0.186742
81 201144 1610612763 HOME 0021400468 1610612759 9 1036 29.02 26.96 26.49 33.34 35.35 31.43 0.197 0.306 25.48 44.9 -19.42 -0.432517
15 203083 1610612765 AWAY 0021400465 1610612753 12 1035 31.05 35.14 37.92 30.97 30.75 30.27 0.211 0.348 29.56 44.9 -15.34 -0.341648
0 2544 1610612748 HOME 0021301149 1610612752 8 1303 36.12 37.43 35.97 38.30 37.94 35.20 0.135 0.331 37.67 45.2 -7.53 -0.166593
51 200752 1610612758 HOME 0021301151 1610612742 8 1303 23.15 18.11 15.51 34.55 34.81 34.88 0.167 0.463 18.26 45.8 -27.54 -0.601310
194 202331 1610612754 AWAY 0021201107 1610612746 8 1673 25.98 27.12 26.40 37.15 36.46 39.80 0.179 0.335 27.83 47.1 -19.27 -0.409130
55 101145 1610612749 HOME 0021201102 1610612766 9 1673 27.55 24.79 24.32 40.12 39.31 38.47 0.081 0.431 27.90 49.9 -22.00 -0.440882
65 203507 1610612749 AWAY 0021600607 1610612737 12 288 38.85 36.39 28.77 34.92 34.33 30.59 0.227 0.323 32.05 50.0 -17.95 -0.359000
193 201142 1610612744 AWAY 0021600191 1610612749 10 344 41.53 41.08 41.45 32.03 34.30 34.21 0.207 0.225 38.50 50.0 -11.50 -0.230000
0 200752 1610612761 HOME 0021201100 1610612765 12 1672 22.69 25.37 22.72 34.24 31.01 25.81 0.226 0.350 19.89 51.0 -31.11 -0.610000
27 203076 1610612740 HOME 0021600185 1610612766 11 344 36.56 38.13 40.22 32.29 37.59 37.83 0.330 0.480 38.20 53.2 -15.00 -0.281955
121 201566 1610612760 AWAY 0021600610 1610612758 9 288 45.39 42.31 45.06 33.57 33.70 35.69 0.106 0.304 42.19 56.7 -14.51 -0.255908
157 201939 1610612744 HOME 0021301155 1610612762 8 1303 31.83 33.07 30.94 33.51 36.66 36.24 0.187 0.342 31.48 58.0 -26.52 -0.457241

884 rows × 19 columns


In [247]:
players_t = players[players['5_g_d'] < 20]
players_t = players_t[players_t['SCO_COV_20'] < 0.6]
players_t = players_t[players_t['MIN_COV_20'] < 0.25]
players_t.sort_values('ACT_SCO')


Out[247]:
PERSON_ID TEAM_ID Location GAME_ID Against_Team_ID 5_g_d d_rest MA_20 MA_10 MA_5 MIN_20 MIN_10 MIN_5 MIN_COV_20 SCO_COV_20 EXP_SCO ACT_SCO SCO_DIFF SCO_DIFF_PCT
78 201579 1610612754 HOME 0021301152 1610612737 8 1303 14.97 15.15 14.03 29.52 29.25 30.05 0.166 0.528 12.10 -6.0 18.10 -3.016667
143 201584 1610612763 AWAY 0021301154 1610612759 8 1303 16.76 15.72 22.47 27.34 27.91 30.62 0.208 0.560 14.56 -1.6 16.16 -10.100000
223 101150 1610612761 AWAY 0021400473 1610612757 10 1035 25.53 23.64 27.01 26.40 27.86 28.73 0.158 0.578 19.51 -1.5 21.01 -14.006667
59 203488 1610612737 HOME 0021600607 1610612749 10 288 20.30 19.87 19.57 20.18 19.39 20.65 0.215 0.563 11.10 -1.0 12.10 -12.100000
137 203516 1610612763 HOME 0021600611 1610612741 10 288 19.62 18.74 22.37 21.71 23.23 25.33 0.234 0.459 13.17 -0.9 14.07 -15.633333
167 203952 1610612750 AWAY 0021600190 1610612763 9 345 25.16 27.80 33.09 33.59 36.54 39.40 0.162 0.427 29.09 -0.1 29.19 -291.900000
72 201148 1610612742 AWAY 0021301151 1610612758 9 1303 29.83 30.57 33.82 19.12 20.91 22.44 0.242 0.387 18.17 0.1 18.07 180.700000
56 201565 1610612741 HOME 0021400467 1610612751 8 1034 25.35 26.21 26.84 28.63 29.83 30.61 0.196 0.376 21.55 0.2 21.35 106.750000
18 203143 1610612752 AWAY 0021301149 1610612748 8 1303 20.74 20.75 19.55 18.03 18.86 19.54 0.170 0.450 10.63 0.2 10.43 52.150000
133 2557 1610612750 HOME 0021201105 1610612738 7 1673 18.50 17.99 19.34 30.83 28.72 28.41 0.124 0.441 15.16 0.6 14.56 24.266667
183 201599 1610612746 HOME 0021201107 1610612754 8 1673 25.02 24.87 26.54 23.42 25.64 28.21 0.242 0.357 18.23 0.8 17.43 21.787500
17 201943 1610612765 AWAY 0021400465 1610612753 12 1035 22.04 24.63 33.85 28.93 26.88 28.45 0.176 0.519 20.94 1.0 19.94 19.940000
132 2564 1610612759 HOME 0021301154 1610612763 7 1304 22.72 18.71 20.26 25.32 25.30 26.35 0.208 0.481 14.66 1.3 13.36 10.276923
1 201583 1610612745 HOME 0021600184 1610612762 9 345 17.96 17.21 14.36 29.52 32.75 34.08 0.203 0.584 14.73 1.7 13.03 7.664706
16 203484 1610612765 AWAY 0021400465 1610612753 12 1035 17.31 19.32 22.06 30.77 28.46 28.63 0.177 0.495 15.92 2.2 13.72 6.236364
144 1626164 1610612756 AWAY 0021600189 1610612755 10 344 22.64 20.43 20.37 31.75 34.28 31.61 0.221 0.539 19.12 2.3 16.82 7.313043
108 202720 1610612765 HOME 0021600188 1610612738 8 344 24.12 24.73 24.80 25.61 26.65 26.57 0.133 0.497 17.92 2.3 15.62 6.791304
135 201978 1610612750 HOME 0021201105 1610612738 7 1673 21.55 20.41 18.66 21.76 20.33 21.68 0.177 0.504 11.93 2.4 9.53 3.970833
173 201156 1610612747 AWAY 0021400471 1610612743 8 1035 21.62 24.50 22.23 24.88 24.55 26.24 0.198 0.494 15.96 2.5 13.46 5.384000
48 200809 1610612739 AWAY 0021201101 1610612737 8 1674 6.47 6.47 6.47 14.10 14.10 14.10 0.244 0.497 2.53 3.2 -0.67 -0.209375
85 2757 1610612763 HOME 0021400468 1610612759 9 1036 26.52 28.64 24.97 18.81 18.26 19.59 0.177 0.403 14.01 3.3 10.71 3.245455
199 2406 1610612760 AWAY 0021301156 1610612756 11 1303 16.28 17.27 17.27 28.07 29.43 30.34 0.175 0.402 13.78 3.4 10.38 3.052941
5 101112 1610612753 HOME 0021400465 1610612765 9 1034 14.89 17.49 21.06 28.97 26.92 20.92 0.226 0.481 12.67 3.8 8.87 2.334211
209 2590 1610612757 HOME 0021301157 1610612740 9 1303 21.53 20.84 24.70 25.36 25.45 25.41 0.238 0.494 15.78 4.2 11.58 2.757143
39 101107 1610612766 AWAY 0021600185 1610612740 10 344 17.31 17.58 19.07 28.16 28.41 28.38 0.217 0.569 14.15 4.4 9.75 2.215909
79 202362 1610612754 HOME 0021301152 1610612737 8 1303 19.21 18.66 17.20 34.61 31.91 31.73 0.129 0.334 16.70 4.4 12.30 2.795455
140 2754 1610612763 AWAY 0021301154 1610612759 8 1303 19.18 12.99 15.13 19.64 17.02 16.64 0.246 0.559 7.78 4.8 2.98 0.620833
209 2581 1610612757 HOME 0021400473 1610612761 9 1035 18.14 17.89 16.01 22.31 23.12 23.68 0.224 0.499 11.10 5.2 5.90 1.134615
70 204020 1610612748 AWAY 0021600186 1610612764 8 345 24.94 22.99 20.75 26.13 29.85 29.99 0.203 0.497 18.22 5.3 12.92 2.437736
134 201158 1610612759 HOME 0021301154 1610612763 7 1304 22.55 23.92 23.48 24.44 25.60 28.03 0.199 0.523 16.85 5.6 11.25 2.008929
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
106 201600 1610612745 HOME 0021201104 1610612753 9 1673 23.04 21.45 23.68 29.79 29.21 27.82 0.184 0.361 18.27 38.5 -20.23 -0.525455
127 202695 1610612759 HOME 0021301154 1610612763 7 1304 30.55 30.62 33.07 29.96 30.01 28.06 0.162 0.311 25.60 39.4 -13.80 -0.350254
232 201578 1610612744 HOME 0021400474 1610612755 10 1036 31.02 26.92 27.89 19.56 19.91 18.84 0.167 0.426 15.45 39.5 -24.05 -0.608861
181 203953 1610612749 HOME 0021600191 1610612744 12 345 25.00 25.27 21.98 31.08 32.74 35.20 0.140 0.430 22.08 39.8 -17.72 -0.445226
205 200746 1610612757 HOME 0021301157 1610612740 9 1303 25.39 29.61 31.82 33.39 33.55 32.85 0.155 0.527 26.74 40.6 -13.86 -0.341379
29 101108 1610612746 HOME 0021301150 1610612747 8 1304 36.17 36.52 38.65 36.14 37.61 38.35 0.100 0.326 38.52 41.0 -2.48 -0.060488
43 202681 1610612739 AWAY 0021400466 1610612737 9 1038 25.51 20.76 23.45 37.79 37.86 37.74 0.108 0.316 24.40 42.0 -17.60 -0.419048
178 200782 1610612756 HOME 0021301156 1610612760 10 1303 20.03 17.61 14.30 30.98 29.18 28.05 0.210 0.417 14.14 42.1 -27.96 -0.664133
134 201144 1610612763 HOME 0021600611 1610612741 10 288 30.64 31.64 31.52 31.45 34.35 37.37 0.190 0.277 29.87 42.6 -12.73 -0.298826
169 203504 1610612762 AWAY 0021301155 1610612744 10 1303 16.85 15.40 13.07 33.02 34.02 33.23 0.137 0.472 14.03 42.6 -28.57 -0.670657
196 201579 1610612754 AWAY 0021201107 1610612746 8 1673 29.10 28.80 31.34 29.51 30.07 32.47 0.179 0.315 25.35 42.7 -17.35 -0.406323
55 202322 1610612764 HOME 0021600186 1610612748 11 345 35.29 36.69 35.43 30.69 32.09 31.29 0.226 0.222 31.19 42.7 -11.51 -0.269555
169 977 1610612747 AWAY 0021400471 1610612743 15 1035 23.92 21.92 17.36 35.56 35.14 35.03 0.107 0.386 20.62 43.2 -22.58 -0.522685
27 200794 1610612737 HOME 0021400466 1610612739 8 1036 29.67 29.05 30.74 32.76 32.53 33.63 0.133 0.305 27.31 43.2 -15.89 -0.367824
30 201952 1610612737 HOME 0021400466 1610612739 15 1036 30.99 29.42 30.61 29.98 29.62 30.35 0.159 0.363 25.27 43.3 -18.03 -0.416397
104 201935 1610612745 HOME 0021301153 1610612743 9 1303 36.50 38.84 37.23 37.27 36.65 38.35 0.163 0.430 39.01 43.9 -4.89 -0.111390
191 201142 1610612760 AWAY 0021301156 1610612756 11 1303 39.45 40.73 40.53 39.27 38.74 38.39 0.170 0.287 43.37 44.0 -0.63 -0.014318
4 201935 1610612745 HOME 0021600184 1610612762 9 345 44.38 45.83 43.15 34.75 36.86 36.23 0.135 0.246 44.39 44.4 -0.01 -0.000225
106 202326 1610612758 HOME 0021600610 1610612760 10 288 38.01 39.41 36.86 35.12 34.40 33.09 0.099 0.314 36.19 44.5 -8.31 -0.186742
15 203083 1610612765 AWAY 0021400465 1610612753 12 1035 31.05 35.14 37.92 30.97 30.75 30.27 0.211 0.348 29.56 44.9 -15.34 -0.341648
81 201144 1610612763 HOME 0021400468 1610612759 9 1036 29.02 26.96 26.49 33.34 35.35 31.43 0.197 0.306 25.48 44.9 -19.42 -0.432517
0 2544 1610612748 HOME 0021301149 1610612752 8 1303 36.12 37.43 35.97 38.30 37.94 35.20 0.135 0.331 37.67 45.2 -7.53 -0.166593
51 200752 1610612758 HOME 0021301151 1610612742 8 1303 23.15 18.11 15.51 34.55 34.81 34.88 0.167 0.463 18.26 45.8 -27.54 -0.601310
194 202331 1610612754 AWAY 0021201107 1610612746 8 1673 25.98 27.12 26.40 37.15 36.46 39.80 0.179 0.335 27.83 47.1 -19.27 -0.409130
55 101145 1610612749 HOME 0021201102 1610612766 9 1673 27.55 24.79 24.32 40.12 39.31 38.47 0.081 0.431 27.90 49.9 -22.00 -0.440882
65 203507 1610612749 AWAY 0021600607 1610612737 12 288 38.85 36.39 28.77 34.92 34.33 30.59 0.227 0.323 32.05 50.0 -17.95 -0.359000
193 201142 1610612744 AWAY 0021600191 1610612749 10 344 41.53 41.08 41.45 32.03 34.30 34.21 0.207 0.225 38.50 50.0 -11.50 -0.230000
0 200752 1610612761 HOME 0021201100 1610612765 12 1672 22.69 25.37 22.72 34.24 31.01 25.81 0.226 0.350 19.89 51.0 -31.11 -0.610000
121 201566 1610612760 AWAY 0021600610 1610612758 9 288 45.39 42.31 45.06 33.57 33.70 35.69 0.106 0.304 42.19 56.7 -14.51 -0.255908
157 201939 1610612744 HOME 0021301155 1610612762 8 1303 31.83 33.07 30.94 33.51 36.66 36.24 0.187 0.342 31.48 58.0 -26.52 -0.457241

335 rows × 19 columns


In [237]:
def location_eff(game_stats_logs, row):
    player_id = row['PERSON_ID']
    game_id_o = row['GAME_ID'][3:5] + row['GAME_ID'][:3] + row['GAME_ID'][-5:]
    player_stats_logs = game_stats_logs[game_stats_logs['PLAYER_ID'] == player_id].sort_values('GAME_ID_O')
    player_stats_home = player_stats_logs[(player_stats_logs['LOCATION'] == 'HOME') &
                                          (player_stats_logs['MINS'].notnull()) &
                                          (player_stats_logs['GAME_ID_O'] < game_id_o)].tail(20)
    home_score_20 = get_score_36(player_stats_home)[0]
    player_stats_away = player_stats_logs[(player_stats_logs['LOCATION'] == 'AWAY') &
                                          (player_stats_logs['MINS'].notnull()) &
                                          (player_stats_logs['GAME_ID_O'] < game_id_o)].tail(20)
    away_score_20 = get_score_36(player_stats_away)[0]
    player_stats_all = player_stats_logs[(player_stats_logs['MINS'].notnull()) &
                                         (player_stats_logs['GAME_ID_O'] < game_id_o)].tail(40)
    recent_score_40 = get_score_36(player_stats_all)[0]
    return home_score_20 / recent_score_40, away_score_20 / recent_score_40

In [238]:
players_t_1 = players_t.copy()
players_t_1['home_eff'] = players_t_1.apply(lambda x: location_eff(game_stats_logs, x)[0], axis=1)
players_t_1['away_eff'] = players_t_1.apply(lambda x: location_eff(game_stats_logs, x)[1], axis=1)
players_t_1['EXP_SCO'] = players_t_1.apply(lambda x: x['EXP_SCO'] * x['home_eff'] if x['Location'] == 'HOME' 
                                           else x['EXP_SCO'] * x['away_eff'], 
                                           axis=1)
players_t_1['ACT_SCO'] = players_t_1.apply(get_score, axis = 1)
players_t_1['SCO_DIFF'] = players_t_1['EXP_SCO'] - players_t_1['ACT_SCO']
players_t_1['SCO_DIFF_PCT'] = players_t_1['SCO_DIFF'] / players_t_1['ACT_SCO']
print('test data loaded!')


test data loaded!

In [239]:
test_result(players)


Out[239]:
SCO_DIFF SCO_DIFF_PCT
count 884.000000 884.000000
mean -1.142455 -0.090621
std 9.271786 13.670506
min -31.110000 -291.900000
25% -7.075000 -0.448652
50% -0.500000 -0.133625
75% 5.192500 0.393032
max 29.190000 180.700000

In [240]:
test_result(players_t)


Out[240]:
SCO_DIFF SCO_DIFF_PCT
count 872.000000 872.000000
mean -1.145539 -0.111310
std 9.323399 13.749679
min -31.110000 -291.900000
25% -7.152500 -0.444665
50% -0.500000 -0.133625
75% 5.232500 0.389405
max 29.190000 180.700000

In [241]:
test_result(players_t_1)


Out[241]:
SCO_DIFF SCO_DIFF_PCT
count 872.000000 872.000000
mean -1.053748 -0.056999
std 9.381961 13.851579
min -30.621169 -276.009297
25% -7.241177 -0.450081
50% -0.409459 -0.123112
75% 5.445539 0.403848
max 27.727138 207.278905

In [242]:
factor_cov(players)


the corr between cov and different percantage is:  0.016516275476

In [243]:
factor_cov(players_t)


the corr between cov and different percantage is:  -0.0119185397126

In [244]:
diff_distribution(players, 5, 0.1)


data in range: 0.9309954751131222

In [245]:
diff_distribution(players_t, 5, 0.1)


data in range: 0.9311926605504587

In [246]:
diff_distribution(players_t_1, 5, 0.1)


data in range: 0.9380733944954128

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: