In [1]:
#Variable Analysis

In [1]:
#Import Packages
from Schedule.Schedule import Schedule
from Schedule.Stats import Stats
import pandas as pd


//anaconda/lib/python2.7/site-packages/pandas/computation/__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used

  UserWarning)

In [2]:
#Get schedule of games
sched_2014 = Schedule(b_dt = '10/1/2014')
sched_2015 = Schedule(b_dt = '10/1/2015')

In [4]:
#Add four factors
sched_2014.add_four_factors()
sched_2015.add_four_factors()


Out[4]:
Team_ID_home Game_ID FGM_home FGA_home FG_PCT_home FG3M_home FG3A_home FG3_PCT_home FTM_home FTA_home ... Home Team Away Team H_FF_EFG H_FF_ORB H_FF_FTFGA H_FF_TOV A_FF_EFG A_FF_ORB A_FF_FTFGA A_FF_TOV
0 1610612737 21501188 46 88 0.523 17 33 0.515 9 11 ... ATL BOS 0.619318 0.142857 0.102273 0.162152 0.505556 0.204082 0.177778 0.142099
1 1610612737 21501173 33 76 0.434 12 32 0.375 17 21 ... ATL TOR 0.513158 0.116279 0.223684 0.139425 0.428571 0.181818 0.178571 0.114823
2 1610612737 21501157 39 95 0.411 11 34 0.324 14 19 ... ATL PHX 0.468421 0.245283 0.147368 0.150432 0.444444 0.229167 0.222222 0.223881
3 1610612737 21501131 38 95 0.400 9 30 0.300 23 26 ... ATL CLE 0.447368 0.098039 0.242105 0.128822 0.459184 0.203704 0.204082 0.106534
4 1610612737 21501076 41 97 0.423 5 32 0.156 14 17 ... ATL MIL 0.448454 0.340000 0.144330 0.111698 0.434524 0.363636 0.202381 0.158831
5 1610612737 21501048 38 78 0.487 13 33 0.394 13 17 ... ATL WAS 0.570513 0.054054 0.166667 0.143619 0.576923 0.225000 0.131868 0.090180
6 1610612737 21501029 44 88 0.500 14 38 0.368 7 12 ... ATL HOU 0.579545 0.200000 0.079545 0.159553 0.422619 0.326087 0.309524 0.170614
7 1610612737 21501015 40 80 0.500 12 26 0.462 24 28 ... ATL DEN 0.575000 0.189189 0.300000 0.123305 0.482759 0.214286 0.160920 0.140732
8 1610612737 21500984 40 85 0.471 15 30 0.500 9 10 ... ATL IND 0.558824 0.225000 0.105882 0.157233 0.408046 0.145833 0.045977 0.161486
9 1610612737 21500974 36 84 0.429 11 34 0.324 12 17 ... ATL MEM 0.494048 0.173913 0.142857 0.116427 0.378947 0.338983 0.115789 0.164677
10 1610612737 21500878 38 77 0.494 8 17 0.471 3 5 ... ATL CHA 0.545455 0.162162 0.038961 0.170068 0.376543 0.192308 0.185185 0.068871
11 1610612737 21500865 37 89 0.416 7 34 0.206 22 24 ... ATL CHI 0.455056 0.288889 0.247191 0.112751 0.392045 0.326923 0.215909 0.198649
12 1610612737 21500836 36 86 0.419 10 34 0.294 10 16 ... ATL GSW 0.476744 0.177778 0.116279 0.166601 0.516854 0.133333 0.112360 0.128357
13 1610612737 21500819 44 106 0.415 9 41 0.220 12 18 ... ATL MIL 0.457547 0.172414 0.113208 0.133422 0.456731 0.303571 0.211538 0.143970
14 1610612737 21500808 41 87 0.471 16 36 0.444 13 13 ... ATL MIA 0.563218 0.181818 0.149425 0.198638 0.545455 0.227273 0.215909 0.134202
15 1610612737 21500780 43 92 0.467 12 29 0.414 12 20 ... ATL ORL 0.532609 0.163265 0.130435 0.131086 0.543011 0.133333 0.172043 0.118527
16 1610612737 21500750 39 76 0.513 10 27 0.370 14 20 ... ATL IND 0.578947 0.081081 0.184211 0.154959 0.489011 0.395833 0.076923 0.200084
17 1610612737 21500723 42 80 0.525 14 35 0.400 14 19 ... ATL DAL 0.612500 0.156250 0.175000 0.152501 0.405882 0.181818 0.329412 0.061400
18 1610612737 21500687 33 79 0.418 10 23 0.435 7 14 ... ATL LAC 0.481013 0.177778 0.088608 0.221864 0.417647 0.180000 0.164706 0.114298
19 1610612737 21500620 41 80 0.513 9 19 0.474 7 10 ... ATL ORL 0.568750 0.114286 0.087500 0.174538 0.379310 0.203704 0.172414 0.132924
20 1610612737 21500602 44 79 0.557 8 29 0.276 18 24 ... ATL BKN 0.607595 0.147059 0.227848 0.105753 0.474359 0.166667 0.153846 0.176422
21 1610612737 21500551 49 94 0.521 10 24 0.417 12 15 ... ATL CHI 0.574468 0.190476 0.127660 0.139405 0.475904 0.272727 0.313253 0.198788
22 1610612737 21500521 37 87 0.425 15 39 0.385 12 19 ... ATL NYK 0.511494 0.285714 0.137931 0.137770 0.505952 0.289474 0.261905 0.097234
23 1610612737 21500442 47 88 0.534 8 24 0.333 15 18 ... ATL NYK 0.579545 0.250000 0.170455 0.094817 0.524691 0.307692 0.160494 0.223396
24 1610612737 21500429 43 89 0.483 6 20 0.300 15 20 ... ATL DET 0.516854 0.232558 0.168539 0.128968 0.466292 0.288889 0.191011 0.133946
25 1610612737 21500413 37 77 0.481 9 29 0.310 23 28 ... ATL POR 0.538961 0.052632 0.298701 0.102754 0.500000 0.325581 0.197531 0.190114
26 1610612737 21500376 48 78 0.615 10 21 0.476 21 24 ... ATL PHI 0.679487 0.074074 0.269231 0.121753 0.600000 0.233333 0.213333 0.218775
27 1610612737 21500360 33 84 0.393 8 33 0.242 14 15 ... ATL MIA 0.440476 0.152174 0.166667 0.143443 0.488764 0.333333 0.146067 0.156904
28 1610612737 21500347 30 80 0.375 5 24 0.208 13 16 ... ATL SAS 0.406250 0.140000 0.162500 0.175185 0.554054 0.187500 0.283784 0.228447
29 1610612737 21500286 37 74 0.500 10 26 0.385 16 20 ... ATL LAL 0.567568 0.250000 0.216216 0.187225 0.447674 0.306122 0.116279 0.172563
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1200 1610612762 21500860 33 77 0.429 3 17 0.176 9 12 ... UTA SAS 0.448052 0.105263 0.116883 0.123208 0.537975 0.250000 0.139241 0.127492
1201 1610612762 21500843 38 74 0.514 10 24 0.417 31 38 ... UTA HOU 0.581081 0.250000 0.418919 0.202468 0.477011 0.250000 0.356322 0.129582
1202 1610612762 21500817 37 68 0.544 10 23 0.435 27 44 ... UTA BOS 0.617647 0.235294 0.397059 0.158966 0.413580 0.156863 0.320988 0.073038
1203 1610612762 21500758 30 77 0.390 8 27 0.296 16 23 ... UTA MIL 0.441558 0.260870 0.207792 0.166445 0.471831 0.175000 0.197183 0.214500
1204 1610612762 21500743 33 74 0.446 5 22 0.227 14 20 ... UTA DEN 0.479730 0.200000 0.189189 0.157658 0.400000 0.230769 0.357143 0.156390
1205 1610612762 21500728 38 85 0.447 7 18 0.389 22 33 ... UTA CHI 0.488235 0.227273 0.258824 0.135240 0.461111 0.166667 0.144444 0.133588
1206 1610612762 21500704 40 76 0.526 10 25 0.400 13 16 ... UTA MIN 0.592105 0.285714 0.171053 0.151093 0.480000 0.302326 0.240000 0.160110
1207 1610612762 21500690 40 81 0.494 12 31 0.387 10 15 ... UTA CHA 0.567901 0.189189 0.123457 0.110375 0.411765 0.116279 0.250000 0.199468
1208 1610612762 21500673 34 85 0.400 9 29 0.310 15 18 ... UTA DET 0.452941 0.255319 0.176471 0.138416 0.526667 0.159091 0.213333 0.108085
1209 1610612762 21500608 41 83 0.494 12 24 0.500 15 18 ... UTA LAL 0.566265 0.282051 0.180723 0.111210 0.394444 0.285714 0.122222 0.145955
1210 1610612762 21500591 35 83 0.422 6 34 0.176 25 34 ... UTA SAC 0.457831 0.200000 0.301205 0.128764 0.493243 0.333333 0.405405 0.160462
1211 1610612762 21500555 39 71 0.549 9 20 0.450 11 18 ... UTA MIA 0.612676 0.172414 0.154930 0.177936 0.451220 0.319149 0.109756 0.185833
1212 1610612762 21500518 30 75 0.400 12 29 0.414 19 25 ... UTA HOU 0.480000 0.348837 0.253333 0.174419 0.566176 0.171429 0.235294 0.183402
1213 1610612762 21500503 31 75 0.413 9 27 0.333 21 28 ... UTA MEM 0.473333 0.219512 0.280000 0.169635 0.450617 0.170213 0.172840 0.115256
1214 1610612762 21500489 43 86 0.500 15 33 0.455 8 11 ... UTA POR 0.587209 0.279070 0.093023 0.059637 0.551282 0.210526 0.128205 0.116063
1215 1610612762 21500467 28 84 0.333 7 26 0.269 32 36 ... UTA PHI 0.375000 0.340909 0.380952 0.150240 0.447674 0.173913 0.162791 0.172480
1216 1610612762 21500452 36 74 0.486 10 27 0.370 22 27 ... UTA LAC 0.554054 0.228571 0.297297 0.170430 0.563291 0.200000 0.253165 0.126904
1217 1610612762 21500417 36 79 0.456 9 25 0.360 29 37 ... UTA PHX 0.512658 0.261905 0.367089 0.106067 0.436709 0.200000 0.253165 0.140268
1218 1610612762 21500395 34 73 0.466 10 25 0.400 19 25 ... UTA DEN 0.534247 0.147059 0.260274 0.150538 0.430380 0.306122 0.253165 0.157303
1219 1610612762 21500380 32 67 0.478 7 20 0.350 23 29 ... UTA NOP 0.529851 0.151515 0.343284 0.138313 0.534247 0.216216 0.356164 0.114521
1220 1610612762 21500341 33 78 0.423 8 28 0.286 16 24 ... UTA OKC 0.474359 0.244444 0.205128 0.124210 0.506329 0.282051 0.177215 0.147660
1221 1610612762 21500327 39 80 0.488 9 21 0.429 19 23 ... UTA NYK 0.543750 0.236842 0.237500 0.147183 0.448718 0.142857 0.192308 0.107666
1222 1610612762 21500297 43 92 0.467 8 23 0.348 28 37 ... UTA IND 0.510870 0.372549 0.304348 0.143843 0.494624 0.255319 0.290323 0.135184
1223 1610612762 21500279 31 72 0.431 14 33 0.424 18 24 ... UTA ORL 0.527778 0.138889 0.250000 0.196769 0.494382 0.209302 0.168539 0.103520
1224 1610612762 21500259 40 89 0.449 6 19 0.316 17 22 ... UTA GSW 0.483146 0.243902 0.191011 0.082747 0.573171 0.358974 0.146341 0.167411
1225 1610612762 21500243 38 82 0.463 9 18 0.500 16 21 ... UTA NOP 0.518293 0.279070 0.195122 0.159168 0.456522 0.097561 0.347826 0.169851
1226 1610612762 21500208 28 73 0.384 5 19 0.263 28 40 ... UTA OKC 0.417808 0.302326 0.383562 0.212982 0.616883 0.176471 0.207792 0.168563
1227 1610612762 21500173 35 72 0.486 7 22 0.318 16 22 ... UTA TOR 0.534722 0.200000 0.222222 0.185428 0.456250 0.209302 0.200000 0.150667
1228 1610612762 21500091 31 74 0.419 12 27 0.444 15 18 ... UTA MEM 0.500000 0.225000 0.202703 0.223595 0.362637 0.245283 0.142857 0.142624
1229 1610612762 21500068 33 88 0.375 5 24 0.208 21 33 ... UTA POR 0.403409 0.313725 0.238636 0.094221 0.603896 0.351351 0.194805 0.195715

1230 rows × 52 columns


In [5]:
# Doing dunk data much much later
#sched_2014.add_dunk_data(log = True)

In [6]:
sched_2014.get_games().columns


Out[6]:
Index([u'Team_ID_home', u'Game_ID', u'FGM_home', u'FGA_home', u'FG_PCT_home',
       u'FG3M_home', u'FG3A_home', u'FG3_PCT_home', u'FTM_home', u'FTA_home',
       u'FT_PCT_home', u'OREB_home', u'DREB_home', u'REB_home', u'AST_home',
       u'STL_home', u'BLK_home', u'TOV_home', u'PF_home', u'PTS_home',
       u'Team_ID_away', u'FGM_away', u'FGA_away', u'FG_PCT_away', u'FG3M_away',
       u'FG3A_away', u'FG3_PCT_away', u'FTM_away', u'FTA_away', u'FT_PCT_away',
       u'OREB_away', u'DREB_away', u'REB_away', u'AST_away', u'STL_away',
       u'BLK_away', u'TOV_away', u'PF_away', u'PTS_away', u'GAME_DATE', u'WL',
       u'MIN', u'Home Team', u'Away Team', u'H_FF_EFG', u'H_FF_ORB',
       u'H_FF_FTFGA', u'H_FF_TOV', u'A_FF_EFG', u'A_FF_ORB', u'A_FF_FTFGA',
       u'A_FF_TOV', u'H_dunk_made', u'H_dunk_miss', u'H_dunk_score'],
      dtype='object')

In [37]:
games_2014 = sched_2014.get_games()
games_2014['Pts_diff'] = [x-y for x,y in zip(games_2014['PTS_home'], games_2014['PTS_away'])]
games_2014 = games_2014[['Pts_diff', 'PTS_home', 'PTS_away', 'GAME_DATE', 'WL', 'Home Team', 'Away Team', 'H_FF_EFG', 'H_FF_ORB', 'H_FF_FTFGA', 'H_FF_TOV', 'A_FF_EFG', 'A_FF_ORB', 'A_FF_FTFGA', 'A_FF_TOV']]
games_2014 = games_2014.rename(index = str, columns = {'PTS_away':'A_PTS', 'PTS_home':'H_PTS', 'WL':'H_WL'})
games_2014['H_WL'] = [1 if x=='W' else 0 for x in games_2014['H_WL']]
games_2014['A_WL'] = [1-x for x in games_2014['H_WL']]
stats_2014 = Stats(games_2014, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff')
stats_5 = stats_2014.get_lastn_stats(5)

In [38]:
games_2015 = sched_2015.get_games()
games_2015['Pts_diff'] = [x-y for x,y in zip(games_2015['PTS_home'], games_2015['PTS_away'])]
games_2015 = games_2015[['Pts_diff', 'PTS_home', 'PTS_away', 'GAME_DATE', 'WL', 'Home Team', 'Away Team', 'H_FF_EFG', 'H_FF_ORB', 'H_FF_FTFGA', 'H_FF_TOV', 'A_FF_EFG', 'A_FF_ORB', 'A_FF_FTFGA', 'A_FF_TOV']]
games_2015 = games_2015.rename(index = str, columns = {'PTS_away':'A_PTS', 'PTS_home':'H_PTS', 'WL':'H_WL'})
games_2015['H_WL'] = [1 if x=='W' else 0 for x in games_2015['H_WL']]
games_2015['A_WL'] = [1-x for x in games_2015['H_WL']]
stats_2015 = Stats(games_2015, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff')
stats_5 = stats_5.append(stats_2015.get_lastn_stats(5))
stats_5.to_csv('stats_5.csv', index = False)

In [3]:
import numpy as np
stats_5 = pd.read_csv('stats_5.csv')

games_2015 = sched_2015.get_games()
games_2015['Pts_diff'] = [x-y for x,y in zip(games_2015['PTS_home'], games_2015['PTS_away'])]
games_2015 = games_2015[['Pts_diff', 'PTS_home', 'PTS_away', 'GAME_DATE', 'WL', 'Home Team', 'Away Team', 'H_FF_EFG', 'H_FF_ORB', 'H_FF_FTFGA', 'H_FF_TOV', 'A_FF_EFG', 'A_FF_ORB', 'A_FF_FTFGA', 'A_FF_TOV']]
games_2015 = games_2015.rename(index = str, columns = {'PTS_away':'A_PTS', 'PTS_home':'H_PTS', 'WL':'H_WL'})
games_2015['H_WL'] = [1 if x=='W' else 0 for x in games_2015['H_WL']]
games_2015['A_WL'] = [1-x for x in games_2015['H_WL']]
stats_2015 = Stats(games_2015, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff')

#print np.corrcoef(stats_5['Pts_diff'], stats_5['H_WL_5'])[0,1]
stats_2015.get_correl(stats_5)


Out[3]:
Correlation
H_PTS_5 0.111043
H_O_PTS_5 -0.028201
A_PTS_5 -0.056925
A_O_PTS_5 0.079918
H_WL_5 0.195208
H_O_WL_5 -0.169209
A_WL_5 -0.153628
A_O_WL_5 0.168239
H_FF_EFG_5 0.062029
H_O_FF_EFG_5 0.002441
A_FF_EFG_5 -0.018854
A_O_FF_EFG_5 0.044173
H_FF_ORB_5 0.026042
H_O_FF_ORB_5 0.013560
A_FF_ORB_5 0.012910
A_O_FF_ORB_5 0.027436
H_FF_FTFGA_5 0.018500
H_O_FF_FTFGA_5 0.018258
A_FF_FTFGA_5 0.015683
A_O_FF_FTFGA_5 0.028083
H_FF_TOV_5 0.013531
H_O_FF_TOV_5 0.026361
A_FF_TOV_5 0.018966
A_O_FF_TOV_5 0.013385

In [12]:
from Regression.Reg_Model import Reg_Model
model = Reg_Model()
x = stats_5.drop('Pts_diff', axis=1)
model.set_training(x, stats_5['Pts_diff'])
model.calc_model()

In [16]:
print model.model_type
print model.mse


ridge
145.461816281

In [5]:
from Regression.Game_Scores_v2 import Game_Scores
x = stats_5.drop('Pts_diff', axis=1)
gs = Game_Scores(stats_5, x, 'Pts_diff')

In [6]:
gs.create_rank_order_graph()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-6-7b32bd33086d> in <module>()
----> 1 gs.create_rank_order_graph()

/Users/Matt/Documents/!Research/Github/NBA/Regression/Game_Scores_v2.py in create_rank_order_graph(self)
     84     def create_rank_order_graph(self):
     85         if self.rank_order is None:
---> 86             self.get_rank_order_acc()
     87         x = self.rank_order['Cum Count']
     88         y = self.rank_order['Cum Win']

/Users/Matt/Documents/!Research/Github/NBA/Regression/Game_Scores_v2.py in get_rank_order_acc(self)
     58     def get_rank_order_acc(self):
     59         if self.win_scores == None:
---> 60             self.get_win_scores(self.x)
     61         set_ws = sorted(list(set(self.win_scores)))
     62         self.rank_order = pd.DataFrame(index=set_ws)

/Users/Matt/Documents/!Research/Github/NBA/Regression/Game_Scores_v2.py in get_win_scores(self, test_x)
     51     def get_win_scores(self, test_x):
     52         if self.pred == None:
---> 53             self.get_pred(test_x)
     54         self.win_scores = [round((50 + x) / 100, 2) for x in (self.pred)]
     55         return self.win_scores

AttributeError: 'Game_Scores' object has no attribute 'get_pred'

In [ ]:


In [41]:
import matplotlib.pyplot as plt
plt.scatter(stats_5['Pts_diff'],stats_5['H_PTS_5'])
plt.show()

In [42]:
plt.scatter(stats_5['Pts_diff'], stats_5['H_O_PTS_5'])
plt.show()

In [43]:
plt.scatter(stats_5['Pts_diff'], stats_5['H_WL_5'])
plt.show()

In [30]:
plt.scatter(stats_5['Pts_diff'], stats_5['H_FF_EFG_5'])
plt.show()

In [23]:
stats_5.columns


Out[23]:
Index([u'H_PTS', u'H_PTS_5', u'H_O_PTS_5', u'A_PTS_5', u'A_O_PTS_5', u'H_WL_5',
       u'H_O_WL_5', u'A_WL_5', u'A_O_WL_5', u'H_FF_EFG_5', u'H_O_FF_EFG_5',
       u'A_FF_EFG_5', u'A_O_FF_EFG_5', u'H_FF_ORB_5', u'H_O_FF_ORB_5',
       u'A_FF_ORB_5', u'A_O_FF_ORB_5', u'H_FF_FTFGA_5', u'H_O_FF_FTFGA_5',
       u'A_FF_FTFGA_5', u'A_O_FF_FTFGA_5', u'H_FF_TOV_5', u'H_O_FF_TOV_5',
       u'A_FF_TOV_5', u'A_O_FF_TOV_5'],
      dtype='object')

In [ ]:
#so i don't see any huge indicators from these stats for home scores.  i wonder if i should be looking at h-a score