In [1]:
import pandas as pd
from Player.Players import Players
from Regression.Reg_Model import Reg_Model
In [2]:
#Creates list of players during 2015-2016 season
Player_List = Players('2015-16').players
In [11]:
#prepare model for subset of players
p_subset_list=[]
index=0
for key in Player_List:
p=Player_List[key]
stats = p.get_stats('GAME_DATE', 'Game_ID', 'PTS', ['PTS', 'MIN', 'FGM', 'FGA', 'FG_PCT'], 5)
if len(stats>0):
stats=stats[stats['n_games']>=5]
y=stats['PTS']
x=stats[['PTS_avg_5', 'MIN_avg_5', 'FGM_avg_5', 'FGA_avg_5', 'FG_PCT_avg_5']]
if len(y)>=5:
Model = Reg_Model()
Model.set_training(x,y)
Model.calc_model()
p.set_model('PTS', Model)
p_subset_list.append(key)
index += 1
if index>50:
break
In [12]:
#average MSE for players
import numpy as np
print np.mean([Player_List[a].model_list['PTS'].mse for a in p_subset_list])
In [41]:
#Try to see how data looks like by position, season_exp
exp = [0,4,8,12]
positions = set([Player_List[x].desc['POSITION'][0] for x in Player_List])
mse_by_seg = {}
for pos in positions:
for ex in exp:
x =pd.DataFrame()
y =pd.DataFrame()
for key in Player_List:
if Player_List[key].desc['POSITION'][0] == pos:
if Player_List[key].desc['SEASON_EXP'][0]>=ex and Player_List[key].desc['SEASON_EXP'][0]<(ex+4):
stats = Player_List[key].get_stats('GAME_DATE', 'Game_ID', 'PTS', ['PTS', 'MIN', 'FGM', 'FGA', 'FG_PCT'], 5)
if len(stats>0):
stats=stats[stats['n_games']>=5]
y = y.append(stats[['PTS', 'PTS_avg_5']], ignore_index=True).reset_index(drop=True)
x = x.append(stats[['PTS_avg_5', 'MIN_avg_5', 'FGM_avg_5', 'FGA_avg_5', 'FG_PCT_avg_5']], ignore_index=True).reset_index(drop=True)
y=y['PTS']
Model = Reg_Model()
Model.set_training(x,y)
Model.calc_model()
segment_name = pos + '-' + str(ex)
mse_by_seg[segment_name] = Model.mse
In [42]:
mse_by_seg
Out[42]:
In [43]:
#Repeat with 10 games
exp = [0,4,8,12]
positions = set([Player_List[x].desc['POSITION'][0] for x in Player_List])
mse_by_seg = {}
for pos in positions:
for ex in exp:
x =pd.DataFrame()
y =pd.DataFrame()
for key in Player_List:
if Player_List[key].desc['POSITION'][0] == pos:
if Player_List[key].desc['SEASON_EXP'][0]>=ex and Player_List[key].desc['SEASON_EXP'][0]<(ex+4):
stats = Player_List[key].get_stats('GAME_DATE', 'Game_ID', 'PTS', ['PTS', 'MIN', 'FGM', 'FGA', 'FG_PCT'], 10)
if len(stats>0):
stats=stats[stats['n_games']>=5]
y = y.append(stats[['PTS', 'PTS_avg_10']], ignore_index=True).reset_index(drop=True)
x = x.append(stats[['PTS_avg_10', 'MIN_avg_10', 'FGM_avg_10', 'FGA_avg_10', 'FG_PCT_avg_10']], ignore_index=True).reset_index(drop=True)
y=y['PTS']
Model = Reg_Model()
Model.set_training(x,y)
Model.calc_model()
segment_name = pos + '-' + str(ex)
mse_by_seg[segment_name] = Model.mse
In [44]:
mse_by_seg
Out[44]:
In [50]:
#Now i want to see how i can add other variables to the model. probably want a model that takes into account: opposing team, current team
from Schedule.Schedule import Schedule
from Schedule.Stats import Stats
In [47]:
sched_2015 = Schedule(b_dt = '10/1/2015')
sched_2015.add_four_factors()
In [51]:
# Create last n statistics
games = sched_2015.get_games()
stats = Stats(games, 'avg', 'GAME_DATE', 'Home Team', 'Away Team', 'Pts_diff', ['Game_ID'])
In [52]:
stats_10 = stats.get_lastn_stats(10)
In [53]:
print len(stats_10)
stats_10 = stats_10[stats_10['H_10_games']==10]
print len(stats_10)
stats_10 = stats_10[stats_10['A_10_games']==10]
print len(stats_10)
In [195]:
#Create model based on team data
exp = [0,4,8,12]
positions = set([Player_List[x].desc['POSITION'][0] for x in Player_List])
mse_by_seg = {}
for pos in positions:
for ex in exp:
x =pd.DataFrame()
y =pd.DataFrame()
for key in Player_List:
if Player_List[key].desc['POSITION'][0] == pos:
if Player_List[key].desc['SEASON_EXP'][0]>=ex and Player_List[key].desc['SEASON_EXP'][0]<(ex+4):
stats = Player_List[key].get_stats('GAME_DATE', 'Game_ID', 'PTS', ['PTS', 'MIN', 'FGM', 'FGA', 'FG_PCT'], 10)
if len(stats>0):
data = Player_List[key].game_logs
for index, game in data.iterrows():
splits = game['MATCHUP'].split(' ')
if splits[1] == '@':
data.set_value(index, 'Home', 0)
else:
data.set_value(index, 'Home', 1)
data = data[['Game_ID', 'Home']]
stats = pd.merge(stats, data, on='Game_ID')
stats=stats[stats['n_games']>=5]
col_list = {}
for col in h_games.columns.values:
if col[0:2]=='H_':
new_col = 'Y_' + col[2:]
elif col[0:2]=='A_':
new_col = 'M_' + col[2:]
else:
new_col=col
col_list[col]=new_col
h_games.rename(columns=col_list,inplace=True)
col_list = {}
for col in a_games.columns.values:
if col[0:2]=='H_':
new_col = 'M_' + col[2:]
elif col[0:2]=='A_':
new_col = 'Y_' + col[2:]
else:
new_col=col
col_list[col]=new_col
a_games.rename(columns=col_list,inplace=True)
stats_all = h_games.append(a_games, ignore_index=True).reset_index(drop=True)
stats_all = stats_all[stats_all['n_games']>=5]
y=stats_all[['PTS', 'PTS_avg_10']]
x=stats_all[['PTS_avg_10', 'MIN_avg_10', 'FGM_avg_10', 'FGA_avg_10', 'FG_PCT_avg_10', 'M_BTB',
'M_FF_EFG_10', 'M_FF_FTFGA_10', 'M_FF_ORB_10',
'M_FF_TOV_10', 'M_O_FF_EFG_10',
'M_O_FF_FTFGA_10', 'M_O_FF_ORB_10',
'M_O_FF_TOV_10', 'M_O_PTS_10','M_O_WL_10',
'M_PTS_10', 'M_WL_10',
'Y_FF_EFG_10', 'Y_FF_FTFGA_10', 'Y_FF_ORB_10',
'Y_FF_TOV_10', 'Y_O_FF_EFG_10',
'Y_O_FF_FTFGA_10', 'Y_O_FF_ORB_10',
'Y_O_FF_TOV_10', 'Y_O_PTS_10', 'Y_O_WL_10',
'Y_PTS_10', 'Y_WL_10']]
y=y['PTS']
Model = Reg_Model()
Model.set_training(x,y)
Model.calc_model()
segment_name = pos + '-' + str(ex)
mse_by_seg[segment_name] = Model.mse
#Gets Player stats data and adds home indicator
In [196]:
mse_by_seg
Out[196]:
In [ ]:
In [ ]:
In [118]:
stats_10.columns
#try to see what variables i have on tap
Out[118]:
In [119]:
#join player stats and game stats for 1 player
from Player.Player import Player
lebron = Player(f_name='Lebron', l_name='James')
data = lebron.game_logs
for index, game in data.iterrows():
splits = game['MATCHUP'].split(' ')
if splits[1] == '@':
data.set_value(index, 'Home', 0)
else:
data.set_value(index, 'Home', 1)
data = data[['Game_ID', 'Home']]
stats = lebron.get_stats('GAME_DATE', 'Game_ID', 'PTS', ['PTS', 'MIN', 'FGM', 'FGA', 'FG_PCT'], 10)
stats_v2 = pd.merge(stats, data, on='Game_ID')
print len(stats_v2)
stats_v3 = stats_v2[stats_v2['n_games']>=10]
print len(stats_v3)
In [161]:
h_games = stats_v3[stats_v3['Home']==1]
print len(h_games)
a_games = stats_v3[stats_v3['Home']==0]
print len(a_games)
h_games = pd.merge(h_games, stats_10, on='Game_ID')
print len(h_games)
a_games = pd.merge(a_games, stats_10, on='Game_ID')
print len(a_games)
In [163]:
print len(h_games.columns)
col_list = {}
for col in h_games.columns.values:
if col[0:2]=='H_':
new_col = 'Y_' + col[2:]
elif col[0:2]=='A_':
new_col = 'M_' + col[2:]
else:
new_col=col
col_list[col]=new_col
h_games.rename(columns=col_list,inplace=True)
print len(h_games.columns)
print len(a_games.columns)
col_list = {}
for col in a_games.columns.values:
if col[0:2]=='H_':
new_col = 'M_' + col[2:]
elif col[0:2]=='A_':
new_col = 'Y_' + col[2:]
else:
new_col=col
col_list[col]=new_col
a_games.rename(columns=col_list,inplace=True)
print len(a_games.columns)
In [164]:
stats_all = h_games.append(a_games, ignore_index=True).reset_index(drop=True)
print len(stats_all)
print len(a_games.columns)
print len(a_games.columns)
print len(stats_all.columns)
In [148]:
stats_all.columns
Out[148]:
In [170]:
stats_all = stats_all[stats_all['n_games']>4]
y=stats_all['PTS']
x=stats_all[['PTS_avg_10', 'MIN_avg_10', 'FGM_avg_10', 'FGA_avg_10', 'FG_PCT_avg_10', 'M_BTB']]
In [185]:
stats_all = stats_all[stats_all['n_games']>4]
y=stats_all['PTS']
x=stats_all[['PTS_avg_10', 'MIN_avg_10', 'FGM_avg_10', 'FGA_avg_10', 'FG_PCT_avg_10', 'M_BTB',
'M_FF_EFG_10', 'M_FF_FTFGA_10', 'M_FF_ORB_10',
'M_FF_TOV_10', 'M_O_FF_EFG_10',
'M_O_FF_FTFGA_10', 'M_O_FF_ORB_10',
'M_O_FF_TOV_10', 'M_O_PTS_10','M_O_WL_10',
'M_PTS_10', 'M_WL_10',
'Y_FF_EFG_10', 'Y_FF_FTFGA_10', 'Y_FF_ORB_10',
'Y_FF_TOV_10', 'Y_O_FF_EFG_10',
'Y_O_FF_FTFGA_10', 'Y_O_FF_ORB_10',
'Y_O_FF_TOV_10', 'Y_O_PTS_10', 'Y_O_WL_10',
'Y_PTS_10', 'Y_WL_10']]
In [186]:
for index, row in x.iterrows():
for col in x.columns.values:
print type(x.ix[index, col]), col
break
In [167]:
x
Out[167]:
In [187]:
Model = Reg_Model()
Model.set_training(x,y)
Model.calc_model()
In [188]:
Model.mse
Out[188]:
In [189]:
Model.model_type
Out[189]:
In [ ]: