In [99]:
import os
import pandas as pd
In [100]:
base_dir = os.path.join("/Users", "sbussmann", "Development", "buda", "buda-ratings")
scores_dir = os.path.join(base_dir, "data", "raw", "game_scores")
interim_dir = os.path.join(base_dir, "data", "interim")
In [101]:
# summer club league 2016
league_id = 40264
In [102]:
file_name = "scores_{}.csv".format(league_id)
In [103]:
file_path = os.path.join(scores_dir, file_name)
In [104]:
scores = pd.read_csv(file_path)
In [105]:
scores.head()
Out[105]:
In [106]:
winloss = scores.copy()
In [107]:
winloss['Team A Wins'] = winloss['Score A'] > winloss['Score B']
In [108]:
winloss.head(20)
Out[108]:
In [109]:
winloss = winloss.drop(['Tourney Qualifying games*', 'Score A', 'Score B'], axis=1)
In [110]:
winloss.head(20)
Out[110]:
In [111]:
teams = winloss['Team A'].unique()
In [112]:
team2index = {}
team2div = {}
for i, team in enumerate(teams):
row = winloss['Team A'] == team
div_team = winloss.loc[row, 'divname'].unique()[0]
team2div[team] = div_team
team2index[team] = i
In [113]:
winloss['Index A'] = winloss['Team A'].apply(lambda x: team2index[x])
winloss['Index B'] = winloss['Team B'].apply(lambda x: team2index[x])
winloss['Div A'] = winloss['Team A'].apply(lambda x: team2div[x])
winloss['Div B'] = winloss['Team B'].apply(lambda x: team2div[x])
In [114]:
mixed_winloss = winloss.copy()
for div in ['Open Div 1', 'Open Div 2']:
sub1 = (mixed_winloss['Div A'] == div) | (mixed_winloss['Div B'] == div)
mixed_winloss = mixed_winloss[~sub1]
In [115]:
mixed_winloss.shape
Out[115]:
In [116]:
mixed_winloss.to_csv(os.path.join(interim_dir, 'winloss_with_duplicates.csv'), index=None)
In [90]:
pair_list = []
for row in mixed_winloss.index:
team_A = mixed_winloss.loc[row, 'Index A']
team_B = mixed_winloss.loc[row, 'Index B']
new_pair = (team_A, team_B)
pair_list.append(new_pair)
reverse_pair = (team_B, team_A)
if reverse_pair in pair_list:
mixed_winloss = mixed_winloss.drop(row)
In [91]:
mixed_winloss.shape
Out[91]:
In [92]:
mixed_winloss.head()
Out[92]:
In [93]:
mixed_winloss_flatprior = mixed_winloss[['Team A', 'Team B', 'Team A Wins', 'Index A', 'Index B']]
In [97]:
outmwlfppath = os.path.join(interim_dir, 'winloss_simpleprior_{}.csv'.format(league_id))
mixed_winloss_flatprior.to_csv(outmwlfppath, index=None)
In [95]:
mixed_winloss_divprior = mixed_winloss[['Team A', 'Team B', 'Team A Wins', 'Index A', 'Index B', 'Div A', 'Div B']]
In [98]:
outmwldppath = os.path.join(interim_dir, 'winloss_divprior_{}.csv'.format(league_id))
mixed_winloss_divprior.to_csv(outmwldppath, index=None)
In [60]:
outwlpath = os.path.join(interim_dir, 'winloss_{}.csv'.format(league_id))
winloss.to_csv(outwlpath, index=None)
In [15]:
scorediv = scores[['Team A', 'divname']].drop_duplicates()
In [17]:
scorediv = scorediv.set_index('Team A')
In [21]:
scorediv.loc['AHOC', 'divname']
Out[21]:
In [ ]:
map2div = {}
teams = scores['Team A'].unique()
for team in teams:
index =
In [38]:
heuristicdf = scores.groupby('Team A').sum().sort_index().reset_index()
In [39]:
heuristicdf.head()
Out[39]:
In [40]:
heuristicdf = heuristicdf.assign(PlusMinus = heuristicdf['Score A'] - heuristicdf['Score B'])
In [41]:
heuristicdf.head()
Out[41]:
In [42]:
scorediv.loc[heuristicdf['Team A'].values, 'divname'].values
Out[42]:
In [43]:
heuristicdf = heuristicdf.assign(divname = scorediv.loc[heuristicdf['Team A'].values, 'divname'].values)
In [67]:
tmpwl = scores.copy()
In [68]:
tmpwl['Team A Wins'] = (tmpwl['Score A'] > tmpwl['Score B']).astype('int')
tmpwl['Team A Loses'] = (tmpwl['Score A'] < tmpwl['Score B']).astype('int')
tmpwl['Team A Ties'] = (tmpwl['Score A'] == tmpwl['Score B']).astype('int')
In [73]:
wlrecord = tmpwl.groupby('Team A').sum()
In [74]:
wlrecord.head(20)
Out[74]:
In [77]:
heuristicdf = heuristicdf.set_index('Team Name').join(wlrecord)
In [79]:
heuristicdf = heuristicdf.drop(['Tourney Qualifying games*', 'Score A', 'Score B'], axis=1)
In [82]:
heuristicdf = heuristicdf.rename(columns={'Team A': 'Team Name',
'Team A Wins': 'Wins',
'Team A Loses': 'Losses',
'Team A Ties': 'Ties'})
In [83]:
heuristicdf.head()
Out[83]:
In [84]:
outhuepath = os.path.join(interim_dir, 'plusminus_{}.csv'.format(league_id))
heuristicdf.to_csv(outhuepath)
In [ ]: