In [35]:
import pandas as pd
from pandas import Series,DataFrame
import string
import numpy as np
import scipy
from scipy import optimize
In [ ]:
def removepunct(str_in):
for n in [".",",","!","'",'"','\n','-']:
str_in = str_in.replace(n,'')
str_out = str_in.lower()
# Hard coding my way around a typo
# Error was in row 262 of results spreadsheet
# Remove this when typo is fixed
if str_out == "belmontgomery burbs":
str_out = "belmontgomery burns"
return unicode(str_out)
In [24]:
mens_mixed_url = 'https://docs.google.com/spreadsheets/d/15TNjIJDkopneZ6PWg4IGhTQNvTl-T5U8u_i9zgJPEXA/export?format=csv&id=15TNjIJDkopneZ6PWg4IGhTQNvTl-T5U8u_i9zgJPEXA&gid=0'
results_df = pd.read_csv(mens_mixed_url)
results_df.head()
Out[24]:
In [28]:
# Drop all games that have not occurred - where validate == 'N'
results_df = results_df[results_df['Validate'] != 'N']
# Create the team list from all the unique values in the Home column
# Assumes all teams have (so far) played at least one game at home
team_list = results_df['HOME'].unique()
results_df.reset_index(inplace = True)
print len(results_df),'games in results'
In [39]:
def optimizeme((K1,K2)):
# Default values:
#newteamK = 75
#oldteamK = 50
newteamK = K1
oldteamK = K2
elos_workbook = 'https://cdn.fbsbx.com/v/t59.2708-21/14505629_10157512405250331_5499472195350429696_n.xlsm/Full-Fix-Round-7-R3.xlsm?oh=fd98ccbd86bdb408d684df9244e1e834&oe=57F27865&dl=1'
elos_source = "Team Elo Week 1"
team_elos = pd.read_excel(elos_workbook,sheetname=elos_source)
team_elos.head()
team_elos['2016b Teams lower'] = team_elos['2016b Teams'].apply(removepunct)
# Create a dict of the teams and corresponding elo ratings
elos_dict = dict(zip(team_elos['2016b Teams lower'],team_elos['Starting Elo']))
newteam = team_elos['New Team'][3]
def getKfactor(x):
if x == newteam:
out = newteamK
else:
out = oldteamK
return out
team_elos['K Factor'] = team_elos['New Team'].apply(getKfactor)
team_K_factors = dict(zip(team_elos['2016b Teams lower'],team_elos['K Factor']))
results_df['Home Predicted Result'] = None
results_df['Away Predicted Result'] = None
results_df['Home Actual Result'] = None
results_df['Away Actual Result'] = None
results_df['Error'] = None
for row in range(0,len(results_df)):
# Print row for debugging purposes
# Only uncomment when debugging
#print row
# Get the home and away teams, make them lower and unicode
hometeam = results_df['HOME'][row]
hometeam = removepunct(hometeam)
awayteam = results_df['AWAY'][row]
awayteam = removepunct(awayteam)
# Get the elos from the elo dictionary
awayElo = elos_dict[awayteam]
homeElo = elos_dict[hometeam]
# Get the team K factors
homeK = team_K_factors[hometeam]
awayK = team_K_factors[awayteam]
# Calculate the Score Expectancies
homeSE = 1 / (1 + 10 ** -((homeElo - awayElo) / 400))
awaySE = 1 / (1 + 10 ** ((homeElo - awayElo) / 400))
# Write to DF
results_df.loc[row,'Home Predicted Result'] = homeSE
results_df.loc[row,'Away Predicted Result'] = awaySE
# Get home, away and total scores
homescore = results_df['SCORE'][row]
awayscore = results_df['SCORE.1'][row]
totalscore = homescore+awayscore
# Calculate home and away score percentages
homeScorePerc = homescore / totalscore
awayScorePerc = awayscore / totalscore
# Write to DF
results_df.loc[row,'Home Actual Result'] = homeScorePerc
results_df.loc[row,'Away Actual Result'] = awayScorePerc
# Calculate abs(error)
results_df.loc[row,'Error'] = (abs(homeSE - homeScorePerc)+abs(awaySE - awayScorePerc))/2
# Find Elo Changes
homeNewElo = homeElo + homeK * (homeScorePerc - homeSE)
awayNewElo = awayElo + awayK * (awayScorePerc - awaySE)
# Check to ensure winning teams don't lose Elo
if homescore > awayscore:
homeNewElo = max(homeNewElo,homeElo)
if awayscore > homescore:
awayNewElo = max(awayNewElo,awayElo)
newElos = {hometeam:homeNewElo, awayteam:awayNewElo}
elos_dict.update(newElos)
errorcolumn = results_df['Error']
sumerror = sum(errorcolumn)
return sumerror
In [43]:
optimize.minimize(optimizeme,x0=(75,50),method='Nelder-Mead',options={'maxiter':100,'disp':True})
Out[43]:
In [51]:
if True:
newteamK = 75
oldteamK = 50
elos_workbook = 'https://cdn.fbsbx.com/v/t59.2708-21/14505629_10157512405250331_5499472195350429696_n.xlsm/Full-Fix-Round-7-R3.xlsm?oh=fd98ccbd86bdb408d684df9244e1e834&oe=57F27865&dl=1'
elos_source = "Team Elo Week 1"
team_elos = pd.read_excel(elos_workbook,sheetname=elos_source)
team_elos.head()
team_elos['2016b Teams lower'] = team_elos['2016b Teams'].apply(removepunct)
# Create a dict of the teams and corresponding elo ratings
elos_dict = dict(zip(team_elos['2016b Teams lower'],team_elos['Starting Elo']))
newteam = team_elos['New Team'][3]
def getKfactor(x):
if x == newteam:
out = newteamK
else:
out = oldteamK
return out
team_elos['K Factor'] = team_elos['New Team'].apply(getKfactor)
team_K_factors = dict(zip(team_elos['2016b Teams lower'],team_elos['K Factor']))
results_df['Home Predicted Result'] = None
results_df['Away Predicted Result'] = None
results_df['Home Actual Result'] = None
results_df['Away Actual Result'] = None
results_df['Error'] = None
for row in range(0,len(results_df)):
# Print row for debugging purposes
# Only uncomment when debugging
#print row
# Get the home and away teams, make them lower and unicode
hometeam = results_df['HOME'][row]
hometeam = removepunct(hometeam)
awayteam = results_df['AWAY'][row]
awayteam = removepunct(awayteam)
# Get the elos from the elo dictionary
awayElo = elos_dict[awayteam]
homeElo = elos_dict[hometeam]
# Get the team K factors
homeK = team_K_factors[hometeam]
awayK = team_K_factors[awayteam]
# Calculate the Score Expectancies
homeSE = 1 / (1 + 10 ** -((homeElo - awayElo) / 400))
awaySE = 1 / (1 + 10 ** ((homeElo - awayElo) / 400))
# Write to DF
results_df.loc[row,'Home Predicted Result'] = homeSE
results_df.loc[row,'Away Predicted Result'] = awaySE
# Get home, away and total scores
homescore = results_df['SCORE'][row]
awayscore = results_df['SCORE.1'][row]
totalscore = homescore+awayscore
# Calculate home and away score percentages
homeScorePerc = homescore / totalscore
awayScorePerc = awayscore / totalscore
# Write to DF
results_df.loc[row,'Home Actual Result'] = homeScorePerc
results_df.loc[row,'Away Actual Result'] = awayScorePerc
# Calculate abs(error)
results_df.loc[row,'Error'] = (abs(homeSE - homeScorePerc)+abs(awaySE - awayScorePerc))/2
# Find Elo Changes
homeNewElo = homeElo + homeK * (homeScorePerc - homeSE)
awayNewElo = awayElo + awayK * (awayScorePerc - awaySE)
# Check to ensure winning teams don't lose Elo
if homescore > awayscore:
homeNewElo = max(homeNewElo,homeElo)
if awayscore > homescore:
awayNewElo = max(awayNewElo,awayElo)
newElos = {hometeam:homeNewElo, awayteam:awayNewElo}
elos_dict.update(newElos)
errorcolumn = results_df['Error']
sumerror = sum(errorcolumn)
print 'K value for new teams:',newteamK
print 'K value for old teams',oldteamK
print sumerror
In [ ]:
In [ ]:
In [ ]:
In [52]:
print "OldK = 55, NewK = 300, Sum Error = 28.38"
print "OldK = 55, NewK = 200, Sum Error = 28.43"
print "OldK = 55, NewK = 100, Sum Error = 28.58"
print "OldK = 55, NewK = 75, Sum Error = 28.63"
print "OldK = 50, NewK = 75, Sum Error = 28.64"
In [ ]: