In [35]:
import pandas as pd
from pandas import Series,DataFrame
import string

import numpy as np

import scipy
from scipy import optimize

In [ ]:
def removepunct(str_in):
    for n in [".",",","!","'",'"','\n','-']:
        str_in = str_in.replace(n,'')
    str_out = str_in.lower()
    
    # Hard coding my way around a typo
    # Error was in row 262 of results spreadsheet
    # Remove this when typo is fixed
    if str_out == "belmontgomery burbs": 
        str_out = "belmontgomery burns"
    
    return unicode(str_out)

In [24]:
mens_mixed_url = 'https://docs.google.com/spreadsheets/d/15TNjIJDkopneZ6PWg4IGhTQNvTl-T5U8u_i9zgJPEXA/export?format=csv&id=15TNjIJDkopneZ6PWg4IGhTQNvTl-T5U8u_i9zgJPEXA&gid=0'

results_df = pd.read_csv(mens_mixed_url)

results_df.head()


Out[24]:
ROUND HOME SCORE SCORE.1 AWAY OT Validate H_Win A_Win H_Tie A_Tie H_OTL A_OTL H_Loss A_Loss
0 1 The SwanBourne Supremacy 14.0 12.0 Balcatta Bandits NaN Y W NaN NaN NaN NaN NaN NaN L
1 1 Patrick Bateman NaN NaN Como Mo Co NaN N NaN NaN NaN NaN NaN NaN NaN NaN
2 1 Belmontgomery Burns 12.0 7.0 C. Montgomery Cockburns NaN Y W NaN NaN NaN NaN NaN NaN L
3 1 Bicton Banter Lords 15.0 5.0 Morley Puck Troopers NaN Y W NaN NaN NaN NaN NaN NaN L
4 1 Churchlands Hellraisers NaN NaN GMLA Angels NaN N NaN NaN NaN NaN NaN NaN NaN NaN

In [28]:
# Drop all games that have not occurred - where validate == 'N'
results_df = results_df[results_df['Validate'] != 'N']

# Create the team list from all the unique values in the Home column
# Assumes all teams have (so far) played at least one game at home
team_list = results_df['HOME'].unique()
results_df.reset_index(inplace = True)
print len(results_df),'games in results'


229 games in results

In [39]:
def optimizeme((K1,K2)):
    
    # Default values:
    #newteamK = 75
    #oldteamK = 50
    
    newteamK = K1
    oldteamK = K2
    
    elos_workbook = 'https://cdn.fbsbx.com/v/t59.2708-21/14505629_10157512405250331_5499472195350429696_n.xlsm/Full-Fix-Round-7-R3.xlsm?oh=fd98ccbd86bdb408d684df9244e1e834&oe=57F27865&dl=1'
    elos_source = "Team Elo Week 1"

    team_elos = pd.read_excel(elos_workbook,sheetname=elos_source)
    team_elos.head()

    team_elos['2016b Teams lower'] = team_elos['2016b Teams'].apply(removepunct)



    # Create a dict of the teams and corresponding elo ratings
    elos_dict = dict(zip(team_elos['2016b Teams lower'],team_elos['Starting Elo']))




    newteam = team_elos['New Team'][3]
    def getKfactor(x):
        if x == newteam:
            out = newteamK
        else:
            out = oldteamK
        return out

    team_elos['K Factor'] = team_elos['New Team'].apply(getKfactor)
    team_K_factors = dict(zip(team_elos['2016b Teams lower'],team_elos['K Factor']))

    results_df['Home Predicted Result'] = None
    results_df['Away Predicted Result'] = None
    results_df['Home Actual Result'] = None
    results_df['Away Actual Result'] = None
    results_df['Error'] = None



    for row in range(0,len(results_df)):

        # Print row for debugging purposes
        # Only uncomment when debugging
        #print row

        # Get the home and away teams, make them lower and unicode
        hometeam = results_df['HOME'][row]
        hometeam = removepunct(hometeam)
        awayteam = results_df['AWAY'][row]
        awayteam = removepunct(awayteam)

        # Get the elos from the elo dictionary
        awayElo = elos_dict[awayteam]
        homeElo = elos_dict[hometeam]

        # Get the team K factors
        homeK = team_K_factors[hometeam]
        awayK = team_K_factors[awayteam]

        # Calculate the Score Expectancies
        homeSE = 1 / (1 + 10 ** -((homeElo - awayElo) / 400))
        awaySE = 1 / (1 + 10 ** ((homeElo - awayElo) / 400))
        # Write to DF
        results_df.loc[row,'Home Predicted Result'] = homeSE
        results_df.loc[row,'Away Predicted Result'] = awaySE


        # Get home, away and total scores
        homescore = results_df['SCORE'][row]
        awayscore = results_df['SCORE.1'][row]
        totalscore = homescore+awayscore

        # Calculate home and away score percentages
        homeScorePerc = homescore / totalscore
        awayScorePerc = awayscore / totalscore
        # Write to DF
        results_df.loc[row,'Home Actual Result'] = homeScorePerc
        results_df.loc[row,'Away Actual Result'] = awayScorePerc

        # Calculate abs(error)
        results_df.loc[row,'Error'] = (abs(homeSE - homeScorePerc)+abs(awaySE - awayScorePerc))/2



        # Find Elo Changes
        homeNewElo = homeElo + homeK * (homeScorePerc - homeSE)
        awayNewElo = awayElo + awayK * (awayScorePerc - awaySE)

        # Check to ensure winning teams don't lose Elo
        if homescore > awayscore:
            homeNewElo = max(homeNewElo,homeElo)
        if awayscore > homescore:
            awayNewElo = max(awayNewElo,awayElo)

        newElos = {hometeam:homeNewElo, awayteam:awayNewElo}

        elos_dict.update(newElos)
        
    errorcolumn = results_df['Error']
    sumerror = sum(errorcolumn)
    return sumerror

In [43]:
optimize.minimize(optimizeme,x0=(75,50),method='Nelder-Mead',options={'maxiter':100,'disp':True})


Optimization terminated successfully.
         Current function value: 28.382967
         Iterations: 60
         Function evaluations: 114
Out[43]:
 final_simplex: (array([[ 298.88053807,   54.87128112],
       [ 298.88056678,   54.87120614],
       [ 298.88049675,   54.87123697]]), array([ 28.3829669,  28.3829669,  28.3829669]))
           fun: 28.382966896693599
       message: 'Optimization terminated successfully.'
          nfev: 114
           nit: 60
        status: 0
       success: True
             x: array([ 298.88053807,   54.87128112])

In [51]:
if True:
    

    
    newteamK = 75
    oldteamK = 50
    
    elos_workbook = 'https://cdn.fbsbx.com/v/t59.2708-21/14505629_10157512405250331_5499472195350429696_n.xlsm/Full-Fix-Round-7-R3.xlsm?oh=fd98ccbd86bdb408d684df9244e1e834&oe=57F27865&dl=1'
    elos_source = "Team Elo Week 1"

    team_elos = pd.read_excel(elos_workbook,sheetname=elos_source)
    team_elos.head()

    team_elos['2016b Teams lower'] = team_elos['2016b Teams'].apply(removepunct)



    # Create a dict of the teams and corresponding elo ratings
    elos_dict = dict(zip(team_elos['2016b Teams lower'],team_elos['Starting Elo']))




    newteam = team_elos['New Team'][3]
    def getKfactor(x):
        if x == newteam:
            out = newteamK
        else:
            out = oldteamK
        return out

    team_elos['K Factor'] = team_elos['New Team'].apply(getKfactor)
    team_K_factors = dict(zip(team_elos['2016b Teams lower'],team_elos['K Factor']))

    results_df['Home Predicted Result'] = None
    results_df['Away Predicted Result'] = None
    results_df['Home Actual Result'] = None
    results_df['Away Actual Result'] = None
    results_df['Error'] = None



    for row in range(0,len(results_df)):

        # Print row for debugging purposes
        # Only uncomment when debugging
        #print row

        # Get the home and away teams, make them lower and unicode
        hometeam = results_df['HOME'][row]
        hometeam = removepunct(hometeam)
        awayteam = results_df['AWAY'][row]
        awayteam = removepunct(awayteam)

        # Get the elos from the elo dictionary
        awayElo = elos_dict[awayteam]
        homeElo = elos_dict[hometeam]

        # Get the team K factors
        homeK = team_K_factors[hometeam]
        awayK = team_K_factors[awayteam]

        # Calculate the Score Expectancies
        homeSE = 1 / (1 + 10 ** -((homeElo - awayElo) / 400))
        awaySE = 1 / (1 + 10 ** ((homeElo - awayElo) / 400))
        # Write to DF
        results_df.loc[row,'Home Predicted Result'] = homeSE
        results_df.loc[row,'Away Predicted Result'] = awaySE


        # Get home, away and total scores
        homescore = results_df['SCORE'][row]
        awayscore = results_df['SCORE.1'][row]
        totalscore = homescore+awayscore

        # Calculate home and away score percentages
        homeScorePerc = homescore / totalscore
        awayScorePerc = awayscore / totalscore
        # Write to DF
        results_df.loc[row,'Home Actual Result'] = homeScorePerc
        results_df.loc[row,'Away Actual Result'] = awayScorePerc

        # Calculate abs(error)
        results_df.loc[row,'Error'] = (abs(homeSE - homeScorePerc)+abs(awaySE - awayScorePerc))/2



        # Find Elo Changes
        homeNewElo = homeElo + homeK * (homeScorePerc - homeSE)
        awayNewElo = awayElo + awayK * (awayScorePerc - awaySE)

        # Check to ensure winning teams don't lose Elo
        if homescore > awayscore:
            homeNewElo = max(homeNewElo,homeElo)
        if awayscore > homescore:
            awayNewElo = max(awayNewElo,awayElo)

        newElos = {hometeam:homeNewElo, awayteam:awayNewElo}

        elos_dict.update(newElos)
        
    errorcolumn = results_df['Error']
    sumerror = sum(errorcolumn)
    print 'K value for new teams:',newteamK
    print 'K value for old teams',oldteamK
    print sumerror


K value for new teams: 75
K value for old teams 50
28.6421658685

In [ ]:


In [ ]:


In [ ]:


In [52]:
print "OldK = 55, NewK = 300, Sum Error = 28.38"
print "OldK = 55, NewK = 200, Sum Error = 28.43"
print "OldK = 55, NewK = 100, Sum Error = 28.58"
print "OldK = 55, NewK = 75,  Sum Error = 28.63"
print "OldK = 50, NewK = 75,  Sum Error = 28.64"


OldK = 55, NewK = 300, Sum Error = 28.38
OldK = 55, NewK = 200, Sum Error = 28.43
OldK = 55, NewK = 100, Sum Error = 28.58
OldK = 55, NewK = 75,  Sum Error = 28.63
OldK = 50, NewK = 75,  Sum Error = 28.64

In [ ]: