notebook.community

Edit and run



In [1]:

    
import pandas as pd
import re

import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline



In [2]:

    
def removepunct(str_in):
    for n in [".",",","!","'",'"','\n','-']:
        str_in = str_in.replace(n,'')
    
    str_out = str_in.lower()
    return unicode(str_out)



In [3]:

    
def parse_round(x):
    y = None
    parsed = None
    if type(x) == str:
        parsed = re.search(r'([0-9]+)+',x).group(1)
        y = int(parsed)
    if (type(x) == int) or (type(x) == float):
        y = int(x)
    return y



In [4]:

    
mens_mixed_url = 'https://docs.google.com/spreadsheets/d/15TNjIJDkopneZ6PWg4IGhTQNvTl-T5U8u_i9zgJPEXA/export?format=csv&id=15TNjIJDkopneZ6PWg4IGhTQNvTl-T5U8u_i9zgJPEXA&gid=0'
elos_workbook = 'https://raw.githubusercontent.com/robfox92/HockeyElo/master/Elos_2016b_week_1.csv'


mens_mixed_raw = pd.read_csv(mens_mixed_url)
elos_start_raw = pd.read_csv(elos_workbook)



In [5]:

    
# Only get results that are validated
results = mens_mixed_raw[mens_mixed_raw['Validate'] == 'Y']
results.reset_index(inplace=True)



In [6]:

    
results['Round_number'] = results['ROUND'].apply(parse_round)
results.head()









    



C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':






    Out[6]:






  
    
      
      index
      ROUND
      HOME
      SCORE
      SCORE.1
      AWAY
      OT
      Validate
      H_Win
      A_Win
      H_Tie
      A_Tie
      H_OTL
      A_OTL
      H_Loss
      A_Loss
      Round_number
    
  
  
    
      0
      0
      1
      The SwanBourne Supremacy
      14.0
      12.0
      Balcatta Bandits
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
    
    
      1
      2
      1
      Belmontgomery Burns
      12.0
      7.0
      C. Montgomery Cockburns
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
    
    
      2
      3
      1
      Bicton Banter Lords
      15.0
      5.0
      Morley Puck Troopers
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
    
    
      3
      5
      1
      Greenmount Doom
      17.0
      7.0
      Bayswater Croquet Club
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
    
    
      4
      6
      1
      Swan View Draught
      7.0
      5.0
      Morley Flying Hellfish
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1



In [7]:

    
results['HOME'] = results['HOME'].apply(removepunct)
results['AWAY'] = results['AWAY'].apply(removepunct)









    



C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app



In [ ]:



In [8]:

    
def getKfactor(x):
        newteam = 'New Team'
        newteamK = 75
        oldteamK = 50
        if x == newteam:
            out = newteamK
        else:
            out = oldteamK
        return out
# Create a dict of team starting elos
team_elos = elos_start_raw
team_elos['2016b Teams lower'] = elos_start_raw['2016b Teams'].apply(removepunct)
elos_dict = dict(zip(team_elos['2016b Teams lower'],team_elos['Starting Elo']))
team_elos['K Factor'] = team_elos['New Team'].apply(getKfactor)
team_K_factors = dict(zip(team_elos['2016b Teams lower'],team_elos['K Factor']))



In [9]:

    
results.loc[:,'Home Elo'] = None
results.loc[:,'Away Elo'] = None









    



C:\Anaconda2\lib\site-packages\pandas\core\indexing.py:288: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
C:\Anaconda2\lib\site-packages\pandas\core\indexing.py:465: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s



In [10]:

    
results.head()









    Out[10]:






  
    
      
      index
      ROUND
      HOME
      SCORE
      SCORE.1
      AWAY
      OT
      Validate
      H_Win
      A_Win
      H_Tie
      A_Tie
      H_OTL
      A_OTL
      H_Loss
      A_Loss
      Round_number
      Home Elo
      Away Elo
    
  
  
    
      0
      0
      1
      the swanbourne supremacy
      14.0
      12.0
      balcatta bandits
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      None
      None
    
    
      1
      2
      1
      belmontgomery burns
      12.0
      7.0
      c montgomery cockburns
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      None
      None
    
    
      2
      3
      1
      bicton banter lords
      15.0
      5.0
      morley puck troopers
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      None
      None
    
    
      3
      5
      1
      greenmount doom
      17.0
      7.0
      bayswater croquet club
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      None
      None
    
    
      4
      6
      1
      swan view draught
      7.0
      5.0
      morley flying hellfish
      NaN
      Y
      W
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      None
      None



In [39]:

    
elos_dict = dict(zip(team_elos['2016b Teams lower'],team_elos['Starting Elo']))

for row in range(0,len(results)):

    # Get the home and away teams
    hometeam = results['HOME'][row]
    awayteam = results['AWAY'][row]


    # Get the elos from the elo dictionary
    awayElo = elos_dict[awayteam]
    homeElo = elos_dict[hometeam]
    # Write to the df
    results.loc[row,'Home Elo'] = homeElo
    results.loc[row,'Away Elo'] = awayElo
        

    # Get the team K factors
    homeK = team_K_factors[hometeam]
    awayK = team_K_factors[awayteam]

    # Calculate the Score Expectancies
    homeSE = 1 / (1 + 10 ** -((homeElo - awayElo) / 400))
    awaySE = 1 / (1 + 10 ** ((homeElo - awayElo) / 400))
    # Write to DF
    results.loc[row,'Home Predicted Result'] = homeSE
    results.loc[row,'Away Predicted Result'] = awaySE


    # Get home, away and total scores
    homescore = results['SCORE'][row]
    awayscore = results['SCORE.1'][row]
    totalscore = homescore+awayscore

    # Calculate home and away score percentages
    homeScorePerc = homescore / totalscore
    awayScorePerc = awayscore / totalscore
    # Write to DF
    results.loc[row,'Home Actual Result'] = homeScorePerc
    results.loc[row,'Away Actual Result'] = awayScorePerc
   
    # Find Elo Changes
    homeNewElo = homeElo + homeK * (homeScorePerc - homeSE)
    awayNewElo = awayElo + awayK * (awayScorePerc - awaySE)

    # Check to ensure winning teams don't lose Elo
    if homescore > awayscore:
        homeNewElo = max(homeNewElo,homeElo)
    if awayscore > homescore:
        awayNewElo = max(awayNewElo,awayElo)
 
    newElos = {hometeam:homeNewElo, awayteam:awayNewElo}

    elos_dict.update(newElos)



In [12]:

    
hometeams = set(results['HOME'].unique())
awayteams = set(results['AWAY'].unique())
allteams = hometeams | awayteams



In [13]:

    
print "all hometeams in teams?",hometeams.issubset(allteams)
print "all awayteams in teams?",awayteams.issubset(allteams)









    



all hometeams in teams? True
all awayteams in teams? True



In [14]:

    
def getscoreprogression(team):
    progress = []
    team = unicode(team)
    out = None
    if team in allteams:
        progress = results[results['HOME']==team]
        progress = progress.append(results[results['AWAY']==team])
        progress.sort_values('Round_number',inplace=True)
        progress.reset_index(inplace=True)
    
    if len(progress)>0:
        out = []
        for row in range(0,len(progress)):
            if progress.loc[row,'HOME'] == team:
                roundno = progress.loc[row,'Round_number']
                if type(roundno) == str:
                    roundno = parse_round(roundno)
                out.append((progress.loc[row,'Round_number'],progress.loc[row,'Home Elo']))
            if progress.loc[row,'AWAY'] == team:
                roundno = progress.loc[row,'Round_number']
                if type(roundno) == str:
                    roundno = parse_round(roundno)
                out.append((roundno,progress.loc[row,'Away Elo']))
    return out



In [15]:

    
getscoreprogression('harambaeyswater')









    Out[15]:





[(1, 585.58014810000009),
 (2, 586.66017060062006),
 (3, 606.16571449908554),
 (4, 601.20708608741631),
 (5, 615.18911702470643),
 (6, 618.32379907906443),
 (7, 615.82933682609473)]



In [16]:

    
def getresults(team):
    progress = []
    team = unicode(team)
    if team in allteams:
        progress = results[results['HOME']==team]
        progress = progress.append(results[results['AWAY']==team])
        progress.sort_values('Round_number',inplace=True)
        progress.reset_index(inplace=True)
        out = progress
    else:
        out = None
    
    return out



In [17]:

    
getresults('harambaeyswater');



In [18]:

    
results.head()









    Out[18]:






  
    
      
      index
      ROUND
      HOME
      SCORE
      SCORE.1
      AWAY
      OT
      Validate
      H_Win
      A_Win
      ...
      A_OTL
      H_Loss
      A_Loss
      Round_number
      Home Elo
      Away Elo
      Home Predicted Result
      Away Predicted Result
      Home Actual Result
      Away Actual Result
    
  
  
    
      0
      0
      1
      the swanbourne supremacy
      14.0
      12.0
      balcatta bandits
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      948.054
      1061.63
      0.342139
      0.657861
      0.538462
      0.461538
    
    
      1
      2
      1
      belmontgomery burns
      12.0
      7.0
      c montgomery cockburns
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      644.158
      600
      0.563209
      0.436791
      0.631579
      0.368421
    
    
      2
      3
      1
      bicton banter lords
      15.0
      5.0
      morley puck troopers
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      945.594
      817.326
      0.676639
      0.323361
      0.750000
      0.250000
    
    
      3
      5
      1
      greenmount doom
      17.0
      7.0
      bayswater croquet club
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      936.05
      811.485
      0.671958
      0.328042
      0.708333
      0.291667
    
    
      4
      6
      1
      swan view draught
      7.0
      5.0
      morley flying hellfish
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      910.884
      817.326
      0.631478
      0.368522
      0.583333
      0.416667
    
  

5 rows × 23 columns



In [19]:

    
parse_round('9-10 BYE')









    Out[19]:





9



In [20]:

    
teams = ['harambaeyswater','get bentley','city beach dont kill my vibe',
            'south perth major blazers','fremantle metropolis double blacks',
            'booragoonbagz','mount hawtnhorny']
teams = list(allteams)
plt.figure(figsize=(15,10))
axes = plt.gca()
#axes.set_xlim([0,7])
#axes.set_ylim([500,700])
plt.ylabel("Elo Rating")
plt.xlabel("Round Number")
plt.title(" Elo vs Round for various SRHL Teams")
for team in teams:
    if getscoreprogression(team) is not None:
        s=getscoreprogression(team)
        x,y = zip(*s)
        if max(x) < 8:
            plt.scatter(x,y)
            plt.plot(x,y)
            if len(teams) < 10:
                plt.legend(teams, loc='lower center')



In [21]:

    
results.head()









    Out[21]:






  
    
      
      index
      ROUND
      HOME
      SCORE
      SCORE.1
      AWAY
      OT
      Validate
      H_Win
      A_Win
      ...
      A_OTL
      H_Loss
      A_Loss
      Round_number
      Home Elo
      Away Elo
      Home Predicted Result
      Away Predicted Result
      Home Actual Result
      Away Actual Result
    
  
  
    
      0
      0
      1
      the swanbourne supremacy
      14.0
      12.0
      balcatta bandits
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      948.054
      1061.63
      0.342139
      0.657861
      0.538462
      0.461538
    
    
      1
      2
      1
      belmontgomery burns
      12.0
      7.0
      c montgomery cockburns
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      644.158
      600
      0.563209
      0.436791
      0.631579
      0.368421
    
    
      2
      3
      1
      bicton banter lords
      15.0
      5.0
      morley puck troopers
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      945.594
      817.326
      0.676639
      0.323361
      0.750000
      0.250000
    
    
      3
      5
      1
      greenmount doom
      17.0
      7.0
      bayswater croquet club
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      936.05
      811.485
      0.671958
      0.328042
      0.708333
      0.291667
    
    
      4
      6
      1
      swan view draught
      7.0
      5.0
      morley flying hellfish
      NaN
      Y
      W
      NaN
      ...
      NaN
      NaN
      L
      1
      910.884
      817.326
      0.631478
      0.368522
      0.583333
      0.416667
    
  

5 rows × 23 columns



In [46]:

    
homeresults = results['Home Actual Result'];
b = [0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1]
#plt.figure(figsize=(15,10))
axes = plt.gca()
#axes.set_xlim([0,7])
#axes.set_ylim([500,700])
plt.ylabel("Frequency")
plt.xlabel("Home Score Outcome")
#plt.title("Histogram of Home Score Outcomes up to SRHL Round 7")
plt.hist(homeresults, bins = b)
print "2016b Score Histogram"
print "Mean Home Score Outcome:",np.mean(homeresults)
print "Standard Deviation of score outcomes:",np.std(homeresults)
print len(homeresults),'game results'









    



2016b Score Histogram
Mean Home Score Outcome: 0.523971326886
Standard Deviation of score outcomes: 0.171164761912
265 game results



In [23]:

    
results_2016a_file = "2016a SRHL Ladders - Mens-Mixed Game Results.csv"
results_2016a_raw = pd.read_csv(results_2016a_file)
results_2016a = results_2016a_raw[results_2016a_raw['Validate']=="Y"]

# Reset the index and drop the old index
results_2016a.reset_index(inplace=True)
results_2016a.drop('index',axis=1,inplace=True)









    



C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [24]:

    
for row in range(0,len(results_2016a)):
    # Get the home, away and total scores
    homescore = results_2016a['SCORE'][row]
    awayscore = results_2016a['SCORE.1'][row]
    totalscore = homescore + awayscore
    homeoutcome = homescore/totalscore
    results_2016a.loc[row,'Home Actual Result'] = homeoutcome



In [45]:

    
homeresults = results_2016a['Home Actual Result']
b = [0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1]
#plt.figure(figsize=(15,10))
axes = plt.gca()
#axes.set_xlim([0,7])
#axes.set_ylim([500,700])
plt.ylabel("Frequency")
plt.xlabel("Home Score Outcome")
#plt.title("Histogram of Home Score Outcomes up to SRHL Round 7")
plt.hist(homeresults)
print "2016a Score Histogram"
print "Mean Home Score Outcome:",np.mean(homeresults)
print "Standard Deviation of score outcomes:",np.std(homeresults)
print len(homeresults),'game results'









    



2016a Score Histogram
Mean Home Score Outcome: 0.509584904447
Standard Deviation of score outcomes: 0.200718551666
589 game results



In [26]:

    
allteams;



In [ ]:



In [47]:

    
team = 'yokine drugs n crime'
print getscoreprogression(team)[-1]









    



(6, 560.18989865186381)



In [ ]:



In [48]:

    
team_elos_df = pd.DataFrame.from_dict(elos_dict,orient='index')
colname = team_elos_df.columns.values[0]
plt.hist(team_elos_df[colname])
print "Mean Elo:",np.mean(team_elos_df[colname])
print "Elo Rating Histogram as at Round 7 2016b"
print "For SRHL"









    



Mean Elo: 735.054915369
Elo Rating Histogram as at Round 7 2016b
For SRHL



In [ ]:



In [33]:

    
results.drop(['index','OT','H_Win','A_Win'],axis=1,inplace=True)









    



C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':



In [34]:

    
results.head()









    Out[34]:






  
    
      
      ROUND
      HOME
      SCORE
      SCORE.1
      AWAY
      Validate
      H_Tie
      A_Tie
      H_OTL
      A_OTL
      H_Loss
      A_Loss
      Round_number
      Home Elo
      Away Elo
      Home Predicted Result
      Away Predicted Result
      Home Actual Result
      Away Actual Result
    
  
  
    
      0
      1
      the swanbourne supremacy
      14.0
      12.0
      balcatta bandits
      Y
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      948.054
      1061.63
      0.342139
      0.657861
      0.538462
      0.461538
    
    
      1
      1
      belmontgomery burns
      12.0
      7.0
      c montgomery cockburns
      Y
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      644.158
      600
      0.563209
      0.436791
      0.631579
      0.368421
    
    
      2
      1
      bicton banter lords
      15.0
      5.0
      morley puck troopers
      Y
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      945.594
      817.326
      0.676639
      0.323361
      0.750000
      0.250000
    
    
      3
      1
      greenmount doom
      17.0
      7.0
      bayswater croquet club
      Y
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      936.05
      811.485
      0.671958
      0.328042
      0.708333
      0.291667
    
    
      4
      1
      swan view draught
      7.0
      5.0
      morley flying hellfish
      Y
      NaN
      NaN
      NaN
      NaN
      NaN
      L
      1
      910.884
      817.326
      0.631478
      0.368522
      0.583333
      0.416667



In [40]:

    
results['Home Error'] = None
for row in range(0,len(results)):
    predicted = results.loc[row,'Home Predicted Result']
    actual = results.loc[row,'Home Actual Result']
    error = (predicted - actual)**2
    results.loc[row,'Home Error'] = error









    



C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':



In [42]:

    
RMSE = (np.mean(results['Home Error']))**0.5
print "Reducing Elo for wins gives an RMS Error of:"
print RMSE









    



Reducing Elo for wins gives an RMS Error of:
0.156306285612



In [ ]:

	index	ROUND	HOME	SCORE	SCORE.1	AWAY	OT	Validate	H_Win	A_Win	H_Tie	A_Tie	H_OTL	A_OTL	H_Loss	A_Loss	Round_number
0	0	1	The SwanBourne Supremacy	14.0	12.0	Balcatta Bandits	NaN	Y	W	NaN	NaN	NaN	NaN	NaN	NaN	L	1
1	2	1	Belmontgomery Burns	12.0	7.0	C. Montgomery Cockburns	NaN	Y	W	NaN	NaN	NaN	NaN	NaN	NaN	L	1
2	3	1	Bicton Banter Lords	15.0	5.0	Morley Puck Troopers	NaN	Y	W	NaN	NaN	NaN	NaN	NaN	NaN	L	1
3	5	1	Greenmount Doom	17.0	7.0	Bayswater Croquet Club	NaN	Y	W	NaN	NaN	NaN	NaN	NaN	NaN	L	1
4	6	1	Swan View Draught	7.0	5.0	Morley Flying Hellfish	NaN	Y	W	NaN	NaN	NaN	NaN	NaN	NaN	L	1