Summary

Manipulate the data into two distinct formats for pedagogical reasons.



In [99]:

    
import os
import pandas as pd



In [100]:

    
base_dir = os.path.join("/Users", "sbussmann", "Development", "buda", "buda-ratings")
scores_dir = os.path.join(base_dir, "data", "raw", "game_scores")
interim_dir = os.path.join(base_dir, "data", "interim")



In [101]:

    
# summer club league 2016
league_id = 40264



In [102]:

    
file_name = "scores_{}.csv".format(league_id)



In [103]:

    
file_path = os.path.join(scores_dir, file_name)



In [104]:

    
scores = pd.read_csv(file_path)



In [105]:

    
scores.head()









    Out[105]:







  
    
      
      Team A
      Team B
      Tourney Qualifying games*
      divname
      Score A
      Score B
    
  
  
    
      0
      AHOC
      Gothrilla
      -99
      4/3 Div 1
      15
      12
    
    
      1
      AHOC
      BBN
      -99
      4/3 Div 1
      15
      1
    
    
      2
      AHOC
      Stonecutters
      -99
      4/3 Div 1
      15
      8
    
    
      3
      AHOC
      FlowChart
      -99
      4/3 Div 1
      15
      10
    
    
      4
      AHOC
      Lady and the BAMF
      -99
      4/3 Div 1
      15
      5



In [106]:

    
winloss = scores.copy()



In [107]:

    
winloss['Team A Wins'] = winloss['Score A'] > winloss['Score B']



In [108]:

    
winloss.head(20)









    Out[108]:







  
    
      
      Team A
      Team B
      Tourney Qualifying games*
      divname
      Score A
      Score B
      Team A Wins
    
  
  
    
      0
      AHOC
      Gothrilla
      -99
      4/3 Div 1
      15
      12
      True
    
    
      1
      AHOC
      BBN
      -99
      4/3 Div 1
      15
      1
      True
    
    
      2
      AHOC
      Stonecutters
      -99
      4/3 Div 1
      15
      8
      True
    
    
      3
      AHOC
      FlowChart
      -99
      4/3 Div 1
      15
      10
      True
    
    
      4
      AHOC
      Lady and the BAMF
      -99
      4/3 Div 1
      15
      5
      True
    
    
      5
      AHOC
      Swingers
      -99
      4/3 Div 1
      15
      11
      True
    
    
      6
      AHOC
      Upstream
      -99
      4/3 Div 1
      15
      10
      True
    
    
      7
      AHOC
      Tubbs
      -99
      4/3 Div 1
      15
      6
      True
    
    
      8
      AHOC
      Stonecutters
      -99
      4/3 Div 1
      15
      10
      True
    
    
      9
      AHOC
      Zerg Rush!
      -99
      4/3 Div 1
      15
      8
      True
    
    
      10
      AHOC
      Turtle Boy
      -99
      4/3 Div 1
      15
      6
      True
    
    
      11
      AHOC
      Live Poultry, Fresh Killed (LPFK)
      -99
      4/3 Div 1
      15
      7
      True
    
    
      12
      AHOC
      TuneSquad
      -99
      4/3 Div 1
      15
      4
      True
    
    
      13
      AHOC
      JuJu Hex
      -99
      4/3 Div 1
      15
      12
      True
    
    
      14
      Gothrilla
      AHOC
      -99
      4/3 Div 1
      12
      15
      False
    
    
      15
      Gothrilla
      Swingers
      -99
      4/3 Div 1
      9
      13
      False
    
    
      16
      Gothrilla
      FlowChart
      -99
      4/3 Div 1
      15
      7
      True
    
    
      17
      Gothrilla
      Hucky McThrowandcatch
      -99
      4/3 Div 1
      14
      10
      True
    
    
      18
      Gothrilla
      SnakeCountryBromance
      -99
      4/3 Div 1
      10
      15
      False
    
    
      19
      Gothrilla
      Zerg Rush!
      -99
      4/3 Div 1
      11
      15
      False



In [109]:

    
winloss = winloss.drop(['Tourney Qualifying games*', 'Score A', 'Score B'], axis=1)



In [110]:

    
winloss.head(20)









    Out[110]:







  
    
      
      Team A
      Team B
      divname
      Team A Wins
    
  
  
    
      0
      AHOC
      Gothrilla
      4/3 Div 1
      True
    
    
      1
      AHOC
      BBN
      4/3 Div 1
      True
    
    
      2
      AHOC
      Stonecutters
      4/3 Div 1
      True
    
    
      3
      AHOC
      FlowChart
      4/3 Div 1
      True
    
    
      4
      AHOC
      Lady and the BAMF
      4/3 Div 1
      True
    
    
      5
      AHOC
      Swingers
      4/3 Div 1
      True
    
    
      6
      AHOC
      Upstream
      4/3 Div 1
      True
    
    
      7
      AHOC
      Tubbs
      4/3 Div 1
      True
    
    
      8
      AHOC
      Stonecutters
      4/3 Div 1
      True
    
    
      9
      AHOC
      Zerg Rush!
      4/3 Div 1
      True
    
    
      10
      AHOC
      Turtle Boy
      4/3 Div 1
      True
    
    
      11
      AHOC
      Live Poultry, Fresh Killed (LPFK)
      4/3 Div 1
      True
    
    
      12
      AHOC
      TuneSquad
      4/3 Div 1
      True
    
    
      13
      AHOC
      JuJu Hex
      4/3 Div 1
      True
    
    
      14
      Gothrilla
      AHOC
      4/3 Div 1
      False
    
    
      15
      Gothrilla
      Swingers
      4/3 Div 1
      False
    
    
      16
      Gothrilla
      FlowChart
      4/3 Div 1
      True
    
    
      17
      Gothrilla
      Hucky McThrowandcatch
      4/3 Div 1
      True
    
    
      18
      Gothrilla
      SnakeCountryBromance
      4/3 Div 1
      False
    
    
      19
      Gothrilla
      Zerg Rush!
      4/3 Div 1
      False



In [111]:

    
teams = winloss['Team A'].unique()



In [112]:

    
team2index = {}
team2div = {}
for i, team in enumerate(teams):
    row = winloss['Team A'] == team
    div_team = winloss.loc[row, 'divname'].unique()[0]
    team2div[team] = div_team
    team2index[team] = i



In [113]:

    
winloss['Index A'] = winloss['Team A'].apply(lambda x: team2index[x])
winloss['Index B'] = winloss['Team B'].apply(lambda x: team2index[x])
winloss['Div A'] = winloss['Team A'].apply(lambda x: team2div[x])
winloss['Div B'] = winloss['Team B'].apply(lambda x: team2div[x])



In [114]:

    
mixed_winloss = winloss.copy()
for div in ['Open Div 1', 'Open Div 2']:
    sub1 = (mixed_winloss['Div A'] == div) | (mixed_winloss['Div B'] == div)
    mixed_winloss = mixed_winloss[~sub1]



In [115]:

    
mixed_winloss.shape









    Out[115]:





(1044, 8)



In [116]:

    
mixed_winloss.to_csv(os.path.join(interim_dir, 'winloss_with_duplicates.csv'), index=None)



In [90]:

    
pair_list = []
for row in mixed_winloss.index:
    team_A = mixed_winloss.loc[row, 'Index A']
    team_B = mixed_winloss.loc[row, 'Index B']
    new_pair = (team_A, team_B)
    pair_list.append(new_pair)
    reverse_pair = (team_B, team_A)
    if reverse_pair in pair_list:
        mixed_winloss = mixed_winloss.drop(row)



In [91]:

    
mixed_winloss.shape









    Out[91]:





(522, 13)



In [92]:

    
mixed_winloss.head()









    Out[92]:







  
    
      
      Team A
      Team B
      Tourney Qualifying games*
      divname
      Score A
      Score B
      Team A Wins
      Team A Loses
      Team A Ties
      Index A
      Index B
      Div A
      Div B
    
  
  
    
      0
      AHOC
      Gothrilla
      -99
      4/3 Div 1
      15
      12
      True
      False
      False
      0
      1
      4/3 Div 1
      4/3 Div 1
    
    
      1
      AHOC
      BBN
      -99
      4/3 Div 1
      15
      1
      True
      False
      False
      0
      9
      4/3 Div 1
      4/3 Div 1
    
    
      2
      AHOC
      Stonecutters
      -99
      4/3 Div 1
      15
      8
      True
      False
      False
      0
      41
      4/3 Div 1
      5/2 Div 1
    
    
      3
      AHOC
      FlowChart
      -99
      4/3 Div 1
      15
      10
      True
      False
      False
      0
      2
      4/3 Div 1
      4/3 Div 1
    
    
      4
      AHOC
      Lady and the BAMF
      -99
      4/3 Div 1
      15
      5
      True
      False
      False
      0
      28
      4/3 Div 1
      4/3 Div 2



In [93]:

    
mixed_winloss_flatprior = mixed_winloss[['Team A', 'Team B', 'Team A Wins', 'Index A', 'Index B']]



In [97]:

    
outmwlfppath = os.path.join(interim_dir, 'winloss_simpleprior_{}.csv'.format(league_id))
mixed_winloss_flatprior.to_csv(outmwlfppath, index=None)



In [95]:

    
mixed_winloss_divprior = mixed_winloss[['Team A', 'Team B', 'Team A Wins', 'Index A', 'Index B', 'Div A', 'Div B']]



In [98]:

    
outmwldppath = os.path.join(interim_dir, 'winloss_divprior_{}.csv'.format(league_id))
mixed_winloss_divprior.to_csv(outmwldppath, index=None)



In [60]:

    
outwlpath = os.path.join(interim_dir, 'winloss_{}.csv'.format(league_id))
winloss.to_csv(outwlpath, index=None)



In [15]:

    
scorediv = scores[['Team A', 'divname']].drop_duplicates()



In [17]:

    
scorediv = scorediv.set_index('Team A')



In [21]:

    
scorediv.loc['AHOC', 'divname']









    Out[21]:





'4/3 Div 1'



In [ ]:

    
map2div = {}
teams = scores['Team A'].unique()
for team in teams:
    index =



In [38]:

    
heuristicdf = scores.groupby('Team A').sum().sort_index().reset_index()



In [39]:

    
heuristicdf.head()









    Out[39]:







  
    
      
      Team A
      Tourney Qualifying games*
      Score A
      Score B
    
  
  
    
      0
      123 Trap!
      -990
      61
      142
    
    
      1
      215 Needham Street
      -693
      98
      62
    
    
      2
      A Lil Bit Sticky
      -1485
      151
      194
    
    
      3
      AHOC
      -1386
      210
      110
    
    
      4
      Alpha No Beta
      -1881
      216
      223



In [40]:

    
heuristicdf = heuristicdf.assign(PlusMinus = heuristicdf['Score A'] - heuristicdf['Score B'])



In [41]:

    
heuristicdf.head()









    Out[41]:







  
    
      
      Team A
      Tourney Qualifying games*
      Score A
      Score B
      PlusMinus
    
  
  
    
      0
      123 Trap!
      -990
      61
      142
      -81
    
    
      1
      215 Needham Street
      -693
      98
      62
      36
    
    
      2
      A Lil Bit Sticky
      -1485
      151
      194
      -43
    
    
      3
      AHOC
      -1386
      210
      110
      100
    
    
      4
      Alpha No Beta
      -1881
      216
      223
      -7



In [42]:

    
scorediv.loc[heuristicdf['Team A'].values, 'divname'].values









    Out[42]:





array(['4/3 Div 3', 'Open Div 1', '4/3 Div 2', '4/3 Div 1', '5/2 Div 3',
       '4/3 Div 1', '5/2 Div 2', '4/3 Div 3', '5/2 Div 2', '4/3 Div 3',
       '4/3 Div 2', 'Open Div 1', 'Open Div 1', '5/2 Div 3', 'Open Div 1',
       '4/3 Div 2', '4/3 Div 2', 'Open Div 1', '4/3 Div 2', '4/3 Div 2',
       '4/3 Div 1', '5/2 Div 3', '5/2 Div 2', '4/3 Div 1', '5/2 Div 3',
       '4/3 Div 1', 'Open Div 1', '5/2 Div 2', '4/3 Div 2', '5/2 Div 2',
       'Open Div 2', '4/3 Div 2', '5/2 Div 2', '4/3 Div 2', '4/3 Div 2',
       '5/2 Div 3', '5/2 Div 2', '5/2 Div 3', '4/3 Div 2', '4/3 Div 2',
       '4/3 Div 1', '5/2 Div 3', '5/2 Div 2', '4/3 Div 2', '4/3 Div 2',
       '4/3 Div 3', '5/2 Div 2', '4/3 Div 2', '5/2 Div 3', '4/3 Div 2',
       '4/3 Div 2', '5/2 Div 2', '5/2 Div 3', 'Open Div 1', '5/2 Div 1',
       'Open Div 2', '5/2 Div 1', '5/2 Div 3', '5/2 Div 3', 'Open Div 1',
       '4/3 Div 3', 'Open Div 1', '5/2 Div 1', '4/3 Div 2', '4/3 Div 1',
       '5/2 Div 3', '5/2 Div 2', '4/3 Div 2', '5/2 Div 3', '4/3 Div 3',
       '4/3 Div 3', '4/3 Div 2', '4/3 Div 1', '4/3 Div 2', '4/3 Div 1',
       '4/3 Div 3', '4/3 Div 2', '4/3 Div 1'], dtype=object)



In [43]:

    
heuristicdf = heuristicdf.assign(divname = scorediv.loc[heuristicdf['Team A'].values, 'divname'].values)



In [67]:

    
tmpwl = scores.copy()



In [68]:

    
tmpwl['Team A Wins'] = (tmpwl['Score A'] > tmpwl['Score B']).astype('int')
tmpwl['Team A Loses'] = (tmpwl['Score A'] < tmpwl['Score B']).astype('int')
tmpwl['Team A Ties'] = (tmpwl['Score A'] == tmpwl['Score B']).astype('int')



In [73]:

    
wlrecord = tmpwl.groupby('Team A').sum()



In [74]:

    
wlrecord.head(20)









    Out[74]:







  
    
      
      Tourney Qualifying games*
      Score A
      Score B
      Team A Wins
      Team A Loses
      Team A Ties
    
    
      Team A
      
      
      
      
      
      
    
  
  
    
      123 Trap!
      -990
      61
      142
      1
      9
      0
    
    
      215 Needham Street
      -693
      98
      62
      5
      1
      1
    
    
      A Lil Bit Sticky
      -1485
      151
      194
      5
      10
      0
    
    
      AHOC
      -1386
      210
      110
      14
      0
      0
    
    
      Alpha No Beta
      -1881
      216
      223
      10
      9
      0
    
    
      BBN
      -1089
      111
      158
      2
      8
      1
    
    
      Baba Yaga
      -1188
      170
      123
      9
      3
      0
    
    
      Baboon Heart
      -1188
      108
      143
      5
      7
      0
    
    
      Bacon
      -1089
      143
      125
      7
      4
      0
    
    
      Batman and the Robins
      -1188
      140
      143
      6
      6
      0
    
    
      Booooooosh!
      -891
      116
      116
      4
      5
      0
    
    
      Crossroads
      -2079
      299
      226
      17
      3
      1
    
    
      DISCtinguished Gentlemen
      -693
      75
      101
      1
      6
      0
    
    
      Demons
      -1089
      140
      128
      5
      6
      0
    
    
      Desert Horizon
      -297
      28
      46
      0
      3
      0
    
    
      Disc Envy
      -1683
      218
      187
      9
      8
      0
    
    
      Dominion
      -1287
      147
      153
      6
      7
      0
    
    
      Fat Bass Ultimate
      -990
      138
      127
      6
      4
      0
    
    
      Fetch
      -1881
      225
      216
      8
      11
      0
    
    
      Flaming Croissants
      -1980
      271
      205
      15
      5
      0



In [77]:

    
heuristicdf = heuristicdf.set_index('Team Name').join(wlrecord)



In [79]:

    
heuristicdf = heuristicdf.drop(['Tourney Qualifying games*', 'Score A', 'Score B'], axis=1)



In [82]:

    
heuristicdf = heuristicdf.rename(columns={'Team A': 'Team Name', 
                                          'Team A Wins': 'Wins', 
                                          'Team A Loses': 'Losses', 
                                          'Team A Ties': 'Ties'})



In [83]:

    
heuristicdf.head()









    Out[83]:







  
    
      
      PlusMinus
      divname
      Wins
      Losses
      Ties
    
    
      Team Name
      
      
      
      
      
    
  
  
    
      123 Trap!
      -81
      4/3 Div 3
      1
      9
      0
    
    
      215 Needham Street
      36
      Open Div 1
      5
      1
      1
    
    
      A Lil Bit Sticky
      -43
      4/3 Div 2
      5
      10
      0
    
    
      AHOC
      100
      4/3 Div 1
      14
      0
      0
    
    
      Alpha No Beta
      -7
      5/2 Div 3
      10
      9
      0



In [84]:

    
outhuepath = os.path.join(interim_dir, 'plusminus_{}.csv'.format(league_id))
heuristicdf.to_csv(outhuepath)



In [ ]:

	Team A	Team B	Tourney Qualifying games*	divname	Score A	Score B
0	AHOC	Gothrilla	-99	4/3 Div 1	15	12
1	AHOC	BBN	-99	4/3 Div 1	15	1
2	AHOC	Stonecutters	-99	4/3 Div 1	15	8
3	AHOC	FlowChart	-99	4/3 Div 1	15	10
4	AHOC	Lady and the BAMF	-99	4/3 Div 1	15	5

	Team A	Tourney Qualifying games*	Score A	Score B
0	123 Trap!	-990	61	142
1	215 Needham Street	-693	98	62
2	A Lil Bit Sticky	-1485	151	194
3	AHOC	-1386	210	110
4	Alpha No Beta	-1881	216	223

	Tourney Qualifying games*	Score A	Score B	Team A Wins	Team A Loses	Team A Ties
Team A
123 Trap!	-990	61	142	1	9	0
215 Needham Street	-693	98	62	5	1	1
A Lil Bit Sticky	-1485	151	194	5	10	0
AHOC	-1386	210	110	14	0	0
Alpha No Beta	-1881	216	223	10	9	0
BBN	-1089	111	158	2	8	1
Baba Yaga	-1188	170	123	9	3	0
Baboon Heart	-1188	108	143	5	7	0
Bacon	-1089	143	125	7	4	0
Batman and the Robins	-1188	140	143	6	6	0
Booooooosh!	-891	116	116	4	5	0
Crossroads	-2079	299	226	17	3	1
DISCtinguished Gentlemen	-693	75	101	1	6	0
Demons	-1089	140	128	5	6	0
Desert Horizon	-297	28	46	0	3	0
Disc Envy	-1683	218	187	9	8	0
Dominion	-1287	147	153	6	7	0
Fat Bass Ultimate	-990	138	127	6	4	0
Fetch	-1881	225	216	8	11	0
Flaming Croissants	-1980	271	205	15	5	0

	PlusMinus	divname	Wins	Losses	Ties
Team Name
123 Trap!	-81	4/3 Div 3	1	9	0
215 Needham Street	36	Open Div 1	5	1	1
A Lil Bit Sticky	-43	4/3 Div 2	5	10	0
AHOC	100	4/3 Div 1	14	0	0
Alpha No Beta	-7	5/2 Div 3	10	9	0