In [27]:

    
import pandas as pd
import itertools

FILE = "Heidees-stats.csv"
PASS_RECEIVER = ["Passer", "Receiver"]

Load raw file



In [28]:

    
df = pd.read_csv(FILE)
df.head()









    Out[28]:






  
    
      
      Date/Time
      Tournamemnt
      Opponent
      Point Elapsed Seconds
      Line
      Our Score - End of Point
      Their Score - End of Point
      Event Type
      Action
      Passer
      ...
      Begin Area
      Begin X
      Begin Y
      End Area
      End X
      End Y
      Distance Unit of Measure
      Absolute Distance
      Lateral Distance
      Toward Our Goal Distance
    
  
  
    
      0
      2017-05-28 08:59
      EUCQ
      MUC
      163
      O
      0
      1
      Offense
      Catch
      Aze
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      2017-05-28 08:59
      EUCQ
      MUC
      163
      O
      0
      1
      Offense
      Catch
      Hans
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      2017-05-28 08:59
      EUCQ
      MUC
      163
      O
      0
      1
      Offense
      Catch
      Aze
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      2017-05-28 08:59
      EUCQ
      MUC
      163
      O
      0
      1
      Offense
      Catch
      Chris
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      2017-05-28 08:59
      EUCQ
      MUC
      163
      O
      0
      1
      Offense
      Throwaway
      Hans
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 52 columns

Get line ups per point and game without duplicates



In [8]:

    
line_ups = df.filter(like="Player") \
    .dropna(axis=1) \
    .assign(total_points=df.filter(like="Score").sum(axis=1)) \
    .assign(game=df["Opponent"]) \
    .drop_duplicates()
    
line_ups.head()









    Out[8]:






  
    
      
      Player 0
      Player 1
      Player 2
      Player 3
      Player 4
      Player 5
      Player 6
      total_points
      game
    
  
  
    
      0
      Flex
      Flo
      Alan
      Hans
      Aze
      Chris
      Matze
      1
      MUC
    
    
      12
      Flex
      Flo
      Hans
      Aze
      Mike
      Chris
      Matze
      2
      MUC
    
    
      16
      Dave
      Connor
      Cafi
      Brandon
      Franz
      Moritz
      Kitchen
      3
      MUC
    
    
      25
      Flex
      Flo
      Alan
      Hans
      Aze
      Matze
      Chris
      4
      MUC
    
    
      31
      Flo
      Alan
      Aze
      Hans
      Mike
      Chris
      Matze
      5
      MUC

Convert to long format and back to get each player as binary variable



In [9]:

    
long_line_ups = pd.melt(line_ups, id_vars=["total_points", "game"], value_name="player") \
    .drop("variable", axis=1) \
    .assign(count=1)
    
long_line_ups.head()









    Out[9]:






  
    
      
      total_points
      game
      player
      count
    
  
  
    
      0
      1
      MUC
      Flex
      1
    
    
      1
      2
      MUC
      Flex
      1
    
    
      2
      3
      MUC
      Dave
      1
    
    
      3
      4
      MUC
      Flex
      1
    
    
      4
      5
      MUC
      Flo
      1



In [25]:

    
# convert back to wide as binary
binary_line_ups = long_line_ups.pivot_table(columns="player", 
                                            index=["total_points", "game"], 
                                            values="count")

# fill missings with 0 and create 'Anonymous' player which corresponds to a throwaway
binary_line_ups = binary_line_ups.fillna(0) \
    .astype(int) \
    .assign(Anonymous=1)

binary_line_ups.head()









    Out[25]:






  
    
      
      player
      Alan
      Aze
      Brandon
      Cafi
      Chris
      Connor
      Dave
      Davide
      Flex
      Flo
      ...
      Kitchen
      Matze
      Mike
      Misch
      Moritz
      Mou K.
      Rainer
      Rene
      Walder
      Anonymous
    
    
      total_points
      game
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      7Schwaben
      1
      1
      0
      0
      1
      0
      0
      0
      0
      1
      ...
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
    
    
      Bad Raps
      1
      1
      0
      0
      1
      0
      0
      0
      0
      1
      ...
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
    
    
      Bad SKID
      0
      0
      1
      0
      0
      1
      1
      0
      0
      0
      ...
      1
      0
      0
      0
      1
      0
      0
      1
      0
      1
    
    
      FrankN
      1
      1
      0
      0
      1
      0
      0
      0
      0
      1
      ...
      0
      1
      0
      0
      0
      0
      1
      0
      0
      1
    
    
      MUC
      1
      1
      0
      0
      1
      0
      0
      0
      1
      1
      ...
      0
      1
      0
      0
      0
      0
      0
      0
      0
      1
    
  

5 rows × 22 columns

Get all combinations of player tuples and points played together



In [24]:

    
# get all combinations of player tuples
players = itertools.combinations(binary_line_ups.columns, 2)

# define helper function to compute points played together
def points_played_together(sub_df, player_tuple):
    player_columns = list(player_tuple)
    played_points_bool = sub_df[player_columns].sum(axis=1)
    return (played_points_bool == 2).sum()

played_together = {player_tuple: points_played_together(binary_line_ups, player_tuple)
                   for player_tuple in players}

played_together_rev = {(player2, player1): count 
                       for (player1, player2), count in played_together.items()}   

played_together.update(played_together_rev)

Create final data frame with passer, receiver and pass stats



In [38]:

    
# consider only offense event type
mask = df["Event Type"] == "Offense"
passes = df.loc[mask, PASS_RECEIVER]

# create tupled series to apply value count 
res_df = pd.Series([tuple(x) for x in passes.values]) \
    .value_counts() \
    .to_frame("Passes") \
    .reindex(played_together.keys()) \
    .fillna(0) \
    .assign(PlayedTogether=pd.Series(played_together))
    
# create average passes per point played together
res_df["PassesPerPoint"] = (res_df["Passes"] / res_df["PlayedTogether"]).fillna(0).sort_values()
res_df["Passer"], res_df["Receiver"] = zip(*res_df.index.values.tolist())
res_df = res_df.reset_index(drop=True) \
    .sort_values(PASS_RECEIVER) \
    .replace("Anonymous", "Throwaway")

res_df.head()









    Out[38]:






  
    
      
      Passes
      PlayedTogether
      PassesPerPoint
      Passer
      Receiver
    
  
  
    
      173
      3.0
      47
      0.063830
      Alan
      Throwaway
    
    
      431
      4.0
      42
      0.095238
      Alan
      Aze
    
    
      373
      1.0
      1
      1.000000
      Alan
      Brandon
    
    
      156
      0.0
      0
      0.000000
      Alan
      Cafi
    
    
      300
      1.0
      45
      0.022222
      Alan
      Chris

Write csv output formats



In [39]:

    
res_df[PASS_RECEIVER + ["Passes"]].to_csv("plots/data/passes_absolute.csv", index=False)
res_df[PASS_RECEIVER + ["PlayedTogether"]].query("Receiver != 'Throwaway' and Passer != 'Throwaway'").to_csv("plots/data/played_together.csv", index=False)
res_df[PASS_RECEIVER + ["PassesPerPoint"]].to_csv("plots/data/passes_per_point.csv", index=False)



In [ ]:

	Date/Time	Tournamemnt	Opponent	Point Elapsed Seconds	Line	Their Score - End of Point	Event Type	Action	Passer	...	Begin Area	Begin X	Begin Y	End Area	End X	End Y	Distance Unit of Measure	Absolute Distance	Lateral Distance	Toward Our Goal Distance
0	2017-05-28 08:59	EUCQ	MUC	163	O	1	Offense	Catch	Aze	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	2017-05-28 08:59	EUCQ	MUC	163	O	1	Offense	Catch	Hans	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	2017-05-28 08:59	EUCQ	MUC	163	O	1	Offense	Catch	Aze	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	2017-05-28 08:59	EUCQ	MUC	163	O	1	Offense	Catch	Chris	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	2017-05-28 08:59	EUCQ	MUC	163	O	1	Offense	Throwaway	Hans	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Player 0	Player 1	Player 2	Player 3	Player 4	Player 5	Player 6	total_points	game
0	Flex	Flo	Alan	Hans	Aze	Chris	Matze	1	MUC
12	Flex	Flo	Hans	Aze	Mike	Chris	Matze	2	MUC
16	Dave	Connor	Cafi	Brandon	Franz	Moritz	Kitchen	3	MUC
25	Flex	Flo	Alan	Hans	Aze	Matze	Chris	4	MUC
31	Flo	Alan	Aze	Hans	Mike	Chris	Matze	5	MUC

	player	Alan	Aze	Brandon	Cafi	Chris	Connor	Dave	Davide	Flex	Flo	...	Kitchen	Matze	Mike	Misch	Moritz	Mou K.	Rainer	Rene	Walder	Anonymous
total_points	game
1	7Schwaben	1	1	0	0	1	0	0	0	0	1	...	0	1	0	0	0	0	1	0	0	1
	Bad Raps	1	1	0	0	1	0	0	0	0	1	...	0	1	1	0	0	0	0	0	0	1
	Bad SKID	0	0	1	0	0	1	1	0	0	0	...	1	0	0	0	1	0	0	1	0	1
	FrankN	1	1	0	0	1	0	0	0	0	1	...	0	1	0	0	0	0	1	0	0	1
	MUC	1	1	0	0	1	0	0	0	1	1	...	0	1	0	0	0	0	0	0	0	1

	Passes	PlayedTogether	PassesPerPoint	Passer	Receiver
173	3.0	47	0.063830	Alan	Throwaway
431	4.0	42	0.095238	Alan	Aze
373	1.0	1	1.000000	Alan	Brandon
156	0.0	0	0.000000	Alan	Cafi
300	1.0	45	0.022222	Alan	Chris