In [27]:
import pandas as pd
import itertools

FILE = "Heidees-stats.csv"
PASS_RECEIVER = ["Passer", "Receiver"]

Load raw file


In [28]:
df = pd.read_csv(FILE)
df.head()


Out[28]:
Date/Time Tournamemnt Opponent Point Elapsed Seconds Line Our Score - End of Point Their Score - End of Point Event Type Action Passer ... Begin Area Begin X Begin Y End Area End X End Y Distance Unit of Measure Absolute Distance Lateral Distance Toward Our Goal Distance
0 2017-05-28 08:59 EUCQ MUC 163 O 0 1 Offense Catch Aze ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 2017-05-28 08:59 EUCQ MUC 163 O 0 1 Offense Catch Hans ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 2017-05-28 08:59 EUCQ MUC 163 O 0 1 Offense Catch Aze ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 2017-05-28 08:59 EUCQ MUC 163 O 0 1 Offense Catch Chris ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 2017-05-28 08:59 EUCQ MUC 163 O 0 1 Offense Throwaway Hans ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 52 columns

Get line ups per point and game without duplicates


In [8]:
line_ups = df.filter(like="Player") \
    .dropna(axis=1) \
    .assign(total_points=df.filter(like="Score").sum(axis=1)) \
    .assign(game=df["Opponent"]) \
    .drop_duplicates()
    
line_ups.head()


Out[8]:
Player 0 Player 1 Player 2 Player 3 Player 4 Player 5 Player 6 total_points game
0 Flex Flo Alan Hans Aze Chris Matze 1 MUC
12 Flex Flo Hans Aze Mike Chris Matze 2 MUC
16 Dave Connor Cafi Brandon Franz Moritz Kitchen 3 MUC
25 Flex Flo Alan Hans Aze Matze Chris 4 MUC
31 Flo Alan Aze Hans Mike Chris Matze 5 MUC

Convert to long format and back to get each player as binary variable


In [9]:
long_line_ups = pd.melt(line_ups, id_vars=["total_points", "game"], value_name="player") \
    .drop("variable", axis=1) \
    .assign(count=1)
    
long_line_ups.head()


Out[9]:
total_points game player count
0 1 MUC Flex 1
1 2 MUC Flex 1
2 3 MUC Dave 1
3 4 MUC Flex 1
4 5 MUC Flo 1

In [25]:
# convert back to wide as binary
binary_line_ups = long_line_ups.pivot_table(columns="player", 
                                            index=["total_points", "game"], 
                                            values="count")

# fill missings with 0 and create 'Anonymous' player which corresponds to a throwaway
binary_line_ups = binary_line_ups.fillna(0) \
    .astype(int) \
    .assign(Anonymous=1)

binary_line_ups.head()


Out[25]:
player Alan Aze Brandon Cafi Chris Connor Dave Davide Flex Flo ... Kitchen Matze Mike Misch Moritz Mou K. Rainer Rene Walder Anonymous
total_points game
1 7Schwaben 1 1 0 0 1 0 0 0 0 1 ... 0 1 0 0 0 0 1 0 0 1
Bad Raps 1 1 0 0 1 0 0 0 0 1 ... 0 1 1 0 0 0 0 0 0 1
Bad SKID 0 0 1 0 0 1 1 0 0 0 ... 1 0 0 0 1 0 0 1 0 1
FrankN 1 1 0 0 1 0 0 0 0 1 ... 0 1 0 0 0 0 1 0 0 1
MUC 1 1 0 0 1 0 0 0 1 1 ... 0 1 0 0 0 0 0 0 0 1

5 rows × 22 columns

Get all combinations of player tuples and points played together


In [24]:
# get all combinations of player tuples
players = itertools.combinations(binary_line_ups.columns, 2)

# define helper function to compute points played together
def points_played_together(sub_df, player_tuple):
    player_columns = list(player_tuple)
    played_points_bool = sub_df[player_columns].sum(axis=1)
    return (played_points_bool == 2).sum()

played_together = {player_tuple: points_played_together(binary_line_ups, player_tuple)
                   for player_tuple in players}

played_together_rev = {(player2, player1): count 
                       for (player1, player2), count in played_together.items()}   

played_together.update(played_together_rev)

Create final data frame with passer, receiver and pass stats


In [38]:
# consider only offense event type
mask = df["Event Type"] == "Offense"
passes = df.loc[mask, PASS_RECEIVER]

# create tupled series to apply value count 
res_df = pd.Series([tuple(x) for x in passes.values]) \
    .value_counts() \
    .to_frame("Passes") \
    .reindex(played_together.keys()) \
    .fillna(0) \
    .assign(PlayedTogether=pd.Series(played_together))
    
# create average passes per point played together
res_df["PassesPerPoint"] = (res_df["Passes"] / res_df["PlayedTogether"]).fillna(0).sort_values()
res_df["Passer"], res_df["Receiver"] = zip(*res_df.index.values.tolist())
res_df = res_df.reset_index(drop=True) \
    .sort_values(PASS_RECEIVER) \
    .replace("Anonymous", "Throwaway")

res_df.head()


Out[38]:
Passes PlayedTogether PassesPerPoint Passer Receiver
173 3.0 47 0.063830 Alan Throwaway
431 4.0 42 0.095238 Alan Aze
373 1.0 1 1.000000 Alan Brandon
156 0.0 0 0.000000 Alan Cafi
300 1.0 45 0.022222 Alan Chris

Write csv output formats


In [39]:
res_df[PASS_RECEIVER + ["Passes"]].to_csv("plots/data/passes_absolute.csv", index=False)
res_df[PASS_RECEIVER + ["PlayedTogether"]].query("Receiver != 'Throwaway' and Passer != 'Throwaway'").to_csv("plots/data/played_together.csv", index=False)
res_df[PASS_RECEIVER + ["PassesPerPoint"]].to_csv("plots/data/passes_per_point.csv", index=False)

In [ ]: