In [27]:
import pandas as pd
import itertools
FILE = "Heidees-stats.csv"
PASS_RECEIVER = ["Passer", "Receiver"]
In [28]:
df = pd.read_csv(FILE)
df.head()
Out[28]:
In [8]:
line_ups = df.filter(like="Player") \
.dropna(axis=1) \
.assign(total_points=df.filter(like="Score").sum(axis=1)) \
.assign(game=df["Opponent"]) \
.drop_duplicates()
line_ups.head()
Out[8]:
In [9]:
long_line_ups = pd.melt(line_ups, id_vars=["total_points", "game"], value_name="player") \
.drop("variable", axis=1) \
.assign(count=1)
long_line_ups.head()
Out[9]:
In [25]:
# convert back to wide as binary
binary_line_ups = long_line_ups.pivot_table(columns="player",
index=["total_points", "game"],
values="count")
# fill missings with 0 and create 'Anonymous' player which corresponds to a throwaway
binary_line_ups = binary_line_ups.fillna(0) \
.astype(int) \
.assign(Anonymous=1)
binary_line_ups.head()
Out[25]:
In [24]:
# get all combinations of player tuples
players = itertools.combinations(binary_line_ups.columns, 2)
# define helper function to compute points played together
def points_played_together(sub_df, player_tuple):
player_columns = list(player_tuple)
played_points_bool = sub_df[player_columns].sum(axis=1)
return (played_points_bool == 2).sum()
played_together = {player_tuple: points_played_together(binary_line_ups, player_tuple)
for player_tuple in players}
played_together_rev = {(player2, player1): count
for (player1, player2), count in played_together.items()}
played_together.update(played_together_rev)
In [38]:
# consider only offense event type
mask = df["Event Type"] == "Offense"
passes = df.loc[mask, PASS_RECEIVER]
# create tupled series to apply value count
res_df = pd.Series([tuple(x) for x in passes.values]) \
.value_counts() \
.to_frame("Passes") \
.reindex(played_together.keys()) \
.fillna(0) \
.assign(PlayedTogether=pd.Series(played_together))
# create average passes per point played together
res_df["PassesPerPoint"] = (res_df["Passes"] / res_df["PlayedTogether"]).fillna(0).sort_values()
res_df["Passer"], res_df["Receiver"] = zip(*res_df.index.values.tolist())
res_df = res_df.reset_index(drop=True) \
.sort_values(PASS_RECEIVER) \
.replace("Anonymous", "Throwaway")
res_df.head()
Out[38]:
In [39]:
res_df[PASS_RECEIVER + ["Passes"]].to_csv("plots/data/passes_absolute.csv", index=False)
res_df[PASS_RECEIVER + ["PlayedTogether"]].query("Receiver != 'Throwaway' and Passer != 'Throwaway'").to_csv("plots/data/played_together.csv", index=False)
res_df[PASS_RECEIVER + ["PassesPerPoint"]].to_csv("plots/data/passes_per_point.csv", index=False)
In [ ]: