In [1]:
import numpy as np
import pandas as pd
In [2]:
df = pd.read_csv("processed_data.csv")
# read the file as pandas dataframe
df.head()
Out[2]:
In [3]:
df_sub = df[['year', 'opponent', 'shot_made_flag']]
# subset of the data
df_sub.head()
Out[3]:
In [4]:
d = pd.get_dummies(df_sub, columns = ['shot_made_flag'])
# shot made flag
In [5]:
d = d.groupby(by = ['year', 'opponent']).sum()
# group the data
d.head()
Out[5]:
In [44]:
years = df_sub['year'].unique()
teams = df['opponent'].unique()
for y in years:
for team in teams:
try:
d.loc[(y, team)]
except KeyError:
d.loc[(y, team), 'shot_made_flag_0.0'] = 0
d.loc[(y, team), 'shot_made_flag_1.0'] = 0
In [46]:
d.to_csv('shot_hit_miss.csv')
In [43]:
Out[43]:
In [ ]: