In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("processed_data.csv")
# read the file as pandas dataframe
df.head()


Out[2]:
Unnamed: 0 action_type combined_shot_type loc_x loc_y minutes_remaining period playoffs season seconds_remaining shot_distance shot_made_flag shot_type shot_zone_area shot_zone_basic shot_zone_range team_name matchup opponent year
0 0 Jump Shot Jump Shot -157 0 10 1 0 2000-01 22 15 0.0 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. Los Angeles Lakers LAL @ POR POR 2000
1 1 Jump Shot Jump Shot -101 135 7 1 0 2000-01 45 16 1.0 2PT Field Goal Left Side Center(LC) Mid-Range 16-24 ft. Los Angeles Lakers LAL @ POR POR 2000
2 2 Jump Shot Jump Shot 138 175 6 1 0 2000-01 52 22 0.0 2PT Field Goal Right Side Center(RC) Mid-Range 16-24 ft. Los Angeles Lakers LAL @ POR POR 2000
3 3 Driving Dunk Shot Dunk 0 0 6 2 0 2000-01 19 0 1.0 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. Los Angeles Lakers LAL @ POR POR 2000
4 4 Jump Shot Jump Shot -145 -11 9 3 0 2000-01 32 14 0.0 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. Los Angeles Lakers LAL @ POR POR 2000

In [3]:
df_sub = df[['year', 'opponent', 'shot_made_flag']]
# subset of the data
df_sub.head()


Out[3]:
year opponent shot_made_flag
0 2000 POR 0.0
1 2000 POR 1.0
2 2000 POR 0.0
3 2000 POR 1.0
4 2000 POR 0.0

In [4]:
d = pd.get_dummies(df_sub, columns = ['shot_made_flag'])
# shot made flag

In [5]:
d = d.groupby(by = ['year', 'opponent']).sum()
# group the data
d.head()


Out[5]:
shot_made_flag_0.0 shot_made_flag_1.0
year opponent
1996 ATL 4.0 0.0
BOS 4.0 1.0
CHA 8.0 6.0
CHI 5.0 4.0
CLE 4.0 2.0

In [44]:
years = df_sub['year'].unique()
teams = df['opponent'].unique()
for y in years:
    for team in teams:
        try:
            d.loc[(y, team)]
        except KeyError:
            d.loc[(y, team), 'shot_made_flag_0.0'] = 0
            d.loc[(y, team), 'shot_made_flag_1.0'] = 0

In [46]:
d.to_csv('shot_hit_miss.csv')

In [43]:



Out[43]:
shot_made_flag_0.0    0.0
shot_made_flag_1.0    0.0
Name: (1998, CHA), dtype: float64

In [ ]: