In [ ]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline

In [ ]:


In [ ]:


In [ ]:
filename= "../data/kobe/kobe_bryant_shot_data.csv.gz"
df = pd.read_csv(filename, na_values={'shot_made_flag': ''})

In [ ]:
df = df.dropna()

In [ ]:


In [ ]:


In [ ]:
df = df.drop([u'action_type', u'game_event_id', u'game_id',
       u'lat', u'lon', u'team_id', u'team_name', u'game_date',
       u'opponent', u'shot_id'], axis=1)

In [ ]:
df = df.drop(['loc_x', 'loc_y', 'shot_type', 'shot_zone_area', 'shot_zone_basic', 'shot_zone_range'], axis=1)

In [ ]:


In [ ]:
df.head()

In [ ]:


In [ ]:
df['home'] = df.matchup.apply(lambda matchup: 0 if '@' in matchup else 1)
df = df.drop(['matchup'], axis=1)

In [ ]:
df.head()

In [ ]:


In [ ]:
df['time_remaining'] = 60 * df['minutes_remaining'] + df['seconds_remaining']
df = df.drop(['minutes_remaining', 'seconds_remaining'], axis=1)

In [ ]:


In [ ]:
df.head()

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
cols = df.columns.tolist()
cols.remove('shot_made_flag')
cols.append('shot_made_flag')

df = df[cols]

In [ ]:
df.head()

In [ ]:


In [ ]:
filename= "../data/kobe/kobe_bryant_shot_data_refined.csv"
df.to_csv(filename, index=False)

In [ ]:


In [ ]: