Exploratory analysis on Kobe Bryant's data


In [2]:
import pandas as pd
import numpy as np
%matplotlib inline

In [3]:
df = pd.read_csv("D:\Box Sync\Fall_2016\data_vis\project\data\\data.csv")
# read the input CSV file as pandas dataframe

In [4]:
df.head(5)


Out[4]:
action_type combined_shot_type game_event_id game_id lat loc_x loc_y lon minutes_remaining period ... shot_type shot_zone_area shot_zone_basic shot_zone_range team_id team_name game_date matchup opponent shot_id
0 Jump Shot Jump Shot 10 20000012 33.9723 167 72 -118.1028 10 1 ... 2PT Field Goal Right Side(R) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 1
1 Jump Shot Jump Shot 12 20000012 34.0443 -157 0 -118.4268 10 1 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 2
2 Jump Shot Jump Shot 35 20000012 33.9093 -101 135 -118.3708 7 1 ... 2PT Field Goal Left Side Center(LC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 3
3 Jump Shot Jump Shot 43 20000012 33.8693 138 175 -118.1318 6 1 ... 2PT Field Goal Right Side Center(RC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 4
4 Driving Dunk Shot Dunk 155 20000012 34.0443 0 0 -118.2698 6 2 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 5

5 rows × 25 columns


In [7]:
df.dropna(inplace = True)
# drop NA's if any in the rows

In [11]:
remove_columns = ['game_event_id', 'game_id', 'lat', 'lon', 'team_id', 'game_date', 'shot_id']
# columns to be removed
df.drop(labels = remove_columns, inplace = True, axis = 1)
# # drop the columns
df.head()

In [12]:
df.to_csv("D:\Box Sync\Fall_2016\data_vis\project\data\\processed_data.csv", index = False)
# write data to file

In [ ]: