In [54]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

In [23]:
models = {'svm': LinearSVC(), 
          'log_reg': LogisticRegression(), 
          'naive_baives': MultinomialNB(), 
          'knn': KNeighborsClassifier(),
          'dec_tree': DecisionTreeClassifier()}

Read in the Kobe Bryant shooting data [https://www.kaggle.com/c/kobe-bryant-shot-selection]


In [87]:
kobe = pd.read_csv('../data/kobe.csv')
kobe.dropna(inplace=True)

For now, use just the numerical datatypes. They are below as num_columns


In [39]:
kobe


Out[39]:
action_type combined_shot_type game_event_id game_id lat loc_x loc_y lon minutes_remaining period ... shot_type shot_zone_area shot_zone_basic shot_zone_range team_id team_name game_date matchup opponent shot_id
0 Jump Shot Jump Shot 10 20000012 33.9723 167 72 -118.1028 10 1 ... 2PT Field Goal Right Side(R) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 1
1 Jump Shot Jump Shot 12 20000012 34.0443 -157 0 -118.4268 10 1 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 2
2 Jump Shot Jump Shot 35 20000012 33.9093 -101 135 -118.3708 7 1 ... 2PT Field Goal Left Side Center(LC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 3
3 Jump Shot Jump Shot 43 20000012 33.8693 138 175 -118.1318 6 1 ... 2PT Field Goal Right Side Center(RC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 4
4 Driving Dunk Shot Dunk 155 20000012 34.0443 0 0 -118.2698 6 2 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 5
5 Jump Shot Jump Shot 244 20000012 34.0553 -145 -11 -118.4148 9 3 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 6
6 Layup Shot Layup 251 20000012 34.0443 0 0 -118.2698 8 3 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 7
7 Jump Shot Jump Shot 254 20000012 34.0163 1 28 -118.2688 8 3 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 8
8 Jump Shot Jump Shot 265 20000012 33.9363 -65 108 -118.3348 6 3 ... 2PT Field Goal Left Side(L) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 9
9 Running Jump Shot Jump Shot 294 20000012 33.9193 -33 125 -118.3028 3 3 ... 2PT Field Goal Center(C) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 10
10 Jump Shot Jump Shot 309 20000012 33.8063 -94 238 -118.3638 1 3 ... 3PT Field Goal Left Side Center(LC) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-10-31 LAL @ POR POR 11
11 Jump Shot Jump Shot 4 20000019 33.9173 121 127 -118.1488 11 1 ... 2PT Field Goal Right Side Center(RC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 12
12 Running Jump Shot Jump Shot 27 20000019 33.9343 -67 110 -118.3368 7 1 ... 2PT Field Goal Left Side(L) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 13
13 Jump Shot Jump Shot 66 20000019 34.0403 -94 4 -118.3638 2 1 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 14
14 Jump Shot Jump Shot 80 20000019 33.9973 -23 47 -118.2928 1 1 ... 2PT Field Goal Center(C) In The Paint (Non-RA) Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 15
15 Jump Shot Jump Shot 86 20000019 33.8523 62 192 -118.2078 0 1 ... 2PT Field Goal Center(C) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 16
16 Driving Layup Shot Layup 100 20000019 34.0443 0 0 -118.2698 0 1 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 17
17 Jump Shot Jump Shot 138 20000019 33.8183 -117 226 -118.3868 8 2 ... 3PT Field Goal Left Side Center(LC) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 18
18 Jump Shot Jump Shot 244 20000019 33.9473 -132 97 -118.4018 11 3 ... 2PT Field Goal Left Side Center(LC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 19
19 Driving Layup Shot Layup 249 20000019 34.0443 0 0 -118.2698 10 3 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 20
20 Jump Shot Jump Shot 255 20000019 33.9003 3 144 -118.2668 10 3 ... 2PT Field Goal Center(C) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 21
21 Jump Shot Jump Shot 265 20000019 33.9173 134 127 -118.1358 9 3 ... 2PT Field Goal Right Side Center(RC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 22
22 Running Jump Shot Jump Shot 274 20000019 33.9343 -16 110 -118.2858 7 3 ... 2PT Field Goal Center(C) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 23
23 Running Jump Shot Jump Shot 299 20000019 33.8943 -109 150 -118.3788 5 3 ... 2PT Field Goal Left Side Center(LC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 24
24 Running Jump Shot Jump Shot 307 20000019 33.9813 -46 63 -118.3158 5 3 ... 2PT Field Goal Center(C) In The Paint (Non-RA) Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 25
25 Layup Shot Layup 332 20000019 34.0443 0 0 -118.2698 2 3 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 26
26 Jump Shot Jump Shot 345 20000019 33.8483 -58 196 -118.3278 2 3 ... 2PT Field Goal Center(C) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 27
27 Jump Shot Jump Shot 369 20000019 33.8583 -183 186 -118.4528 0 3 ... 3PT Field Goal Left Side Center(LC) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 28
28 Jump Shot Jump Shot 400 20000019 33.8713 85 173 -118.1848 8 4 ... 2PT Field Goal Right Side Center(RC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 29
29 Jump Shot Jump Shot 429 20000019 33.9573 3 87 -118.2668 6 4 ... 2PT Field Goal Center(C) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-11-01 LAL vs. UTA UTA 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
30667 Jump Shot Jump Shot 368 49900087 33.7943 40 250 -118.2298 9 4 ... 3PT Field Goal Center(C) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-06-16 LAL @ IND IND 30668
30668 Jump Shot Jump Shot 386 49900087 33.8223 -23 222 -118.2928 7 4 ... 2PT Field Goal Center(C) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-06-16 LAL @ IND IND 30669
30669 Jump Shot Jump Shot 425 49900087 33.9913 171 53 -118.0988 3 4 ... 2PT Field Goal Right Side(R) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-06-16 LAL @ IND IND 30670
30670 Running Jump Shot Jump Shot 15 49900088 34.0283 -74 16 -118.3438 9 1 ... 2PT Field Goal Center(C) In The Paint (Non-RA) Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30671
30671 Driving Layup Shot Layup 25 49900088 34.0443 0 0 -118.2698 8 1 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30672
30672 Jump Shot Jump Shot 29 49900088 33.9893 89 55 -118.1808 8 1 ... 2PT Field Goal Right Side(R) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30673
30673 Jump Shot Jump Shot 36 49900088 34.0443 117 0 -118.1528 7 1 ... 2PT Field Goal Right Side(R) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30674
30674 Jump Shot Jump Shot 81 49900088 33.8283 117 216 -118.1528 2 1 ... 3PT Field Goal Right Side Center(RC) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30675
30675 Jump Shot Jump Shot 84 49900088 33.8283 -134 216 -118.4038 2 1 ... 3PT Field Goal Left Side Center(LC) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30676
30676 Running Jump Shot Jump Shot 98 49900088 34.0443 -141 0 -118.4108 0 1 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30677
30677 Jump Shot Jump Shot 101 49900088 33.9013 -113 143 -118.3828 0 1 ... 2PT Field Goal Left Side Center(LC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30678
30678 Driving Layup Shot Layup 181 49900088 34.0283 14 16 -118.2558 3 2 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30679
30679 Layup Shot Layup 212 49900088 34.0443 0 0 -118.2698 0 2 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30680
30680 Tip Shot Tip Shot 213 49900088 34.0443 0 0 -118.2698 0 2 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30681
30681 Jump Shot Jump Shot 218 49900088 33.7833 -18 261 -118.2878 0 2 ... 3PT Field Goal Center(C) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30682
30682 Running Jump Shot Jump Shot 226 49900088 33.9963 -68 48 -118.3378 11 3 ... 2PT Field Goal Left Side(L) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30683
30683 Jump Shot Jump Shot 228 49900088 33.8283 1 216 -118.2688 10 3 ... 2PT Field Goal Center(C) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30684
30684 Jump Shot Jump Shot 231 49900088 33.9553 -96 89 -118.3658 10 3 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30685
30685 Jump Shot Jump Shot 249 49900088 33.7943 81 250 -118.1888 7 3 ... 3PT Field Goal Center(C) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30686
30686 Running Jump Shot Jump Shot 268 49900088 33.9513 16 93 -118.2538 5 3 ... 2PT Field Goal Center(C) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30687
30687 Jump Shot Jump Shot 284 49900088 33.9443 40 100 -118.2298 3 3 ... 2PT Field Goal Center(C) In The Paint (Non-RA) 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30688
30688 Jump Shot Jump Shot 308 49900088 33.9833 -126 61 -118.3958 1 3 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30689
30689 Jump Shot Jump Shot 326 49900088 33.3653 -12 679 -118.2818 0 3 ... 3PT Field Goal Back Court(BC) Backcourt Back Court Shot 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30690
30690 Jump Shot Jump Shot 331 49900088 33.9443 -113 100 -118.3828 11 4 ... 2PT Field Goal Left Side(L) Mid-Range 8-16 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30691
30691 Driving Layup Shot Layup 382 49900088 34.0443 0 0 -118.2698 7 4 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30692
30692 Jump Shot Jump Shot 397 49900088 33.9963 1 48 -118.2688 6 4 ... 2PT Field Goal Center(C) In The Paint (Non-RA) Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30693
30693 Tip Shot Tip Shot 398 49900088 34.0443 0 0 -118.2698 6 4 ... 2PT Field Goal Center(C) Restricted Area Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30694
30694 Running Jump Shot Jump Shot 426 49900088 33.8783 -134 166 -118.4038 3 4 ... 2PT Field Goal Left Side Center(LC) Mid-Range 16-24 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30695
30695 Jump Shot Jump Shot 448 49900088 33.7773 31 267 -118.2388 2 4 ... 3PT Field Goal Center(C) Above the Break 3 24+ ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30696
30696 Jump Shot Jump Shot 471 49900088 33.9723 1 72 -118.2688 0 4 ... 2PT Field Goal Center(C) In The Paint (Non-RA) Less Than 8 ft. 1610612747 Los Angeles Lakers 2000-06-19 LAL vs. IND IND 30697

30697 rows × 25 columns


In [ ]:


In [52]:
[(col, dtype) for col, dtype in zip(kobe.columns, kobe.dtypes) if dtype != 'object']
num_columns = [col for col, dtype in zip(kobe.columns, kobe.dtypes) if dtype != 'object']
num_columns


Out[52]:
['game_event_id',
 'game_id',
 'lat',
 'loc_x',
 'loc_y',
 'lon',
 'minutes_remaining',
 'period',
 'playoffs',
 'seconds_remaining',
 'shot_distance',
 'shot_made_flag',
 'team_id',
 'shot_id',
 'pred']

The shot_made_flag is the result (0 or 1) of the shot that Kobe took. Some of the values are missing (e.g. NaN). Drop them.


In [ ]:
#kobe.hist()

In [77]:
fig, ax = plt.subplots()
kobe[kobe.shot_made_flag==0].plot(kind='scatter', x='loc_x', y='loc_y', color='blue', alpha=0.1, ax=ax)
kobe[kobe.shot_made_flag==1].plot(kind='scatter', x='loc_x', y='loc_y', color='green', alpha=0.1, ax=ax)
# plt.scatter(kobe.loc_x, kobe.loc_y, alpha=0.2)


Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x11e23c278>

In [68]:
kobe[kobe.shot_made_flag==0].shot_distance.hist(bins=range(0,70,2), alpha=.4)
kobe[kobe.shot_made_flag==1].shot_distance.hist(bins=range(0,70,2), alpha=.4)


Out[68]:
<matplotlib.axes._subplots.AxesSubplot at 0x11cedb240>

Use the num_columns, the kobe dataframe to fit() the models. Choose one or more of the entries in num_columns as features. These models are used to predict whether Kobe will make or miss a shot given the certain input parameters provided.

Get the accuracy of each model with respect to the data used to fit the model.


In [27]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(font_scale=1.5)

In [94]:
# fit a linear regression model and store the predictions
feature_cols = ['shot_distance', 'minutes_remaining']
X = kobe[feature_cols] #kobe[['shot_distance', 'minutes_remaining']]
y = kobe.shot_made_flag
# from sklearn.linear_model import LinearRegression as Model
from sklearn.linear_model import LogisticRegression as Model
# from sklearn.tree import DecisionTreeClassifier as Model
# from sklearn.ensemble import RandomForestClassifier as Model
model = Model()
model.fit(X, y)
kobe['pred'] = model.predict(X)
# scatter plot that includes the regression line
plt.scatter(kobe.shot_distance, kobe.shot_made_flag)
plt.scatter(kobe.shot_distance, kobe.pred, color='red', alpha=.2)
plt.xlabel('dist')
plt.ylabel('made')

from sklearn.metrics import accuracy_score
accuracy_score(kobe.shot_made_flag, kobe.pred.round())


Out[94]:
0.59719033350196526

The following is a reminder of how the SciKit-Learn Models can be interfaced


In [45]:
# fit a linear regression model and store the predictions
example = pd.DataFrame({'a':[1,2,3,4,5,6], 'b':[1,1,0,0,0,1]})
feature_cols = ['a']
X = example[feature_cols]
y = example.b
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)
example['pred'] = model.predict(X)
# scatter plot that includes the regression line
plt.scatter(example.a, example.b)
plt.plot(example.a, example.pred, color='red')
plt.xlabel('a')
plt.ylabel('b')

from sklearn.metrics import accuracy_score
accuracy_score(example.b, example.pred.astype(int))


Out[45]:
0.5

In [ ]: