Loading necessary library



In [40]:

    
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb
import numpy as np

Loading data

deleting irrelevant features



In [43]:

    
kobe = pd.read_csv('data.csv', sep=',') 
kobe= kobe[np.isfinite(kobe['shot_made_flag'])]
del kobe['lat']
del kobe['lon']
del kobe['game_id']
del kobe['team_id']
del kobe['team_name']

kobe_2 = pd.read_csv('data.csv', sep=',') 
kobe_2= kobe_2[np.isfinite(kobe_2['shot_made_flag'])]
del kobe_2['lat']
del kobe_2['lon']
del kobe_2['game_id']
del kobe_2['team_id']
del kobe_2['team_name']

encoding catagorical features



In [26]:

    
mt_up = preprocessing.LabelEncoder()
kobe.matchup = mt_up.fit_transform(kobe.matchup )
#kobe_2.matchup = mt_up.fit_transform(kobe.matchup )

opp = preprocessing.LabelEncoder()
kobe.opponent = opp.fit_transform(kobe.opponent )
#kobe_2.opponent = opp.fit_transform(kobe.opponent )

dt = preprocessing.LabelEncoder()
kobe.game_date = dt.fit_transform(kobe.game_date )
#kobe_2.game_date = dt.fit_transform(kobe.game_date )

at = preprocessing.LabelEncoder()
kobe.action_type = at.fit_transform(kobe.action_type )
#kobe_2.action_type = at.fit_transform(kobe.action_type )

cst = preprocessing.LabelEncoder()
kobe.combined_shot_type = cst.fit_transform(kobe.combined_shot_type )
#kobe_2.combined_shot_type = cst.fit_transform(kobe.combined_shot_type )

seson = preprocessing.LabelEncoder()
kobe.season = seson.fit_transform(kobe.season )
#kobe_2.season = seson.fit_transform(kobe.season )

st = preprocessing.LabelEncoder()
kobe.shot_type = st.fit_transform(kobe.shot_type )
#kobe_2.shot_type = st.fit_transform(kobe.shot_type )

sza = preprocessing.LabelEncoder()
kobe.shot_zone_area = sza.fit_transform(kobe.shot_zone_area )
#kobe_2.shot_zone_area = sza.fit_transform(kobe.shot_zone_area )

szb = preprocessing.LabelEncoder()
kobe.shot_zone_basic = szb.fit_transform(kobe.shot_zone_basic )
#kobe_2.shot_zone_basic = szb.fit_transform(kobe.shot_zone_basic )

szr = preprocessing.LabelEncoder()
kobe.shot_zone_range = szr.fit_transform(kobe.shot_zone_range )
#kobe_2.shot_zone_range = szr.fit_transform(kobe.shot_zone_range )









    Out[26]:






  
    
      
      action_type
      combined_shot_type
      game_event_id
      loc_x
      loc_y
      minutes_remaining
      period
      playoffs
      season
      seconds_remaining
      shot_distance
      shot_made_flag
      shot_type
      shot_zone_area
      shot_zone_basic
      shot_zone_range
      game_date
      matchup
      opponent
    
  
  
    
      2
      25
      3
      35
      -101
      135
      7
      1
      0
      4
      45
      16
      1.0
      0
      2
      4
      0
      250
      28
      25
    
    
      11
      25
      3
      4
      121
      127
      11
      1
      0
      4
      0
      17
      1.0
      0
      4
      4
      0
      254
      71
      30
    
    
      12
      40
      3
      27
      -67
      110
      7
      1
      0
      4
      9
      12
      1.0
      0
      3
      2
      2
      254
      71
      30
    
    
      17
      25
      3
      138
      -117
      226
      8
      2
      0
      4
      50
      25
      1.0
      1
      2
      0
      1
      254
      71
      30
    
    
      18
      25
      3
      244
      -132
      97
      11
      3
      0
      4
      29
      16
      0.0
      0
      2
      4
      0
      254
      71
      30

splitting data into test and train



In [44]:

    
from sklearn.cross_validation import train_test_split
# Generate the training set.  Set random_state to be able to replicate results.
train = kobe.sample(frac=0.6, random_state=1)
train_2 = kobe_2.sample(frac=0.6, random_state=1)
# Select anything not in the training set and put it in the testing set.
test = kobe.loc[~kobe.index.isin(train.index)] 
test_2 = kobe_2.loc[~kobe_2.index.isin(train_2.index)]

seperating features and class in both test and train sets



In [45]:

    
columns = kobe.columns.tolist()
columns = [c for c in columns if c not in ["shot_made_flag","team_id","team_name"]]
kobe_train_x =train[columns]
kobe_test_x =test[columns]
kobe_train_y=train['shot_made_flag']
kobe_test_y=test['shot_made_flag']
print(kobe_train_x.shape)
print(kobe_test_x.shape)
print(kobe_train_y.shape)
print(kobe_test_y.shape)









    



(15418, 18)
(10279, 18)
(15418L,)
(10279L,)

getting best parameters

do not run this section as the best set of parameters is already found



In [77]:

    
def optimization(depth, n_est,l_r):
    maxacc=0
    best_depth=0
    best_n_est=0
    best_l_r=0
    for i in range(1,depth):
        for j in n_est:
            for k in l_r: 
                gbm = xgb.XGBClassifier(max_depth=i, n_estimators=j, learning_rate=k).fit(kobe_train_x, kobe_train_y)
                predicted = gbm.predict(kobe_test_x)
                key=str(i)+"_"+str(j)+"_"+str(k)
                accu=accuracy_score(kobe_test_y, predicted)
                if(accu>maxacc):
                    maxacc=accu
                    best_depth=i
                    best_n_est=j
                    best_l_r=k
                    print(maxkey+" "+str(maxacc))
    return(best_depth,best_n_est,best_l_r)

n_est=[5,10,20,50,100,150,200,250,300,350,400,450,500,550,600,650,700,750,800,850,900,950,1000]
depth=10
l_r = [0.0001, 0.001, 0.01,0.05, 0.1, 0.2, 0.3]
best_depth,best_n_est,best_l_r=optimization(depth,n_est,l_r)

creating model with best parameter combination and reporting metrics



In [5]:

    
#hard coded the best features
gbm = xgb.XGBClassifier(max_depth=4, n_estimators=600, learning_rate=0.01).fit(kobe_train_x, kobe_train_y) 
predicted = gbm.predict(kobe_test_x)
# summarize the fit of the model
print(metrics.classification_report(kobe_test_y, predicted))
print("Confusion Matrix")
print(metrics.confusion_matrix(kobe_test_y, predicted))
accuracy=accuracy_score(kobe_test_y, predicted)
print("Accuracy: %.2f%%" % (accuracy * 100.0))









    



             precision    recall  f1-score   support

        0.0       0.67      0.86      0.75      5717
        1.0       0.73      0.46      0.56      4562

avg / total       0.69      0.68      0.67     10279

Confusion Matrix
[[4938  779]
 [2482 2080]]
Accuracy: 68.28%

creating a test file with predicted results to visualize



In [47]:

    
test_2['predicted']=predicted
test_2.to_csv(path_or_buf='test_with_predictions.csv', sep=',')
test_2.head(10)









    



C:\Users\Narmi\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':






    Out[47]:






  
    
      
      action_type
      combined_shot_type
      game_event_id
      loc_x
      loc_y
      minutes_remaining
      period
      playoffs
      season
      seconds_remaining
      shot_distance
      shot_made_flag
      shot_type
      shot_zone_area
      shot_zone_basic
      shot_zone_range
      game_date
      matchup
      opponent
      predicted
    
  
  
    
      2
      Jump Shot
      Jump Shot
      35
      -101
      135
      7
      1
      0
      2000-01
      45
      16
      1.0
      2PT Field Goal
      Left Side Center(LC)
      Mid-Range
      16-24 ft.
      10/31/2000
      LAL @ POR
      POR
      0.0
    
    
      11
      Jump Shot
      Jump Shot
      4
      121
      127
      11
      1
      0
      2000-01
      0
      17
      1.0
      2PT Field Goal
      Right Side Center(RC)
      Mid-Range
      16-24 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      0.0
    
    
      12
      Running Jump Shot
      Jump Shot
      27
      -67
      110
      7
      1
      0
      2000-01
      9
      12
      1.0
      2PT Field Goal
      Left Side(L)
      In The Paint (Non-RA)
      8-16 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      1.0
    
    
      17
      Jump Shot
      Jump Shot
      138
      -117
      226
      8
      2
      0
      2000-01
      50
      25
      1.0
      3PT Field Goal
      Left Side Center(LC)
      Above the Break 3
      24+ ft.
      11/1/2000
      LAL vs. UTA
      UTA
      0.0
    
    
      18
      Jump Shot
      Jump Shot
      244
      -132
      97
      11
      3
      0
      2000-01
      29
      16
      0.0
      2PT Field Goal
      Left Side Center(LC)
      Mid-Range
      16-24 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      0.0
    
    
      22
      Running Jump Shot
      Jump Shot
      274
      -16
      110
      7
      3
      0
      2000-01
      57
      11
      1.0
      2PT Field Goal
      Center(C)
      In The Paint (Non-RA)
      8-16 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      1.0
    
    
      24
      Running Jump Shot
      Jump Shot
      307
      -46
      63
      5
      3
      0
      2000-01
      11
      7
      1.0
      2PT Field Goal
      Center(C)
      In The Paint (Non-RA)
      Less Than 8 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      1.0
    
    
      25
      Layup Shot
      Layup
      332
      0
      0
      2
      3
      0
      2000-01
      36
      0
      0.0
      2PT Field Goal
      Center(C)
      Restricted Area
      Less Than 8 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      0.0
    
    
      29
      Jump Shot
      Jump Shot
      429
      3
      87
      6
      4
      0
      2000-01
      22
      8
      0.0
      2PT Field Goal
      Center(C)
      In The Paint (Non-RA)
      8-16 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      0.0
    
    
      31
      Jump Shot
      Jump Shot
      499
      127
      34
      0
      4
      0
      2000-01
      30
      13
      0.0
      2PT Field Goal
      Right Side(R)
      Mid-Range
      8-16 ft.
      11/1/2000
      LAL vs. UTA
      UTA
      0.0

	action_type	combined_shot_type	game_event_id	loc_x	loc_y	minutes_remaining	period	season	seconds_remaining	shot_distance	shot_made_flag	shot_type	shot_zone_area	shot_zone_basic	shot_zone_range	game_date	matchup	opponent
2	25	3	35	-101	135	7	1	4	45	16	1.0	0	2	4	0	250	28	25
11	25	3	4	121	127	11	1	4	0	17	1.0	0	4	4	0	254	71	30
12	40	3	27	-67	110	7	1	4	9	12	1.0	0	3	2	2	254	71	30
17	25	3	138	-117	226	8	2	4	50	25	1.0	1	2	0	1	254	71	30
18	25	3	244	-132	97	11	3	4	29	16	0.0	0	2	4	0	254	71	30

	action_type	combined_shot_type	game_event_id	loc_x	loc_y	minutes_remaining	period	season	seconds_remaining	shot_distance	shot_made_flag	shot_type	shot_zone_area	shot_zone_basic	shot_zone_range	game_date	matchup	opponent	predicted
2	Jump Shot	Jump Shot	35	-101	135	7	1	2000-01	45	16	1.0	2PT Field Goal	Left Side Center(LC)	Mid-Range	16-24 ft.	10/31/2000	LAL @ POR	POR	0.0
11	Jump Shot	Jump Shot	4	121	127	11	1	2000-01	0	17	1.0	2PT Field Goal	Right Side Center(RC)	Mid-Range	16-24 ft.	11/1/2000	LAL vs. UTA	UTA	0.0
12	Running Jump Shot	Jump Shot	27	-67	110	7	1	2000-01	9	12	1.0	2PT Field Goal	Left Side(L)	In The Paint (Non-RA)	8-16 ft.	11/1/2000	LAL vs. UTA	UTA	1.0
17	Jump Shot	Jump Shot	138	-117	226	8	2	2000-01	50	25	1.0	3PT Field Goal	Left Side Center(LC)	Above the Break 3	24+ ft.	11/1/2000	LAL vs. UTA	UTA	0.0
18	Jump Shot	Jump Shot	244	-132	97	11	3	2000-01	29	16	0.0	2PT Field Goal	Left Side Center(LC)	Mid-Range	16-24 ft.	11/1/2000	LAL vs. UTA	UTA	0.0
22	Running Jump Shot	Jump Shot	274	-16	110	7	3	2000-01	57	11	1.0	2PT Field Goal	Center(C)	In The Paint (Non-RA)	8-16 ft.	11/1/2000	LAL vs. UTA	UTA	1.0
24	Running Jump Shot	Jump Shot	307	-46	63	5	3	2000-01	11	7	1.0	2PT Field Goal	Center(C)	In The Paint (Non-RA)	Less Than 8 ft.	11/1/2000	LAL vs. UTA	UTA	1.0
25	Layup Shot	Layup	332	0	0	2	3	2000-01	36	0	0.0	2PT Field Goal	Center(C)	Restricted Area	Less Than 8 ft.	11/1/2000	LAL vs. UTA	UTA	0.0
29	Jump Shot	Jump Shot	429	3	87	6	4	2000-01	22	8	0.0	2PT Field Goal	Center(C)	In The Paint (Non-RA)	8-16 ft.	11/1/2000	LAL vs. UTA	UTA	0.0
31	Jump Shot	Jump Shot	499	127	34	0	4	2000-01	30	13	0.0	2PT Field Goal	Right Side(R)	Mid-Range	8-16 ft.	11/1/2000	LAL vs. UTA	UTA	0.0