notebook.community

Edit and run



In [1]:

    
%matplotlib inline

from matplotlib import pyplot as plt

import pandas as pd
import numpy as np

from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

pd.set_option('max_columns', 100)



In [2]:

    
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

models = {'svm': LinearSVC(), 
          'log_reg': LogisticRegression(), 
          'naive_baives': MultinomialNB(), 
          'knn': KNeighborsClassifier(),
          'dec_tree': DecisionTreeClassifier()}



In [3]:

    
kobe = pd.read_csv('../data/kobe.csv')
kobe.dropna(inplace=True)

kobe['reverse'] = kobe.action_type.map(lambda x: 1 if "Reverse" in x else 0)
kobe['running'] = kobe.action_type.map(lambda x: 1 if "Running" in x else 0)
kobe['driving'] = kobe.action_type.map(lambda x: 1 if "Driving" in x else 0)
kobe['floating'] = kobe.action_type.map(lambda x: 1 if "Floating" in x else 0)

combined_shot_type_dummies = pd.get_dummies(kobe.combined_shot_type)
shot_type_dummies = pd.get_dummies(kobe.shot_type)
shot_zone_range_dummies = pd.get_dummies(kobe.shot_zone_range)
opponent_dummies = pd.get_dummies(kobe.opponent)

kobe = pd.concat([
        kobe, 
        combined_shot_type_dummies, 
        shot_type_dummies, 
        shot_zone_range_dummies,
        opponent_dummies
    ], axis=1)

kobe.head(2)









    Out[3]:






  
    
      
      action_type
      combined_shot_type
      game_event_id
      game_id
      lat
      loc_x
      loc_y
      lon
      minutes_remaining
      period
      playoffs
      season
      seconds_remaining
      shot_distance
      shot_made_flag
      shot_type
      shot_zone_area
      shot_zone_basic
      shot_zone_range
      team_id
      team_name
      game_date
      matchup
      opponent
      shot_id
      reverse
      running
      driving
      floating
      Bank Shot
      Dunk
      Hook Shot
      Jump Shot
      Layup
      Tip Shot
      2PT Field Goal
      3PT Field Goal
      16-24 ft.
      24+ ft.
      8-16 ft.
      Back Court Shot
      Less Than 8 ft.
      ATL
      BKN
      BOS
      CHA
      CHI
      CLE
      DAL
      DEN
      DET
      GSW
      HOU
      IND
      LAC
      MEM
      MIA
      MIL
      MIN
      NJN
      NOH
      NOP
      NYK
      OKC
      ORL
      PHI
      PHX
      POR
      SAC
      SAS
      SEA
      TOR
      UTA
      VAN
      WAS
    
  
  
    
      1
      Jump Shot
      Jump Shot
      12
      20000012
      34.0443
      -157
      0
      -118.4268
      10
      1
      0
      2000-01
      22
      15
      0.0
      2PT Field Goal
      Left Side(L)
      Mid-Range
      8-16 ft.
      1610612747
      Los Angeles Lakers
      2000-10-31
      LAL @ POR
      POR
      2
      0
      0
      0
      0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Jump Shot
      Jump Shot
      35
      20000012
      33.9093
      -101
      135
      -118.3708
      7
      1
      0
      2000-01
      45
      16
      1.0
      2PT Field Goal
      Left Side Center(LC)
      Mid-Range
      16-24 ft.
      1610612747
      Los Angeles Lakers
      2000-10-31
      LAL @ POR
      POR
      3
      0
      0
      0
      0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0



In [4]:

    
feature_columns = [
    'shot_distance',
    'minutes_remaining',
    'period',
    'reverse',
] + list(combined_shot_type_dummies.columns) + list(shot_type_dummies.columns) + list(shot_zone_range_dummies)

print(feature_columns)









    



['shot_distance', 'minutes_remaining', 'period', 'reverse', 'Bank Shot', 'Dunk', 'Hook Shot', 'Jump Shot', 'Layup', 'Tip Shot', '2PT Field Goal', '3PT Field Goal', '16-24 ft.', '24+ ft.', '8-16 ft.', 'Back Court Shot', 'Less Than 8 ft.']



In [5]:

    
X = kobe[feature_columns]
y = kobe.shot_made_flag

logit = LogisticRegression()
cross_val_score(logit, X, y, 'accuracy', cv=10)









    Out[5]:





array([ 0.6176585 ,  0.6176585 ,  0.60350195,  0.60350195,  0.62256809,
        0.61074348,  0.62281043,  0.62242118,  0.62358895,  0.60412612])



In [6]:

    
kobe_train, kobe_test = train_test_split(kobe, test_size=0.2, stratify=kobe.shot_made_flag)

X_train = kobe_train[feature_columns]
y_train = kobe_train.shot_made_flag

model = LogisticRegression()
model.fit(X_train, y_train)

X_test = kobe_test[feature_columns]
y_test = kobe_test.shot_made_flag # y_true

kobe_test['pred'] = model.predict(X_test) # throws an warning / error

accuracy_score(y_test, kobe_test.pred) # out of sample accuracy









    



/Users/johria/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy






    Out[6]:





0.61536964980544751

	action_type	combined_shot_type	game_event_id	game_id	lat	loc_x	loc_y	lon	minutes_remaining	period	playoffs	season	seconds_remaining	shot_distance	shot_made_flag	shot_type	shot_zone_area	shot_zone_basic	shot_zone_range	team_id	team_name	game_date	matchup	opponent	shot_id	reverse	running	driving	floating	Bank Shot	Dunk	Hook Shot	Jump Shot	Layup	Tip Shot	2PT Field Goal	3PT Field Goal	16-24 ft.	24+ ft.	8-16 ft.	Back Court Shot	Less Than 8 ft.	ATL	BKN	BOS	CHA	CHI	CLE	DAL	DEN	DET	GSW	HOU	IND	LAC	MEM	MIA	MIL	MIN	NJN	NOH	NOP	NYK	OKC	ORL	PHI	PHX	POR	SAC	SAS	SEA	TOR	UTA	VAN	WAS
1	Jump Shot	Jump Shot	12	20000012	34.0443	-157	0	-118.4268	10	1	0	2000-01	22	15	0.0	2PT Field Goal	Left Side(L)	Mid-Range	8-16 ft.	1610612747	Los Angeles Lakers	2000-10-31	LAL @ POR	POR	2	0	0	0	0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	Jump Shot	Jump Shot	35	20000012	33.9093	-101	135	-118.3708	7	1	0	2000-01	45	16	1.0	2PT Field Goal	Left Side Center(LC)	Mid-Range	16-24 ft.	1610612747	Los Angeles Lakers	2000-10-31	LAL @ POR	POR	3	0	0	0	0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0