In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
pd.set_option('max_columns', 100)
In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
models = {'svm': LinearSVC(),
'log_reg': LogisticRegression(),
'naive_baives': MultinomialNB(),
'knn': KNeighborsClassifier(),
'dec_tree': DecisionTreeClassifier()}
In [3]:
kobe = pd.read_csv('../data/kobe.csv')
kobe.dropna(inplace=True)
kobe['reverse'] = kobe.action_type.map(lambda x: 1 if "Reverse" in x else 0)
kobe['running'] = kobe.action_type.map(lambda x: 1 if "Running" in x else 0)
kobe['driving'] = kobe.action_type.map(lambda x: 1 if "Driving" in x else 0)
kobe['floating'] = kobe.action_type.map(lambda x: 1 if "Floating" in x else 0)
combined_shot_type_dummies = pd.get_dummies(kobe.combined_shot_type)
shot_type_dummies = pd.get_dummies(kobe.shot_type)
shot_zone_range_dummies = pd.get_dummies(kobe.shot_zone_range)
opponent_dummies = pd.get_dummies(kobe.opponent)
kobe = pd.concat([
kobe,
combined_shot_type_dummies,
shot_type_dummies,
shot_zone_range_dummies,
opponent_dummies
], axis=1)
kobe.head(2)
Out[3]:
In [4]:
feature_columns = [
'shot_distance',
'minutes_remaining',
'period',
'reverse',
] + list(combined_shot_type_dummies.columns) + list(shot_type_dummies.columns) + list(shot_zone_range_dummies)
print(feature_columns)
In [5]:
X = kobe[feature_columns]
y = kobe.shot_made_flag
logit = LogisticRegression()
cross_val_score(logit, X, y, 'accuracy', cv=10)
Out[5]:
In [6]:
kobe_train, kobe_test = train_test_split(kobe, test_size=0.2, stratify=kobe.shot_made_flag)
X_train = kobe_train[feature_columns]
y_train = kobe_train.shot_made_flag
model = LogisticRegression()
model.fit(X_train, y_train)
X_test = kobe_test[feature_columns]
y_test = kobe_test.shot_made_flag # y_true
kobe_test['pred'] = model.predict(X_test) # throws an warning / error
accuracy_score(y_test, kobe_test.pred) # out of sample accuracy
Out[6]: