In [1]:
import numpy as np
import pandas as pd
import os as os
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
from sklearn.metrics import confusion_matrix
%matplotlib inline
In [2]:
n_neighbors = 1
data = pd.read_csv(os.path.join('..','Data','CrowdstormingDataJuly1st.csv'))
data = data.dropna()
X = data[['height','weight','goals','yellowCards','yellowReds','redCards']].values
y = data[['position']].values
posd = pd.Series(data.position,dtype="category")
posd.unique()
y = posd.cat.rename_categories(range(0,12))
y = y.to_frame()
y = y.values
indices = np.random.permutation(data.shape[0])
training_idx, test_idx = indices[:int(round(.8*len(indices)))],indices[int(round(.8*len(indices))):]
training_X = X[training_idx,:]
training_y = y[training_idx].reshape(len(training_idx),)
test_X = X[test_idx,:]
test_y = y[test_idx].reshape(len(test_idx),)
In [7]:
data.keys()
Out[7]:
In [3]:
weights = 'uniform'
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(training_X, training_y)
predicted = clf.predict(test_X)
np.mean(predicted == test_y)
Out[3]:
In [4]:
weights = 'distance'
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(training_X, training_y)
predicted = clf.predict(test_X)
np.mean(predicted == test_y)
Out[4]:
In [5]:
# Redo in 2 dimensions
X = data[['height','weight']].values
y = data[['position']].values
posd = pd.Series(data.position,dtype="category")
posd.unique()
y = posd.cat.rename_categories(range(0,12))
y = y.to_frame()
y = y.values
indices = np.random.permutation(data.shape[0])
training_idx, test_idx = indices[:int(round(.8*len(indices)))],indices[int(round(.8*len(indices))):]
training_X = X[training_idx,:]
training_y = y[training_idx].reshape(len(training_idx),)
test_X = X[test_idx,:]
test_y = y[test_idx].reshape(len(test_idx),)
weights = 'uniform'
weights = 'distance'
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(training_X, training_y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
X = test_X[:,0:2]
y = test_y
h = .02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA',
'#ff0000', '#ff8000','#ffff00', '#40ff00',
'#00ffff','#bf00ff', '#ff0080', '#ff0000'])
cmap_bold = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA',
'#ff0000', '#ff8000','#ffff00', '#40ff00',
'#00ffff','#bf00ff', '#ff0080', '#ff0000'])
# Put the result into a color plot
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("12-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))
plt.show()
In [12]:
predicted = clf.predict(test_X)
np.mean(predicted == test_y)
Out[12]:
In [13]:
def plot_confusion_matrix(cm, names, title='Confusion matrix', cmap=plt.cm.Blues):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(names))
plt.xticks(tick_marks, names, rotation=45)
plt.yticks(tick_marks, names)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
In [14]:
# Compute confusion matrix
cm = confusion_matrix(test_y, predicted)
np.set_printoptions(precision=2)
print('Confusion matrix, without normalization')
print(cm)
names = posd.unique()
plt.figure(figsize=(12,12))
plot_confusion_matrix(cm,names,title = 'Confusion matrix')
# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('Normalized confusion matrix')
print(cm_normalized)
plt.figure(figsize=(12,12))
plot_confusion_matrix(cm_normalized,names, title='Normalized confusion matrix')
plt.show()
In [3]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
In [ ]:
In [ ]:
# Linear SVM
clf = OneVsRestClassifier(LinearSVC(random_state=0))
clf.fit(training_X, training_y)
In [9]:
predicted = clf.predict(test_X)
np.mean(predicted == test_y)
Out[9]:
In [ ]:
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
X = test_X[:,0:2]
y = test_y
h = .02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA',
'#ff0000', '#ff8000','#ffff00', '#40ff00',
'#00ffff','#bf00ff', '#ff0080', '#ff0000'])
cmap_bold = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA',
'#ff0000', '#ff8000','#ffff00', '#40ff00',
'#00ffff','#bf00ff', '#ff0080', '#ff0000'])
# Put the result into a color plot
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("3-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))
plt.show()
In [ ]:
# Naive Bayesian classifier
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(training_X, training_y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
X = test_X[:,0:2]
y = test_y
h = .02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA',
'#ff0000', '#ff8000','#ffff00', '#40ff00',
'#00ffff','#bf00ff', '#ff0080', '#ff0000'])
cmap_bold = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA',
'#ff0000', '#ff8000','#ffff00', '#40ff00',
'#00ffff','#bf00ff', '#ff0080', '#ff0000'])
# Put the result into a color plot
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("3-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))
plt.show()
In [ ]: