In [26]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix as conf_mat
from mnist_util import *
In [6]:
training, test = load_sorted_data('data_notMNIST')
PRIORS = {'id': lambda data: [np.identity(len(data[0][0])) for _ in range(len(data))],
'var': lambda data: variance(data),
'var_1': lambda data: variance(data, axis=0),
'cov': lambda data: covariance(data)}
means = mean(training)
sigma = {}
for key in PRIORS:
sigma[key] = PRIORS[key](training)
In [3]:
plot_all_numbers(means, elements_per_line=5, plot_title="Means of the training dataset")
plot_all_numbers(variances1, elements_per_line=5, plot_title="Variances of the training dataset")
In [7]:
pdfs = {}
for key in sigma:
pdfs[key] = multivariates(training, sigma[key])
Now I am plotting the first 20 numbers of each test dataset (0-9) and thereafter guess/predict the corresponding number with the four PDFs and then I will show the number of errors for each PDF for each number.
(The second next code snippet may take a while. It is processing the entire test data)
In [4]:
tmp = flatten_lists([test[i][:20] for i in range(10)])
tmp = [np.array(x) for x in tmp]
plot_all_numbers(tmp, elements_per_line=20, plot_title="First 20 of each number from the test dataset")
In [19]:
preds = {}
for key in pdfs:
preds[key] = [tell_all_numbers(pdfs[key], nums) for nums in test]
In [18]:
for i in range(10):
print("Right guess: {0}".format(i))
for key in preds:
print("{0}:\t{1}\tERRORS: {2}".format(key, preds[key][i][:20], len([x for x in preds[key][i][:20] if x != i])))
print("")
In [66]:
class_names = [str(i) for i in range(10)]
confusion_matrix = {}
training_labels = flatten_lists([[i]*len(preds['id'][i]) for i in range(10)])
#plot_confusion_matrix(conf_matrix, classes=class_names)
for key in preds:
confusion_matrix[key] = conf_mat(flatten_lists(preds[key]), list(training_labels))
for key in confusion_matrix:
plot.figure(figsize=(10,10))
plot_confusion_matrix(normalize(confusion_matrix[key]), classes=class_names, title="{0} Confusion Matrix".format(key))
plot.show()
In [ ]: