In [1]:
%matplotlib inline
import util
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
train_data = []
test_data = []
test_names = []
train_names = []
total_amount = 0
for root, files in util.get_person_images('../lfw_funneled/', 'jpg', 70):
name, data, amount = util.get_dataset(root, files, 2, 3.2) # TODO: nur zentralen Pixel nehmen
test_amount = int(np.ceil(amount*0.4))
train_amount = int(np.floor(amount*0.6))
index = np.arange(0, amount, dtype=np.int)
total_amount = total_amount + amount
# shuffle index to select random images
np.random.shuffle(index)
# save name vector
test_names.extend([name]*test_amount)
train_names.extend([name]*train_amount)
train_data.append(np.asmatrix([data[index] for index in index[test_amount:]]))
test_data.append(np.asmatrix([data[index] for index in index[:test_amount]]))
print("{} hat {} Trainings Bilder und {} Test Bilder".format(name, train_amount, test_amount))
In [3]:
train_design_matrix = np.asmatrix(np.concatenate(train_data))
test_design_matrix = np.asmatrix(np.concatenate(test_data))
In [4]:
print("Insgesamt gibt es {} Trainings Bilder und {} Test Bilder".format(train_design_matrix.shape[0], test_design_matrix.shape[0]))
In [5]:
# align test and training data
train_mean = np.mean(train_design_matrix, axis=1)
test_mean = np.mean(test_design_matrix, axis=1)
train_aligned = train_design_matrix - train_mean
test_aligned = test_design_matrix - test_mean
In [6]:
u, d, v = np.linalg.svd(train_aligned, full_matrices=False)
u.shape, d.shape, v.shape
Out[6]:
In [7]:
features = 7
train_projected = np.dot(v[:features], train_aligned.T).T
test_projected = np.dot(v[:features], test_aligned.T).T
In [8]:
print("Shape Projiziert: train {}, test {}".format(np.shape(train_projected), np.shape(test_projected)))
In [9]:
person = 'George_W_Bush'
#person = 'Ariel_Sharon'
train_mask_george = np.asmatrix(train_names) == person
test_mask_george = np.asmatrix(test_names) == person
train_george = np.compress(train_mask_george[0,:], train_projected, axis=0)
train_not_george = np.compress((~train_mask_george)[0,:], train_projected, axis=0)
train_labels = np.asarray([-1 if b else 1 for b in train_mask_george[0,:]])
test_george = np.compress(test_mask_george[0,:], test_projected, axis=0)
test_not_george = np.compress(~test_mask_george[0,:], test_projected, axis=0)
test_labels = np.asarray([-1 if b else 1 for b in test_mask_george[0,:]])
In [10]:
print("Im Trainingsdatensatz sind {} Bilder von George Bush und {} Bilder nicht von George Bush".format(train_george.shape[0], train_not_george.shape[0]))
In [11]:
george_perceptron = util.Perceptron((train_george, train_not_george))
george_gaussian = util.GaussianNaiveBayes((train_george, train_not_george))
In [12]:
george_perceptron.learn(100)
george_gaussian.learn()
In [104]:
#GNB
c1_gnb = 0.2 # 0.00001 # 0.01
c2_gnb = 10.0 # 50 # 60
In [105]:
true_positive_gnb, false_positive = george_gaussian.classify(test_projected, test_labels, c1_gnb, False)
true_positive, false_positive_gnb = george_gaussian.classify(test_projected, test_labels, c2_gnb, False)
true_positive_gnb, false_positive_gnb
Out[105]:
In [ ]:
In [106]:
#Perceptron
c1_p = -10
c2_p = 9
In [107]:
true_positive_p, false_positive = george_perceptron.classify(test_projected, test_labels, c1_p, False)
true_positive, false_positive_p = george_perceptron.classify(test_projected, test_labels, c2_p, False)
true_positive_p, false_positive_p
Out[107]:
In [ ]:
In [109]:
steps = 200
c_vals_p = np.linspace(c1_p, c2_p, steps)
c_vals_gnb = np.linspace(c1_gnb, c2_gnb, steps)
tp_p,fp_p = zip(*[george_perceptron.classify(test_projected, test_labels, c, False) for c in c_vals_p])
tp_gnb,fp_gnb = zip(*[george_gaussian.classify(test_projected, test_labels, c, False) for c in c_vals_p])
In [110]:
fig = plt.figure(figsize=(7,7))
ax = plt.subplot(1, 1, 1)
ax.plot(tp_p, fp_p)
ax.plot(tp_gnb,fp_gnb)
ax.legend(("Perceptron", "Bayes"), loc='best')
ax.set_title('ROC Curve', fontsize=14)
ax.set_xlabel("True positives", fontsize=14)
ax.set_ylabel("False positives", fontsize=14)
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: