In [1]:
%matplotlib inline
import util
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
train_data = []
test_data = []
test_names = []
train_names = []
total_amount = 0
for root, files in util.get_person_images('../lfw_funneled/', 'jpg', 70):
    name, data, amount = util.get_dataset(root, files, 2, 3.2) # TODO: nur zentralen Pixel nehmen
    test_amount = int(np.ceil(amount*0.4))
    train_amount = int(np.floor(amount*0.6))
    index = np.arange(0, amount, dtype=np.int)
    total_amount = total_amount + amount
    # shuffle index to select random images
    np.random.shuffle(index)
    # save name vector
    test_names.extend([name]*test_amount)
    train_names.extend([name]*train_amount)
    train_data.append(np.asmatrix([data[index] for index in index[test_amount:]]))
    test_data.append(np.asmatrix([data[index] for index in index[:test_amount]]))
    print("{} hat {} Trainings Bilder und {} Test Bilder".format(name, train_amount, test_amount))


Colin_Powell hat 141 Trainings Bilder und 95 Test Bilder
George_W_Bush hat 318 Trainings Bilder und 212 Test Bilder
Hugo_Chavez hat 42 Trainings Bilder und 29 Test Bilder
Ariel_Sharon hat 46 Trainings Bilder und 31 Test Bilder
Tony_Blair hat 86 Trainings Bilder und 58 Test Bilder
Gerhard_Schroeder hat 65 Trainings Bilder und 44 Test Bilder
Donald_Rumsfeld hat 72 Trainings Bilder und 49 Test Bilder

In [3]:
train_design_matrix = np.asmatrix(np.concatenate(train_data))
test_design_matrix = np.asmatrix(np.concatenate(test_data))

In [4]:
print("Insgesamt gibt es {} Trainings Bilder und {} Test Bilder".format(train_design_matrix.shape[0], test_design_matrix.shape[0]))


Insgesamt gibt es 770 Trainings Bilder und 518 Test Bilder

In [5]:
# align test and training data
train_mean = np.mean(train_design_matrix, axis=1)
test_mean = np.mean(test_design_matrix, axis=1)
train_aligned = train_design_matrix - train_mean
test_aligned = test_design_matrix - test_mean

In [6]:
u, d, v = np.linalg.svd(train_aligned, full_matrices=False)
u.shape, d.shape, v.shape


Out[6]:
((770, 770), (770,), (770, 2209))

In [7]:
features = 7
train_projected = np.dot(v[:features], train_aligned.T).T
test_projected = np.dot(v[:features], test_aligned.T).T

In [8]:
print("Shape Projiziert: train {}, test {}".format(np.shape(train_projected), np.shape(test_projected)))


Shape Projiziert: train (770, 7), test (518, 7)

In [9]:
person = 'George_W_Bush'
#person = 'Ariel_Sharon'

train_mask_george = np.asmatrix(train_names) == person
test_mask_george = np.asmatrix(test_names) == person
train_george = np.compress(train_mask_george[0,:], train_projected, axis=0)
train_not_george = np.compress((~train_mask_george)[0,:], train_projected, axis=0)
train_labels = np.asarray([-1 if b else 1 for b in train_mask_george[0,:]])

test_george = np.compress(test_mask_george[0,:], test_projected, axis=0)
test_not_george = np.compress(~test_mask_george[0,:], test_projected, axis=0)
test_labels = np.asarray([-1 if b else 1 for b in test_mask_george[0,:]])

In [10]:
print("Im Trainingsdatensatz sind {} Bilder von George Bush und {} Bilder nicht von George Bush".format(train_george.shape[0], train_not_george.shape[0]))


Im Trainingsdatensatz sind 318 Bilder von George Bush und 452 Bilder nicht von George Bush

In [11]:
george_perceptron = util.Perceptron((train_george, train_not_george))
george_gaussian = util.GaussianNaiveBayes((train_george, train_not_george))


data set length: 770
data point dimension: 7
R: 125.18690441130492

In [12]:
george_perceptron.learn(100)
george_gaussian.learn()


Did not find linear seperation after 77770 steps
correction steps: 486
Last step: b = [ 0.], 
	   w = [-0.04027086  0.4266511  -0.42459731  2.79902855 -0.30884299 -1.43769885
 -0.45717172]
Functional margin min and max: -10.313563215565425, 30.58636197100177

In [104]:
#GNB
c1_gnb = 0.2 # 0.00001 # 0.01
c2_gnb = 10.0 # 50 # 60

In [105]:
true_positive_gnb, false_positive = george_gaussian.classify(test_projected, test_labels, c1_gnb, False)
true_positive, false_positive_gnb = george_gaussian.classify(test_projected, test_labels, c2_gnb, False)
true_positive_gnb, false_positive_gnb


Out[105]:
(0.0, 1.0)

In [ ]:


In [106]:
#Perceptron
c1_p = -10
c2_p = 9

In [107]:
true_positive_p, false_positive = george_perceptron.classify(test_projected, test_labels, c1_p, False)
true_positive, false_positive_p = george_perceptron.classify(test_projected, test_labels, c2_p, False)
true_positive_p, false_positive_p


Out[107]:
(0.0, 1.0)

In [ ]:


In [109]:
steps = 200
c_vals_p = np.linspace(c1_p, c2_p, steps)
c_vals_gnb = np.linspace(c1_gnb, c2_gnb, steps)
tp_p,fp_p = zip(*[george_perceptron.classify(test_projected, test_labels, c, False) for c in c_vals_p])
tp_gnb,fp_gnb = zip(*[george_gaussian.classify(test_projected, test_labels, c, False) for c in c_vals_p])

In [110]:
fig = plt.figure(figsize=(7,7))
ax = plt.subplot(1, 1, 1)
ax.plot(tp_p, fp_p)
ax.plot(tp_gnb,fp_gnb)
ax.legend(("Perceptron", "Bayes"), loc='best')
ax.set_title('ROC Curve', fontsize=14)
ax.set_xlabel("True positives", fontsize=14)
ax.set_ylabel("False positives", fontsize=14)
plt.show()



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: