Gaussian Naive Bayes



In [1]:

    
%matplotlib inline
import util
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



In [9]:

    
variance = 0.2
data = 100
pos = [0,0]
pos_vec = [1.4, 1.4]
d1, d2, colors = util.create_isotropic_gaussian_twindataset(pos, data, variance, pos_vec)

# plot data with pandas wrapper
pd1 = pd.DataFrame(d1, columns=['x','y'])
pd2 = pd.DataFrame(d2, columns=['x','y'])
ax = pd1.plot(kind='scatter', x='x', y='y', figsize=(9,9), color=colors[0], label='Data 1')
pd2.plot(kind='scatter', x='x', y='y', figsize=(9,9), color=colors[1], label='Data 2', ax=ax)









    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f671f96f470>



In [10]:

    
gaussian = util.GaussianNaiveBayes((d1,d2))



In [11]:

    
gaussian.learn()



In [12]:

    
gaussian.plot_discriminant_function(colors)



In [ ]:



In [13]:

    
train_data = []
test_data = []
test_names = []
train_names = []
total_amount = 0
for root, files in util.get_person_images('../lfw_funneled/', 'jpg', 70):
    name, data, amount = util.get_dataset(root, files, 2, 3.2)
    test_amount = int(np.ceil(amount*0.4))
    train_amount = int(np.floor(amount*0.6))
    index = np.arange(0, amount, dtype=np.int)
    total_amount = total_amount + amount
    # shuffle index to select random images
    np.random.shuffle(index)
    # save name vector
    test_names.extend([name]*test_amount)
    train_names.extend([name]*train_amount)
    train_data.append(np.asmatrix([data[index] for index in index[test_amount:]]))
    test_data.append(np.asmatrix([data[index] for index in index[:test_amount]]))
    print("{} hat {} Trainings Bilder und {} Test Bilder".format(name, train_amount, test_amount))









    



Colin_Powell hat 141 Trainings Bilder und 95 Test Bilder
George_W_Bush hat 318 Trainings Bilder und 212 Test Bilder
Hugo_Chavez hat 42 Trainings Bilder und 29 Test Bilder
Ariel_Sharon hat 46 Trainings Bilder und 31 Test Bilder
Tony_Blair hat 86 Trainings Bilder und 58 Test Bilder
Gerhard_Schroeder hat 65 Trainings Bilder und 44 Test Bilder
Donald_Rumsfeld hat 72 Trainings Bilder und 49 Test Bilder



In [14]:

    
train_design_matrix = np.asmatrix(np.concatenate(train_data))
test_design_matrix = np.asmatrix(np.concatenate(test_data))



In [15]:

    
print("Insgesamt gibt es {} Trainings Bilder und {} Test Bilder".format(train_design_matrix.shape, test_design_matrix.shape))









    



Insgesamt gibt es (770, 2209) Trainings Bilder und (518, 2209) Test Bilder



In [16]:

    
# align test and training data
train_mean = np.mean(train_design_matrix, axis=1)
test_mean = np.mean(test_design_matrix, axis=1)
train_aligned = train_design_matrix - train_mean
test_aligned = test_design_matrix - test_mean



In [17]:

    
u, d, v = np.linalg.svd(train_aligned, full_matrices=False)
u.shape, d.shape, v.shape









    Out[17]:





((770, 770), (770,), (770, 2209))



In [18]:

    
# Plot the first 12 eigenfaces
fig = plt.figure()    
fig, ax = plt.subplots(figsize=(14, 14))
for i, eigenface in enumerate(v[:7], start=1):
    plt.subplot(1, 7, i)
    dim = np.sqrt(eigenface.shape[1])
    plt.imshow(np.reshape(eigenface, (dim, dim)), cmap=plt.gray())









    





<matplotlib.figure.Figure at 0x7f671f7b7fd0>



In [19]:

    
features = 7
train_projected = np.dot(v[:features], train_aligned.T).T
test_projected = np.dot(v[:features], test_aligned.T).T



In [20]:

    
print("Shape Projiziert: train {}, test {}".format(np.shape(train_projected), np.shape(test_projected)))









    



Shape Projiziert: train (770, 7), test (518, 7)



In [21]:

    
person = 'George_W_Bush'
#person = 'Ariel_Sharon'

train_mask_george = np.asmatrix(train_names) == person
test_mask_george = np.asmatrix(test_names) == person



In [22]:

    
train_george = np.compress(train_mask_george[0,:], train_projected, axis=0)
train_not_george = np.compress((~train_mask_george)[0,:], train_projected, axis=0)
train_labels = np.asarray([-1 if b else 1 for b in train_mask_george[0,:]])

test_george = np.compress(test_mask_george[0,:], test_projected, axis=0)
test_not_george = np.compress(~test_mask_george[0,:], test_projected, axis=0)
test_labels = np.asarray([-1 if b else 1 for b in test_mask_george[0,:]])



In [23]:

    
gaussian = util.GaussianNaiveBayes((train_george, train_not_george))



In [24]:

    
gaussian.learn()



In [25]:

    
gaussian.classify(test_projected,test_labels)









    



False negative (Miss): 196 --> 37.84%
False positive (Fehlalarmrate): 139 --> 26.83%
True negative (korrekte Rückweisung): 73 --> 14.09%
True positive (Detektionswahrscheinlichkeit): 110 --> 21.24%






    Out[25]:





(110, 139)



In [ ]:



In [ ]:



In [ ]: