Gaussian Naive Bayes


In [1]:
%matplotlib inline
import util
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [9]:
variance = 0.2
data = 100
pos = [0,0]
pos_vec = [1.4, 1.4]
d1, d2, colors = util.create_isotropic_gaussian_twindataset(pos, data, variance, pos_vec)

# plot data with pandas wrapper
pd1 = pd.DataFrame(d1, columns=['x','y'])
pd2 = pd.DataFrame(d2, columns=['x','y'])
ax = pd1.plot(kind='scatter', x='x', y='y', figsize=(9,9), color=colors[0], label='Data 1')
pd2.plot(kind='scatter', x='x', y='y', figsize=(9,9), color=colors[1], label='Data 2', ax=ax)


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f671f96f470>

In [10]:
gaussian = util.GaussianNaiveBayes((d1,d2))

In [11]:
gaussian.learn()

In [12]:
gaussian.plot_discriminant_function(colors)



In [ ]:


In [13]:
train_data = []
test_data = []
test_names = []
train_names = []
total_amount = 0
for root, files in util.get_person_images('../lfw_funneled/', 'jpg', 70):
    name, data, amount = util.get_dataset(root, files, 2, 3.2)
    test_amount = int(np.ceil(amount*0.4))
    train_amount = int(np.floor(amount*0.6))
    index = np.arange(0, amount, dtype=np.int)
    total_amount = total_amount + amount
    # shuffle index to select random images
    np.random.shuffle(index)
    # save name vector
    test_names.extend([name]*test_amount)
    train_names.extend([name]*train_amount)
    train_data.append(np.asmatrix([data[index] for index in index[test_amount:]]))
    test_data.append(np.asmatrix([data[index] for index in index[:test_amount]]))
    print("{} hat {} Trainings Bilder und {} Test Bilder".format(name, train_amount, test_amount))


Colin_Powell hat 141 Trainings Bilder und 95 Test Bilder
George_W_Bush hat 318 Trainings Bilder und 212 Test Bilder
Hugo_Chavez hat 42 Trainings Bilder und 29 Test Bilder
Ariel_Sharon hat 46 Trainings Bilder und 31 Test Bilder
Tony_Blair hat 86 Trainings Bilder und 58 Test Bilder
Gerhard_Schroeder hat 65 Trainings Bilder und 44 Test Bilder
Donald_Rumsfeld hat 72 Trainings Bilder und 49 Test Bilder

In [14]:
train_design_matrix = np.asmatrix(np.concatenate(train_data))
test_design_matrix = np.asmatrix(np.concatenate(test_data))

In [15]:
print("Insgesamt gibt es {} Trainings Bilder und {} Test Bilder".format(train_design_matrix.shape, test_design_matrix.shape))


Insgesamt gibt es (770, 2209) Trainings Bilder und (518, 2209) Test Bilder

In [16]:
# align test and training data
train_mean = np.mean(train_design_matrix, axis=1)
test_mean = np.mean(test_design_matrix, axis=1)
train_aligned = train_design_matrix - train_mean
test_aligned = test_design_matrix - test_mean

In [17]:
u, d, v = np.linalg.svd(train_aligned, full_matrices=False)
u.shape, d.shape, v.shape


Out[17]:
((770, 770), (770,), (770, 2209))

In [18]:
# Plot the first 12 eigenfaces
fig = plt.figure()    
fig, ax = plt.subplots(figsize=(14, 14))
for i, eigenface in enumerate(v[:7], start=1):
    plt.subplot(1, 7, i)
    dim = np.sqrt(eigenface.shape[1])
    plt.imshow(np.reshape(eigenface, (dim, dim)), cmap=plt.gray())


<matplotlib.figure.Figure at 0x7f671f7b7fd0>

In [19]:
features = 7
train_projected = np.dot(v[:features], train_aligned.T).T
test_projected = np.dot(v[:features], test_aligned.T).T

In [20]:
print("Shape Projiziert: train {}, test {}".format(np.shape(train_projected), np.shape(test_projected)))


Shape Projiziert: train (770, 7), test (518, 7)

In [21]:
person = 'George_W_Bush'
#person = 'Ariel_Sharon'

train_mask_george = np.asmatrix(train_names) == person
test_mask_george = np.asmatrix(test_names) == person

In [22]:
train_george = np.compress(train_mask_george[0,:], train_projected, axis=0)
train_not_george = np.compress((~train_mask_george)[0,:], train_projected, axis=0)
train_labels = np.asarray([-1 if b else 1 for b in train_mask_george[0,:]])

test_george = np.compress(test_mask_george[0,:], test_projected, axis=0)
test_not_george = np.compress(~test_mask_george[0,:], test_projected, axis=0)
test_labels = np.asarray([-1 if b else 1 for b in test_mask_george[0,:]])

In [23]:
gaussian = util.GaussianNaiveBayes((train_george, train_not_george))

In [24]:
gaussian.learn()

In [25]:
gaussian.classify(test_projected,test_labels)


False negative (Miss): 196 --> 37.84%
False positive (Fehlalarmrate): 139 --> 26.83%
True negative (korrekte Rückweisung): 73 --> 14.09%
True positive (Detektionswahrscheinlichkeit): 110 --> 21.24%
Out[25]:
(110, 139)

In [ ]:


In [ ]:


In [ ]: