Faces recognition using PCA and SVMs

The dataset used in this example is a preprocessed excerpt of the "Labeled Faces in the Wild", aka LFW_:

http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)

LFW: http://vis-www.cs.umass.edu/lfw/


In [1]:
%matplotlib inline
from time import time
import logging
import matplotlib.pyplot as plt

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn import manifold

print(__doc__)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

print("Total dataset size:")
print("n_samples: %d" % n_samples)
print("n_features: %d" % n_features)
print("n_classes: %d" % n_classes)

# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42)


2017-03-17 23:07:01,859 Loading LFW people faces from /home/chandu/scikit_learn_data/lfw_home
Automatically created module for IPython interactive environment
Total dataset size:
n_samples: 1288
n_features: 1850
n_classes: 7

In [4]:
n_comp_1 = np.arange(150,250,3)
accuracies = []
components = []
for i in xrange(len(n_comp_1)):
    n_components = n_comp_1[i]
    
    pca = PCA(n_components=n_components, svd_solver='randomized',whiten=True).fit(X_train)
    
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)

    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    clf = clf.fit(X_train_pca, y_train)
    y_pred = clf.predict(X_test_pca)

    accuracies.append(float(np.sum(y_test==y_pred))/len(y_pred))
    components.append(n_components)

    print('For '+str(n_components)+' components, accuracy is '+str(float(np.sum(y_test==y_pred))/len(y_pred))+' confusion matrix is: ')
    print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))
    print(classification_report(y_test, y_pred, target_names=target_names))


For 150 components, accuracy is 0.819875776398 confusion matrix is: 
[[  4   8   1   0   0   0   0]
 [  0  51   0   4   1   0   4]
 [  0   1  22   4   0   0   0]
 [  1   5   5 131   0   3   1]
 [  0   0   0   2  17   1   5]
 [  1   0   0   4   0  10   0]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.31      0.42        13
     Colin Powell       0.76      0.85      0.80        60
  Donald Rumsfeld       0.76      0.81      0.79        27
    George W Bush       0.88      0.90      0.89       146
Gerhard Schroeder       0.94      0.68      0.79        25
      Hugo Chavez       0.71      0.67      0.69        15
       Tony Blair       0.74      0.81      0.77        36

      avg / total       0.82      0.82      0.82       322

For 153 components, accuracy is 0.866459627329 confusion matrix is: 
[[  5   6   1   1   0   0   0]
 [  0  54   0   4   0   0   2]
 [  0   1  20   6   0   0   0]
 [  0   1   1 144   0   0   0]
 [  0   0   0   4  17   0   4]
 [  0   0   0   4   0  10   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       1.00      0.38      0.56        13
     Colin Powell       0.84      0.90      0.87        60
  Donald Rumsfeld       0.87      0.74      0.80        27
    George W Bush       0.86      0.99      0.92       146
Gerhard Schroeder       1.00      0.68      0.81        25
      Hugo Chavez       1.00      0.67      0.80        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.88      0.87      0.86       322

For 156 components, accuracy is 0.860248447205 confusion matrix is: 
[[  5   7   1   0   0   0   0]
 [  0  52   1   5   0   0   2]
 [  0   1  21   5   0   0   0]
 [  0   2   1 143   0   0   0]
 [  0   0   0   4  17   0   4]
 [  0   0   0   5   0  10   0]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       1.00      0.38      0.56        13
     Colin Powell       0.81      0.87      0.84        60
  Donald Rumsfeld       0.84      0.78      0.81        27
    George W Bush       0.86      0.98      0.92       146
Gerhard Schroeder       1.00      0.68      0.81        25
      Hugo Chavez       1.00      0.67      0.80        15
       Tony Blair       0.83      0.81      0.82        36

      avg / total       0.87      0.86      0.85       322

For 159 components, accuracy is 0.82298136646 confusion matrix is: 
[[  5   6   1   0   1   0   0]
 [  0  51   1   4   1   1   2]
 [  0   3  19   5   0   0   0]
 [  1   4   4 133   0   3   1]
 [  0   0   0   4  17   0   4]
 [  0   0   0   2   2  11   0]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.83      0.38      0.53        13
     Colin Powell       0.77      0.85      0.81        60
  Donald Rumsfeld       0.73      0.70      0.72        27
    George W Bush       0.88      0.91      0.89       146
Gerhard Schroeder       0.81      0.68      0.74        25
      Hugo Chavez       0.73      0.73      0.73        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.82      0.82      0.82       322

For 162 components, accuracy is 0.82298136646 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  51   0   4   1   1   3]
 [  0   3  19   5   0   0   0]
 [  2   3   4 135   0   1   1]
 [  0   0   0   2  16   1   6]
 [  0   0   0   3   1  10   1]
 [  0   2   2   3   1   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.78      0.85      0.82        60
  Donald Rumsfeld       0.73      0.70      0.72        27
    George W Bush       0.89      0.92      0.91       146
Gerhard Schroeder       0.84      0.64      0.73        25
      Hugo Chavez       0.77      0.67      0.71        15
       Tony Blair       0.72      0.78      0.75        36

      avg / total       0.82      0.82      0.82       322

For 165 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  51   1   4   1   1   2]
 [  0   2  20   5   0   0   0]
 [  1   3   4 135   0   2   1]
 [  0   0   0   4  16   0   5]
 [  1   0   0   4   0  10   0]
 [  0   2   3   2   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.80      0.85      0.82        60
  Donald Rumsfeld       0.69      0.74      0.71        27
    George W Bush       0.88      0.92      0.90       146
Gerhard Schroeder       0.94      0.64      0.76        25
      Hugo Chavez       0.77      0.67      0.71        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.83      0.83      0.83       322

For 168 components, accuracy is 0.841614906832 confusion matrix is: 
[[  7   6   0   0   0   0   0]
 [  0  53   0   5   1   0   1]
 [  0   3  20   4   0   0   0]
 [  1   6   1 134   2   2   0]
 [  0   0   0   4  17   0   4]
 [  0   0   0   3   1  11   0]
 [  0   3   2   2   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.88      0.54      0.67        13
     Colin Powell       0.75      0.88      0.81        60
  Donald Rumsfeld       0.87      0.74      0.80        27
    George W Bush       0.88      0.92      0.90       146
Gerhard Schroeder       0.81      0.68      0.74        25
      Hugo Chavez       0.85      0.73      0.79        15
       Tony Blair       0.85      0.81      0.83        36

      avg / total       0.84      0.84      0.84       322

For 171 components, accuracy is 0.826086956522 confusion matrix is: 
[[  5   6   1   1   0   0   0]
 [  0  52   1   4   1   1   1]
 [  0   2  20   5   0   0   0]
 [  1   5   3 133   1   2   1]
 [  0   0   0   4  16   1   4]
 [  0   0   0   3   1  10   1]
 [  0   2   1   3   0   0  30]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.83      0.38      0.53        13
     Colin Powell       0.78      0.87      0.82        60
  Donald Rumsfeld       0.77      0.74      0.75        27
    George W Bush       0.87      0.91      0.89       146
Gerhard Schroeder       0.84      0.64      0.73        25
      Hugo Chavez       0.71      0.67      0.69        15
       Tony Blair       0.81      0.83      0.82        36

      avg / total       0.83      0.83      0.82       322

For 174 components, accuracy is 0.832298136646 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  53   1   5   0   1   0]
 [  0   2  20   5   0   0   0]
 [  2   4   2 135   1   1   1]
 [  0   0   0   4  16   0   5]
 [  0   0   0   4   1   9   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.79      0.88      0.83        60
  Donald Rumsfeld       0.80      0.74      0.77        27
    George W Bush       0.86      0.92      0.89       146
Gerhard Schroeder       0.89      0.64      0.74        25
      Hugo Chavez       0.82      0.60      0.69        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.83      0.83      0.83       322

For 177 components, accuracy is 0.813664596273 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  55   1   3   1   0   0]
 [  0   2  20   5   0   0   0]
 [  2   6   4 128   2   3   1]
 [  0   0   0   4  16   0   5]
 [  0   0   0   3   1  10   1]
 [  1   3   2   3   0   0  27]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.76      0.92      0.83        60
  Donald Rumsfeld       0.71      0.74      0.73        27
    George W Bush       0.88      0.88      0.88       146
Gerhard Schroeder       0.80      0.64      0.71        25
      Hugo Chavez       0.77      0.67      0.71        15
       Tony Blair       0.79      0.75      0.77        36

      avg / total       0.81      0.81      0.81       322

For 180 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  53   2   2   0   1   1]
 [  0   4  19   4   0   0   0]
 [  1   6   2 133   0   2   2]
 [  0   0   0   4  17   0   4]
 [  0   0   0   2   2  11   0]
 [  0   3   2   3   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.74      0.88      0.80        60
  Donald Rumsfeld       0.73      0.70      0.72        27
    George W Bush       0.90      0.91      0.90       146
Gerhard Schroeder       0.89      0.68      0.77        25
      Hugo Chavez       0.79      0.73      0.76        15
       Tony Blair       0.80      0.78      0.79        36

      avg / total       0.83      0.83      0.83       322

For 183 components, accuracy is 0.844720496894 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  55   1   2   0   1   0]
 [  0   2  19   5   0   0   1]
 [  2   4   2 136   0   1   1]
 [  0   1   0   4  15   0   5]
 [  0   0   0   1   2  11   1]
 [  0   2   1   3   0   0  30]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.79      0.92      0.85        60
  Donald Rumsfeld       0.79      0.70      0.75        27
    George W Bush       0.90      0.93      0.92       146
Gerhard Schroeder       0.88      0.60      0.71        25
      Hugo Chavez       0.85      0.73      0.79        15
       Tony Blair       0.79      0.83      0.81        36

      avg / total       0.84      0.84      0.84       322

For 186 components, accuracy is 0.854037267081 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  56   1   2   0   0   1]
 [  0   2  22   3   0   0   0]
 [  1   5   2 135   1   2   0]
 [  0   0   0   4  16   0   5]
 [  0   0   0   1   2  11   1]
 [  0   3   1   3   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.46      0.60        13
     Colin Powell       0.78      0.93      0.85        60
  Donald Rumsfeld       0.81      0.81      0.81        27
    George W Bush       0.91      0.92      0.92       146
Gerhard Schroeder       0.84      0.64      0.73        25
      Hugo Chavez       0.85      0.73      0.79        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.86      0.85      0.85       322

For 189 components, accuracy is 0.847826086957 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  54   1   2   0   1   1]
 [  0   2  22   3   0   0   0]
 [  1   3   3 135   1   2   1]
 [  0   0   0   4  17   0   4]
 [  0   0   0   2   2  11   0]
 [  0   3   2   3   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.79      0.90      0.84        60
  Donald Rumsfeld       0.76      0.81      0.79        27
    George W Bush       0.91      0.92      0.92       146
Gerhard Schroeder       0.85      0.68      0.76        25
      Hugo Chavez       0.79      0.73      0.76        15
       Tony Blair       0.82      0.78      0.80        36

      avg / total       0.85      0.85      0.84       322

For 192 components, accuracy is 0.841614906832 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  55   1   2   0   1   1]
 [  0   1  22   4   0   0   0]
 [  2   6   2 133   1   1   1]
 [  0   1   0   4  16   0   4]
 [  0   0   0   3   2  10   0]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.77      0.92      0.84        60
  Donald Rumsfeld       0.81      0.81      0.81        27
    George W Bush       0.89      0.91      0.90       146
Gerhard Schroeder       0.84      0.64      0.73        25
      Hugo Chavez       0.83      0.67      0.74        15
       Tony Blair       0.83      0.81      0.82        36

      avg / total       0.84      0.84      0.84       322

For 195 components, accuracy is 0.832298136646 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  54   2   1   0   1   2]
 [  0   1  19   7   0   0   0]
 [  0   7   2 134   1   1   1]
 [  0   0   0   4  17   0   4]
 [  0   1   0   1   2  10   1]
 [  0   3   1   4   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       1.00      0.46      0.63        13
     Colin Powell       0.75      0.90      0.82        60
  Donald Rumsfeld       0.76      0.70      0.73        27
    George W Bush       0.89      0.92      0.90       146
Gerhard Schroeder       0.85      0.68      0.76        25
      Hugo Chavez       0.83      0.67      0.74        15
       Tony Blair       0.78      0.78      0.78        36

      avg / total       0.84      0.83      0.83       322

For 198 components, accuracy is 0.847826086957 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  55   2   1   0   1   1]
 [  0   3  20   4   0   0   0]
 [  1   5   1 136   1   1   1]
 [  0   0   0   4  16   1   4]
 [  0   0   0   0   2  12   1]
 [  0   3   1   4   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.46      0.60        13
     Colin Powell       0.76      0.92      0.83        60
  Donald Rumsfeld       0.80      0.74      0.77        27
    George W Bush       0.91      0.93      0.92       146
Gerhard Schroeder       0.84      0.64      0.73        25
      Hugo Chavez       0.80      0.80      0.80        15
       Tony Blair       0.80      0.78      0.79        36

      avg / total       0.85      0.85      0.84       322

For 201 components, accuracy is 0.847826086957 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  53   1   4   1   0   0]
 [  0   1  22   4   0   0   0]
 [  2   5   0 136   1   1   1]
 [  0   0   0   3  16   1   5]
 [  0   0   0   2   1  11   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.79      0.88      0.83        60
  Donald Rumsfeld       0.88      0.81      0.85        27
    George W Bush       0.89      0.93      0.91       146
Gerhard Schroeder       0.84      0.64      0.73        25
      Hugo Chavez       0.85      0.73      0.79        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.85      0.85      0.84       322

For 204 components, accuracy is 0.832298136646 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  54   2   2   1   0   1]
 [  0   1  21   5   0   0   0]
 [  2   6   1 132   3   1   1]
 [  0   1   0   3  16   1   4]
 [  0   0   0   1   2  11   1]
 [  0   2   1   5   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.77      0.90      0.83        60
  Donald Rumsfeld       0.81      0.78      0.79        27
    George W Bush       0.89      0.90      0.90       146
Gerhard Schroeder       0.73      0.64      0.68        25
      Hugo Chavez       0.85      0.73      0.79        15
       Tony Blair       0.80      0.78      0.79        36

      avg / total       0.83      0.83      0.83       322

For 207 components, accuracy is 0.841614906832 confusion matrix is: 
[[  7   6   0   0   0   0   0]
 [  0  53   1   3   0   2   1]
 [  0   3  20   4   0   0   0]
 [  1   5   1 136   2   1   0]
 [  0   1   0   3  15   1   5]
 [  0   0   0   1   1  12   1]
 [  0   2   1   5   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.88      0.54      0.67        13
     Colin Powell       0.76      0.88      0.82        60
  Donald Rumsfeld       0.87      0.74      0.80        27
    George W Bush       0.89      0.93      0.91       146
Gerhard Schroeder       0.83      0.60      0.70        25
      Hugo Chavez       0.75      0.80      0.77        15
       Tony Blair       0.80      0.78      0.79        36

      avg / total       0.84      0.84      0.84       322

For 210 components, accuracy is 0.844720496894 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  54   2   1   0   1   1]
 [  0   1  22   4   0   0   0]
 [  2   5   1 135   1   2   0]
 [  0   1   0   4  16   0   4]
 [  0   0   0   3   1  10   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.78      0.90      0.84        60
  Donald Rumsfeld       0.81      0.81      0.81        27
    George W Bush       0.89      0.92      0.91       146
Gerhard Schroeder       0.89      0.64      0.74        25
      Hugo Chavez       0.77      0.67      0.71        15
       Tony Blair       0.83      0.81      0.82        36

      avg / total       0.84      0.84      0.84       322

For 213 components, accuracy is 0.847826086957 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  53   0   3   0   1   2]
 [  0   1  22   4   0   0   0]
 [  1   4   3 135   1   2   0]
 [  0   0   0   4  17   0   4]
 [  0   0   0   1   2  11   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.80      0.88      0.84        60
  Donald Rumsfeld       0.81      0.81      0.81        27
    George W Bush       0.89      0.92      0.91       146
Gerhard Schroeder       0.85      0.68      0.76        25
      Hugo Chavez       0.79      0.73      0.76        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.85      0.85      0.84       322

For 216 components, accuracy is 0.829192546584 confusion matrix is: 
[[  7   6   0   0   0   0   0]
 [  1  52   3   2   0   0   2]
 [  0   2  21   4   0   0   0]
 [  2   5   2 130   2   4   1]
 [  0   0   0   4  17   0   4]
 [  0   0   0   2   1  11   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.70      0.54      0.61        13
     Colin Powell       0.78      0.87      0.82        60
  Donald Rumsfeld       0.78      0.78      0.78        27
    George W Bush       0.89      0.89      0.89       146
Gerhard Schroeder       0.85      0.68      0.76        25
      Hugo Chavez       0.73      0.73      0.73        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.83      0.83      0.83       322

For 219 components, accuracy is 0.826086956522 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  1  53   3   3   0   0   0]
 [  0   0  23   4   0   0   0]
 [  2   6   2 131   2   3   0]
 [  0   1   0   3  16   1   4]
 [  0   0   0   2   1  11   1]
 [  0   3   1   6   0   0  26]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.78      0.88      0.83        60
  Donald Rumsfeld       0.77      0.85      0.81        27
    George W Bush       0.87      0.90      0.89       146
Gerhard Schroeder       0.84      0.64      0.73        25
      Hugo Chavez       0.73      0.73      0.73        15
       Tony Blair       0.84      0.72      0.78        36

      avg / total       0.83      0.83      0.82       322

For 222 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  52   2   3   2   0   1]
 [  0   1  21   5   0   0   0]
 [  1   6   2 130   2   3   2]
 [  0   0   0   3  17   1   4]
 [  0   0   0   2   1  11   1]
 [  0   2   1   3   0   0  30]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.46      0.60        13
     Colin Powell       0.79      0.87      0.83        60
  Donald Rumsfeld       0.78      0.78      0.78        27
    George W Bush       0.88      0.89      0.89       146
Gerhard Schroeder       0.77      0.68      0.72        25
      Hugo Chavez       0.73      0.73      0.73        15
       Tony Blair       0.79      0.83      0.81        36

      avg / total       0.83      0.83      0.83       322

For 225 components, accuracy is 0.826086956522 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  54   1   3   0   0   2]
 [  0   2  19   6   0   0   0]
 [  2   7   3 129   2   2   1]
 [  0   1   0   2  18   1   3]
 [  0   0   0   2   1  11   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.75      0.90      0.82        60
  Donald Rumsfeld       0.76      0.70      0.73        27
    George W Bush       0.88      0.88      0.88       146
Gerhard Schroeder       0.86      0.72      0.78        25
      Hugo Chavez       0.79      0.73      0.76        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.83      0.83      0.82       322

For 228 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  53   0   3   1   1   2]
 [  0   1  21   5   0   0   0]
 [  1   8   2 130   2   2   1]
 [  0   1   0   4  17   0   3]
 [  0   0   0   2   1  11   1]
 [  0   3   1   3   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.46      0.60        13
     Colin Powell       0.75      0.88      0.81        60
  Donald Rumsfeld       0.84      0.78      0.81        27
    George W Bush       0.88      0.89      0.88       146
Gerhard Schroeder       0.81      0.68      0.74        25
      Hugo Chavez       0.79      0.73      0.76        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.83      0.83      0.83       322

For 231 components, accuracy is 0.832298136646 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  54   0   4   0   0   2]
 [  0   2  20   5   0   0   0]
 [  2   7   1 131   1   3   1]
 [  0   0   0   4  18   1   2]
 [  0   0   0   2   1  11   1]
 [  0   3   1   4   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.76      0.90      0.82        60
  Donald Rumsfeld       0.87      0.74      0.80        27
    George W Bush       0.87      0.90      0.88       146
Gerhard Schroeder       0.90      0.72      0.80        25
      Hugo Chavez       0.73      0.73      0.73        15
       Tony Blair       0.82      0.78      0.80        36

      avg / total       0.83      0.83      0.83       322

For 234 components, accuracy is 0.835403726708 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  1  53   1   2   1   0   2]
 [  0   2  20   5   0   0   0]
 [  2   4   2 133   1   3   1]
 [  0   0   0   2  18   1   4]
 [  0   0   0   1   1  12   1]
 [  0   4   2   3   0   0  27]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.78      0.88      0.83        60
  Donald Rumsfeld       0.77      0.74      0.75        27
    George W Bush       0.90      0.91      0.91       146
Gerhard Schroeder       0.86      0.72      0.78        25
      Hugo Chavez       0.75      0.80      0.77        15
       Tony Blair       0.77      0.75      0.76        36

      avg / total       0.83      0.84      0.83       322

For 237 components, accuracy is 0.835403726708 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  54   0   3   1   0   2]
 [  0   2  21   4   0   0   0]
 [  1   4   3 131   3   4   0]
 [  0   0   0   3  18   0   4]
 [  0   0   0   2   1  11   1]
 [  0   3   1   4   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.46      0.60        13
     Colin Powell       0.79      0.90      0.84        60
  Donald Rumsfeld       0.81      0.78      0.79        27
    George W Bush       0.89      0.90      0.89       146
Gerhard Schroeder       0.78      0.72      0.75        25
      Hugo Chavez       0.73      0.73      0.73        15
       Tony Blair       0.80      0.78      0.79        36

      avg / total       0.84      0.84      0.83       322

For 240 components, accuracy is 0.835403726708 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  54   0   4   0   0   2]
 [  0   1  21   5   0   0   0]
 [  2   6   1 130   3   4   0]
 [  0   0   0   2  17   1   5]
 [  0   0   0   1   2  11   1]
 [  0   2   1   3   0   0  30]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.79      0.90      0.84        60
  Donald Rumsfeld       0.88      0.78      0.82        27
    George W Bush       0.89      0.89      0.89       146
Gerhard Schroeder       0.77      0.68      0.72        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.79      0.83      0.81        36

      avg / total       0.84      0.84      0.83       322

For 243 components, accuracy is 0.819875776398 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  54   0   4   0   0   2]
 [  0   1  20   5   1   0   0]
 [  0   7   3 129   3   3   1]
 [  0   0   0   2  17   1   5]
 [  1   0   0   2   1  10   1]
 [  0   3   2   3   0   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.46      0.60        13
     Colin Powell       0.77      0.90      0.83        60
  Donald Rumsfeld       0.77      0.74      0.75        27
    George W Bush       0.88      0.88      0.88       146
Gerhard Schroeder       0.77      0.68      0.72        25
      Hugo Chavez       0.71      0.67      0.69        15
       Tony Blair       0.76      0.78      0.77        36

      avg / total       0.82      0.82      0.82       322

For 246 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  53   0   4   0   1   2]
 [  0   1  21   5   0   0   0]
 [  1   6   2 131   3   3   0]
 [  0   1   0   3  17   0   4]
 [  0   0   0   3   1  10   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.86      0.46      0.60        13
     Colin Powell       0.78      0.88      0.83        60
  Donald Rumsfeld       0.84      0.78      0.81        27
    George W Bush       0.87      0.90      0.88       146
Gerhard Schroeder       0.81      0.68      0.74        25
      Hugo Chavez       0.71      0.67      0.69        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.83      0.83      0.83       322

For 249 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  52   2   2   1   1   2]
 [  0   3  20   4   0   0   0]
 [  3   4   2 130   2   4   1]
 [  0   0   0   2  17   1   5]
 [  0   0   0   0   1  13   1]
 [  0   2   1   4   0   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.78      0.87      0.82        60
  Donald Rumsfeld       0.77      0.74      0.75        27
    George W Bush       0.92      0.89      0.90       146
Gerhard Schroeder       0.81      0.68      0.74        25
      Hugo Chavez       0.68      0.87      0.76        15
       Tony Blair       0.76      0.81      0.78        36

      avg / total       0.83      0.83      0.83       322


In [5]:
plt.plot(components,accuracies)
plt.title('Number of Components vs Accuracy')
plt.xlabel('Components')
plt.ylabel('Accuracy')
plt.show()



In [ ]: