Faces recognition using ICA and SVMs

The dataset used in this example is a preprocessed excerpt of the "Labeled Faces in the Wild", aka LFW_:

http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz (233MB)

LFW: http://vis-www.cs.umass.edu/lfw/


In [1]:
%matplotlib inline
from time import time
import logging
import matplotlib.pyplot as plt

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn import manifold
from sklearn.decomposition import FastICA

print(__doc__)

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, h, w = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

print("Total dataset size:")
print("n_samples: %d" % n_samples)
print("n_features: %d" % n_features)
print("n_classes: %d" % n_classes)

# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42)


2017-03-17 22:53:01,300 Loading LFW people faces from /home/chandu/scikit_learn_data/lfw_home
Automatically created module for IPython interactive environment
Total dataset size:
n_samples: 1288
n_features: 1850
n_classes: 7

In [2]:
n_components_1 = np.arange(150,240,3)
accuracies = []
components = []
for i in xrange(len(n_components_1)):
    n_components = n_components_1[i]

    ica = FastICA(n_components=n_components)
    S_ = ica.fit_transform(X)
    A_ = ica.mixing_

    X_train_ica = ica.transform(X_train)
    X_test_ica = ica.transform(X_test)

    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    clf = clf.fit(X_train_ica, y_train)
    y_pred = clf.predict(X_test_ica)

    accuracies.append(float(np.sum(y_test==y_pred))/len(y_pred))
    components.append(n_components)

    print('For '+str(n_components)+' components, accuracy is '+str(float(np.sum(y_test==y_pred))/len(y_pred))+' confusion matrix is: ')
    print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))
    print(classification_report(y_test, y_pred, target_names=target_names))


/home/chandu/anaconda2/lib/python2.7/site-packages/sklearn/decomposition/fastica_.py:116: UserWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.
  warnings.warn('FastICA did not converge. Consider increasing '
For 150 components, accuracy is 0.804347826087 confusion matrix is: 
[[  5   4   3   0   0   0   1]
 [  1  53   2   0   1   1   2]
 [  0   1  23   2   0   0   1]
 [  5   7   4 119   5   5   1]
 [  0   0   1   1  19   0   4]
 [  1   0   0   2   1  11   0]
 [  0   1   3   1   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.42      0.38      0.40        13
     Colin Powell       0.80      0.88      0.84        60
  Donald Rumsfeld       0.64      0.85      0.73        27
    George W Bush       0.95      0.82      0.88       146
Gerhard Schroeder       0.68      0.76      0.72        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.76      0.81      0.78        36

      avg / total       0.82      0.80      0.81       322

For 153 components, accuracy is 0.791925465839 confusion matrix is: 
[[  5   5   2   0   0   0   1]
 [  2  51   3   0   2   1   1]
 [  0   2  22   2   0   0   1]
 [  5   5   6 118   4   7   1]
 [  0   0   1   1  19   0   4]
 [  1   0   0   0   3  11   0]
 [  0   1   3   1   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.38      0.38      0.38        13
     Colin Powell       0.80      0.85      0.82        60
  Donald Rumsfeld       0.59      0.81      0.69        27
    George W Bush       0.97      0.81      0.88       146
Gerhard Schroeder       0.63      0.76      0.69        25
      Hugo Chavez       0.58      0.73      0.65        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.82      0.79      0.80       322

For 156 components, accuracy is 0.807453416149 confusion matrix is: 
[[  5   5   2   0   1   0   0]
 [  1  53   3   0   2   1   0]
 [  0   2  22   2   0   0   1]
 [  3   8   4 121   5   4   1]
 [  0   0   1   1  19   0   4]
 [  1   0   0   1   2  11   0]
 [  0   3   2   0   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.50      0.38      0.43        13
     Colin Powell       0.75      0.88      0.81        60
  Donald Rumsfeld       0.65      0.81      0.72        27
    George W Bush       0.97      0.83      0.89       146
Gerhard Schroeder       0.61      0.76      0.68        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.83      0.81      0.82        36

      avg / total       0.82      0.81      0.81       322

For 159 components, accuracy is 0.816770186335 confusion matrix is: 
[[  4   5   2   1   1   0   0]
 [  1  54   3   0   1   1   0]
 [  0   2  23   2   0   0   0]
 [  4   6   4 122   4   4   2]
 [  0   0   1   1  20   0   3]
 [  1   0   0   0   3  11   0]
 [  0   3   2   0   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.40      0.31      0.35        13
     Colin Powell       0.77      0.90      0.83        60
  Donald Rumsfeld       0.66      0.85      0.74        27
    George W Bush       0.97      0.84      0.90       146
Gerhard Schroeder       0.65      0.80      0.71        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.85      0.81      0.83        36

      avg / total       0.83      0.82      0.82       322

For 162 components, accuracy is 0.810559006211 confusion matrix is: 
[[  5   5   2   0   1   0   0]
 [  0  53   4   1   1   1   0]
 [  0   2  22   2   0   0   1]
 [  4   7   3 121   5   4   2]
 [  0   0   1   1  20   0   3]
 [  1   0   0   0   2  12   0]
 [  0   3   1   1   3   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.50      0.38      0.43        13
     Colin Powell       0.76      0.88      0.82        60
  Donald Rumsfeld       0.67      0.81      0.73        27
    George W Bush       0.96      0.83      0.89       146
Gerhard Schroeder       0.62      0.80      0.70        25
      Hugo Chavez       0.71      0.80      0.75        15
       Tony Blair       0.82      0.78      0.80        36

      avg / total       0.83      0.81      0.81       322

For 165 components, accuracy is 0.801242236025 confusion matrix is: 
[[  5   5   2   1   0   0   0]
 [  2  50   4   1   1   1   1]
 [  0   2  22   2   0   0   1]
 [  5   6   3 120   5   5   2]
 [  0   0   1   2  19   0   3]
 [  0   0   0   0   3  12   0]
 [  0   2   1   1   2   0  30]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.42      0.38      0.40        13
     Colin Powell       0.77      0.83      0.80        60
  Donald Rumsfeld       0.67      0.81      0.73        27
    George W Bush       0.94      0.82      0.88       146
Gerhard Schroeder       0.63      0.76      0.69        25
      Hugo Chavez       0.67      0.80      0.73        15
       Tony Blair       0.81      0.83      0.82        36

      avg / total       0.82      0.80      0.80       322

For 168 components, accuracy is 0.798136645963 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  2  50   3   1   2   1   1]
 [  0   2  22   3   0   0   0]
 [  4   6   3 120   5   5   3]
 [  0   0   1   2  19   0   3]
 [  0   0   0   1   3  11   0]
 [  0   3   1   1   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.50      0.46      0.48        13
     Colin Powell       0.76      0.83      0.79        60
  Donald Rumsfeld       0.71      0.81      0.76        27
    George W Bush       0.93      0.82      0.87       146
Gerhard Schroeder       0.61      0.76      0.68        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.81      0.80      0.80       322

For 171 components, accuracy is 0.798136645963 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  49   4   1   3   1   1]
 [  0   2  22   3   0   0   0]
 [  4   6   3 121   4   5   3]
 [  0   0   1   2  20   0   2]
 [  0   0   0   2   2  11   0]
 [  0   2   1   3   2   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.55      0.46      0.50        13
     Colin Powell       0.75      0.82      0.78        60
  Donald Rumsfeld       0.69      0.81      0.75        27
    George W Bush       0.92      0.83      0.87       146
Gerhard Schroeder       0.65      0.80      0.71        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.82      0.78      0.80        36

      avg / total       0.81      0.80      0.80       322

For 174 components, accuracy is 0.795031055901 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  1  49   4   0   3   1   2]
 [  0   2  22   3   0   0   0]
 [  5   5   4 121   3   5   3]
 [  0   0   1   2  20   0   2]
 [  0   0   0   1   3  11   0]
 [  0   2   1   4   2   0  27]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.50      0.46      0.48        13
     Colin Powell       0.78      0.82      0.80        60
  Donald Rumsfeld       0.67      0.81      0.73        27
    George W Bush       0.92      0.83      0.87       146
Gerhard Schroeder       0.65      0.80      0.71        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.79      0.75      0.77        36

      avg / total       0.81      0.80      0.80       322

For 177 components, accuracy is 0.785714285714 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  1  49   4   0   3   1   2]
 [  0   2  22   3   0   0   0]
 [  4   6   5 119   3   6   3]
 [  0   0   1   2  20   0   2]
 [  0   0   0   1   3  11   0]
 [  0   2   2   4   2   0  26]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.55      0.46      0.50        13
     Colin Powell       0.75      0.82      0.78        60
  Donald Rumsfeld       0.63      0.81      0.71        27
    George W Bush       0.92      0.82      0.87       146
Gerhard Schroeder       0.65      0.80      0.71        25
      Hugo Chavez       0.61      0.73      0.67        15
       Tony Blair       0.79      0.72      0.75        36

      avg / total       0.80      0.79      0.79       322

For 180 components, accuracy is 0.788819875776 confusion matrix is: 
[[  6   5   1   0   1   0   0]
 [  1  49   4   1   3   1   1]
 [  0   2  22   3   0   0   0]
 [  4   5   4 121   5   5   2]
 [  0   0   1   2  20   0   2]
 [  0   0   0   0   4  11   0]
 [  0   3   1   5   2   0  25]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.55      0.46      0.50        13
     Colin Powell       0.77      0.82      0.79        60
  Donald Rumsfeld       0.67      0.81      0.73        27
    George W Bush       0.92      0.83      0.87       146
Gerhard Schroeder       0.57      0.80      0.67        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.83      0.69      0.76        36

      avg / total       0.80      0.79      0.79       322

For 183 components, accuracy is 0.807453416149 confusion matrix is: 
[[  6   5   1   0   1   0   0]
 [  0  50   2   3   3   0   2]
 [  0   2  21   3   0   0   1]
 [  3   5   3 123   4   3   5]
 [  0   0   1   2  20   0   2]
 [  0   0   0   1   2  11   1]
 [  0   2   2   1   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.78      0.83      0.81        60
  Donald Rumsfeld       0.70      0.78      0.74        27
    George W Bush       0.92      0.84      0.88       146
Gerhard Schroeder       0.62      0.80      0.70        25
      Hugo Chavez       0.79      0.73      0.76        15
       Tony Blair       0.72      0.81      0.76        36

      avg / total       0.82      0.81      0.81       322

For 186 components, accuracy is 0.82298136646 confusion matrix is: 
[[  6   5   2   0   0   0   0]
 [  2  52   1   1   1   0   3]
 [  0   3  20   3   0   0   1]
 [  3   3   3 128   4   2   3]
 [  0   0   1   2  19   0   3]
 [  0   0   0   1   2  11   1]
 [  0   2   2   1   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.55      0.46      0.50        13
     Colin Powell       0.80      0.87      0.83        60
  Donald Rumsfeld       0.69      0.74      0.71        27
    George W Bush       0.94      0.88      0.91       146
Gerhard Schroeder       0.68      0.76      0.72        25
      Hugo Chavez       0.85      0.73      0.79        15
       Tony Blair       0.72      0.81      0.76        36

      avg / total       0.83      0.82      0.82       322

For 189 components, accuracy is 0.807453416149 confusion matrix is: 
[[  7   4   1   1   0   0   0]
 [  1  51   3   1   2   1   1]
 [  0   2  21   3   0   0   1]
 [  5   5   4 121   6   3   2]
 [  0   0   1   1  20   1   2]
 [  0   0   0   1   2  11   1]
 [  0   1   1   2   3   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.54      0.54      0.54        13
     Colin Powell       0.81      0.85      0.83        60
  Donald Rumsfeld       0.68      0.78      0.72        27
    George W Bush       0.93      0.83      0.88       146
Gerhard Schroeder       0.61      0.80      0.69        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.82      0.81      0.81       322

For 192 components, accuracy is 0.810559006211 confusion matrix is: 
[[  7   4   1   1   0   0   0]
 [  0  51   3   1   3   1   1]
 [  0   2  21   3   0   0   1]
 [  3   2   5 122   7   4   3]
 [  0   0   1   1  20   1   2]
 [  0   0   0   1   2  11   1]
 [  0   1   1   3   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.70      0.54      0.61        13
     Colin Powell       0.85      0.85      0.85        60
  Donald Rumsfeld       0.66      0.78      0.71        27
    George W Bush       0.92      0.84      0.88       146
Gerhard Schroeder       0.59      0.80      0.68        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.82      0.81      0.81       322

For 195 components, accuracy is 0.816770186335 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  1  50   2   1   3   1   2]
 [  0   1  23   2   0   0   1]
 [  3   2   5 125   5   4   2]
 [  0   0   1   1  19   1   3]
 [  0   0   0   1   2  11   1]
 [  0   2   2   1   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.60      0.46      0.52        13
     Colin Powell       0.83      0.83      0.83        60
  Donald Rumsfeld       0.68      0.85      0.75        27
    George W Bush       0.95      0.86      0.90       146
Gerhard Schroeder       0.61      0.76      0.68        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.76      0.81      0.78        36

      avg / total       0.83      0.82      0.82       322

For 198 components, accuracy is 0.841614906832 confusion matrix is: 
[[  6   6   1   0   0   0   0]
 [  0  54   1   0   3   1   1]
 [  0   1  24   1   0   0   1]
 [  4   3   2 128   3   4   2]
 [  0   0   1   1  19   1   3]
 [  0   0   0   1   2  11   1]
 [  0   2   2   2   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.60      0.46      0.52        13
     Colin Powell       0.82      0.90      0.86        60
  Donald Rumsfeld       0.77      0.89      0.83        27
    George W Bush       0.96      0.88      0.92       146
Gerhard Schroeder       0.68      0.76      0.72        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.85      0.84      0.84       322

For 201 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  52   2   0   3   1   2]
 [  0   1  25   1   0   0   0]
 [  4   2   2 125   6   5   2]
 [  0   0   1   2  19   1   2]
 [  0   0   0   1   2  11   1]
 [  1   3   1   1   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.55      0.46      0.50        13
     Colin Powell       0.83      0.87      0.85        60
  Donald Rumsfeld       0.78      0.93      0.85        27
    George W Bush       0.95      0.86      0.90       146
Gerhard Schroeder       0.61      0.76      0.68        25
      Hugo Chavez       0.61      0.73      0.67        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.84      0.83      0.83       322

For 204 components, accuracy is 0.82298136646 confusion matrix is: 
[[  7   4   1   1   0   0   0]
 [  1  51   2   1   3   0   2]
 [  0   1  24   1   0   0   1]
 [  4   3   3 124   6   4   2]
 [  0   0   1   1  19   1   3]
 [  0   0   0   1   2  11   1]
 [  1   3   1   1   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.54      0.54      0.54        13
     Colin Powell       0.82      0.85      0.84        60
  Donald Rumsfeld       0.75      0.89      0.81        27
    George W Bush       0.95      0.85      0.90       146
Gerhard Schroeder       0.61      0.76      0.68        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.76      0.81      0.78        36

      avg / total       0.84      0.82      0.83       322

For 207 components, accuracy is 0.829192546584 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  51   2   1   3   1   2]
 [  0   1  25   1   0   0   0]
 [  3   3   3 126   5   4   2]
 [  0   0   1   2  19   1   2]
 [  0   0   0   1   2  11   1]
 [  1   3   1   1   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.60      0.46      0.52        13
     Colin Powell       0.81      0.85      0.83        60
  Donald Rumsfeld       0.76      0.93      0.83        27
    George W Bush       0.95      0.86      0.90       146
Gerhard Schroeder       0.63      0.76      0.69        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.84      0.83      0.83       322

For 210 components, accuracy is 0.816770186335 confusion matrix is: 
[[  6   4   1   1   0   0   1]
 [  0  51   2   1   3   1   2]
 [  0   2  23   2   0   0   0]
 [  3   3   1 125   8   4   2]
 [  0   0   2   2  18   1   2]
 [  0   0   0   1   2  11   1]
 [  1   2   1   1   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.60      0.46      0.52        13
     Colin Powell       0.82      0.85      0.84        60
  Donald Rumsfeld       0.77      0.85      0.81        27
    George W Bush       0.94      0.86      0.90       146
Gerhard Schroeder       0.55      0.72      0.62        25
      Hugo Chavez       0.65      0.73      0.69        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.83      0.82      0.82       322

For 213 components, accuracy is 0.819875776398 confusion matrix is: 
[[  6   4   1   1   0   0   1]
 [  0  50   2   3   3   1   1]
 [  0   3  23   1   0   0   0]
 [  4   3   2 127   5   4   1]
 [  0   0   0   6  18   0   1]
 [  0   0   0   1   1  12   1]
 [  0   2   1   4   1   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.60      0.46      0.52        13
     Colin Powell       0.81      0.83      0.82        60
  Donald Rumsfeld       0.79      0.85      0.82        27
    George W Bush       0.89      0.87      0.88       146
Gerhard Schroeder       0.64      0.72      0.68        25
      Hugo Chavez       0.71      0.80      0.75        15
       Tony Blair       0.85      0.78      0.81        36

      avg / total       0.82      0.82      0.82       322

For 216 components, accuracy is 0.832298136646 confusion matrix is: 
[[  6   5   1   0   0   0   1]
 [  0  51   2   2   3   1   1]
 [  0   3  24   0   0   0   0]
 [  3   4   2 128   5   3   1]
 [  0   0   0   6  18   0   1]
 [  0   0   0   1   1  12   1]
 [  0   2   1   3   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.78      0.85      0.82        60
  Donald Rumsfeld       0.80      0.89      0.84        27
    George W Bush       0.91      0.88      0.90       146
Gerhard Schroeder       0.64      0.72      0.68        25
      Hugo Chavez       0.75      0.80      0.77        15
       Tony Blair       0.85      0.81      0.83        36

      avg / total       0.83      0.83      0.83       322

For 219 components, accuracy is 0.816770186335 confusion matrix is: 
[[  6   5   1   1   0   0   0]
 [  0  52   0   2   2   1   3]
 [  0   4  19   3   0   0   1]
 [  3   4   2 126   4   3   4]
 [  0   0   1   4  19   0   1]
 [  0   0   0   1   1  12   1]
 [  0   1   2   2   2   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.79      0.87      0.83        60
  Donald Rumsfeld       0.76      0.70      0.73        27
    George W Bush       0.91      0.86      0.88       146
Gerhard Schroeder       0.68      0.76      0.72        25
      Hugo Chavez       0.75      0.80      0.77        15
       Tony Blair       0.74      0.81      0.77        36

      avg / total       0.82      0.82      0.82       322

For 222 components, accuracy is 0.816770186335 confusion matrix is: 
[[  6   4   1   1   0   0   1]
 [  0  50   2   1   4   1   2]
 [  0   1  23   2   0   0   1]
 [  4   4   2 125   6   3   2]
 [  0   0   1   3  19   1   1]
 [  0   0   0   1   2  11   1]
 [  0   3   1   2   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.60      0.46      0.52        13
     Colin Powell       0.81      0.83      0.82        60
  Donald Rumsfeld       0.77      0.85      0.81        27
    George W Bush       0.93      0.86      0.89       146
Gerhard Schroeder       0.59      0.76      0.67        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.82      0.82      0.82       322

For 225 components, accuracy is 0.810559006211 confusion matrix is: 
[[  6   4   1   1   1   0   0]
 [  0  49   2   2   4   0   3]
 [  0   2  23   2   0   0   0]
 [  3   4   3 124   6   4   2]
 [  0   0   1   3  19   1   1]
 [  0   0   0   1   2  11   1]
 [  0   3   1   2   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.79      0.82      0.80        60
  Donald Rumsfeld       0.74      0.85      0.79        27
    George W Bush       0.92      0.85      0.88       146
Gerhard Schroeder       0.58      0.76      0.66        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.81      0.81      0.81        36

      avg / total       0.82      0.81      0.81       322

For 228 components, accuracy is 0.82298136646 confusion matrix is: 
[[  6   3   1   2   1   0   0]
 [  0  50   2   2   3   0   3]
 [  0   3  23   1   0   0   0]
 [  3   4   3 126   5   3   2]
 [  0   0   1   3  18   1   2]
 [  0   0   0   0   1  13   1]
 [  0   3   1   2   1   0  29]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.46      0.55        13
     Colin Powell       0.79      0.83      0.81        60
  Donald Rumsfeld       0.74      0.85      0.79        27
    George W Bush       0.93      0.86      0.89       146
Gerhard Schroeder       0.62      0.72      0.67        25
      Hugo Chavez       0.76      0.87      0.81        15
       Tony Blair       0.78      0.81      0.79        36

      avg / total       0.83      0.82      0.82       322

For 231 components, accuracy is 0.807453416149 confusion matrix is: 
[[  6   3   1   2   0   0   1]
 [  0  53   1   1   2   0   3]
 [  0   4  19   2   1   0   1]
 [  2   5   6 122   3   4   4]
 [  0   0   1   2  19   1   2]
 [  0   0   0   2   1  11   1]
 [  0   2   1   1   2   0  30]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.79      0.88      0.83        60
  Donald Rumsfeld       0.66      0.70      0.68        27
    George W Bush       0.92      0.84      0.88       146
Gerhard Schroeder       0.68      0.76      0.72        25
      Hugo Chavez       0.69      0.73      0.71        15
       Tony Blair       0.71      0.83      0.77        36

      avg / total       0.82      0.81      0.81       322

For 234 components, accuracy is 0.826086956522 confusion matrix is: 
[[  6   3   1   2   0   0   1]
 [  0  52   1   1   2   1   3]
 [  0   2  23   2   0   0   0]
 [  4   5   2 126   6   3   0]
 [  0   0   1   3  18   1   2]
 [  0   0   0   0   1  13   1]
 [  0   3   1   3   1   0  28]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.60      0.46      0.52        13
     Colin Powell       0.80      0.87      0.83        60
  Donald Rumsfeld       0.79      0.85      0.82        27
    George W Bush       0.92      0.86      0.89       146
Gerhard Schroeder       0.64      0.72      0.68        25
      Hugo Chavez       0.72      0.87      0.79        15
       Tony Blair       0.80      0.78      0.79        36

      avg / total       0.83      0.83      0.83       322

For 237 components, accuracy is 0.816770186335 confusion matrix is: 
[[  6   4   1   1   0   0   1]
 [  0  54   0   0   2   1   3]
 [  0   4  21   1   0   0   1]
 [  2   7   3 122   6   3   3]
 [  0   0   1   3  19   0   2]
 [  0   0   0   2   1  11   1]
 [  0   2   1   2   1   0  30]]
                   precision    recall  f1-score   support

     Ariel Sharon       0.75      0.46      0.57        13
     Colin Powell       0.76      0.90      0.82        60
  Donald Rumsfeld       0.78      0.78      0.78        27
    George W Bush       0.93      0.84      0.88       146
Gerhard Schroeder       0.66      0.76      0.70        25
      Hugo Chavez       0.73      0.73      0.73        15
       Tony Blair       0.73      0.83      0.78        36

      avg / total       0.83      0.82      0.82       322


In [3]:
plt.plot(components,accuracies)
plt.title('Number of Components vs Accuracy')
plt.xlabel('Components')
plt.ylabel('Accuracy')
plt.show()



In [ ]: