In [62]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [2]:
iris=datasets.load_iris()
X=iris.data
y=iris.target

In [3]:
model = PCA()
model.fit(X)
transformed = model.transform(X)

In [4]:
plt.scatter(transformed[:,0],transformed[:,1], c=y)
plt.show()



In [5]:
model.explained_variance_ratio_


Out[5]:
array([ 0.92461621,  0.05301557,  0.01718514,  0.00518309])

In [6]:
model.singular_values_


Out[6]:
array([ 25.08986398,   6.00785254,   3.42053538,   1.87850234])

In [7]:
model.components_ #Direction of variance


Out[7]:
array([[ 0.36158968, -0.08226889,  0.85657211,  0.35884393],
       [ 0.65653988,  0.72971237, -0.1757674 , -0.07470647],
       [-0.58099728,  0.59641809,  0.07252408,  0.54906091],
       [ 0.31725455, -0.32409435, -0.47971899,  0.75112056]])

In [13]:
#Explained variance
plt.bar(range(model.n_components_), model.explained_variance_)
plt.show()



In [19]:
#mean of the features
print(model.mean_)
mean=model.mean_
#first principal components
first_pc=model.components_[0,:]
print(first_pc)


[ 5.84333333  3.054       3.75866667  1.19866667]
[ 0.36158968 -0.08226889  0.85657211  0.35884393]

Scaling


In [84]:
scaler = StandardScaler()
pca = PCA()
pipeline = make_pipeline(scaler,pca)
pipeline.fit(X)
transformed = pipeline.transform(X)

In [73]:
#Explained variance
plt.bar(range(pca.n_components_), pca.explained_variance_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(range(pca.n_components_))
plt.show()



In [89]:
#mean of the features
print(pca.mean_)
mean=pca.mean_
#first principal components
first_pc=pca.components_[0,:]
print(first_pc)
second_pc=pca.components_[1,:]


[ -1.69031455e-15  -1.63702385e-15  -1.48251781e-15  -1.62314606e-15]
[ 0.52237162 -0.26335492  0.58125401  0.56561105]

In [77]:
pca.components_


Out[77]:
array([[ 0.52237162, -0.26335492,  0.58125401,  0.56561105],
       [ 0.37231836,  0.92555649,  0.02109478,  0.06541577],
       [-0.72101681,  0.24203288,  0.14089226,  0.6338014 ],
       [-0.26199559,  0.12413481,  0.80115427, -0.52354627]])

In [91]:
plt.scatter(transformed[:,0],transformed[:,1], c=y)
plt.arrow(mean[0], mean[1], first_pc[0], first_pc[1], color='red', width=0.11)
plt.arrow(mean[0], mean[1], second_pc[0], second_pc[1], color='red', width=0.11)
plt.axis('equal')
plt.show()



In [54]:
first_pc


Out[54]:
array([ 0.36158968, -0.08226889,  0.85657211,  0.35884393])