notebook.community

Edit and run



In [62]:

    
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline



In [2]:

    
iris=datasets.load_iris()
X=iris.data
y=iris.target



In [3]:

    
model = PCA()
model.fit(X)
transformed = model.transform(X)



In [4]:

    
plt.scatter(transformed[:,0],transformed[:,1], c=y)
plt.show()



In [5]:

    
model.explained_variance_ratio_









    Out[5]:





array([ 0.92461621,  0.05301557,  0.01718514,  0.00518309])



In [6]:

    
model.singular_values_









    Out[6]:





array([ 25.08986398,   6.00785254,   3.42053538,   1.87850234])



In [7]:

    
model.components_ #Direction of variance









    Out[7]:





array([[ 0.36158968, -0.08226889,  0.85657211,  0.35884393],
       [ 0.65653988,  0.72971237, -0.1757674 , -0.07470647],
       [-0.58099728,  0.59641809,  0.07252408,  0.54906091],
       [ 0.31725455, -0.32409435, -0.47971899,  0.75112056]])



In [13]:

    
#Explained variance
plt.bar(range(model.n_components_), model.explained_variance_)
plt.show()



In [19]:

    
#mean of the features
print(model.mean_)
mean=model.mean_
#first principal components
first_pc=model.components_[0,:]
print(first_pc)









    



[ 5.84333333  3.054       3.75866667  1.19866667]
[ 0.36158968 -0.08226889  0.85657211  0.35884393]

Scaling



In [84]:

    
scaler = StandardScaler()
pca = PCA()
pipeline = make_pipeline(scaler,pca)
pipeline.fit(X)
transformed = pipeline.transform(X)



In [73]:

    
#Explained variance
plt.bar(range(pca.n_components_), pca.explained_variance_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(range(pca.n_components_))
plt.show()



In [89]:

    
#mean of the features
print(pca.mean_)
mean=pca.mean_
#first principal components
first_pc=pca.components_[0,:]
print(first_pc)
second_pc=pca.components_[1,:]









    



[ -1.69031455e-15  -1.63702385e-15  -1.48251781e-15  -1.62314606e-15]
[ 0.52237162 -0.26335492  0.58125401  0.56561105]



In [77]:

    
pca.components_









    Out[77]:





array([[ 0.52237162, -0.26335492,  0.58125401,  0.56561105],
       [ 0.37231836,  0.92555649,  0.02109478,  0.06541577],
       [-0.72101681,  0.24203288,  0.14089226,  0.6338014 ],
       [-0.26199559,  0.12413481,  0.80115427, -0.52354627]])



In [91]:

    
plt.scatter(transformed[:,0],transformed[:,1], c=y)
plt.arrow(mean[0], mean[1], first_pc[0], first_pc[1], color='red', width=0.11)
plt.arrow(mean[0], mean[1], second_pc[0], second_pc[1], color='red', width=0.11)
plt.axis('equal')
plt.show()



In [54]:

    
first_pc









    Out[54]:





array([ 0.36158968, -0.08226889,  0.85657211,  0.35884393])