In [62]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
In [2]:
iris=datasets.load_iris()
X=iris.data
y=iris.target
In [3]:
model = PCA()
model.fit(X)
transformed = model.transform(X)
In [4]:
plt.scatter(transformed[:,0],transformed[:,1], c=y)
plt.show()
In [5]:
model.explained_variance_ratio_
Out[5]:
In [6]:
model.singular_values_
Out[6]:
In [7]:
model.components_ #Direction of variance
Out[7]:
In [13]:
#Explained variance
plt.bar(range(model.n_components_), model.explained_variance_)
plt.show()
In [19]:
#mean of the features
print(model.mean_)
mean=model.mean_
#first principal components
first_pc=model.components_[0,:]
print(first_pc)
In [84]:
scaler = StandardScaler()
pca = PCA()
pipeline = make_pipeline(scaler,pca)
pipeline.fit(X)
transformed = pipeline.transform(X)
In [73]:
#Explained variance
plt.bar(range(pca.n_components_), pca.explained_variance_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(range(pca.n_components_))
plt.show()
In [89]:
#mean of the features
print(pca.mean_)
mean=pca.mean_
#first principal components
first_pc=pca.components_[0,:]
print(first_pc)
second_pc=pca.components_[1,:]
In [77]:
pca.components_
Out[77]:
In [91]:
plt.scatter(transformed[:,0],transformed[:,1], c=y)
plt.arrow(mean[0], mean[1], first_pc[0], first_pc[1], color='red', width=0.11)
plt.arrow(mean[0], mean[1], second_pc[0], second_pc[1], color='red', width=0.11)
plt.axis('equal')
plt.show()
In [54]:
first_pc
Out[54]: