In [1]:
from sklearn import datasets
from sklearn import decomposition
In [2]:
iris = datasets.load_iris()
In [3]:
iris_x = iris.data
In [4]:
pca = decomposition.PCA()
In [5]:
pca
Out[5]:
In [6]:
iris_pca = pca.fit_transform(iris_x)
In [10]:
print iris_x[:5]
print '-'*50
print iris_pca[:5]
In [8]:
# this shows that 92.5% of the variance of the dataset can be
# explained by the first column
pca.explained_variance_ratio_
Out[8]:
In [11]:
# Removing all but 2 rows
pca = decomposition.PCA(n_components=2)
iris_x_prime = pca.fit_transform(iris_x)
iris_x_prime.shape
Out[11]:
In [12]:
pca.explained_variance_ratio_.sum()
Out[12]:
In [13]:
# get over 98%
pca = decomposition.PCA(n_components=.98)
iris_x_prime = pca.fit(iris_x)
pca.explained_variance_ratio_.sum()
Out[13]:
In [ ]: