In [13]:
from sklearn.decomposition import PCA
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
matplotlib.style.use("ggplot")

In [14]:
df = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])

In [15]:
pca = PCA(n_components=2, svd_solver="full")
pca.fit(df)
T = pca.transform(df)

print(df.shape)
print(T.shape)


(1000, 5)
(1000, 2)

In [16]:
print("explained_variance_", pca.explained_variance_)
print("explained_variance_ratio_", pca.explained_variance_ratio_)
print("components_", pca.components_)


explained_variance_ [ 1.09271881  0.99326585]
explained_variance_ratio_ [ 0.22763862  0.20692027]
components_ [[-0.40867203 -0.24175357 -0.31282186  0.52075977  0.63678422]
 [-0.45247747  0.32316253 -0.57093155  0.16314664 -0.58159303]]

In [20]:
plt.scatter(x=pca.components_[0], y=pca.components_[1], c="g")
plt.scatter(x=df.iloc[:,0], y=df.iloc[:,1], c="r", alpha=0.2)


Out[20]:
<matplotlib.collections.PathCollection at 0x11aa23c50>

In [ ]: