In [13]:
from sklearn.decomposition import PCA
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
matplotlib.style.use("ggplot")
In [14]:
df = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
In [15]:
pca = PCA(n_components=2, svd_solver="full")
pca.fit(df)
T = pca.transform(df)
print(df.shape)
print(T.shape)
In [16]:
print("explained_variance_", pca.explained_variance_)
print("explained_variance_ratio_", pca.explained_variance_ratio_)
print("components_", pca.components_)
In [20]:
plt.scatter(x=pca.components_[0], y=pca.components_[1], c="g")
plt.scatter(x=df.iloc[:,0], y=df.iloc[:,1], c="r", alpha=0.2)
Out[20]:
In [ ]: