In [1]:
import warnings
warnings.filterwarnings('ignore')
In [2]:
%matplotlib inline
In [3]:
import numpy as np
from sklearn.decomposition import PCA
In [4]:
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [4, -1]])
# X = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 1], [2, 2], [3, 3], [4, 4]])
In [5]:
X
Out[5]:
In [6]:
import matplotlib.pyplot as plt
In [7]:
plt.figure(figsize=(10,10))
plt.scatter(X[:, 0], X[:, 1])
# plt.savefig('original.png')
Out[7]:
In [8]:
from sklearn.preprocessing import StandardScaler
In [9]:
scaler = StandardScaler()
In [10]:
scaler.fit(X)
Out[10]:
In [15]:
scaler.mean_
Out[15]:
In [16]:
scaler.var_
Out[16]:
In [17]:
X_scaled = scaler.transform(X)
In [18]:
plt.scatter(X_scaled[:, 0], X_scaled[:, 1])
Out[18]:
In [15]:
import pandas as pd
In [16]:
df = pd.DataFrame(X_scaled)
In [17]:
corr_mat = df.corr()
In [18]:
corr_mat
Out[18]:
In [19]:
import seaborn as sns
In [20]:
plt.figure(figsize=(10,10))
sns.heatmap(corr_mat, annot=True)
# plt.savefig('correlation.png')
Out[20]:
In [21]:
pca = PCA(n_components=2)
pca.fit(X)
Out[21]:
In [22]:
pca.explained_variance_
Out[22]:
In [23]:
# sum is 1, first pc has a very high variance, i.e. is very good, second could be deleted
pca.explained_variance_ratio_
Out[23]:
In [24]:
X_reduced = pca.transform(X)
In [25]:
plt.figure(figsize=(10,10))
plt.scatter(X_reduced[:, 0], X_reduced[:, 1])
# plt.savefig('reduced.png')
Out[25]:
In [ ]: