In [1]:
    
import warnings
warnings.filterwarnings('ignore')
    
In [2]:
    
%matplotlib inline
    
In [3]:
    
import numpy as np
from sklearn.decomposition import PCA
    
In [4]:
    
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [4, -1]])
# X = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 1], [2, 2], [3, 3], [4, 4]])
    
In [5]:
    
X
    
    Out[5]:
In [6]:
    
import matplotlib.pyplot as plt
    
In [7]:
    
plt.figure(figsize=(10,10))
plt.scatter(X[:, 0], X[:, 1])
# plt.savefig('original.png')
    
    Out[7]:
    
In [8]:
    
from sklearn.preprocessing import StandardScaler
    
In [9]:
    
scaler = StandardScaler()
    
In [10]:
    
scaler.fit(X)
    
    Out[10]:
In [15]:
    
scaler.mean_
    
    Out[15]:
In [16]:
    
scaler.var_
    
    Out[16]:
In [17]:
    
X_scaled = scaler.transform(X)
    
In [18]:
    
plt.scatter(X_scaled[:, 0], X_scaled[:, 1])
    
    Out[18]:
    
In [15]:
    
import pandas as pd
    
In [16]:
    
df = pd.DataFrame(X_scaled)
    
In [17]:
    
corr_mat = df.corr()
    
In [18]:
    
corr_mat
    
    Out[18]:
In [19]:
    
import seaborn as sns
    
In [20]:
    
plt.figure(figsize=(10,10))
sns.heatmap(corr_mat, annot=True)
# plt.savefig('correlation.png')
    
    Out[20]:
    
In [21]:
    
pca = PCA(n_components=2)
pca.fit(X)
    
    Out[21]:
In [22]:
    
pca.explained_variance_
    
    Out[22]:
In [23]:
    
# sum is 1, first pc has a very high variance, i.e. is very good, second could be deleted
pca.explained_variance_ratio_
    
    Out[23]:
In [24]:
    
X_reduced = pca.transform(X)
    
In [25]:
    
plt.figure(figsize=(10,10))
plt.scatter(X_reduced[:, 0], X_reduced[:, 1])
# plt.savefig('reduced.png')
    
    Out[25]:
    
In [ ]: