notebook.community

Edit and run



In [1]:

    
import warnings
warnings.filterwarnings('ignore')



In [2]:

    
%matplotlib inline



In [3]:

    
import numpy as np
from sklearn.decomposition import PCA



In [4]:

    
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [4, -1]])
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [0, 0]])
# X = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 1], [2, 2], [3, 3], [4, 4]])



In [5]:

    
X









    Out[5]:





array([[-1, -1],
       [-2, -1],
       [-3, -2],
       [ 1,  1],
       [ 2,  1],
       [ 3,  2],
       [ 4,  3],
       [ 4, -1]])



In [6]:

    
import matplotlib.pyplot as plt



In [7]:

    
plt.figure(figsize=(10,10))
plt.scatter(X[:, 0], X[:, 1])
# plt.savefig('original.png')









    Out[7]:





<matplotlib.collections.PathCollection at 0x2dabcd545f8>



In [8]:

    
pca = PCA(n_components=2)
pca.fit(X)









    Out[8]:





PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)



In [9]:

    
pca.explained_variance_









    Out[9]:





array([9.3121762, 1.1878238])



In [10]:

    
# sum is 1, first pc has a very high variance, i.e. is very good, second could be deleted
pca.explained_variance_ratio_









    Out[10]:





array([0.88687392, 0.11312608])



In [11]:

    
X_transformed = pca.transform(X)
X_transformed









    Out[11]:





array([[ 2.35476789,  0.13254503],
       [ 3.23121141, -0.34895965],
       [ 4.58915961,  0.04597919],
       [-0.36112851, -0.65733264],
       [-1.23757203, -0.17582796],
       [-2.59552023, -0.57076679],
       [-3.95346843, -0.96570563],
       [-2.0274497 ,  2.54006844]])



In [12]:

    
plt.figure(figsize=(10,10))
plt.scatter(X_transformed[:, 0], X_transformed[:, 1])
# plt.savefig('reduced.png')









    Out[12]:





<matplotlib.collections.PathCollection at 0x2dabd142e80>

Reduction to 1



In [13]:

    
pca = PCA(n_components=1)
pca.fit(X)









    Out[13]:





PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)



In [14]:

    
pca.explained_variance_









    Out[14]:





array([9.3121762])



In [15]:

    
# sum is 1, first pc has a very high variance, i.e. is very good, second could be deleted
pca.explained_variance_ratio_









    Out[15]:





array([0.88687392])



In [16]:

    
X_transformed = pca.transform(X)
X_transformed









    Out[16]:





array([[ 2.35476789],
       [ 3.23121141],
       [ 4.58915961],
       [-0.36112851],
       [-1.23757203],
       [-2.59552023],
       [-3.95346843],
       [-2.0274497 ]])



In [17]:

    
plt.figure(figsize=(10,10))
plt.plot(X_transformed)
# plt.savefig('reduced.png')









    Out[17]:





[<matplotlib.lines.Line2D at 0x2dabd229630>]



In [ ]: