In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline

In [3]:
import numpy as np
from sklearn.decomposition import PCA

In [4]:
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [4, -1]])
# X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [4, 3], [0, 0]])
# X = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 1], [2, 2], [3, 3], [4, 4]])

In [5]:
X


Out[5]:
array([[-1, -1],
       [-2, -1],
       [-3, -2],
       [ 1,  1],
       [ 2,  1],
       [ 3,  2],
       [ 4,  3],
       [ 4, -1]])

In [6]:
import matplotlib.pyplot as plt

In [7]:
plt.figure(figsize=(10,10))
plt.scatter(X[:, 0], X[:, 1])
# plt.savefig('original.png')


Out[7]:
<matplotlib.collections.PathCollection at 0x2dabcd545f8>

In [8]:
pca = PCA(n_components=2)
pca.fit(X)


Out[8]:
PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [9]:
pca.explained_variance_


Out[9]:
array([9.3121762, 1.1878238])

In [10]:
# sum is 1, first pc has a very high variance, i.e. is very good, second could be deleted
pca.explained_variance_ratio_


Out[10]:
array([0.88687392, 0.11312608])

In [11]:
X_transformed = pca.transform(X)
X_transformed


Out[11]:
array([[ 2.35476789,  0.13254503],
       [ 3.23121141, -0.34895965],
       [ 4.58915961,  0.04597919],
       [-0.36112851, -0.65733264],
       [-1.23757203, -0.17582796],
       [-2.59552023, -0.57076679],
       [-3.95346843, -0.96570563],
       [-2.0274497 ,  2.54006844]])

In [12]:
plt.figure(figsize=(10,10))
plt.scatter(X_transformed[:, 0], X_transformed[:, 1])
# plt.savefig('reduced.png')


Out[12]:
<matplotlib.collections.PathCollection at 0x2dabd142e80>

Reduction to 1


In [13]:
pca = PCA(n_components=1)
pca.fit(X)


Out[13]:
PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [14]:
pca.explained_variance_


Out[14]:
array([9.3121762])

In [15]:
# sum is 1, first pc has a very high variance, i.e. is very good, second could be deleted
pca.explained_variance_ratio_


Out[15]:
array([0.88687392])

In [16]:
X_transformed = pca.transform(X)
X_transformed


Out[16]:
array([[ 2.35476789],
       [ 3.23121141],
       [ 4.58915961],
       [-0.36112851],
       [-1.23757203],
       [-2.59552023],
       [-3.95346843],
       [-2.0274497 ]])

In [17]:
plt.figure(figsize=(10,10))
plt.plot(X_transformed)
# plt.savefig('reduced.png')


Out[17]:
[<matplotlib.lines.Line2D at 0x2dabd229630>]

In [ ]: