In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import NMF
import matplotlib.pyplot as plt
%matplotlib inline

Exercise 1 - Principal Component Analysis


In [10]:
X = np.array([[-3, -2],[-2, -1], [-1, 0], [0,1], [1,2], [2,3], [-2,-2], [-1,-1], [0,0], [1,1], [2,2], [-2,-3], [-1,-2], [0, -1], [1, 0], [2,1], [3,2]])

In [5]:
mean_X = 1.0/X.shape[0] * np.dot(X.T, np.ones(shape=(X.shape[0],1)))

Note: Dropping off the normalization constant does not change the eigenvectors but the eigenvalues


In [8]:
cov_X = 1.0/X.shape[0] * np.dot(X.T, X) - np.dot(mean_X, mean_X.T)

In [9]:
cov_X


Out[9]:
array([[ 49.5,  24.5],
       [ 24.5,  12. ]])

In [12]:
w, v = np.linalg.eigh(X)
w, v


Out[12]:
(array([  1.90983006,  13.09016994]), array([[ 0.52573111, -0.85065081],
        [-0.85065081, -0.52573111]]))

In [ ]:
(array([ 0.35294118,  5.29411765]), array([[-0.70710678,  0.70710678],
        [ 0.70710678,  0.70710678]]))

In [27]:
plt.scatter(X[:, 0], X[:, 1])
mean_d1, mean_d2 = np.squeeze(mean_X, 1)
plt.plot(mean_d1, mean_d2, 'o', markersize=10, color='red', alpha=0.5)
plt.arrow(mean_d1, mean_d2, v[0, 0], v[0, 1], width=0.01, color='red', alpha=0.5)
plt.arrow(mean_d1, mean_d2, v[1, 0], v[1, 1], width=0.01, color='red', alpha=0.5);



In [29]:
v_trunc = v[:,1]

In [32]:
X_transformed = np.dot(X, v_trunc)
X_transformed


Out[32]:
array([-3.53553391, -2.12132034, -0.70710678,  0.70710678,  2.12132034,
        3.53553391, -2.82842712, -1.41421356,  0.        ,  1.41421356,
        2.82842712, -3.53553391, -2.12132034, -0.70710678,  0.70710678,
        2.12132034,  3.53553391])

Exercise 2 - SVD and Dimensionality Reduction


In [34]:
M = np.array([[1,2], [6,3], [0,2]])

In [41]:
U, S, Vt = np.linalg.svd(M, 0)

In [42]:
U


Out[42]:
array([[-0.27073584,  0.54578489],
       [-0.95094914, -0.27969357],
       [-0.14965909,  0.78986731]])

In [43]:
S


Out[43]:
array([ 7.02571561,  2.15390813])

In [53]:
Sigma = np.diag(S)
Sigma


Out[53]:
array([[ 7.02571561,  0.        ],
       [ 0.        ,  2.15390813]])

In [44]:
Vt


Out[44]:
array([[-0.85065081, -0.52573111],
       [-0.52573111,  0.85065081]])

In [73]:
#Reconstructed similar to original?
np.isclose(M, U.dot(Sigma).dot(Vt))


Out[73]:
array([[ True,  True],
       [ True,  True],
       [ True,  True]], dtype=bool)

In [76]:
Sigma_c = Sigma
Sigma_c[1,1] = 0
Sigma_c


Out[76]:
array([[ 7.02571561,  0.        ],
       [ 0.        ,  0.        ]])

In [77]:
#Dimensionality reduction - Method 1
P = np.dot(U,Sigma_c)

In [78]:
P


Out[78]:
array([[-1.90211303,  0.        ],
       [-6.68109819,  0.        ],
       [-1.05146222,  0.        ]])

In [79]:
#Dimensionality reduction - Method 2; Note: Is same as P
P_ = np.dot(M, Vt[0,:])

In [80]:
P_


Out[80]:
array([-1.90211303, -6.68109819, -1.05146222])

In [82]:
#Reconstruction
M_reconstruct = P.dot(Vt)
M_reconstruct


Out[82]:
array([[ 1.61803399,  1.        ],
       [ 5.68328157,  3.51246118],
       [ 0.89442719,  0.5527864 ]])

In [83]:
np.isclose(M, M_reconstruct, atol=)


Out[83]:
array([[False, False],
       [False, False],
       [False, False]], dtype=bool)