In [1]:

    
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import NMF
import matplotlib.pyplot as plt
%matplotlib inline

Exercise 1 - Principal Component Analysis



In [10]:

    
X = np.array([[-3, -2],[-2, -1], [-1, 0], [0,1], [1,2], [2,3], [-2,-2], [-1,-1], [0,0], [1,1], [2,2], [-2,-3], [-1,-2], [0, -1], [1, 0], [2,1], [3,2]])



In [5]:

    
mean_X = 1.0/X.shape[0] * np.dot(X.T, np.ones(shape=(X.shape[0],1)))

Note: Dropping off the normalization constant does not change the eigenvectors but the eigenvalues



In [8]:

    
cov_X = 1.0/X.shape[0] * np.dot(X.T, X) - np.dot(mean_X, mean_X.T)



In [9]:

    
cov_X









    Out[9]:





array([[ 49.5,  24.5],
       [ 24.5,  12. ]])



In [12]:

    
w, v = np.linalg.eigh(X)
w, v









    Out[12]:





(array([  1.90983006,  13.09016994]), array([[ 0.52573111, -0.85065081],
        [-0.85065081, -0.52573111]]))



In [ ]:

    
(array([ 0.35294118,  5.29411765]), array([[-0.70710678,  0.70710678],
        [ 0.70710678,  0.70710678]]))



In [27]:

    
plt.scatter(X[:, 0], X[:, 1])
mean_d1, mean_d2 = np.squeeze(mean_X, 1)
plt.plot(mean_d1, mean_d2, 'o', markersize=10, color='red', alpha=0.5)
plt.arrow(mean_d1, mean_d2, v[0, 0], v[0, 1], width=0.01, color='red', alpha=0.5)
plt.arrow(mean_d1, mean_d2, v[1, 0], v[1, 1], width=0.01, color='red', alpha=0.5);



In [29]:

    
v_trunc = v[:,1]



In [32]:

    
X_transformed = np.dot(X, v_trunc)
X_transformed









    Out[32]:





array([-3.53553391, -2.12132034, -0.70710678,  0.70710678,  2.12132034,
        3.53553391, -2.82842712, -1.41421356,  0.        ,  1.41421356,
        2.82842712, -3.53553391, -2.12132034, -0.70710678,  0.70710678,
        2.12132034,  3.53553391])

Exercise 2 - SVD and Dimensionality Reduction



In [34]:

    
M = np.array([[1,2], [6,3], [0,2]])



In [41]:

    
U, S, Vt = np.linalg.svd(M, 0)



In [42]:

    
U









    Out[42]:





array([[-0.27073584,  0.54578489],
       [-0.95094914, -0.27969357],
       [-0.14965909,  0.78986731]])



In [43]:

    
S









    Out[43]:





array([ 7.02571561,  2.15390813])



In [53]:

    
Sigma = np.diag(S)
Sigma









    Out[53]:





array([[ 7.02571561,  0.        ],
       [ 0.        ,  2.15390813]])



In [44]:

    
Vt









    Out[44]:





array([[-0.85065081, -0.52573111],
       [-0.52573111,  0.85065081]])



In [73]:

    
#Reconstructed similar to original?
np.isclose(M, U.dot(Sigma).dot(Vt))









    Out[73]:





array([[ True,  True],
       [ True,  True],
       [ True,  True]], dtype=bool)



In [76]:

    
Sigma_c = Sigma
Sigma_c[1,1] = 0
Sigma_c









    Out[76]:





array([[ 7.02571561,  0.        ],
       [ 0.        ,  0.        ]])



In [77]:

    
#Dimensionality reduction - Method 1
P = np.dot(U,Sigma_c)



In [78]:

    
P









    Out[78]:





array([[-1.90211303,  0.        ],
       [-6.68109819,  0.        ],
       [-1.05146222,  0.        ]])



In [79]:

    
#Dimensionality reduction - Method 2; Note: Is same as P
P_ = np.dot(M, Vt[0,:])



In [80]:

    
P_









    Out[80]:





array([-1.90211303, -6.68109819, -1.05146222])



In [82]:

    
#Reconstruction
M_reconstruct = P.dot(Vt)
M_reconstruct









    Out[82]:





array([[ 1.61803399,  1.        ],
       [ 5.68328157,  3.51246118],
       [ 0.89442719,  0.5527864 ]])



In [83]:

    
np.isclose(M, M_reconstruct, atol=)









    Out[83]:





array([[False, False],
       [False, False],
       [False, False]], dtype=bool)