notebook.community

Edit and run



In [4]:

    
# Using Truncated SVD (Singular Value Decomposition).
# SVD factors a matrix M into 3 matricies A,B,C.
# Similar to PCA except that SVD is done on the data matrix and
# PCA is performed on the Covariance Matrix.
# Typically SVD is done to find the principle components of a
# matrix.

from sklearn.datasets import load_iris
from sklearn.decomposition import TruncatedSVD



In [5]:

    
iris = load_iris()
iris_data = iris.data
iris_target = iris.target



In [6]:

    
svd = TruncatedSVD(2)
iris_transformed = svd.fit_transform(iris_data)



In [7]:

    
iris_data[:5]









    Out[7]:





array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2]])



In [8]:

    
iris_transformed[:5]









    Out[8]:





array([[ 5.91220352, -2.30344211],
       [ 5.57207573, -1.97383104],
       [ 5.4464847 , -2.09653267],
       [ 5.43601924, -1.87168085],
       [ 5.87506555, -2.32934799]])



In [9]:

    
# How does this work?
# using scipy only to do the same thing...



In [12]:

    
from scipy.linalg import svd
import numpy as np



In [13]:

    
D = np.array([[1,2], [1,3], [1,4]])



In [14]:

    
D









    Out[14]:





array([[1, 2],
       [1, 3],
       [1, 4]])



In [15]:

    
U,S,V = svd(D, full_matrices=False)



In [20]:

    
print U.shape, S.shape, V.shape

print U, S, V









    



(3, 2) (2,) (2, 2)
[[-0.39133557  0.8247362 ]
 [-0.5605708   0.13817999]
 [-0.72980603 -0.54837623]] [ 5.64015854  0.43429448] [[-0.29816758 -0.95451354]
 [ 0.95451354 -0.29816758]]



In [17]:

    
# reconstruct D to show that U,S,V are just decompositions:
np.dot(U.dot(np.diag(S)), V)









    Out[17]:





array([[ 1.,  2.],
       [ 1.,  3.],
       [ 1.,  4.]])



In [19]:

    
# matrix returned by TruncatedSVD is just the dot product
# of the U and S matrices
# To simulate truncation, drop smallest singular values and the
# corresponding column vectors of U
new_S = S[0]
new_U = U[:, 0]
new_U.dot(new_S)









    Out[19]:





array([-2.20719466, -3.16170819, -4.11622173])



In [ ]: