In [4]:
# Using Truncated SVD (Singular Value Decomposition).
# SVD factors a matrix M into 3 matricies A,B,C.
# Similar to PCA except that SVD is done on the data matrix and
# PCA is performed on the Covariance Matrix.
# Typically SVD is done to find the principle components of a
# matrix.
from sklearn.datasets import load_iris
from sklearn.decomposition import TruncatedSVD
In [5]:
iris = load_iris()
iris_data = iris.data
iris_target = iris.target
In [6]:
svd = TruncatedSVD(2)
iris_transformed = svd.fit_transform(iris_data)
In [7]:
iris_data[:5]
Out[7]:
In [8]:
iris_transformed[:5]
Out[8]:
In [9]:
# How does this work?
# using scipy only to do the same thing...
In [12]:
from scipy.linalg import svd
import numpy as np
In [13]:
D = np.array([[1,2], [1,3], [1,4]])
In [14]:
D
Out[14]:
In [15]:
U,S,V = svd(D, full_matrices=False)
In [20]:
print U.shape, S.shape, V.shape
print U, S, V
In [17]:
# reconstruct D to show that U,S,V are just decompositions:
np.dot(U.dot(np.diag(S)), V)
Out[17]:
In [19]:
# matrix returned by TruncatedSVD is just the dot product
# of the U and S matrices
# To simulate truncation, drop smallest singular values and the
# corresponding column vectors of U
new_S = S[0]
new_U = U[:, 0]
new_U.dot(new_S)
Out[19]:
In [ ]: