In [20]:
# Looking at applying non-linear transformations for
# dimensionality reduction.
# Kernel PCA is for data that is not linearly seperable.
# This methodology projects the features onto a new plane
# then uses PCA to linearly separate the data.
# the cosine kernel compares the angle between two samples
# in the feature space
import numpy as np
from sklearn.decomposition import KernelPCA, PCA
In [3]:
A1_mean = [1, 1]
A1_cov = [[2, .99], [1, 1]]
A1 = np.random.multivariate_normal(A1_mean, A1_cov, 50)
In [4]:
A2_mean = [5,5]
A2_cov = [[2, .99], [1, 1]]
A2 = np.random.multivariate_normal(A2_mean, A2_cov, 50)
In [16]:
A = np.vstack((A1, A2))
A[:10]
Out[16]:
In [17]:
B_mean = [5, 0]
B_cov = [[.5, -1], [-.9, .5]]
B = np.random.multivariate_normal(B_mean, B_cov, 100)
B[:10]
Out[17]:
In [8]:
kpca = KernelPCA(kernel='cosine', n_components=1)
In [18]:
AB = np.vstack((A,B))
AB[:10]
Out[18]:
In [10]:
AB_transformed = kpca.fit_transform(AB)
In [19]:
AB_transformed[:10]
Out[19]:
In [21]:
pca = PCA(n_components=1)
In [22]:
AB_PCA_transformed = pca.fit_transform(AB)
In [23]:
AB_PCA_transformed[:10]
Out[23]:
In [ ]: