In [20]:
# Looking at applying non-linear transformations for
# dimensionality reduction.

# Kernel PCA is for data that is not linearly seperable.
# This methodology projects the features onto a new plane
# then uses PCA to linearly separate the data.

# the cosine kernel compares the angle between two samples
# in the feature space

import numpy as np
from sklearn.decomposition import KernelPCA, PCA

In [3]:
A1_mean = [1, 1]
A1_cov = [[2, .99], [1, 1]]
A1 = np.random.multivariate_normal(A1_mean, A1_cov, 50)

In [4]:
A2_mean = [5,5]
A2_cov = [[2, .99], [1, 1]]
A2 = np.random.multivariate_normal(A2_mean, A2_cov, 50)

In [16]:
A = np.vstack((A1, A2))
A[:10]


Out[16]:
array([[ 0.70835939,  1.17198661],
       [-0.26187959,  0.61186227],
       [ 1.67889847,  0.32427944],
       [ 0.28265039,  0.9621006 ],
       [-0.05237279, -0.34116584],
       [ 2.11273141,  2.40026134],
       [ 0.05014519,  1.6727445 ],
       [-0.64323461, -0.64652723],
       [ 2.28506221,  1.69812994],
       [ 0.20600501,  0.21688606]])

In [17]:
B_mean = [5, 0]
B_cov = [[.5, -1], [-.9, .5]]
B = np.random.multivariate_normal(B_mean, B_cov, 100)
B[:10]


/usr/local/lib/python2.7/site-packages/IPython/kernel/__main__.py:3: RuntimeWarning: covariance is not positive-semidefinite.
  app.launch_new_instance()
Out[17]:
array([[ 6.65076371, -1.82407337],
       [ 4.97895322,  0.02325643],
       [ 3.53075515,  1.62349728],
       [ 5.41064259, -0.45375496],
       [ 4.28297064,  0.79230852],
       [ 5.51575627, -0.56990427],
       [ 5.21966135, -0.24272306],
       [ 5.04416523, -0.04880202],
       [ 4.69706394,  0.33474058],
       [ 4.58391759,  0.45976588]])

In [8]:
kpca = KernelPCA(kernel='cosine', n_components=1)

In [18]:
AB = np.vstack((A,B))
AB[:10]


Out[18]:
array([[ 0.70835939,  1.17198661],
       [-0.26187959,  0.61186227],
       [ 1.67889847,  0.32427944],
       [ 0.28265039,  0.9621006 ],
       [-0.05237279, -0.34116584],
       [ 2.11273141,  2.40026134],
       [ 0.05014519,  1.6727445 ],
       [-0.64323461, -0.64652723],
       [ 2.28506221,  1.69812994],
       [ 0.20600501,  0.21688606]])

In [10]:
AB_transformed = kpca.fit_transform(AB)

In [19]:
AB_transformed[:10]


Out[19]:
array([[-0.62063459],
       [-0.89313968],
       [ 0.13495861],
       [-0.77591416],
       [ 1.01878287],
       [-0.48511126],
       [-0.87320878],
       [ 0.61877755],
       [-0.3022956 ],
       [-0.45375917]])

In [21]:
pca = PCA(n_components=1)

In [22]:
AB_PCA_transformed = pca.fit_transform(AB)

In [23]:
AB_PCA_transformed[:10]


Out[23]:
array([[-0.77888382],
       [-1.48322177],
       [-1.46516671],
       [-1.05248344],
       [-2.3920157 ],
       [ 0.65304251],
       [-0.38669893],
       [-2.78563695],
       [-0.01370096],
       [-1.80054432]])

In [ ]: