In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
from pivdire import reduce_dim_2d
import numpy as np
np.random.seed(666)
In [3]:
##### Create some examples
## 3 clusters of points, samples from multivariate gaussians
dim = 64
std = 0.3 * np.identity(dim)
points_hd = np.empty((63,dim))
points_hd[0,:] = np.ones(dim)
points_hd[1,:] = np.ones(dim) - np.random.rand(dim) * 2.0
points_hd[2,:] = np.ones(dim)
points_hd[2,:dim/2] -= np.random.rand(dim/2) * 2.0
points_hd[2,dim/2:] -= np.random.rand(dim/2) * 2.0
for imean, mean in enumerate((points_hd[0,:], points_hd[1,:], points_hd[2,:])):
for i in xrange(20):
vec = np.random.multivariate_normal(mean, std)
points_hd[3+20*imean+i,:] = vec
In [4]:
##### Load the plotting module and assign colors
### to the points, according to which mean they were sampled from
### Highlight the means themselves (bigger point size)
import matplotlib.pyplot as plt
plt.rcParams['savefig.dpi'] = 150
colors = ['b','r','y'] + ['b']*20 + ['r']*20 + ['y']*20
colors[0] = 'b'
colors[1] = 'r'
colors[1] = 'y'
sizes = [30.0 for i in xrange(points_hd.shape[0])]
sizes[0] = 300
sizes[1] = 300
sizes[2] = 300
In [5]:
##### Reduce dimensionality without use of pivot points
#### All the points are clustered close to the center
points_2d = reduce_dim_2d(points_hd, seed=666)
plt.scatter(points_2d[:,0],points_2d[:,1],c=colors,s=sizes)
Out[5]:
In [6]:
##### Weight the clustermean-to-X distances more than the normal point-to-point distances
#### The cluster means are the pivot points
points_2d = reduce_dim_2d(points_hd, pivot_pos=[0,1,2], pivot_importance=5.5, seed=666)
plt.scatter(points_2d[:,0],points_2d[:,1],c=colors,s=sizes)
Out[6]: