notebook.community

Edit and run



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
from pivdire import reduce_dim_2d
import numpy as np
np.random.seed(666)



In [3]:

    
##### Create some examples
## 3 clusters of points, samples from multivariate gaussians

dim = 64
std = 0.3 * np.identity(dim)
points_hd = np.empty((63,dim))
points_hd[0,:] = np.ones(dim)
points_hd[1,:] = np.ones(dim) - np.random.rand(dim) * 2.0
points_hd[2,:] = np.ones(dim)
points_hd[2,:dim/2] -= np.random.rand(dim/2) * 2.0
points_hd[2,dim/2:] -= np.random.rand(dim/2) * 2.0
for imean, mean in enumerate((points_hd[0,:], points_hd[1,:], points_hd[2,:])):
    for i in xrange(20):
        vec = np.random.multivariate_normal(mean, std)
        points_hd[3+20*imean+i,:] = vec



In [4]:

    
##### Load the plotting module and assign colors
### to the points, according to which mean they were sampled from
### Highlight the means themselves (bigger point size)
import matplotlib.pyplot as plt
plt.rcParams['savefig.dpi'] = 150

colors = ['b','r','y'] + ['b']*20 + ['r']*20 + ['y']*20
colors[0] = 'b'
colors[1] = 'r'
colors[1] = 'y'
sizes = [30.0 for i in xrange(points_hd.shape[0])]
sizes[0] = 300
sizes[1] = 300
sizes[2] = 300



In [5]:

    
##### Reduce dimensionality without use of pivot points
#### All the points are clustered close to the center
points_2d = reduce_dim_2d(points_hd, seed=666)
plt.scatter(points_2d[:,0],points_2d[:,1],c=colors,s=sizes)









    Out[5]:





<matplotlib.collections.PathCollection at 0x7f8183294550>



In [6]:

    
##### Weight the clustermean-to-X distances more than the normal point-to-point distances
#### The cluster means are the pivot points
points_2d = reduce_dim_2d(points_hd, pivot_pos=[0,1,2], pivot_importance=5.5, seed=666)
plt.scatter(points_2d[:,0],points_2d[:,1],c=colors,s=sizes)









    Out[6]:





<matplotlib.collections.PathCollection at 0x7f8183266e90>