notebook.community

Edit and run



In [1]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
cd /home/chiroptera/workspace/QCThesis/CUDA/









    



/home/chiroptera/workspace/QCThesis/CUDA



In [3]:

    
import K_Means3
from K_Means3 import *
reload(K_Means3)









    Out[3]:





<module 'K_Means3' from 'K_Means3.pyc'>



In [4]:

    
from sklearn import datasets # generate gaussian mixture

def generateData(n,d,k):
    n_int = np.int(n)

    # Generate data
    data, groundTruth = datasets.make_blobs(n_samples=n_int,n_features=d,centers=k,
                                            center_box=(-1000.0,1000.0))
    data = data.astype(np.float32)  
    
    return data,groundTruth



In [18]:

    
data,labels = generateData(1e2,2,6)

for c in np.unique(labels):
    c_data = data[np.where(labels==c)[0]]
    plt.plot(c_data[:,0],c_data[:,1],'.',label="data")

#plt.plot(data[:,0],data[:,1],'.')



In [15]:

    
labels









    Out[15]:





array([0, 0, 3, 5, 5, 2, 1, 3, 4, 2, 5, 1, 4, 2, 0, 4, 2, 0, 5, 3, 3, 0, 1,
       3, 0, 1, 3, 4, 1, 1, 2, 5, 4, 2, 4, 5, 0, 3, 0, 1, 0, 3, 2, 3, 1, 0,
       1, 2, 4, 4, 4, 4, 2, 1, 1, 5, 5, 4, 3, 0, 2, 1, 0, 3, 0, 3, 3, 0, 4,
       5, 2, 4, 2, 1, 2, 5, 2, 2, 4, 5, 1, 5, 5, 5, 5, 3, 1, 5, 2, 1, 1, 4,
       0, 0, 3, 0, 3, 4, 3, 2])



In [6]:

    
k=12
iters=100



In [19]:

    
#%%debug -b K_Means3.py:577n
reload(K_Means3)
grouperCUDA = K_Means()
grouperCUDA._centroid_mode="index"
grouperCUDA._cuda_mem="manual"
grouperCUDA.fit(data, k, iters=iters, mode="cuda", cuda_mem='manual',tol=1e-4,max_iters=300)



In [22]:

    
for c in np.unique(grouperCUDA.labels_):
    c_data = data[np.where(grouperCUDA.labels_==c)[0]]
    plt.plot(c_data[:,0],c_data[:,1],'.',label="data")



In [21]:

    
plt.plot(data[:,0],data[:,1],'.',label="data")
plt.plot(grouperCUDA.centroids[:,0],grouperCUDA.centroids[:,1],'yx',mew=2,label="centroids")
plt.legend(loc="best")









    Out[21]:





<matplotlib.legend.Legend at 0x7fbd3d72f410>



In [9]:

    
print grouperCUDA.centroids
print ""
for c in grouperCUDA.partition:
    print c
print ""
print grouperCUDA.labels_
print ""
print '# centroids:\t\t', len(np.unique(grouperCUDA.labels_))
print '# partition clusters:\t',len(grouperCUDA.partition)
print '# label clusters:\t',np.unique(grouperCUDA.labels_).size

def countSamples(partition):
    count=0
    for c in partition:
        count += c.size
    return count

print countSamples(grouperCUDA.partition)









    



[[ 992.56048584 -376.89523315]
 [ 774.58990479  712.4800415 ]
 [ -64.87361145 -448.95233154]
 [ -66.11804199 -447.28579712]
 [-460.16653442  -75.57642365]
 [-460.33969116  -77.38581085]
 [ 774.84820557  710.67041016]
 [ 422.27618408 -486.49133301]
 [ 992.40844727 -378.55599976]
 [-442.13818359 -521.44940186]]

[41 60 80 11 58 88 86 90  2 37 68]
[29 65 26 98 25 59 53 99 89]
[39 96 70 14 50]
[16 38 44 47 33 91 87 57 10 83 22 61]
[84 42 67 63 49 15 28]
[82 19 64  1 34 62 72 52 27]
[12 71  5 94 30 51 69]
[ 8 40 85 78 79 13 81  7 20 92 24 56 32 48 97 43  4]
[ 3 66 23  6 35  9]
[93 95 36 76 75 74 73 17 18 21 55 54 31 46 45 77  0]

[9 5 0 8 7 6 8 7 7 8 3 0 6 7 2 4 3 9 9 5 7 9 3 8 7 1 1 5 4 1 6 9 7 3 5 8 9
 0 3 2 7 0 4 7 3 9 9 3 7 4 2 6 5 1 9 9 7 3 0 1 0 3 5 4 5 1 8 4 0 6 2 6 5 9
 9 9 9 9 7 7 0 7 5 3 4 7 0 3 0 1 0 3 7 9 6 9 2 7 1 1]

# centroids:		10
# partition clusters:	10
# label clusters:	10
100