In [1]:
import numpy as np
import MyML.helper.partition as myPart
import MyML.cluster.K_Means3 as myKM
import MyML.utils.sparse as mySparse
import MyML.cluster.eac as eac
from sklearn.datasets import make_blobs
In [2]:
reload(mySparse)
Out[2]:
In [4]:
data,gt = make_blobs(n_samples=5000, n_features=2, centers=6)
data = data.astype(np.float32)
n_samples = data.shape[0]
In [5]:
generator = myKM.K_Means(cuda_mem="manual")
generator._MAX_THREADS_BLOCK = 256
In [6]:
sqrt_rule = lambda n: map(int, map(np.ceil, [np.sqrt(n) / 2, np.sqrt(n)]))
n_clusters = sqrt_rule(data.shape[0])
In [7]:
%time ensemble = myPart.generateEnsemble(data, generator, n_clusters, npartitions=100)
In [8]:
bgs = mySparse._compute_max_assocs_from_ensemble(ensemble)
max_assocs = bgs * 3
print max_assocs
In [ ]:
mat0 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
%time mat0._update_ensemble(ensemble)
%time mat0._condense()
In [9]:
mat1 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat1.sort_mode = "online"
mat1.update_cluster_function = mySparse.update_cluster_sorted
%time mat1._update_ensemble(ensemble)
%time mat1._condense()
In [140]:
mat2 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat2.sort_mode = "online"
mat2.update_cluster_function = mySparse.update_cluster_sorted_simple
%debug mat2._update_ensemble(ensemble)
%time mat2._condense()
In [ ]:
b MyML/utils/sparse.py:57
In [146]:
print map(max,nnz2)
print map(max,nnz3)
In [143]:
nnz2 = [
[0, 48378, 26920, 0, 27638, 5770, 10228, 5310, 2990, 20884, 8136, 544, 10618, 3840, 18692, 934, 0, 16894, 3130, 11812, 1708, 180, 480, 10304, 9850, 2548, 102, 2300, 1710, 3040, 836, 8268, 600, 0, 4588, 10896, 11966, 6864, 5206, 17700, 68, 3266, 2112, 440, 3020, 3778, 2346, 0, 292],
[0, 178, 0, 7944, 104, 116, 576, 2914, 898, 0, 420, 574, 286, 304, 506, 3100, 144, 3724, 5498, 0, 1098, 20164, 22066, 0, 2006, 528, 268, 0, 54, 1526, 440, 198, 0, 224, 658, 2354, 1998, 2858, 3758, 0, 1076, 28, 1884, 1358, 644, 0, 4778, 5710, 2924, 104, 136, 2808, 1504, 72, 682, 918, 1110, 0, 0, 1036, 0, 0, 240, 0, 222, 172],
[0, 0, 520, 2456, 0, 282, 138, 0, 906, 4376, 1116, 710, 284, 172, 0, 4342, 0, 128, 9484, 0, 140, 426, 480, 4498, 1426, 0, 0, 0, 4116, 1272, 0, 1520, 0, 1340, 3210, 0, 78, 818, 0, 788, 436, 0, 3682, 1596, 3000, 326, 2156, 6838, 6002, 10032, 1320, 5018, 1246, 996, 1268, 17360, 3500],
[0, 226, 52882, 0, 120, 0, 1062, 220, 58, 244, 1516, 32, 6294, 564, 370, 370, 180, 228, 144, 490, 38, 62304, 60, 372, 448, 64, 1050, 4, 288, 160, 436, 0, 472, 2266, 6190, 588, 804, 7236, 868, 1660, 566, 26, 344, 256, 572, 0, 0, 12, 974, 0, 2008, 1606, 10482]
]
In [142]:
nnz3 = [
[0, 48378, 26920, 0, 27638, 5770, 10228, 5310, 2990, 20884, 8136, 544, 10618, 3840, 18692, 934, 0, 16894, 3130, 11812, 1708, 180, 480, 10304, 9850, 2548, 102, 2300, 1710, 3040, 836, 8268, 600, 0, 4588, 10896, 11966, 6864, 5206, 17700, 68, 3266, 2112, 440, 3020, 3778, 2346, 0, 292],
[1445, 2209, 0, 11207, 257, 289, 648, 7108, 1086, 0, 612, 706, 775, 913, 506, 3452, 592, 3957, 7707, 0, 1687, 23751, 23287, 776, 2073, 657, 380, 492, 198, 2469, 559, 230, 1748, 383, 1297, 2382, 2056, 4117, 4445, 250, 1245, 28, 2228, 2120, 661, 470, 4804, 6725, 3949, 117, 1834, 3851, 1684, 276, 1303, 984, 1331, 1436, 1442, 1659, 52, 26, 249, 2797, 295, 188],
[1940, 561, 1283, 3789, 405, 2298, 7890, 448, 4843, 8488, 1586, 1996, 3994, 1698, 6991, 6612, 153, 1831, 18007, 85, 306, 4734, 1087, 6778, 2812, 1561, 424, 1834, 5598, 3235, 2898, 4233, 3259, 3162, 3632, 3253, 618, 1792, 4161, 1209, 2221, 545, 9031, 6614, 7417, 1007, 4384, 12048, 7772, 13411, 3447, 6668, 2470, 2478, 2595, 27134, 7254],
[91, 6366, 68673, 697, 7975, 39, 5810, 1674, 1099, 3139, 3149, 471, 14740, 2068, 1633, 6038, 5457, 2588, 2707, 1281, 1147, 84705, 327, 1944, 5203, 352, 4328, 2308, 3414, 1620, 5633, 7827, 2631, 4247, 8699, 6755, 5911, 11066, 4590, 3493, 2351, 333, 3916, 2126, 2742, 3928, 3694, 16, 2524, 970, 6562, 3585, 17006]
]
In [144]:
Out[144]:
In [198]:
reload(mySparse)
mat3 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat3.sort_mode = "online"
mat3.update_cluster_function = mySparse.update_cluster_sorted_surgical
%time mat3._update_ensemble(ensemble)
In [ ]:
b MyML/utils/sparse.py:360
start_idx, end_idx, n_shifts
new_assocs_ptr
new_assocs_ids[:new_assocs_ptr+1]
new_assocs_idx[:new_assocs_ptr+1]
data[[idx,idx+n_shifts]], indices[[idx,idx+n_shifts]]
In [79]:
print "indices", (mat1.indices==mat2.indices).all()
print "data", (mat1.data==mat2.data).all()
print "degree", (mat1.degree==mat2.degree).all()
In [36]:
mat3.degree
Out[36]:
In [224]:
reload(mySparse)
Out[224]:
In [225]:
%%timeit
mat1 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat1.sort_mode = "online"
mat1.update_cluster_function = mySparse.update_cluster_sorted
mat1._update_ensemble(ensemble)
In [226]:
%%timeit
mat2 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat2.sort_mode = "online"
mat2.update_cluster_function = mySparse.update_cluster_sorted_simple
mat2._update_ensemble(ensemble)
In [227]:
%%timeit
mat3 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat3.sort_mode = "online"
mat3.update_cluster_function = mySparse.update_cluster_sorted_surgical
mat3._update_ensemble(ensemble)
In [185]:
mat0 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
%time mat0._update_ensemble(ensemble)
mat1 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat1.sort_mode = "online"
mat1.update_cluster_function = mySparse.update_cluster_sorted
%time mat1._update_ensemble(ensemble)
mat2 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat2.sort_mode = "online"
mat2.update_cluster_function = mySparse.update_cluster_sorted_simple
%time mat2._update_ensemble(ensemble)
mat3 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat3.sort_mode = "online"
mat3.update_cluster_function = mySparse.update_cluster_sorted_surgical
%time mat3._update_ensemble(ensemble)
In [206]:
setup1 = """import MyML.utils.sparse as mySparse
mat1 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat1.sort_mode = "online"
mat1.update_cluster_function = mySparse.update_cluster_sorted"""
In [165]:
print mat2.new_nnz_list[1]
print mat3.new_nnz_list[1]
In [188]:
%time mat0._condense()
%time mat1._condense()
%time mat2._condense()
%time mat3._condense()
In [189]:
eacEst = eac.EAC(n_samples=n_samples)
%time eacEst.fit(ensemble)
eacEst._getAssocsDegree()
In [192]:
reload(mySparse)
mat3 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat3.sort_mode = "online"
mat3.update_cluster_function = mySparse.update_cluster_sorted_surgical
%time mat3._update_ensemble(ensemble)
mat3._condense()
In [199]:
print "check matrices"
print "mat0: ", (eacEst._coassoc == mat0.todense()).all()
print "mat1: ", (eacEst._coassoc == mat1.todense()).all()
print "mat2: ", (eacEst._coassoc == mat2.todense()).all()
print "mat3: ", (eacEst._coassoc == mat3.todense()).all()
In [200]:
print "eac: ", eacEst.nnz
print "mat0: ",mat0.nnz
print "mat1: ",mat1.nnz
print "mat2: ",mat2.nnz
print "mat3: ",mat3.nnz
In [1]:
import MyML.metrics.accuracy as myAcc
import numpy as np
import MyML.helper.partition as myPart
import MyML.cluster.K_Means3 as myKM
import MyML.utils.sparse as mySparse
import MyML.cluster.eac as eac
from sklearn.datasets import make_blobs
In [2]:
data,gt = make_blobs(n_samples=5000, n_features=2, centers=6)
data = data.astype(np.float32)
n_samples = data.shape[0]
generator = myKM.K_Means(cuda_mem="manual")
generator._MAX_THREADS_BLOCK = 256
sqrt_rule = lambda n: map(int, map(np.ceil, [np.sqrt(n) / 2, np.sqrt(n)]))
n_clusters = sqrt_rule(data.shape[0])
%time ensemble = myPart.generateEnsemble(data, generator, n_clusters, npartitions=100)
bgs = mySparse._compute_max_assocs_from_ensemble(ensemble)
max_assocs = bgs * 3
print "max assocs", max_assocs
mat1 = mySparse.EAC_CSR(n_samples=n_samples, max_assocs=max_assocs)
mat1.sort_mode = "online"
mat1.update_cluster_function = mySparse.update_cluster_sorted
%time mat1._update_ensemble(ensemble)
%time mat1._condense()
eacEst = eac.EAC(n_samples=n_samples)
%time eacEst.fit(ensemble)
print "equal mats", (eacEst._coassoc == mat1.todense()).all()
print mat1.nnz
In [3]:
import MyML.cluster.linkage as myLinkage
reload(myLinkage)
Out[3]:
In [4]:
eacEst = eac.EAC(n_samples=n_samples)
%time eacEst.fit(ensemble)
%time labels = eacEst._lifetime_clustering()
accScorer = myAcc.HungarianIndex(nsamples=data.shape[0])
accScorer.score(gt, labels)
print accScorer.accuracy
In [9]:
dest = mat1.indices
weight = mat1.data.max() + 1 - mat1.data
fe = mat1.indptr[:-1]
od = mat1.indptr[1:]
In [11]:
dest.size / fe.size * 1.0
Out[11]:
In [12]:
reload(myLinkage)
%time myLinkage.sl_mst_lifetime_seq(dest, weight, fe, od, disconnect_weight = None)
Out[12]:
In [7]:
import MyML.graph.mst as myMST
In [8]:
myMST.boruvka_minho_seq(dest, weight, fe, od)
Out[8]:
In [ ]:
myLinkage.sl_mst_lifetime_gpu(dest, weight, fe, od, disconnect_weight = None)
In [ ]:
b MyML/cluster/linkage.py:24