In [1]:
from optics import Optics
import sys
import numpy as np
import time
pylab.rcParams['figure.figsize'] = 16, 12

In [22]:
def timeit(method):

    def timed(*args, **kw):
        start = time.time()
        result = method(*args, **kw)
        end = time.time()

        print '%r (%r, %r) %2.2f sec' % \
              (method.__name__, args, kw, end-start)
        return result

    return timed

def getPointsFromFile(fileName):
    X = []
    with open(fileName, 'r') as f:
        for line in f:
            if line.startswith('#') is False:
                X.append(map(float, line.strip().split()[0:2]))
    return np.array(X)

def genPoints(pointsPerCluster=100):
    np.random.seed()

    X = np.empty((0, 2))
    clusters_old = [ {'range': [-5, -2], 'density': 0.8},
                 #{'range': [-4, 1], 'density': 0.1},
                 {'range': [1, -2], 'density': 0.2},
                 {'range': [-2, 3], 'density': 0.3},
                 {'range': [-3, -2], 'density': 1.6},
                 {'range': [5, 6], 'density': 2},
                 {'range': [7, 6.5], 'density': 0.7}

    ]
    rho = 0.3
    clusters = [ 
                {'range': [-5, -5], 'density': rho},
                {'range': [-5, 0], 'density': rho},
                {'range': [-5, +5], 'density': rho},
                {'range': [+5, 0], 'density': rho},
                {'range': [+5, +5], 'density': rho},
                {'range': [+5, -5], 'density': rho},
                {'range': [0, 0], 'density': 4},
    ]

    for cluster in clusters:
        X = np.r_[X, cluster['range'] + cluster['density'] * np.random.randn(pointsPerCluster, 2)]

    return X

In [25]:
#X = getPointsFromFile('hierarchical-2d.ascii')
X = genPoints(pointsPerCluster=100)

In [26]:
def plotPoints(X):
    nX = len(X)
    colors = [ matplotlib.cm.jet(1.0*i/nX) for i in xrange(nX) ]
    scatter(X[:,0], X[:,1],  marker='o', alpha=0.4, color=colors)
    plt.show()
    return

plotPoints(X)



In [27]:
@timeit
def doClustering(minPts=6, epsilon=sys.float_info.max):
    optics = Optics(minPts=minPts, epsilon=epsilon)
    clusters = optics.opticsCluster(X)
    return clusters

clusters = doClustering(minPts=12, epsilon=200)


INFO:root:#making rtree for 700...
INFO:root:#made rtree: <rtree.index.Index object at 0x10eca0f90>!
INFO:root:root: (0, 700, 591, 4)
'doClustering' ((), {'epsilon': 200, 'minPts': 12}) 3.48 sec

In [28]:
def plotClusters(X, clusters):
    nClusters = len(sorted(set(clusters)))
    print '#number of clusters: ', nClusters
    noise = (1, 0, 0, 0.1)
    colors = [ matplotlib.cm.jet(1.0*(i)/nClusters) if i != -1 else noise for i in clusters]
    plt.scatter(X[:,0], X[:,1],  marker='o', alpha=0.4, color=colors)
    plt.show()
    return


plotClusters(X, clusters)


#number of clusters:  7

In [11]:
with open('/tmp/clusters.dat', 'w') as f:
    for i, v in enumerate(X):
        f.write('{} {} ID-{}\n'.format(v[0], v[1], i))

In [ ]: