In [1]:
from optics import Optics
import sys
import numpy as np
import time
pylab.rcParams['figure.figsize'] = 16, 12
In [22]:
def timeit(method):
def timed(*args, **kw):
start = time.time()
result = method(*args, **kw)
end = time.time()
print '%r (%r, %r) %2.2f sec' % \
(method.__name__, args, kw, end-start)
return result
return timed
def getPointsFromFile(fileName):
X = []
with open(fileName, 'r') as f:
for line in f:
if line.startswith('#') is False:
X.append(map(float, line.strip().split()[0:2]))
return np.array(X)
def genPoints(pointsPerCluster=100):
np.random.seed()
X = np.empty((0, 2))
clusters_old = [ {'range': [-5, -2], 'density': 0.8},
#{'range': [-4, 1], 'density': 0.1},
{'range': [1, -2], 'density': 0.2},
{'range': [-2, 3], 'density': 0.3},
{'range': [-3, -2], 'density': 1.6},
{'range': [5, 6], 'density': 2},
{'range': [7, 6.5], 'density': 0.7}
]
rho = 0.3
clusters = [
{'range': [-5, -5], 'density': rho},
{'range': [-5, 0], 'density': rho},
{'range': [-5, +5], 'density': rho},
{'range': [+5, 0], 'density': rho},
{'range': [+5, +5], 'density': rho},
{'range': [+5, -5], 'density': rho},
{'range': [0, 0], 'density': 4},
]
for cluster in clusters:
X = np.r_[X, cluster['range'] + cluster['density'] * np.random.randn(pointsPerCluster, 2)]
return X
In [25]:
#X = getPointsFromFile('hierarchical-2d.ascii')
X = genPoints(pointsPerCluster=100)
In [26]:
def plotPoints(X):
nX = len(X)
colors = [ matplotlib.cm.jet(1.0*i/nX) for i in xrange(nX) ]
scatter(X[:,0], X[:,1], marker='o', alpha=0.4, color=colors)
plt.show()
return
plotPoints(X)
In [27]:
@timeit
def doClustering(minPts=6, epsilon=sys.float_info.max):
optics = Optics(minPts=minPts, epsilon=epsilon)
clusters = optics.opticsCluster(X)
return clusters
clusters = doClustering(minPts=12, epsilon=200)
In [28]:
def plotClusters(X, clusters):
nClusters = len(sorted(set(clusters)))
print '#number of clusters: ', nClusters
noise = (1, 0, 0, 0.1)
colors = [ matplotlib.cm.jet(1.0*(i)/nClusters) if i != -1 else noise for i in clusters]
plt.scatter(X[:,0], X[:,1], marker='o', alpha=0.4, color=colors)
plt.show()
return
plotClusters(X, clusters)
In [11]:
with open('/tmp/clusters.dat', 'w') as f:
for i, v in enumerate(X):
f.write('{} {} ID-{}\n'.format(v[0], v[1], i))
In [ ]: