In [1]:
import numpy as np
import pandas as pd
import time as time
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display, Image
from scipy.spatial.distance import pdist, squareform
In [108]:
filepath = '/Users/alfredogarbuno/github-repos/hashcode-2015/data/right_angle_test.in'
f = open(filepath, "r+")
dimstr = f.readline().split()
paint = np.empty([int(dimstr[0]), int(dimstr[1])])
for i in range(int(dimstr[0])):
paint[i] = np.array(list(f.readline().split()[0].replace(".", "0").replace("#", "1")), np.int32)
f.close()
In [5]:
plt.figure(figsize=(10,10))
plt.imshow(paint)
fig = plt.gca()
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
In [165]:
dim = np.array(dimstr, np.int32)
print dim
In [166]:
density = paint.sum() / dim.prod()
print density
In [172]:
minsize = int(dim.min() * density)
print minsize
In [171]:
paint
Out[171]:
In [212]:
indices = np.arange(1,dim.prod()+1).reshape(dim)
print indices
In [241]:
# Generate the indices, in the test example we can only have sqp(1) so we lose 2 rows and 2 columns, first and last
# in both cases.
row, col = np.indices((dim[0]-2, dim[1]-2))
centers = indices[row+1, col+1]
centers
Out[241]:
In [390]:
# Test case, let's take the center in cell 9
# We access that center by knowing it is in the second row, so the first row in the center matrix
# And we know that it is the first column
def discrete_kernel(center, paint, minsize):
dist = np.empty((3,3))
decay = np.logspace(0,1,minsize+1 )
for xstep in range(-minsize,minsize+1):
for ystep in range(-minsize,minsize+1):
dist[1+xstep][1+ystep] = decay[max(abs(xstep), abs(ystep))] * paint[center[0]+xstep][center[1]+ystep]
return dist.sum()
In [391]:
cands = dict()
for row in range(centers.shape[0]):
for ycoord in centers[row]:
cands[ycoord] = discrete_kernel([row+minsize, ycoord%7-1], paint, minsize)
print cands
In [378]:
max(cands, key = cands.get)
Out[378]:
In [109]:
X = paint.reshape(-1,1)
connectivity = grid_to_graph(*paint.shape)
In [112]:
print("Compute structured hierarchical clustering...")
st = time.time()
n_clusters = 9 # number of regions
ward = AgglomerativeClustering(n_clusters=n_clusters,
linkage='ward', connectivity=connectivity).fit(X)
label = np.reshape(ward.labels_, paint.shape)
print("Elapsed time: ", time.time() - st)
print("Number of pixels: ", label.size)
print("Number of clusters: ", np.unique(label).size)
plt.figure(figsize=(10,10))
plt.imshow(label)
Out[112]:
In [113]:
plt.imshow(paint[:,:91])
Out[113]:
In [124]:
subpaint = paint[:,:90]
X = subpaint.reshape(-1,1)
connectivity = grid_to_graph(*subpaint.shape)
print("Compute structured hierarchical clustering...")
st = time.time()
n_clusters = 4 # number of regions
ward = AgglomerativeClustering(n_clusters=n_clusters,
linkage='complete', connectivity = connectivity).fit(X)
label = np.reshape(ward.labels_, subpaint.shape)
print("Elapsed time: ", time.time() - st)
print("Number of pixels: ", label.size)
print("Number of clusters: ", np.unique(label).size)
fig=plt.figure()
fig.add_subplot(1,2,1)
plt.imshow(label)
fig.add_subplot(1,2,2)
plt.imshow(subpaint)
fig.set_figheight(15)
fig.set_figwidth(15)
In [71]:
Out[71]:
In [409]:
plt.figure(figsize=(5, 5))
plt.imshow(paint, cmap=plt.cm.gray)
for l in range(n_clusters):
plt.contour(label == l, contours=1,
colors=[plt.cm.spectral(l / float(n_clusters)), ])
plt.xticks(())
plt.yticks(())
plt.show()