In [1]:
from PIL import Image
from numpy import *
from pylab import *
import scipy.misc

In [2]:
from scipy.cluster.vq import *

In [3]:
from scipy.misc import imresize

In [4]:
import os
import hcluster
hcluster = reload(hcluster)

In [5]:
path = 'goldengatebridge/'
imlist = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg')]

In [6]:
features = zeros([len(imlist), 512])
for i, f in enumerate(imlist):
    im = array(Image.open(f))

    h, edges = histogramdd(im.reshape(-1, 3), 8, normed=True, range=[(0,255), (0, 255), (0, 255)])

    features[i] = h.flatten()

In [7]:
tree = hcluster.hcluster(features)

In [14]:
clusters = tree.extract_clusters(0.6*tree.distance)

for c in clusters:
    elements = c.get_cluster_elements()
    nbr_elements = len(elements)
    if nbr_elements>3:
        figure(figsize=(8, 8))
        for p in range(minimum(nbr_elements, 20)):
            subplot(4, 5, p+1)
            im = array(Image.open(imlist[elements[p]]))
            imshow(im)
            axis('off')
show()



In [15]:
hcluster.draw_dendrogram(tree, imlist, filename='goldengate_rgb.pdf')

In [17]:
n = len(features)
S = array([[sqrt(sum((features[i]-features[j])**2)) for i in range(n)] for j in range(n)], 'f')

In [20]:
rowsum = sum(S, axis=0)
D = diag(1/sqrt(rowsum))
I = identity(n)
L = I - dot(D, dot(S, D))
U, sigma, V = linalg.svd(L)

In [21]:
k = 5
f = array(V[:k]).T

In [23]:
f = whiten(f)
centroids, distortion = kmeans(f, k)
code, distance = vq(f, centroids)

In [24]:
for c in range(k):
    ind = where(code==c)[0]
    figure()
    for i in range(minimum(len(ind), 39)):
        im = Image.open(imlist[ind[i]])
        subplot(4, 10, i+1)
        gray()
        imshow(array(im))
        axis('equal')
        axis('off')
show()



In [ ]: