In [1]:
from PIL import Image
from numpy import *
from pylab import *
import scipy.misc

In [2]:
from scipy.cluster.vq import *

In [3]:
import imtools
import pickle

In [4]:
imlist = imtools.get_imlist('selected_fontimages/')
imnbr = len(imlist)

In [5]:
with open('font_pca_modes.pkl', 'rb') as f:
    immean = pickle.load(f)
    V = pickle.load(f)

In [6]:
immatrix = array([array(Image.open(im)).flatten() for im in imlist], 'f')

In [7]:
immean = immean.flatten()
projected = array([dot(V[:40], immatrix[i]-immean) for i in range(imnbr)])

In [8]:
cluster_num = 3
projected = whiten(projected)
centroids, distortion = kmeans(projected, cluster_num)

In [9]:
code, distance = vq(projected, centroids)

In [10]:
def divide_branch_with_center(data, branch, k):
    div = min(k, len(branch))
    if div<=1:
        return list(branch)
    centroids, distortion = kmeans(data[branch], k)
    code, distance = vq(data[branch], centroids)
    new_branch = []
    for i in range(k):
        ind = where(code==i)[0]
        if len(ind)==0:
            continue
        else:
            new_branch.append((centroids[i], distance[i], divide_branch_with_center(data, branch[ind], k)))
    return new_branch

In [11]:
tree = array([i for i in range(projected.shape[0])])
branches = ([0 for i in range(40)], 0, divide_branch_with_center(projected, tree, 4))

In [12]:
def get_depth(t):
    if len(t[2])<2:
        return 1
    else:
        return max([get_depth(tt) for tt in t[2]])+1

In [13]:
def get_height(t):
    if (len(t[2])<2):
        return 1
    else:
        return sum([get_height(tt) for tt in t[2]])

In [14]:
from PIL import Image, ImageDraw

In [43]:
def draw_average(center, x, y, im):
    c = center/np.linalg.norm(center)
    avim = dot((V[:40]).T, c)
    avim = 255*(avim-min(avim))/(max(avim)-min(avim)+1e-6)
    avim = avim.reshape(25, 25)
    avim[avim<0] = 0
    avim[avim>255] = 255
    avim = Image.fromarray(avim)
    avim.thumbnail([20, 20])
    ns = avim.size
    im.paste(avim, [int(x), int(y-ns[1]//2), int(x+ns[0]), int(y+ns[1]-ns[1]//2)])

In [44]:
def draw_node(node, draw, x, y, s, iml, im):
    if len(node[2])<1:
        return
    if len(node[2])==1:
        nodeim = Image.open(iml[node[2][0]])
        nodeim.thumbnail([20, 20])
        ns = nodeim.size
        im.paste(nodeim, [int(x), int(y-ns[1]//2), int(x+ns[0]), int(y+ns[1]-ns[1]//2)])
    else:
        ht = sum([get_height(n) for n in node[2]])*20/2
        h1 = get_height(node[2][0])*20/2
        h2 = get_height(node[2][-1])*20/2
        top = y-ht
        bottom = y+ht
        draw.line((x, top+h1, x, bottom-h2), fill=(0, 0, 0))
        y = top
        for i in range(len(node[2])):
            ll = node[2][i][1]/8*s
            y += get_height(node[2][i])*20/2
            xx = x + ll + s/4
            draw.line((x, y, xx, y), fill=(0, 0, 0))
            if len(node[2][i][2])>1:
                draw_average(node[2][i][0], xx, y, im)
                xx = xx+20
            draw.line((xx, y, xx+s/4, y), fill=(0, 0, 0))
            xx = xx+s/4
            draw_node(node[2][i], draw, xx, y, s, imlist, im)
            y += get_height(node[2][i])*20/2

In [45]:
def draw_dendrogram(node, iml, filename='kclusters.jpg'):
    rows = get_height(node)*20+40
    cols = 1200

    s = float(cols-150)/get_depth(node)

    im =  Image.new('RGB', (cols, rows), (255, 255, 255))
    draw = ImageDraw.Draw(im)

    x = 0
    y = rows/2
    avim = Image.fromarray(immean.reshape(25, 25))
    avim.thumbnail([20, 20])
    ns = avim.size
    im.paste(avim, [int(x), int(y-ns[1]//2), int(x+ns[0]), int(y+ns[1]-ns[1]//2)])
    draw.line((x+20, y, x+40, y), fill=(0, 0, 0))
    draw_node(node, draw, x+40, (rows/2), s, iml, im)
    im.save(filename)
    im.show()

In [46]:
draw_dendrogram(branches, imlist, filename='k_fonts.jpg')

In [ ]: