In [1]:
from PIL import Image
from numpy import *
from pylab import *
import scipy.misc
In [2]:
from scipy.cluster.vq import *
In [3]:
import imtools
import pickle
In [4]:
imlist = imtools.get_imlist('selected_fontimages/')
imnbr = len(imlist)
In [5]:
with open('font_pca_modes.pkl', 'rb') as f:
immean = pickle.load(f)
V = pickle.load(f)
In [6]:
immatrix = array([array(Image.open(im)).flatten() for im in imlist], 'f')
In [7]:
immean = immean.flatten()
projected = array([dot(V[:40], immatrix[i]-immean) for i in range(imnbr)])
In [8]:
cluster_num = 3
projected = whiten(projected)
centroids, distortion = kmeans(projected, cluster_num)
In [9]:
code, distance = vq(projected, centroids)
In [10]:
def divide_branch_with_center(data, branch, k):
div = min(k, len(branch))
if div<=1:
return list(branch)
centroids, distortion = kmeans(data[branch], k)
code, distance = vq(data[branch], centroids)
new_branch = []
for i in range(k):
ind = where(code==i)[0]
if len(ind)==0:
continue
else:
new_branch.append((centroids[i], distance[i], divide_branch_with_center(data, branch[ind], k)))
return new_branch
In [11]:
tree = array([i for i in range(projected.shape[0])])
branches = ([0 for i in range(40)], 0, divide_branch_with_center(projected, tree, 4))
In [12]:
def get_depth(t):
if len(t[2])<2:
return 1
else:
return max([get_depth(tt) for tt in t[2]])+1
In [13]:
def get_height(t):
if (len(t[2])<2):
return 1
else:
return sum([get_height(tt) for tt in t[2]])
In [14]:
from PIL import Image, ImageDraw
In [43]:
def draw_average(center, x, y, im):
c = center/np.linalg.norm(center)
avim = dot((V[:40]).T, c)
avim = 255*(avim-min(avim))/(max(avim)-min(avim)+1e-6)
avim = avim.reshape(25, 25)
avim[avim<0] = 0
avim[avim>255] = 255
avim = Image.fromarray(avim)
avim.thumbnail([20, 20])
ns = avim.size
im.paste(avim, [int(x), int(y-ns[1]//2), int(x+ns[0]), int(y+ns[1]-ns[1]//2)])
In [44]:
def draw_node(node, draw, x, y, s, iml, im):
if len(node[2])<1:
return
if len(node[2])==1:
nodeim = Image.open(iml[node[2][0]])
nodeim.thumbnail([20, 20])
ns = nodeim.size
im.paste(nodeim, [int(x), int(y-ns[1]//2), int(x+ns[0]), int(y+ns[1]-ns[1]//2)])
else:
ht = sum([get_height(n) for n in node[2]])*20/2
h1 = get_height(node[2][0])*20/2
h2 = get_height(node[2][-1])*20/2
top = y-ht
bottom = y+ht
draw.line((x, top+h1, x, bottom-h2), fill=(0, 0, 0))
y = top
for i in range(len(node[2])):
ll = node[2][i][1]/8*s
y += get_height(node[2][i])*20/2
xx = x + ll + s/4
draw.line((x, y, xx, y), fill=(0, 0, 0))
if len(node[2][i][2])>1:
draw_average(node[2][i][0], xx, y, im)
xx = xx+20
draw.line((xx, y, xx+s/4, y), fill=(0, 0, 0))
xx = xx+s/4
draw_node(node[2][i], draw, xx, y, s, imlist, im)
y += get_height(node[2][i])*20/2
In [45]:
def draw_dendrogram(node, iml, filename='kclusters.jpg'):
rows = get_height(node)*20+40
cols = 1200
s = float(cols-150)/get_depth(node)
im = Image.new('RGB', (cols, rows), (255, 255, 255))
draw = ImageDraw.Draw(im)
x = 0
y = rows/2
avim = Image.fromarray(immean.reshape(25, 25))
avim.thumbnail([20, 20])
ns = avim.size
im.paste(avim, [int(x), int(y-ns[1]//2), int(x+ns[0]), int(y+ns[1]-ns[1]//2)])
draw.line((x+20, y, x+40, y), fill=(0, 0, 0))
draw_node(node, draw, x+40, (rows/2), s, iml, im)
im.save(filename)
im.show()
In [46]:
draw_dendrogram(branches, imlist, filename='k_fonts.jpg')
In [ ]: