Create image collage from images

In [5]:
from PIL import Image
import os
import time

folder = 'donald'

startTime = time.time()

# load images from the folder
images = []
for image_name in os.listdir(folder):
    images.append( + '/' + image_name))

# calculate the total height and the max width of the collage
total_height = sum(img.size[1] for img in images)
max_width = max(img.size[0] for img in images)

# create a collage with alpha channel 
# every image will be placed below the previous one
collage ='RGBA', (max_width, total_height))
y = 0
for img in images:
    collage.paste(img, (0, y))
    y += img.size[1]
endTime = time.time()
print 'creating collage time: ', endTime - startTime
print 'total_height: ', total_height
print 'max_width: ', max_width

# print the collage

creating collage time:  0.118999958038
total_height:  9482
max_width:  347

Fit KMeans model

In [6]:
from sklearn.cluster import KMeans
import numpy as np

startTime = time.time()

# conevrt the collage to a color array (total_height X max_width X 4)
collage_array = np.array(collage)

# reshape the array 
collage_array = collage_array.reshape((collage_array.shape[0] * collage_array.shape[1], 4))

# remove all transparent colors
collage_array = collage_array[~np.all(collage_array == 0, axis=1)]

# fit k-means model with 5 clusters
kmeans_model = KMeans(n_clusters=5)

endTime = time.time()
print 'fitting model time: ', endTime - startTime

fiting model time:  79.6549999714

Create colors bar

In [7]:
from PIL import ImageDraw

# create a histogram of the number of clusters
numLabels = np.arange(0, len(np.unique(kmeans_model.labels_)) + 1)
(hist, _) = np.histogram(kmeans_model.labels_, bins = numLabels)

# normalize the histogram, such that it sums to one
hist = hist.astype("float")
hist /= hist.sum()

startX = 0

# create a bar image that displays the most used colors
im ='RGB', (300, 300), (0, 0, 0))
dr = ImageDraw.Draw(im)

for (percent, color) in sorted(zip(hist, kmeans_model.cluster_centers_), key=lambda t: t[0], reverse=True):
    endX = startX + (float(percent) * 300)
    dr.rectangle([int(startX), 0, int(endX), 300], fill=tuple(map(int, color)))
    startX = endX

# print the histogram