This ipython notebook can be used to generate cluster of CNN layer weights from a set of images, and cluster those images based on the layer weights. Used to generate a sample image cluster for the image clustering UI.
In [ ]:
import os
# get the location of your caffe and digits
caffe_home = os.environ['CAFFE_HOME']
digits_home = os.environ['DIGITS_HOME']
# set the model directory and the mean file
model_jobid = "20150910-211807-d169"
model_dir = "/data/digits/jobs/%s/" % model_jobid
mean_file = "/data/digits/jobs/20150910-150610-9373/mean.binaryproto"
test_image_file = r"/data/1020/data/military uniform/937.jpg"
# import some digits webserver code
import sys
sys.path.append(caffe_home +"/python/")
sys.path.append(digits_home)
sys.path.append(digits_home + 'digits')
import digits
from digits import config
config.load_config()
import utils
import shutil
from job import Job
In [ ]:
import caffe
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from caffe.proto import caffe_pb2
import sklearn
import glob
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import shutil
In [ ]:
#configure plotting
plt.rcParams['figure.figsize'] = (10,10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
In [ ]:
caffe.set_mode_cpu()
In [ ]:
# Create the CNN object using the caffemodel file for model parameters
# and the prototxt for defining the model architecture
netdeploymet_proto = model_dir + "deploy.prototxt"
latest_model_iteration = model_dir + "snapshot_iter_505860.caffemodel"
net = caffe.Net(netdeploymet_proto,
latest_model_iteration,
caffe.TEST)
In [ ]:
#im = caffe.io.load_image("/home/sander/Documents/Pixelate/corgi_test.png")
im = caffe.io.load_image(test_image_file)
In [ ]:
# Utilize code from the DIGITS web interface for its image handling
# and consistency with the DIGITS interface
def getlayerweightsWeb(imfname, net=net,\
deploy_file=netdeploymet_proto,
meanfname=mean_file,
jobid=model_jobid
layer='fc6'):
network = caffe_pb2.NetParameter()
infile = open(deploy_file)
text_format.Merge(infile.read(), network)
imtest = imfname
image = utils.image.load_image(imtest)
# need programmatic access
dataset = Job.load(jobid)
# get transformer
data_shape = None
channel_swap = None
mean_pixel = None
data_shape = network.input_dim[:4]
channel_swap = (2,1,0)
infile = open(meanfname,'rb')
blob = caffe_pb2.BlobProto()
blob.MergeFromString(infile.read())
mean_pixel = np.reshape(blob.data,\
(\
dataset.image_dims[2],\
dataset.image_dims[0],\
dataset.image_dims[1],\
)\
).mean(1).mean(1)
t = caffe.io.Transformer(
inputs = {'data': data_shape}
)
t.set_transpose('data', (2,0,1))
t.set_channel_swap('data', channel_swap)
t.set_mean('data', mean_pixel)
image = np.array(image)
preprocessed = None
try:
preprocessed = t.preprocess('data',image)
except RuntimeError as e:
print e
return None
test_shape = (1,) + preprocessed.shape
if net.blobs['data'].data.shape != test_shape:
net.blobs['data'].reshape(*test_shape)
net.blobs['data'].data[...] = preprocessed
output = net.forward(end=layer)
return output[layer]
In [ ]:
# using the directory names as labels
# grab a sample of 100 image filenames from each directory
catlist = glob.glob("/data_ebs/1020/data/*")
imcatlist = []
samplesize = 100
for cat in catlist:
imcatlist.extend(glob.glob(cat+"/*")[:samplesize])
In [ ]:
samplecategories = 10
n_samples = samplecategories*samplesize
imweights = np.zeros([n_samples,4096],dtype=float)
In [ ]:
# get the features from the 7th layer of the NN
# and save the numpy array
for i, im in enumerate(imcatlist[:n_samples]):
imweights[i,:] = getlayerweightsWeb(im)
np.save("imweights.npy",imweights)
In [ ]:
#Reload the weights if you've previously run the code
if imweights == None or imweights == np.zeros([n_samples,4096],dtype=float):
imweights = np.load("imweights.npy")
In [ ]:
# Transform the images into reduced 3d space using TSNE
model = TSNE(n_components=3)
trweights3d = model.fit_transform(imweights)
In [ ]:
# create a map between the categories and the index for the files
namemap = {j : i for i, j in enumerate(set([f.split("/")[-2] for f in imfilelist]))}
In [ ]:
# copy the images used for the clustering example
for f in imcatlist[:n_samples]:
ext = f.split("/")[-1].split(".")[-1]
fname = f.split("/")[-1].split(".")[0]
subdir = f.split("/")[-2]
shutil.copyfile(f, "/data/staging/clustering_example/%s_%i.%s" % (fname, namemap[subdir],ext))
In [ ]:
# create the 3d coordinates for the image cluster
outfile = open("/data/staging/clustering_example/imdata3d.txt","w")
for i, f in enumerate(imfilelist[:1000]):
ext = f.split("/")[-1].split(".")[-1]
fname = f.split("/")[-1].split(".")[0]
subdir = f.split("/")[-2]
newfile = "%s_%i.%s" % (fname, namemap[subdir],ext)
outfile.write("%s\t%3.5f\t%3.5f\t%3.5f\n" % (newfile, trweights3d[i,0], trweights3d[i,1], trweights3d[i,2] ))
outfile.close()