Code to generate GoogLeNet class visualizations in the shape of a clipping mask. See this blogpost for details.
Before running the code, insert your pycaffe path in pycaffe_root
and insert the path to the googlenet model in model_path
. Download the bvlc googlenet model from here if you haven't done so already.
In [1]:
# imports and basic notebook setup
from cStringIO import StringIO
import numpy as np
import os,re,random
import scipy.ndimage as nd
import PIL.Image
import sys,string,math,time
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
from scipy.misc import imresize
pycaffe_root = "/your/path/here/caffe/python" # substitute your path here
sys.path.insert(0, pycaffe_root)
import caffe
model_name = "GoogLeNet"
model_path = '/your/path/here/caffe_models/bvlc_googlenet/' # substitute your path here
net_fn = '../deploy_googlenet_updated.prototxt'
param_fn = model_path + 'bvlc_googlenet.caffemodel'
mean = np.float32([104.0, 117.0, 123.0])
#caffe.set_mode_gpu() # uncomment this if gpu processing is available
net = caffe.Classifier(net_fn, param_fn,
mean = mean, # ImageNet mean, training set dependent
channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
return np.dstack((img + net.transformer.mean['data'])[::-1])
def blur(img, sigma):
if sigma > 0:
img[0] = nd.filters.gaussian_filter(img[0], sigma, order=0)
img[1] = nd.filters.gaussian_filter(img[1], sigma, order=0)
img[2] = nd.filters.gaussian_filter(img[2], sigma, order=0)
return img
def showarray(a, f, fmt='jpeg'):
a = np.uint8(np.clip(a, 0, 255))
f = StringIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
In [3]:
def make_step(net, step_size=1.5, end='inception_4c/output', clip=True, focus=None, sigma=None):
'''Basic gradient ascent step.'''
src = net.blobs['data'] # input image is stored in Net's 'data' blob
dst = net.blobs[end]
net.forward(end=end)
one_hot = np.zeros_like(dst.data)
one_hot.flat[focus] = 1.
dst.diff[:] = one_hot
net.backward(start=end)
g = src.diff[0]
src.data[:] += step_size/np.abs(g).mean() * g
if clip:
bias = net.transformer.mean['data']
src.data[:] = np.clip(src.data, -bias, 255-bias)
src.data[0] = blur(src.data[0], sigma)
dst.diff.fill(0.)
def deepdraw(net, base_img, octaves, focus, visualize=True, clip=True, clip_arr=None):
# prepare base image
image = preprocess(net, base_img) # (3,224,224)
# get input dimensions from net
w = net.blobs['data'].width
h = net.blobs['data'].height
print "starting drawing"
src = net.blobs['data']
src.reshape(1,3,h,w) # resize the network's input image size
current_scale = 1.0
for e,o in enumerate(octaves):
if 'scale' in o:
image = nd.zoom(image, (1,o['scale'],o['scale']))
current_scale *= o['scale']
# scale clip_mask as well
clip_arr = np.abs(np.round(nd.zoom(clip_arr, (o['scale'],o['scale']))))
_,imw,imh = image.shape
# select layer
layer = o['layer']
for i in xrange(o['iter_n']):
# randomly select a 224x224 crop
ox = random.randint(0,imw-224)
oy = random.randint(0,imh-224)
# insert the crop into src.data[0]
src.data[0] = image[:,ox:ox+224,oy:oy+224]
sigma = o['start_sigma'] + ((o['end_sigma'] - o['start_sigma']) * i) / o['iter_n']
step_size = o['start_step_size'] + ((o['end_step_size'] - o['start_step_size']) * i) / o['iter_n']
make_step(net, end=layer, clip=clip, focus=focus, sigma=sigma, step_size=step_size)
if visualize:
vis = deprocess(net, src.data[0])
if not clip: # adjust image contrast if clipping is disabled
vis = vis*(255.0/np.percentile(vis, 99.98))
showarray(vis,"./filename"+str(i)+"_"+str(f)+".jpg")
if i > 0 and i % 10 == 0:
print 'finished step %d in octave %d' % (i,e)
# insert modified image into image
image[:,ox:ox+224,oy:oy+224] = src.data[0]
# apply clipping mask
if not clip_arr is None:
image *= clip_arr
image[np.where(np.tile(clip_arr,(3,1,1)) == 0.)] = 255.
print "octave %d image:" % e
showarray(deprocess(net, image),"./octave_"+str(e)+".jpg")
# returning the resulting image
return deprocess(net, image)
We load the clipping mask as black and white image and convert it to a numeric clipping mask
In [4]:
# get clipping mask
clip_img = PIL.Image.open("./clipping_masks/clipping_mask_a.png")
clip_arr = np.asarray(clip_img.convert("L"))
clip_arr = 1. - clip_arr/255.
clip_arr = np.round(clip_arr)
octaves = [
{
'layer':'loss3/classifier',
'iter_n':190,
'start_sigma':2.5,
'end_sigma':0.78,
'start_step_size':12.,
'end_step_size':12.
},
{
'layer':'loss3/classifier',
'scale':1.2,
'iter_n':150,
'start_sigma':0.78*1.2,
'end_sigma':0.78,
'start_step_size':6.,
'end_step_size':6.
},
{
'layer':'loss2/classifier',
'scale':1.2,
'iter_n':150,
'start_sigma':0.78*1.2,
'end_sigma':0.44,
'start_step_size':6.,
'end_step_size':3.
},
{
'layer':'loss1/classifier',
'scale':1.0,
'iter_n':10,
'start_sigma':0.44,
'end_sigma':0.304,
'start_step_size':3.,
'end_step_size':3.
}
]
focus = 368 # capuchins
start_dim = [clip_arr.shape[0],clip_arr.shape[1]]
background_color = np.float32([250.0, 250.0, 250.0])
gen_image = np.random.normal(background_color, 8, (start_dim[0], start_dim[1], 3))
starttime = time.time()
gen_image = deepdraw(net, gen_image, octaves, focus, visualize=False, clip_arr=clip_arr)
print "took seconds : %f" % (time.time()-starttime)
# save image
#img_fn = '_'.join([model_name, "deepdraw_clipping_mask", str(focus)+'.png'])
#PIL.Image.fromarray(np.uint8(gen_image)).save('./' + img_fn)
In [ ]: