In [1]:
# imports
%matplotlib inline
# %pylab osx
import os
import tensorflow as tf
import numpy as np
from scipy.misc import imresize
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
plt.style.use('ggplot')
# Bit of formatting because I don't like the default inline code style:
from IPython.core.display import HTML
HTML("""<style> .rendered_html code {
padding: 2px 4px;
color: #c7254e;
background-color: #f9f2f4;
border-radius: 4px;
} </style>""")
Out[1]:
In [60]:
def crop_square(img):
if img.shape[0] > img.shape[1]:
extra = (img.shape[0] - img.shape[1])
if extra % 2 == 0:
crop = img[extra // 2:-extra // 2, :]
else:
crop = img[max(0, extra // 2 - 1):min(-1, -extra // 2), :]
elif img.shape[1] > img.shape[0]:
extra = (img.shape[1] - img.shape[0])
if extra % 2 == 0:
crop = img[:, extra // 2:-extra // 2]
else:
crop = img[:, max(0, extra // 2 - 1):min(-1, -extra // 2)]
else:
crop = img
return crop
def crop_center(img, new_height, new_width):
width = np.size(img,1)
height = np.size(img,0)
left = np.ceil((width - new_width)/2.)
top = np.ceil((height - new_height)/2.)
right = np.floor((width + new_width)/2.)
bottom = np.floor((height + new_height)/2.)
cImg = img[top:bottom, left:right]
return cImg
image_files = ['./images/20130712_190436_1.jpg','./images/20140108_162814.jpg']
img_orig = [plt.imread(im) for im in image_files]
img_cropped = [crop_square(im) for im in img_orig]
imgs_resized = [imresize(im, (256, 256)) for im in img_cropped]
background = Image.fromarray(imgs_resized[0])
overlay = Image.fromarray(imgs_resized[1])
background = background.convert("RGBA")
overlay = overlay.convert("RGBA")
blended = Image.blend(background, overlay, 0.65).convert("RGB")
img = np.asarray(blended)
img = imgs_resized[0]
plt.imshow(img)
# xs: pixel coordinates, ys: RGB values
xs = []
ys = []
getting_data = [1,1]
for row_i in range(img.shape[0]):
for col_i in range(img.shape[1]):
xs.append([row_i, col_i])
ys.append(img[row_i, col_i])
xs = np.array(xs)
ys = np.array(ys)
xs = (xs - np.mean(xs)) / np.std(xs)
print xs.shape, ys.shape
A single layer consists of linear unit plus and activation function. The W values are initialized using random values drawn from a normal distribution while biases are initialized to zero. More on variables creation, initialization, saving and loading in here
In [3]:
# Define a single hidden layer with activation function
# Creating variables with scope (unique name) allows easy chaining of layers
def linear(X, n_input, n_output, activation, scope):
with tf.variable_scope(scope):
# Create/return variable with a given scope
W = tf.get_variable(
name='W',
shape=[n_input, n_output],
initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))
b = tf.get_variable(
name='b',
shape=[n_output],
initializer=tf.constant_initializer())
h = tf.matmul(X, W) + b
if activation is not None:
h = activation(h)
return h
In [63]:
tf.reset_default_graph()
g = tf.get_default_graph()
# in: 2 (x,y), out
n_neurons = [2, 64, 64, 64, 64, 64, 64, 64, 3]
X = tf.placeholder(tf.float32, shape=[None, 2], name='X')
Y = tf.placeholder(tf.float32, shape=[None, 3], name='Y')
current_input = X
for layer_i in range(1, len(n_neurons)):
current_input = linear(
X=current_input,
n_input=n_neurons[layer_i - 1],
n_output=n_neurons[layer_i],
activation=tf.nn.relu if (layer_i+1) < len(n_neurons) else None,
scope='layer_' + str(layer_i))
Y_pred = current_input
In [64]:
# L1 cost function
def distance_l1(p1, p2):
return tf.abs(p1 - p2)
def distance_l2(p1, p2):
return tf.pow(p1 - p2,2)
distance = distance_l2
cost = tf.reduce_mean(tf.reduce_sum(distance(Y_pred, Y), 1)) # cost: maen of sum of absolute values of RGB
print Y.get_shape(),Y_pred.get_shape(),tf.reduce_sum(distance(Y_pred, Y), 1).get_shape(),cost.get_shape()
In [66]:
n_iterations = 2000
batch_size = 50
learning_rate = 0.0005
imgs = []
costs = []
gif_step = 50
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
# Initialize all tf variables using specified initializers
sess.run(tf.initialize_all_variables())
# We now run a loop over epochs
prev_training_cost = -1.0
for it_i in range(n_iterations):
idxs = np.random.permutation(range(len(xs)))
n_batches = len(idxs) // batch_size
for batch_i in range(n_batches):
idxs_i = idxs[batch_i * batch_size: (batch_i + 1) * batch_size]
sess.run( optimizer,
feed_dict={X: xs[idxs_i], Y: ys[idxs_i]})
training_cost = sess.run(cost, feed_dict={X: xs, Y: ys})
store_img = False
if prev_training_cost < 0.0 or training_cost < prev_training_cost:
print 'Iteration: {} , cost: {}'.format(it_i, training_cost)
store_img = True
ys_pred = Y_pred.eval(feed_dict={X: xs}, session=sess)
img = np.clip(ys_pred.reshape(img.shape), 0, 255).astype(np.uint8)
imgs.append(img)
plt.imshow(img)
plt.show()
prev_training_cost = training_cost
if (it_i + 1) % gif_step == 0:
print 'Iteration: {}'.format(it_i)
In [67]:
import moviepy.editor as mpy
i = -1
class MakeClip():
def __init__(self,imgs):
self.imgs = imgs
self.i = -1
def make_frame(self,t):
print ' t= ',t
self.i += 1
if self.i >= len(self.imgs):
self.i = 0
return self.imgs[self.i]
print 'N images in clip: ',len(imgs)
make_clip = MakeClip(imgs)
clip1 = mpy.VideoClip(make_clip.make_frame, duration=2) # 2 seconds
clip1.write_gif("my1.gif",fps=25)
clip2 = mpy.ImageSequenceClip(imgs, fps=100)
clip2.write_gif("my2.gif")
In [ ]: