Tinkering with Keras

The goal of this notebook is to store useful insights made through my learning process of Keras. Since I will be closely working with a colleague who uses Theano, the idea is to try to implement all the functionalities to work seamlessly with both backends.

Below are shown some of the functionalities that I found imperceptible from the official documentation.

Evaluate variables.


In [1]:
%reset -f
import keras.backend as K

x = K.variable(42.)

# Solution 1:
sess = K.get_session()
print sess.run(x)

# Solution 2 (seamless):
print K.eval(x)


Using TensorFlow backend.
42.0
42.0

Feed placeholders and evaluate functions.


In [2]:
%reset -f
import keras.backend as K
import numpy as np

x = K.placeholder(ndim=1)
y = 2 * x
feed = np.array([2])

# Solution 1:
sess = K.get_session()
print sess.run(y, { x : feed})

# Solution 2 (seamless):
f = K.function([x], [y])
print f([feed])[0]


[ 4.]
[ 4.]

Compute custom gradients.


In [3]:
%reset -f
import numpy as np
import keras.backend as K
from keras.layers import Input, Dense, Activation
from keras.models import Model
from keras import optimizers, losses

# Define the model and generate the data.
print "Our model is y = 3 * x1 + 2 * x2"
num_data = 1000
x_train = np.random.rand(num_data, 2)
y_train = x_train * np.matrix([[3], [2]])
print "Data generated."
x = Input(shape=(2,), name='x')
y = Dense(1, activation='linear', use_bias=False)(x)
model = Model(inputs=x, outputs=y)

opt = optimizers.Adam(0.1)
loss = losses.mean_squared_error

model.compile(opt, loss)

# Retreive the handle to trainable weights, which should be equal to the dy_dx.
w = model.trainable_weights[0]

dy_dx = K.gradients(y, x) 
f = K.function(inputs=[x], outputs=dy_dx)
fx = f([np.ones([1, 2])])[0]  # Input is irrelevant, since we are computing the gradient of a linear function.
print "Before training:"
print "dy_dx = ", fx, ", w = ", K.eval(w).T 
model.fit(x_train, y_train, epochs=500, batch_size=num_data, verbose=0)
print "\nAfter training:"
print "dy_dx = ", f([np.ones([1, 2])])[0], ", w = ", K.eval(w).T 

# Calculate the gradient manually.
y_true = K.placeholder(shape=(None, 1))
dJ_dw = K.gradients(loss(y, y_true), w) 
f = K.function(inputs=[x, y_true], outputs=dJ_dw)

print "\ndJ_dw (should be around 0):"
print f([x_train, y_train])[0].T


Our model is y = 3 * x1 + 2 * x2
Data generated.
Before training:
dy_dx =  [[ 0.10765421  1.28355491]] , w =  [[ 0.10765421  1.28355491]]

After training:
dy_dx =  [[ 3.00000072  1.99999928]] , w =  [[ 3.00000072  1.99999928]]

dJ_dw (should be around 0):
[[ 0.00013559 -0.00010941]]

Pass a custom gradient to the optimizer.


In [4]:
%reset -f
import tensorflow as tf  # Not seamless.
import numpy as np
import keras.backend as K
from keras.layers import Input, Dense, Activation
from keras.models import Model
from keras import losses

# Define the model and generate the data.
print "Our model is y = 3 * x1 + 2 * x2 + 5"
num_data = 1000
x_train = np.random.rand(num_data, 2)
y_train = x_train * np.matrix([[3], [2]]) + 5
print "Data generated."
x = Input(shape=(2,), name='x')
y = Dense(1, activation='linear')(x)
model = Model(inputs=x, outputs=y)

# Retreive the handle to trainable weights. In this case it is a list of variables [W, b].
w = model.trainable_weights
store_w = {l: l.get_weights() for l in model.layers}

print "Initial weights:", [K.eval(i).T for i in w]

opt = tf.train.AdamOptimizer(0.1)
# Output placeholder is needed for to define the loss.
y_true = K.placeholder(shape=(None, 1))
loss = losses.mean_squared_error(y, y_true)

grads_and_vars = zip(tf.gradients(loss, w), w)
op = opt.apply_gradients(grads_and_vars)

print "\nFirst approach:"
f = K.function(inputs=[x, y_true], outputs=[], updates=[op])
for i in range(1000):
    f([x_train, y_train])
    
print "Optimized weights:", [K.eval(i).T for i in w]

# Restore weights from beginning:
for l in model.layers:
    l.set_weights(store_w[l])
print "\nRestored weights:", [K.eval(i).T for i in w]

print "\nSecond approach:"
sess = K.get_session()
sess.run(tf.global_variables_initializer())
for i in range(1000):
    sess.run(op, {x : x_train, y_true : y_train})
    
print "Optimized weights:", [K.eval(i).T for i in w]


Our model is y = 3 * x1 + 2 * x2 + 5
Data generated.
Initial weights: [array([[-1.09196281, -0.79868513]], dtype=float32), array([ 0.], dtype=float32)]

First approach:
Optimized weights: [array([[ 3.00255489,  2.00256109]], dtype=float32), array([ 4.99717522], dtype=float32)]

Restored weights: [array([[-1.09196281, -0.79868513]], dtype=float32), array([ 0.], dtype=float32)]

Second approach:
Optimized weights: [array([[ 3.00089622,  2.00093484]], dtype=float32), array([ 4.99898815], dtype=float32)]

Seamlessly pass a custom gradient to the optimizer.


In [5]:
%reset -f
import numpy as np
import keras.backend as K
from keras.layers import Input, Dense, Activation
from keras.models import Model
from keras import optimizers, losses

# Define the model and generate the data.
print "Our model is y = 3 * x1 + 2 * x2 + 5"
num_data = 1000
x_train = np.random.rand(num_data, 2)
y_train = x_train * np.matrix([[3], [2]]) + 5
print "Data generated."
x = Input(shape=(2,), name='x')
y = Dense(1, activation='linear')(x)
model = Model(inputs=x, outputs=y)

print "Initial weights:", [K.eval(i).T for i in model.trainable_weights]

opt = optimizers.Adam(0.1)
y_true = K.placeholder(shape=(None, 1))
loss = losses.mean_squared_error(y, y_true)

updates = opt.get_updates(model.trainable_weights, model.constraints, [loss])
train_step = K.function(inputs=[x, y_true], outputs=[], updates=updates)

for i in range(1000):
    train_step([x_train, y_train])
    
print "\nOptimized weights:", [K.eval(i).T for i in model.trainable_weights]


Our model is y = 3 * x1 + 2 * x2 + 5
Data generated.
Initial weights: [array([[ 0.02594531,  0.95221674]], dtype=float32), array([ 0.], dtype=float32)]

Optimized weights: [array([[ 3.0002768 ,  2.00025392]], dtype=float32), array([ 4.9997077], dtype=float32)]

Concatenate two models and propagate the gradient.

The idea is to simply do:

in1 = ...
out2 = model2(model1.outputs)
grads = K.gradients(out2, in1)

In [6]:
%reset -f
import keras.backend as K
from keras.layers import Input, Dense, Lambda
from keras.models import Model
import numpy as np

x1 = Input(shape=(2,))
y = Lambda(lambda x: x ** 2)(x1)
grads1 = K.gradients(y, x1)
f1 = K.function(inputs=[x1], outputs=grads1)
model1 = Model(inputs=x1, outputs=y)

x_feed = np.random.randn(2, 2)
print "1. Model: y1 = x ** 2"
print "x =\n", x_feed
print "y =\n", model1.predict_on_batch(x_feed)
print "dy_dx =\n", f1([x_feed])[0]

print "\n2. Model: y2 = 3 * x"
x2 = Input(shape=(3, ))
y = Lambda(lambda x: 3 * x)(x2)
grads2 = K.gradients(y, x2)
f2 = K.function(inputs=[x2], outputs=grads2)
model2 = Model(inputs=x2, outputs=y)
print "y2 =\n", model1.predict_on_batch(x_feed)

print "\n3. Model: z = y2 o y1"
z = model2(model1.outputs)
in1 = model1.inputs[0]
f3 = K.function(inputs=[in1], outputs=[z])
print "z =\n", f3([x_feed])[0]
grads3 = K.gradients(z, in1)
f4 = K.function(inputs=[in1], outputs=grads3)
print "dz_dx =\n", f4([x_feed])[0]
print "6x =\n", 6 * x_feed


1. Model: y1 = x ** 2
x =
[[-1.4824125   1.18547917]
 [ 1.21335748 -0.24811791]]
y =
[[ 2.19754672  1.40536082]
 [ 1.47223628  0.0615625 ]]
dy_dx =
[[-2.96482491  2.37095833]
 [ 2.4267149  -0.49623582]]

2. Model: y2 = 3 * x
y2 =
[[ 2.19754672  1.40536082]
 [ 1.47223628  0.0615625 ]]

3. Model: z = y2 o y1
z =
[[ 6.59263992  4.21608257]
 [ 4.41670895  0.1846875 ]]
dz_dx =
[[-8.89447498  7.11287498]
 [ 7.28014469 -1.48870742]]
6x =
[[-8.89447501  7.11287502]
 [ 7.28014485 -1.48870744]]

Optimize a function by overloading get_gradients.


In [7]:
%reset -f
import keras.backend as K
from keras.layers import Input, Lambda
from keras import optimizers
import numpy as np

print "Goal is to find argmin_x(x^2)"
x0 = 3 * np.ones([1, 1]) # For some reason needed as a two dimensional.
print "x0 = ", x0
x = K.variable(value=x0)
x_in = Input(shape=(1,))
y = Lambda(lambda x: x ** 2)(x_in)

opt = optimizers.Adam(0.1)

def get_gradients(*unused):
    return K.gradients(y, x_in)

# This:
opt.get_gradients = get_gradients
updates = opt.get_updates([x], [], [])
# or only: updates = opt.get_updates([x], [], [y])
train_step = K.function(inputs=[x_in], outputs=[], updates=updates)
for i in range(150):
    train_step([K.eval(x)])
    if i % 10 == 0:
        print K.eval(x)


Goal is to find argmin_x(x^2)
x0 =  [[ 3.]]
[[ 2.9000001]]
[[ 1.91979825]]
[[ 1.03850329]]
[[ 0.36296561]]
[[-0.03721294]]
[[-0.17472973]]
[[-0.14568123]]
[[-0.06182532]]
[[ 0.00232363]]
[[ 0.02459837]]
[[ 0.01804653]]
[[ 0.00436032]]
[[-0.00345373]]
[[-0.00392353]]
[[-0.00132337]]

Optimize a function using get_updates only.


In [8]:
%reset -f
import keras.backend as K
from keras import optimizers, losses
import numpy as np

print "Goal is to find argmin_x[(y - y_des)^2], where y = x^2"
x0 = -3 * np.ones([1, 1])
y_des = 4 * np.ones([1, 1])
print "x0 = ", x0, "  y_des = ", y_des
x = K.variable(value=x0)
y = x ** 2

opt = optimizers.Adam(0.1)

y_var = K.placeholder(ndim=2)
loss = losses.mean_squared_error(y, y_var)
updates = opt.get_updates([x], [], [loss])
train_step = K.function(inputs=[y_var], outputs=[], updates=updates)
for i in range(150):
    train_step([y_des])
    if i % 10 == 0:
        print K.eval(x)


Goal is to find argmin_x[(y - y_des)^2], where y = x^2
x0 =  [[-3.]]   y_des =  [[ 4.]]
[[-2.9000001]]
[[-2.051265]]
[[-1.70973158]]
[[-1.80159187]]
[[-2.00785089]]
[[-2.07333279]]
[[-2.00852156]]
[[-1.97366667]]
[[-1.9952966]]
[[-2.00945544]]
[[-2.00078177]]
[[-1.99651182]]
[[-2.00041032]]
[[-2.00112677]]
[[-1.99952078]]