The goal of this notebook is to store useful insights made through my learning process of Keras. Since I will be closely working with a colleague who uses Theano, the idea is to try to implement all the functionalities to work seamlessly with both backends.
Below are shown some of the functionalities that I found imperceptible from the official documentation.
In [1]:
%reset -f
import keras.backend as K
x = K.variable(42.)
# Solution 1:
sess = K.get_session()
print sess.run(x)
# Solution 2 (seamless):
print K.eval(x)
In [2]:
%reset -f
import keras.backend as K
import numpy as np
x = K.placeholder(ndim=1)
y = 2 * x
feed = np.array([2])
# Solution 1:
sess = K.get_session()
print sess.run(y, { x : feed})
# Solution 2 (seamless):
f = K.function([x], [y])
print f([feed])[0]
In [3]:
%reset -f
import numpy as np
import keras.backend as K
from keras.layers import Input, Dense, Activation
from keras.models import Model
from keras import optimizers, losses
# Define the model and generate the data.
print "Our model is y = 3 * x1 + 2 * x2"
num_data = 1000
x_train = np.random.rand(num_data, 2)
y_train = x_train * np.matrix([[3], [2]])
print "Data generated."
x = Input(shape=(2,), name='x')
y = Dense(1, activation='linear', use_bias=False)(x)
model = Model(inputs=x, outputs=y)
opt = optimizers.Adam(0.1)
loss = losses.mean_squared_error
model.compile(opt, loss)
# Retreive the handle to trainable weights, which should be equal to the dy_dx.
w = model.trainable_weights[0]
dy_dx = K.gradients(y, x)
f = K.function(inputs=[x], outputs=dy_dx)
fx = f([np.ones([1, 2])])[0] # Input is irrelevant, since we are computing the gradient of a linear function.
print "Before training:"
print "dy_dx = ", fx, ", w = ", K.eval(w).T
model.fit(x_train, y_train, epochs=500, batch_size=num_data, verbose=0)
print "\nAfter training:"
print "dy_dx = ", f([np.ones([1, 2])])[0], ", w = ", K.eval(w).T
# Calculate the gradient manually.
y_true = K.placeholder(shape=(None, 1))
dJ_dw = K.gradients(loss(y, y_true), w)
f = K.function(inputs=[x, y_true], outputs=dJ_dw)
print "\ndJ_dw (should be around 0):"
print f([x_train, y_train])[0].T
In [4]:
%reset -f
import tensorflow as tf # Not seamless.
import numpy as np
import keras.backend as K
from keras.layers import Input, Dense, Activation
from keras.models import Model
from keras import losses
# Define the model and generate the data.
print "Our model is y = 3 * x1 + 2 * x2 + 5"
num_data = 1000
x_train = np.random.rand(num_data, 2)
y_train = x_train * np.matrix([[3], [2]]) + 5
print "Data generated."
x = Input(shape=(2,), name='x')
y = Dense(1, activation='linear')(x)
model = Model(inputs=x, outputs=y)
# Retreive the handle to trainable weights. In this case it is a list of variables [W, b].
w = model.trainable_weights
store_w = {l: l.get_weights() for l in model.layers}
print "Initial weights:", [K.eval(i).T for i in w]
opt = tf.train.AdamOptimizer(0.1)
# Output placeholder is needed for to define the loss.
y_true = K.placeholder(shape=(None, 1))
loss = losses.mean_squared_error(y, y_true)
grads_and_vars = zip(tf.gradients(loss, w), w)
op = opt.apply_gradients(grads_and_vars)
print "\nFirst approach:"
f = K.function(inputs=[x, y_true], outputs=[], updates=[op])
for i in range(1000):
f([x_train, y_train])
print "Optimized weights:", [K.eval(i).T for i in w]
# Restore weights from beginning:
for l in model.layers:
l.set_weights(store_w[l])
print "\nRestored weights:", [K.eval(i).T for i in w]
print "\nSecond approach:"
sess = K.get_session()
sess.run(tf.global_variables_initializer())
for i in range(1000):
sess.run(op, {x : x_train, y_true : y_train})
print "Optimized weights:", [K.eval(i).T for i in w]
In [5]:
%reset -f
import numpy as np
import keras.backend as K
from keras.layers import Input, Dense, Activation
from keras.models import Model
from keras import optimizers, losses
# Define the model and generate the data.
print "Our model is y = 3 * x1 + 2 * x2 + 5"
num_data = 1000
x_train = np.random.rand(num_data, 2)
y_train = x_train * np.matrix([[3], [2]]) + 5
print "Data generated."
x = Input(shape=(2,), name='x')
y = Dense(1, activation='linear')(x)
model = Model(inputs=x, outputs=y)
print "Initial weights:", [K.eval(i).T for i in model.trainable_weights]
opt = optimizers.Adam(0.1)
y_true = K.placeholder(shape=(None, 1))
loss = losses.mean_squared_error(y, y_true)
updates = opt.get_updates(model.trainable_weights, model.constraints, [loss])
train_step = K.function(inputs=[x, y_true], outputs=[], updates=updates)
for i in range(1000):
train_step([x_train, y_train])
print "\nOptimized weights:", [K.eval(i).T for i in model.trainable_weights]
In [6]:
%reset -f
import keras.backend as K
from keras.layers import Input, Dense, Lambda
from keras.models import Model
import numpy as np
x1 = Input(shape=(2,))
y = Lambda(lambda x: x ** 2)(x1)
grads1 = K.gradients(y, x1)
f1 = K.function(inputs=[x1], outputs=grads1)
model1 = Model(inputs=x1, outputs=y)
x_feed = np.random.randn(2, 2)
print "1. Model: y1 = x ** 2"
print "x =\n", x_feed
print "y =\n", model1.predict_on_batch(x_feed)
print "dy_dx =\n", f1([x_feed])[0]
print "\n2. Model: y2 = 3 * x"
x2 = Input(shape=(3, ))
y = Lambda(lambda x: 3 * x)(x2)
grads2 = K.gradients(y, x2)
f2 = K.function(inputs=[x2], outputs=grads2)
model2 = Model(inputs=x2, outputs=y)
print "y2 =\n", model1.predict_on_batch(x_feed)
print "\n3. Model: z = y2 o y1"
z = model2(model1.outputs)
in1 = model1.inputs[0]
f3 = K.function(inputs=[in1], outputs=[z])
print "z =\n", f3([x_feed])[0]
grads3 = K.gradients(z, in1)
f4 = K.function(inputs=[in1], outputs=grads3)
print "dz_dx =\n", f4([x_feed])[0]
print "6x =\n", 6 * x_feed
In [7]:
%reset -f
import keras.backend as K
from keras.layers import Input, Lambda
from keras import optimizers
import numpy as np
print "Goal is to find argmin_x(x^2)"
x0 = 3 * np.ones([1, 1]) # For some reason needed as a two dimensional.
print "x0 = ", x0
x = K.variable(value=x0)
x_in = Input(shape=(1,))
y = Lambda(lambda x: x ** 2)(x_in)
opt = optimizers.Adam(0.1)
def get_gradients(*unused):
return K.gradients(y, x_in)
# This:
opt.get_gradients = get_gradients
updates = opt.get_updates([x], [], [])
# or only: updates = opt.get_updates([x], [], [y])
train_step = K.function(inputs=[x_in], outputs=[], updates=updates)
for i in range(150):
train_step([K.eval(x)])
if i % 10 == 0:
print K.eval(x)
In [8]:
%reset -f
import keras.backend as K
from keras import optimizers, losses
import numpy as np
print "Goal is to find argmin_x[(y - y_des)^2], where y = x^2"
x0 = -3 * np.ones([1, 1])
y_des = 4 * np.ones([1, 1])
print "x0 = ", x0, " y_des = ", y_des
x = K.variable(value=x0)
y = x ** 2
opt = optimizers.Adam(0.1)
y_var = K.placeholder(ndim=2)
loss = losses.mean_squared_error(y, y_var)
updates = opt.get_updates([x], [], [loss])
train_step = K.function(inputs=[y_var], outputs=[], updates=updates)
for i in range(150):
train_step([y_des])
if i % 10 == 0:
print K.eval(x)