Based on
In [0]:
!pip install -q tf-nightly-gpu-2.0-preview
In [2]:
import tensorflow as tf
print(tf.__version__)
In [3]:
# a small sanity check, does tf seem to work ok?
hello = tf.constant('Hello TF!')
print("This works: {}".format(hello))
In [4]:
# this should return True even on Colab
tf.test.is_gpu_available()
Out[4]:
In [5]:
tf.test.is_built_with_cuda()
Out[5]:
In [6]:
!nvidia-smi
In [7]:
tf.executing_eagerly()
Out[7]:
In [0]:
input = [[-1], [0], [1], [2], [3], [4]]
output = [[2], [1], [0], [-1], [-2], [-3]]
In [9]:
import matplotlib.pyplot as plt
plt.xlabel('input')
plt.ylabel('output')
plt.plot(input, output, 'ro')
Out[9]:
In [10]:
plt.plot(input, output)
plt.plot(input, output, 'ro')
Out[10]:
untrained single unit (neuron) also outputs a line from same input, although another one
In [11]:
w = tf.constant([[1.5], [-2], [1]], dtype='float32')
x = tf.constant([[10, 6, 8]], dtype='float32')
b = tf.constant([6], dtype='float32')
y = tf.matmul(x, w) + b
print(y)
In [0]:
from tensorflow.keras.layers import Layer
class LinearLayer(Layer):
"""y = w.x + b"""
def __init__(self, units=1, input_dim=1):
super(LinearLayer, self).__init__()
w_init = tf.random_normal_initializer(stddev=2)
self.w = tf.Variable(
initial_value = w_init(shape=(input_dim, units), dtype='float32'),
trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(
initial_value = b_init(shape=(units,), dtype='float32'),
trainable=True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
linear_layer = LinearLayer()
In [13]:
x = tf.constant(input, dtype=tf.float32)
y_true = tf.constant(output, dtype=tf.float32)
y_true
Out[13]:
In [14]:
y_pred = linear_layer(x)
y_pred
Out[14]:
In [15]:
plt.plot(x, y_pred)
plt.plot(input, output, 'ro')
Out[15]:
In [0]:
loss_fn = tf.losses.mean_squared_error
# loss_fn = tf.losses.mean_absolute_error
In [17]:
loss = loss_fn(y_true=tf.squeeze(y_true), y_pred=tf.squeeze(y_pred))
print(loss)
In [18]:
tf.keras.losses.mean_squared_error == tf.losses.mean_squared_error
Out[18]:
Move in parameter space in the direction of a descent
https://twitter.com/colindcarroll/status/1090266016259534848
TensorFlow offers automatic differentiation: https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/GradientTape
In [19]:
# a simple example
# f(x) = x^2
# f'(x) = 2x
# x = 4
# f(4) = 16
# f'(4) = 8 (that's what we expect)
def tape_sample():
x = tf.constant(4.0)
# open a GradientTape
with tf.GradientTape() as tape:
tape.watch(x)
y = x * x
dy_dx = tape.gradient(y, x)
print(dy_dx)
# just a function in order not to interfere with x on the global scope
tape_sample()
In [20]:
linear_layer = LinearLayer()
linear_layer.w, linear_layer.b
Out[20]:
In [21]:
linear_layer.trainable_weights
Out[21]:
In [0]:
EPOCHS = 200
learning_rate = 1e-2
losses = []
weights = []
biases = []
weights_gradient = []
biases_gradient = []
for step in range(EPOCHS):
with tf.GradientTape() as tape:
# forward pass
y_pred = linear_layer(x)
# loss value for this batch
loss = loss_fn(y_true=tf.squeeze(y_true), y_pred=tf.squeeze(y_pred))
# just for logging
losses.append(loss.numpy())
weights.append(linear_layer.w.numpy()[0][0])
biases.append(linear_layer.b.numpy()[0])
# get gradients of weights wrt the loss
gradients = tape.gradient(loss, linear_layer.trainable_weights)
weights_gradient.append(gradients[0].numpy()[0][0])
biases_gradient.append(gradients[1].numpy()[0])
# backward pass, changing trainable weights
linear_layer.w.assign_sub(learning_rate * gradients[0])
linear_layer.b.assign_sub(learning_rate * gradients[1])
In [23]:
print(loss)
In [24]:
plt.xlabel('epochs')
plt.ylabel('loss')
# plt.yscale('log')
plt.plot(losses)
Out[24]:
In [25]:
plt.figure(figsize=(20, 10))
plt.plot(weights)
plt.plot(biases)
plt.plot(weights_gradient)
plt.plot(biases_gradient)
plt.legend(['slope', 'offset', 'gradient slope', 'gradient offset'])
Out[25]:
In [26]:
y_pred = linear_layer(x)
y_pred
Out[26]:
In [27]:
plt.plot(x, y_pred)
plt.plot(input, output, 'ro')
Out[27]:
In [28]:
# single neuron and single input: one weight and one bias
# slope m ~ -1
# y-axis offset y0 ~ 1
# https://en.wikipedia.org/wiki/Linear_equation#Slope%E2%80%93intercept_form
linear_layer.trainable_weights
Out[28]:
In [0]:
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
In [0]:
EPOCHS = 500
losses = []
linear_layer = LinearLayer()
for step in range(EPOCHS):
with tf.GradientTape() as tape:
# Forward pass.
y_pred = linear_layer(x)
# Loss value for this batch.
loss = loss_fn(y_true=tf.squeeze(y_true), y_pred=tf.squeeze(y_pred))
losses.append(loss)
# Get gradients of weights wrt the loss.
gradients = tape.gradient(loss, linear_layer.trainable_weights)
# Update the weights of our linear layer.
optimizer.apply_gradients(zip(gradients, linear_layer.trainable_weights))
In [31]:
# plt.yscale('log')
plt.ylabel("loss")
plt.xlabel("epochs")
plt.plot(losses)
Out[31]:
In [32]:
y_pred = linear_layer(x)
plt.plot(x, y_pred)
plt.plot(input, output, 'ro')
linear_layer.trainable_weights
Out[32]:
In [33]:
import numpy as np
a = -1
b = 1
n = 50
x = tf.constant(np.random.uniform(0, 1, n), dtype='float32')
y = tf.constant(a*x+b + 0.1 * np.random.normal(0, 1, n), dtype='float32')
plt.scatter(x, y)
Out[33]:
In [0]:
x = tf.reshape(x, (n, 1))
y_true = tf.reshape(y, (n, 1))
In [35]:
linear_layer = LinearLayer()
a = linear_layer.w.numpy()[0][0]
b = linear_layer.b.numpy()[0]
def plot_line(a, b, x, y_true):
fig, ax = plt.subplots()
y_pred = a * x + b
line = ax.plot(x, y_pred)
ax.plot(x, y_true, 'ro')
return fig, line
plot_line(a, b, x, y_true)
Out[35]:
In [0]:
# the problem is a little bit harder, train for a little longer
EPOCHS = 2000
losses = []
lines = []
linear_layer = LinearLayer()
for step in range(EPOCHS):
# Open a GradientTape.
with tf.GradientTape() as tape:
# Forward pass.
y_pred = linear_layer(x)
# Loss value for this batch.
loss = loss_fn(y_true=tf.squeeze(y_true), y_pred=tf.squeeze(y_pred))
losses.append(loss)
a = linear_layer.w.numpy()[0][0]
b = linear_layer.b.numpy()[0]
lines.append((a, b))
# Get gradients of weights wrt the loss.
gradients = tape.gradient(loss, linear_layer.trainable_weights)
# Update the weights of our linear layer.
optimizer.apply_gradients(zip(gradients, linear_layer.trainable_weights))
In [37]:
print(loss)
In [38]:
# plt.yscale('log')
plt.ylabel("loss")
plt.xlabel("epochs")
plt.plot(losses)
Out[38]:
In [39]:
a, b = lines[0]
plot_line(a, b, x, y_true)
Out[39]:
In [40]:
a, b = lines[500]
plot_line(a, b, x, y_true)
Out[40]:
In [41]:
a, b = lines[1999]
plot_line(a, b, x, y_true)
Out[41]:
In [42]:
import numpy as np
x = tf.reshape(tf.constant(np.arange(-1, 4, 0.1), dtype='float32'), (50, 1))
y_pred = linear_layer(x)
plt.figure(figsize=(20, 10))
plt.plot(x, y_pred)
y_pred_relu = tf.nn.relu(y_pred)
plt.plot(x, y_pred_relu)
y_pred_sigmoid = tf.nn.sigmoid(y_pred)
plt.plot(x, y_pred_sigmoid)
y_pred_tanh = tf.nn.tanh(y_pred)
plt.plot(x, y_pred_tanh)
plt.plot(input, output, 'ro')
plt.legend(['no activation', 'relu', 'sigmoid', 'tanh'])
Out[42]:
In [43]:
from matplotlib.colors import ListedColormap
a = -1
b = 1
n = 100
# all points
X = np.random.uniform(0, 1, (n, 2))
# our line
line_x = np.random.uniform(0, 1, n)
line_y = a*line_x+b
plt.plot(line_x, line_y, 'r')
# below and above line
y = X[:, 1] > a*X[:, 0]+b
y = y.astype(int)
plt.xlabel("x1")
plt.ylabel("x2")
plt.scatter(X[:,0], X[:,1], c=y, cmap=ListedColormap(['#AA6666', '#6666AA']), marker='o', edgecolors='k')
y
Out[43]:
In [0]:
class SigmoidLayer(LinearLayer):
"""y = sigmoid(w.x + b)"""
def __init__(self, **kwargs):
super(SigmoidLayer, self).__init__(**kwargs)
def call(self, inputs):
return tf.sigmoid(super().call(inputs))
In [45]:
x = tf.constant(X, dtype='float32')
y_true = tf.constant(y, dtype='float32')
x.shape
Out[45]:
In [0]:
model = SigmoidLayer(input_dim=2)
cross entropy is an alternative to squared error
In [0]:
loss_fn = tf.losses.binary_crossentropy
In [0]:
# standard optimizer using advanced properties
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-1)
In [0]:
# https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/metrics/Accuracy
m = tf.keras.metrics.Accuracy()
In [0]:
EPOCHS = 1000
losses = []
accuracies = []
for step in range(EPOCHS):
# Open a GradientTape.
with tf.GradientTape() as tape:
# Forward pass.
y_pred = model(x)
# Loss value for this batch.
loss = loss_fn(y_true=tf.squeeze(y_true), y_pred=tf.squeeze(y_pred))
y_pred_binary = (tf.squeeze(y_pred) > 0.5).numpy().astype(float)
m.update_state(tf.squeeze(y_true), y_pred_binary)
accuracy = m.result().numpy()
losses.append(loss)
accuracies.append(accuracy)
# Get gradients of weights wrt the loss.
gradients = tape.gradient(loss, model.trainable_weights)
# Update the weights of our linear layer.
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
In [51]:
print(loss)
In [52]:
print(accuracy)
In [53]:
plt.yscale('log')
plt.ylabel("loss")
plt.xlabel("epochs")
plt.plot(losses)
Out[53]:
In [54]:
plt.ylabel("accuracy")
plt.xlabel("epochs")
plt.plot(accuracies)
Out[54]:
In [55]:
y_pred = model(x)
y_pred_binary = (tf.squeeze(y_pred) > 0.5).numpy().astype(float)
y_pred_binary
Out[55]:
In [56]:
y_true - y_pred_binary
Out[56]:
In [57]:
# below and above line
plt.xlabel("x1")
plt.ylabel("x2")
plt.scatter(X[:,0], X[:,1], c=y_pred_binary, cmap=ListedColormap(['#AA6666', '#6666AA']), marker='o', edgecolors='k')
Out[57]:
In [58]:
from tensorflow.keras.layers import Dense
model = tf.keras.Sequential()
model.add(Dense(units=1, activation='sigmoid', input_dim=2))
model.summary()
In [59]:
%%time
model.compile(loss=loss_fn, # binary cross entropy, unchanged from low level example
optimizer=optimizer, # adam, unchanged from low level example
metrics=['accuracy'])
# does a similar thing internally as our loop from above
history = model.fit(x, y_true, epochs=EPOCHS, verbose=0)
In [60]:
loss, accuracy = model.evaluate(x, y_true)
loss, accuracy
Out[60]:
In [61]:
plt.yscale('log')
plt.ylabel("accuracy")
plt.xlabel("epochs")
plt.plot(history.history['accuracy'])
Out[61]:
In [62]:
plt.yscale('log')
plt.ylabel("loss")
plt.xlabel("epochs")
plt.plot(history.history['loss'])
Out[62]:
In [0]:
y_pred = model.predict(x)
y_pred_binary = (tf.squeeze(y_pred) > 0.5).numpy().astype(float)
In [64]:
# below and above line
plt.xlabel("x1")
plt.ylabel("x2")
plt.scatter(X[:,0], X[:,1], c=y_pred_binary, cmap=ListedColormap(['#AA6666', '#6666AA']), marker='o', edgecolors='k')
Out[64]: