In [2]:
import tensorflow as tf
import numpy as np
import numpy.random as rnd
from sklearn.preprocessing import StandardScaler
In [3]:
tf.reset_default_graph()#important for tensorboard when using jupyter notebook
students = tf.Variable(13, name="students")
coffee = tf.Variable(-10, name="coffee")
lees_checking = students*coffee
#one way to do it but pretty verbose
sess = tf.Session()
sess.run(students.initializer)
sess.run(coffee.initializer)
result = sess.run(lees_checking)
print("Lee's checking balance:" + str(result))
sess.close()
In [13]:
#but this is better
with tf.Session() as sess:
students.initializer.run()
coffee.initializer.run()
result = lees_checking.eval()
print(result)
We can do a lil bit better
In [14]:
tf.reset_default_graph()
students = tf.Variable(13, name="students")
coffee = tf.Variable(-10, name="coffee")
lees_checking = students*coffee
init = tf.global_variables_initializer() # prepare an init node
with tf.Session() as sess:
init.run() # actually initialize all the variables
print(result)
In [15]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
print(scaled_housing_data_plus_bias.shape)
print(scaled_housing_data_plus_bias.mean())
print(scaled_housing_data_plus_bias.mean(axis=0))
print(scaled_housing_data_plus_bias.mean(axis=1))
Using the Normal Equation
In [30]:
tf.reset_default_graph()
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)
with tf.Session() as sess:
theta_value = theta.eval()
print(theta_value)
Batch gradient descent with manually computed gradients
In [31]:
n_epochs = 1000
learning_rate = 0.01
# all the data, batch gradient descent
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch%100==0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)
best_theta = theta.eval()
print(theta_value)
Batch gradient descent with using autodiff
In [32]:
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01
# all the data, batch gradient descent
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)
best_theta = theta.eval()
print("Best theta:")
print(best_theta)
Gradient Descent with Optimizer
In [19]:
%%time
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
sess.run(training_op)
best_theta = theta.eval()
print("Best theta:")
print(best_theta)
In [20]:
tf.reset_default_graph()
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})
print(B_val_1)
print(B_val_2)
Mini-batch Gradient Descent
my mini batch method
In [21]:
%%time
#Batch parameters
n_epochs = 1000
learning_rate = 0.01
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
batch_index =0
rnd.seed(42)
indices = rnd.randint(m,size=m)
mb_indices = np.array_split(indices,n_batches)
#Construction phase (Our graph again)
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
#Execution phase
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_inds in mb_indices:
X_batch = scaled_housing_data_plus_bias[batch_inds]
y_batch = housing.target.reshape(-1, 1)[batch_inds]
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
print("Best theta:")
print(best_theta)
another mini batch method
In [26]:
%%time
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
def fetch_batch(epoch, batch_index, batch_size):
rnd.seed(epoch * n_batches + batch_index)
indices = rnd.randint(m, size=batch_size)
X_batch = scaled_housing_data_plus_bias[indices]
y_batch = housing.target.reshape(-1, 1)[indices]
return X_batch, y_batch
n_epochs = 1000
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
print("Best theta:")
print(best_theta)
In [23]:
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
if epoch % 100 == 0:
print("Epoch", epoch, "MSE =", mse.eval())
save_path = saver.save(sess, "/tmp/my_model.ckpt")
sess.run(training_op)
best_theta = theta.eval()
save_path = saver.save(sess, "my_model_final.ckpt")
print("Best theta:")
print(best_theta)
tf.reset_default_graph()
In [39]:
%%time
'''Time stamp log directory'''
#if you don't use a different log directory every time you run the program
# TensorBoard will merge stats from different runs
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
'''Hyper parameters'''
learning_rate = 0.01
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
'''Construction phase'''
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
#FOR TENSORBOARD
#The first line creates a node in the graph that evaluatees the MSE value and writes it
#to the tensorboard log string called a 'summary'. The second line creates a FileWriter
# that we'll use to write summaries to logfiles.
# logdir : path of log directory
# tf.get_default_graph() : graph you want to visualize. optional
mse_summary = tf.summary.scalar('MSE', mse)
# merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
if batch_index % 10 == 0:
summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
step = epoch * n_batches + batch_index
train_writer.add_summary(summary_str, step)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
train_writer.flush()
train_writer.close()
print("Best theta:")
print(best_theta)
In [38]:
9 % 10 == 0
Out[38]:
In [ ]: