In [2]:
import tensorflow as tf
import numpy as np
import numpy.random as rnd

from sklearn.preprocessing import StandardScaler

First graph in TensorFlow

  • tf.reset_default_graph
  • tf.Variable
    • Variable.name property: maps to the name of the mutable Tensor in which that variable is stored
    • Important for saving and restoring variables
    • Tensorboard uses these names
    • Relevant for TensorFlow namespace organization (i.e. scope)
    • "You can imagine Python namespace and TensorFlow namespace as two parallel universes. Names in TensorFlow space are actually the "real" attributes belonging to any TensorFlow variables, while names in Python space are just temporary pointers pointing to TensorFlow variables during this run of your script. That is the reason why when saving and restoring variables, only TensorFlow names are used, because the Python namespace no longer exists after script being terminated, but Tensorflow namespace is still there in your saved files."

In [3]:
tf.reset_default_graph()#important for tensorboard when using jupyter notebook

students = tf.Variable(13, name="students") 
coffee = tf.Variable(-10, name="coffee") 
lees_checking = students*coffee

#one way to do it but pretty verbose
sess = tf.Session() 
sess.run(students.initializer)
sess.run(coffee.initializer)
result = sess.run(lees_checking)
print("Lee's checking balance:" + str(result))
sess.close()


Lee's checking balance:-130

In [13]:
#but this is better
with tf.Session() as sess: 
    students.initializer.run() 
    coffee.initializer.run() 
    result = lees_checking.eval()
    print(result)


-130

We can do a lil bit better


In [14]:
tf.reset_default_graph()
students = tf.Variable(13, name="students") 
coffee = tf.Variable(-10, name="coffee") 
lees_checking = students*coffee
init = tf.global_variables_initializer() # prepare an init node
with tf.Session() as sess:
    init.run() # actually initialize all the variables
    print(result)


-130

Linear regression example


In [15]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]


scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

print(scaled_housing_data_plus_bias.shape)
print(scaled_housing_data_plus_bias.mean())
print(scaled_housing_data_plus_bias.mean(axis=0))
print(scaled_housing_data_plus_bias.mean(axis=1))


(20640, 9)
0.111111111111
[  1.00000000e+00   6.60969987e-17   5.50808322e-18   6.60969987e-17
  -1.06030602e-16  -1.10161664e-17   3.44255201e-18  -1.07958431e-15
  -8.52651283e-15]
[ 0.38915536  0.36424355  0.5116157  ..., -0.06612179 -0.06360587
  0.01359031]

Using the Normal Equation


In [30]:
tf.reset_default_graph()
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)
with tf.Session() as sess: 
    theta_value = theta.eval()
    print(theta_value)


[[ 2.06856298]
 [ 0.82961965]
 [ 0.11875178]
 [-0.26552707]
 [ 0.30569667]
 [-0.00450281]
 [-0.03932635]
 [-0.8998825 ]
 [-0.87053877]]

Batch gradient descent with manually computed gradients


In [31]:
n_epochs = 1000
learning_rate = 0.01

# all the data, batch gradient descent
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100==0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()

print(theta_value)


('Epoch', 0, 'MSE =', 7.4496336)
('Epoch', 100, 'MSE =', 7.4496336)
('Epoch', 200, 'MSE =', 7.4496336)
('Epoch', 300, 'MSE =', 7.4496336)
('Epoch', 400, 'MSE =', 7.4496336)
('Epoch', 500, 'MSE =', 7.4496336)
('Epoch', 600, 'MSE =', 7.4496336)
('Epoch', 700, 'MSE =', 7.4496336)
('Epoch', 800, 'MSE =', 7.4496336)
('Epoch', 900, 'MSE =', 7.4496336)
[[ 2.06856298]
 [ 0.82961965]
 [ 0.11875178]
 [-0.26552707]
 [ 0.30569667]
 [-0.00450281]
 [-0.03932635]
 [-0.8998825 ]
 [-0.87053877]]

Batch gradient descent with using autodiff


In [32]:
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01

# all the data, batch gradient descent
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
print("Best theta:")
print(best_theta)


('Epoch', 0, 'MSE =', 2.7544272)
('Epoch', 100, 'MSE =', 0.63222194)
('Epoch', 200, 'MSE =', 0.5727796)
('Epoch', 300, 'MSE =', 0.55850053)
('Epoch', 400, 'MSE =', 0.54906934)
('Epoch', 500, 'MSE =', 0.54228771)
('Epoch', 600, 'MSE =', 0.53737879)
('Epoch', 700, 'MSE =', 0.53382188)
('Epoch', 800, 'MSE =', 0.53124273)
('Epoch', 900, 'MSE =', 0.52937055)
Best theta:
[[  2.06855249e+00]
 [  7.74078071e-01]
 [  1.31192386e-01]
 [ -1.17845066e-01]
 [  1.64778143e-01]
 [  7.44078017e-04]
 [ -3.91945094e-02]
 [ -8.61356676e-01]
 [ -8.23479772e-01]]

Gradient Descent with Optimizer


In [19]:
%%time
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
print("Best theta:")
print(best_theta)


('Epoch', 0, 'MSE =', 2.7544272)
('Epoch', 100, 'MSE =', 0.63222194)
('Epoch', 200, 'MSE =', 0.5727796)
('Epoch', 300, 'MSE =', 0.55850053)
('Epoch', 400, 'MSE =', 0.54906934)
('Epoch', 500, 'MSE =', 0.54228771)
('Epoch', 600, 'MSE =', 0.53737879)
('Epoch', 700, 'MSE =', 0.53382188)
('Epoch', 800, 'MSE =', 0.53124273)
('Epoch', 900, 'MSE =', 0.52937055)
Best theta:
[[  2.06855249e+00]
 [  7.74078071e-01]
 [  1.31192386e-01]
 [ -1.17845066e-01]
 [  1.64778143e-01]
 [  7.44078017e-04]
 [ -3.91945094e-02]
 [ -8.61356676e-01]
 [ -8.23479772e-01]]
CPU times: user 2.47 s, sys: 173 ms, total: 2.64 s
Wall time: 1.43 s

Feeding data

Setting up placeholder nodes


In [20]:
tf.reset_default_graph()
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})

print(B_val_1)
print(B_val_2)


[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]

Mini-batch Gradient Descent

my mini batch method


In [21]:
%%time
#Batch parameters
n_epochs = 1000
learning_rate = 0.01
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

batch_index =0
rnd.seed(42)
indices = rnd.randint(m,size=m)
mb_indices = np.array_split(indices,n_batches)


#Construction phase (Our graph again)
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()

#Execution phase 
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_inds in mb_indices:
            X_batch = scaled_housing_data_plus_bias[batch_inds]
            y_batch = housing.target.reshape(-1, 1)[batch_inds]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()
print("Best theta:")
print(best_theta)


Best theta:
[[  2.06602192e+00]
 [  8.30935359e-01]
 [  1.26624838e-01]
 [ -2.78939813e-01]
 [  2.79391229e-01]
 [  7.06658990e-04]
 [ -3.94885316e-02]
 [ -8.93702090e-01]
 [ -8.75018418e-01]]
CPU times: user 1min 57s, sys: 19.7 s, total: 2min 16s
Wall time: 1min 35s

another mini batch method


In [26]:
%%time

tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

def fetch_batch(epoch, batch_index, batch_size):
    rnd.seed(epoch * n_batches + batch_index)
    indices = rnd.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

n_epochs = 1000
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()
print("Best theta:")
print(best_theta)


Best theta:
[[ 2.0614748 ]
 [ 0.82204753]
 [ 0.12198913]
 [-0.26323745]
 [ 0.32637092]
 [-0.01081076]
 [-0.06751335]
 [-0.90269727]
 [-0.8744489 ]]
CPU times: user 2min 6s, sys: 19.6 s, total: 2min 26s
Wall time: 1min 45s

saving and restoring


In [23]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        sess.run(training_op)
   
    best_theta = theta.eval()
    save_path = saver.save(sess, "my_model_final.ckpt")

print("Best theta:")
print(best_theta)

tf.reset_default_graph()


('Epoch', 0, 'MSE =', 2.7544272)
('Epoch', 100, 'MSE =', 0.63222194)
('Epoch', 200, 'MSE =', 0.5727796)
('Epoch', 300, 'MSE =', 0.55850053)
('Epoch', 400, 'MSE =', 0.54906934)
('Epoch', 500, 'MSE =', 0.54228771)
('Epoch', 600, 'MSE =', 0.53737879)
('Epoch', 700, 'MSE =', 0.53382188)
('Epoch', 800, 'MSE =', 0.53124273)
('Epoch', 900, 'MSE =', 0.52937055)
Best theta:
[[  2.06855249e+00]
 [  7.74078071e-01]
 [  1.31192386e-01]
 [ -1.17845066e-01]
 [  1.64778143e-01]
 [  7.44078017e-04]
 [ -3.91945094e-02]
 [ -8.61356676e-01]
 [ -8.23479772e-01]]

In [39]:
%%time

'''Time stamp log directory'''
#if you don't use a different log directory every time you run the program
# TensorBoard will merge stats from different runs

from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

'''Hyper parameters'''
learning_rate = 0.01
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

'''Construction phase'''
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

#FOR TENSORBOARD
#The first line creates a node in the graph that evaluatees the MSE value and writes it
#to the tensorboard log string called a 'summary'. The second line creates a FileWriter 
# that we'll use to write summaries to logfiles.
# logdir : path of log directory
#  tf.get_default_graph() : graph you want to visualize. optional
mse_summary = tf.summary.scalar('MSE', mse)
# merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())



with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                train_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()

train_writer.flush()
train_writer.close()
print("Best theta:")
print(best_theta)


0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
206
216
226
236
246
256
266
276
286
296
306
316
326
336
346
356
366
376
386
396
406
412
422
432
442
452
462
472
482
492
502
512
522
532
542
552
562
572
582
592
602
612
618
628
638
648
658
668
678
688
698
708
718
728
738
748
758
768
778
788
798
808
818
824
834
844
854
864
874
884
894
904
914
924
934
944
954
964
974
984
994
1004
1014
1024
Best theta:
[[ 2.07962132]
 [ 0.76063615]
 [ 0.13567705]
 [-0.13422588]
 [ 0.17423598]
 [-0.01030065]
 [-0.01802275]
 [-0.87352598]
 [-0.82851851]]
CPU times: user 1.29 s, sys: 190 ms, total: 1.48 s
Wall time: 1.25 s

In [38]:
9 % 10 == 0


Out[38]:
False

In [ ]: