In [1]:
# import necessary packages
import tensorflow as tf
import numpy as np
In [2]:
# test tensorflow
a = tf.constant(2)
b = tf.constant(2)
mult = tf.multiply(a,b)
sess = tf.Session()
sess.run(mult)
Out[2]:
In [3]:
# initialize weights for feed-forward nn, 300x200 matrice with normal dist
# connect a layer with 300 neurons to a layer with 200 neurons
weights = tf.Variable(tf.random_normal([300, 200], stddev=0.5), name="weights")
In [4]:
shape = (200,300)
# Common tensors from the TensorFlow API docs
zeros = tf.zeros(shape, dtype=tf.float32, name=None)
ones = tf.ones(shape, dtype=tf.float32, name=None)
std_normal_dist = tf.random_normal(
shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
trunacted_normal = tf.truncated_normal(
shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
uniform_dist = tf.random_uniform(
shape, minval=0, maxval=None, dtype=tf.float32, seed=None, name=None)
In [5]:
import tensorflow as tf
# set up linear model
x = tf.placeholder(tf.float32, shape=(1024, 1024))
W = tf.Variable(tf.random_uniform([1024, 10], -1, 1), name="W")
b = tf.Variable(tf.zeros([10]), name="biases")
y = tf.matmul(x, W) + b
# initialize variables
init_op = tf.global_variables_initializer()
# create session
with tf.Session() as sess:
sess.run(init_op)
rand_array = np.random.rand(1024, 1024)
result = sess.run(y, feed_dict={x: rand_array})
print(result)
print(result.shape)
In [6]:
# using tf-namespaces/variable scoping
def layer(input, weight_shape, bias_shape):
weight_init = tf.random_uniform_initializer(minval=-1, maxval=1)
bias_init = tf.constant_initializer(value=0)
W = tf.get_variable("W", weight_shape, initializer=weight_init)
b = tf.get_variable("b", bias_shape, initializer=bias_init)
return tf.matmul(input, W) + b
def my_network(input):
with tf.variable_scope("layer_1"):
output_1 = layer(input, [784, 100], [100])
with tf.variable_scope("layer_2"):
output_2 = layer(output_1, [100, 50], [50])
with tf.variable_scope("layer_3"):
output_3 = layer(output_2, [50, 10], [10])
return output_3
In [7]:
# By default, sharing is not allowed, but if we want to enable sharing within a variable scope
with tf.variable_scope("shared_variables") as scope:
i_1 = tf.placeholder(tf.float32, [1000, 784], name="i_1")
print(my_network(i_1))
# enable variable sharing within variable scope
scope.reuse_variables()
i_2 = tf.placeholder(tf.float32, [1000, 784], name="i_2")
print(my_network(i_2))
Use MNIST dataset with 28x28 pixel images as input. Target classes are [0,9], use softmax of size 10.
Steps to train and evaluate model:
In [8]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
In [17]:
def inference(x):
init = tf.constant_initializer(value=0)
W = tf.get_variable("W", [784, 10], initializer=init)
b = tf.get_variable("b", [10], initializer=init)
output = tf.nn.softmax(tf.matmul(x, W) + b)
return output
Formular for cross-entropy: $H(y, \hat{y}) = \sum_i y_i \log \frac{1}{\hat{y}_i} = -\sum_i y_i \log \hat{y}_i$
In [11]:
def loss(output, y):
'''
# tf.reduce_sum (takes `axis` arg):
# 'x' is [[1, 1, 1]
# [1, 1, 1]]
tf.reduce_sum(x) ==> 6
tf.reduce_sum(x, 0) ==> [2, 2, 2]
tf.reduce_sum(x, 1) ==> [3, 3]
'''
dot_product = y * tf.log(output)
xentropy = -tf.reduce_sum(dot_product, axis=1)
loss = tf.reduce_mean(xentropy)
return loss
In [21]:
def training(cost, global_step):
tf.summary.tensor_summary("cost", cost)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(cost, global_step=global_step)
return train_op
In [13]:
def evaluate(output, y):
# compare indices of predicted class, if equal (correct classification) set 1 otherwise 0
correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy
In [28]:
# Parameters
learning_rate = 0.01
training_epochs = 100
batch_size = 100
display_step = 1
In [33]:
from tqdm import tqdm
# program flow
with tf.Graph().as_default():
# mnist data image of shape 28*28=784
x = tf.placeholder("float", [None, 784])
# 0-9 digits recognition => 10 classes
y = tf.placeholder("float", [None, 10])
output = inference(x)
cost = loss(output, y)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = training(cost, global_step)
eval_op = evaluate(output, y)
# tf.merge_all_summaries in order to collect all summary statistics
# use a tf.train.SummaryWriter to write the log to disk.
summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
sess = tf.Session()
# write to tensorboard graph api
summary_writer = tf.summary.FileWriter(
"logistic_logs/", graph=sess.graph)
init_op = tf.global_variables_initializer()
sess.run(init_op)
# training cycle
for epoch in tqdm(range(training_epochs)):
avg_cost = 0.
total_batch = int(mnist.train.num_examples / batch_size)
# Loop over all batches
for i in range(total_batch):
mbatch_x, mbatch_y = mnist.train.next_batch(batch_size)
# Fit training using batch data
feed_dict = {x: mbatch_x, y: mbatch_y}
sess.run(train_op, feed_dict=feed_dict)
# Compute average loss
minibatch_cost = sess.run(cost, feed_dict=feed_dict)
avg_cost += minibatch_cost / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
val_feed_dict = {
x: mnist.validation.images,
y: mnist.validation.labels
}
accuracy = sess.run(eval_op, feed_dict=val_feed_dict)
print("Validation Error in epoch %s: %.11f" % (epoch, 1 - accuracy))
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, sess.run(global_step))
saver.save(
sess,
"logistic_logs/model-checkpoint",
global_step=global_step)
test_feed_dict = {x: mnist.test.images, y: mnist.test.labels}
accuracy = sess.run(eval_op, feed_dict=test_feed_dict)
print("Test Accuracy:", accuracy)