Keras vs. TensorFlow Syntax


In [1]:
import sys
import time
import numpy as np
np.random.seed(1)

In [2]:
# http://stackoverflow.com/questions/3160699/python-progress-bar
def progressbar(it, size=30):
    count = len(it)
    def _show(_i):
        x = int(size*_i/count)
        sys.stdout.write("{}/{} [{}{}] \r".format(_i, count, "="*x, "."*(size-x)))
        sys.stdout.flush()

    _show(0)
    for i, item in enumerate(it):
        yield item
        _show(i+1)
    sys.stdout.write("\n")
    sys.stdout.flush()

Installation

pip install keras

pip install tensorflow

Import


In [3]:
import keras

import tensorflow as tf


Using TensorFlow backend.

Linear Regression


In [4]:
# data
x = np.random.randn(100)
y = 0.5 * x + 1

In [5]:
# Keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential()
model.add(Dense(units=1, activation="linear", input_dim=1))

model.compile(loss="mean_squared_error", optimizer="sgd")
model.summary()
model.fit(x, y, batch_size=5, validation_split=0.2, verbose=1)
model.get_weights()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 1)                 2         
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
Train on 80 samples, validate on 20 samples
Epoch 1/10
80/80 [==============================] - 0s - loss: 0.8591 - val_loss: 0.5711
Epoch 2/10
80/80 [==============================] - 0s - loss: 0.4440 - val_loss: 0.2948
Epoch 3/10
80/80 [==============================] - 0s - loss: 0.2278 - val_loss: 0.1524
Epoch 4/10
80/80 [==============================] - 0s - loss: 0.1178 - val_loss: 0.0788
Epoch 5/10
80/80 [==============================] - 0s - loss: 0.0608 - val_loss: 0.0407
Epoch 6/10
80/80 [==============================] - 0s - loss: 0.0313 - val_loss: 0.0210
Epoch 7/10
80/80 [==============================] - 0s - loss: 0.0162 - val_loss: 0.0109
Epoch 8/10
80/80 [==============================] - 0s - loss: 0.0083 - val_loss: 0.0056
Epoch 9/10
80/80 [==============================] - 0s - loss: 0.0043 - val_loss: 0.0029
Epoch 10/10
80/80 [==============================] - 0s - loss: 0.0022 - val_loss: 0.0015
Out[5]:
[array([[ 0.48942313]], dtype=float32), array([ 0.96271145], dtype=float32)]

In [6]:
# one liner
# Keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential([Dense(units=1, activation="linear", input_dim=1)])

model.compile(loss="mean_squared_error", optimizer="sgd")
model.summary()
model.fit(x, y, batch_size=5, validation_split=0.2, verbose=1)
model.get_weights()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_2 (Dense)              (None, 1)                 2         
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
Train on 80 samples, validate on 20 samples
Epoch 1/10
80/80 [==============================] - 0s - loss: 2.7193 - val_loss: 1.0549
Epoch 2/10
80/80 [==============================] - 0s - loss: 1.4228 - val_loss: 0.5440
Epoch 3/10
80/80 [==============================] - 0s - loss: 0.7544 - val_loss: 0.2808
Epoch 4/10
80/80 [==============================] - 0s - loss: 0.3995 - val_loss: 0.1449
Epoch 5/10
80/80 [==============================] - 0s - loss: 0.2118 - val_loss: 0.0749
Epoch 6/10
80/80 [==============================] - 0s - loss: 0.1121 - val_loss: 0.0388
Epoch 7/10
80/80 [==============================] - 0s - loss: 0.0597 - val_loss: 0.0201
Epoch 8/10
80/80 [==============================] - 0s - loss: 0.0318 - val_loss: 0.0104
Epoch 9/10
80/80 [==============================] - 0s - loss: 0.0170 - val_loss: 0.0054
Epoch 10/10
80/80 [==============================] - 0s - loss: 0.0091 - val_loss: 0.0028
Out[6]:
[array([[ 0.42355224]], dtype=float32), array([ 0.97209901], dtype=float32)]

In [7]:
"""
full_size = 80
batch_size = 5
-> full gradient pass = 16 (80/5)
"""
for i in progressbar(range(80)):
    if i > 16:
        # just run the progessbar without any changes
        pass
    else:
        # do something
        time.sleep(0.1)


80/80 [==============================] 

In [8]:
# TensorFlow
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Parameters
learning_rate = 0.01
training_epochs = 10
split = 0.2
batch_size = 10

graph = tf.Graph()

with graph.as_default():
    # Placeholder & Variables
    X = tf.placeholder(tf.float32, name="X")
    Y = tf.placeholder(tf.float32, name="Y")
    # tf.random_normal(shape=[]) == np.random.randn()
    W = tf.Variable(tf.random_normal(shape=[]), name="weight")
    b = tf.Variable(tf.random_normal(shape=[]), name="bias")

    # Construct a linear model
    Y_predicted = tf.add(tf.multiply(X, W), b)

    # Mean squared error
    #cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
    cost = tf.losses.mean_squared_error(labels=Y, predictions=Y_predicted)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.global_variables_initializer()
    

# Launch the graph
with tf.Session(graph=graph) as sess:
    writer = tf.summary.FileWriter("./graphs", sess.graph) # python -m tensorflow.tensorboard logs="./graphs"
    sess.run(init)
    
    # validation_split
    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=split)

    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        train_range = int(x_train.shape[0] / batch_size)
        for index in progressbar(range(x_train.shape[0])):
            if index > train_range:
                pass
            else:
                mini_batch = np.random.choice(x_train.shape[0], batch_size, replace=False)
                _, loss = sess.run([optimizer, cost], feed_dict={X: x_train[mini_batch], Y: y_train[mini_batch]})
        
        val_range = int(x_val.shape[0] / batch_size)
        for _ in range(x_val.shape[0]): 
            mini_batch = np.random.choice(x_val.shape[0], batch_size, replace=False)
            _, val_loss = sess.run([optimizer, cost], feed_dict={X: x_val[mini_batch], Y: y_val[mini_batch]})
            
        print("loss: {} - val_loss: {}".format(loss, val_loss))
        
    print("Optimization Finished!")
    training_cost = sess.run(cost, feed_dict={X: x, Y: y})
    print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
        
    writer.close()


Epoch: 1/10
80/80 [==============================] 
loss: 0.3362872004508972 - val_loss: 0.1482759267091751
Epoch: 2/10
80/80 [==============================] 
loss: 0.10350023210048676 - val_loss: 0.04635412245988846
Epoch: 3/10
80/80 [==============================] 
loss: 0.032445114105939865 - val_loss: 0.014212565496563911
Epoch: 4/10
80/80 [==============================] 
loss: 0.010255953297019005 - val_loss: 0.004444177262485027
Epoch: 5/10
80/80 [==============================] 
loss: 0.0032214797101914883 - val_loss: 0.0014065582072362304
Epoch: 6/10
80/80 [==============================] 
loss: 0.000945859937928617 - val_loss: 0.0004374490526970476
Epoch: 7/10
80/80 [==============================] 
loss: 0.00030181874171830714 - val_loss: 0.0001299934519920498
Epoch: 8/10
80/80 [==============================] 
loss: 9.21563696465455e-05 - val_loss: 4.0688559238333255e-05
Epoch: 9/10
80/80 [==============================] 
loss: 2.9348942916840315e-05 - val_loss: 1.240925212186994e-05
Epoch: 10/10
80/80 [==============================] 
loss: 8.551732207706664e-06 - val_loss: 3.8293942452583e-06
Optimization Finished!
Training cost= 3.78803e-06 W= 0.499841 b= 0.998068 

Save Model (Linear Regression)


In [9]:
# Keras
from keras.models import load_model

# save model -> Another way is to save weights and model seperately
model.save("linear_model.h5")

# load model
model_loaded = load_model("linear_model.h5")

In [10]:
model.summary(), model_loaded.get_config(), model_loaded.get_weights()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_2 (Dense)              (None, 1)                 2         
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
Out[10]:
(None,
 [{'class_name': 'Dense',
   'config': {'activation': 'linear',
    'activity_regularizer': None,
    'batch_input_shape': (None, 1),
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'VarianceScaling',
     'config': {'distribution': 'uniform',
      'mode': 'fan_avg',
      'scale': 1.0,
      'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense_2',
    'trainable': True,
    'units': 1,
    'use_bias': True}}],
 [array([[ 0.42355224]], dtype=float32), array([ 0.97209901], dtype=float32)])

In [11]:
# TensorFlow - save model without graph
import tensorflow as tf

# Parameters
learning_rate = 0.01
training_epochs = 10
split = 0.2

graph = tf.Graph()

with graph.as_default():
    # Placeholder & Variables
    X = tf.placeholder(tf.float32, name="X")
    Y = tf.placeholder(tf.float32, name="Y")
    # tf.random_normal(shape=[]) == np.random.randn()
    W = tf.Variable(tf.random_normal(shape=[]), name="weight")
    b = tf.Variable(tf.random_normal(shape=[]), name="bias")

    # Construct a linear model
    Y_predicted = tf.add(tf.multiply(X, W), b)

    # Mean squared error
    cost = tf.losses.mean_squared_error(labels=Y, predictions=Y_predicted)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.global_variables_initializer()
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()
    
# Launch the graph
with tf.Session(graph=graph) as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        # loop through the entire batch
        for i, j in zip(x, y):
            _, loss = sess.run([optimizer, cost], feed_dict={X: i, Y: j})
         
        print("loss: {}".format(loss))
        
    print("Optimization Finished!")
    training_cost = sess.run(cost, feed_dict={X: x, Y: y})
    print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
    save_path = saver.save(sess, "linear_model.ckpt")
    print("Model saved in file: {}".format(save_path))


Epoch: 1/10
loss: 6.723103433614597e-05
Epoch: 2/10
loss: 6.750468855898362e-06
Epoch: 3/10
loss: 4.157709554419853e-07
Epoch: 4/10
loss: 2.118612485446647e-08
Epoch: 5/10
loss: 9.904397302307189e-10
Epoch: 6/10
loss: 4.617106696969131e-11
Epoch: 7/10
loss: 1.4210854715202004e-12
Epoch: 8/10
loss: 1.2789769243681803e-13
Epoch: 9/10
loss: 1.4210854715202004e-14
Epoch: 10/10
loss: 1.4210854715202004e-14
Optimization Finished!
Training cost= 7.32343e-14 W= 0.5 b= 1.0 

Model saved in file: linear_model.ckpt

In [12]:
# load model without loading the graph
graph = tf.Graph() # graph needs to be loaded before

with graph.as_default():
    # Placeholder & Variables
    X = tf.placeholder(tf.float32, name="X")
    Y = tf.placeholder(tf.float32, name="Y")
    # tf.random_normal(shape=[]) == np.random.randn()
    W = tf.Variable(tf.random_normal(shape=[]), name="weight")
    b = tf.Variable(tf.random_normal(shape=[]), name="bias")

    # Construct a linear model
    Y_predicted = tf.add(tf.multiply(X, W), b)

    # Mean squared error
    cost = tf.losses.mean_squared_error(labels=Y, predictions=Y_predicted)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.global_variables_initializer()
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

with tf.Session(graph=graph) as sess:
    saver.restore(sess, "linear_model.ckpt")
    # model weights
    print(sess.run([W, b]))


INFO:tensorflow:Restoring parameters from linear_model.ckpt
[0.50000024, 0.99999982]

In [13]:
# TensorFlow - save model with graph
import tensorflow as tf

# Parameters
learning_rate = 0.01
training_epochs = 10
split = 0.2

graph = tf.Graph()

with graph.as_default():
    # Placeholder & Variables
    X = tf.placeholder(tf.float32, name="X")
    Y = tf.placeholder(tf.float32, name="Y")
    # tf.random_normal(shape=[]) == np.random.randn()
    W = tf.Variable(tf.random_normal(shape=[]), name="weight")
    b = tf.Variable(tf.random_normal(shape=[]), name="bias")

    # Construct a linear model
    Y_predicted = tf.add(tf.multiply(X, W), b)

    # Mean squared error
    cost = tf.losses.mean_squared_error(labels=Y, predictions=Y_predicted)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.global_variables_initializer()
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(tf.trainable_variables())
    
    tf.add_to_collection("vars", W)
    tf.add_to_collection("vars", b)
    
# Launch the graph
with tf.Session(graph=graph) as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        # loop through the entire batch
        for i, j in zip(x, y):
            _, loss = sess.run([optimizer, cost], feed_dict={X: i, Y: j})
         
        print("loss: {}".format(loss))
        
    print("Optimization Finished!")
    training_cost = sess.run(cost, feed_dict={X: x, Y: y})
    print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
    save_path = saver.save(sess, "linear_model.ckpt")
    print("Model saved in file: {}".format(save_path))


Epoch: 1/10
loss: 0.4722558856010437
Epoch: 2/10
loss: 0.010192443616688251
Epoch: 3/10
loss: 0.0002446139696985483
Epoch: 4/10
loss: 6.570800906047225e-06
Epoch: 5/10
loss: 1.9654977734262502e-07
Epoch: 6/10
loss: 6.4556502366031054e-09
Epoch: 7/10
loss: 2.2920687570149312e-10
Epoch: 8/10
loss: 8.881784197001252e-12
Epoch: 9/10
loss: 5.115907697472721e-13
Epoch: 10/10
loss: 1.4210854715202004e-14
Optimization Finished!
Training cost= 1.84085e-13 W= 0.5 b= 1.0 

Model saved in file: linear_model.ckpt

In [14]:
# load model with graph
with tf.Session() as sess:
    saver = tf.train.import_meta_graph("linear_model.ckpt.meta")
    saver.restore(sess, tf.train.latest_checkpoint('./'))
    all_vars = tf.get_collection("vars")
    for v in all_vars:
        print(sess.run(v))


INFO:tensorflow:Restoring parameters from ./linear_model.ckpt
0.5
1.0

Prediction (Linear Regression)


In [15]:
# Keras
model.predict(x[:5])


Out[15]:
array([[ 1.66009414],
       [ 0.71298826],
       [ 0.74839067],
       [ 0.51764071],
       [ 1.33864439]], dtype=float32)

In [16]:
# TensorFlow - save model without graph
import tensorflow as tf

# Parameters
learning_rate = 0.01
training_epochs = 10
split = 0.2

graph = tf.Graph()

with graph.as_default():
    # Placeholder & Variables
    X = tf.placeholder(tf.float32, name="X")
    Y = tf.placeholder(tf.float32, name="Y")
    # tf.random_normal(shape=[]) == np.random.randn()
    W = tf.Variable(tf.random_normal(shape=[]), name="weight")
    b = tf.Variable(tf.random_normal(shape=[]), name="bias")

    # Construct a linear model
    Y_predicted = tf.add(tf.multiply(X, W), b)

    # Mean squared error
    cost = tf.losses.mean_squared_error(labels=Y, predictions=Y_predicted)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.global_variables_initializer()
    

# Launch the graph
with tf.Session(graph=graph) as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        # loop through the entire batch
        for i, j in zip(x, y):
            _, loss = sess.run([optimizer, cost], feed_dict={X: i, Y: j})
         
        print("loss: {}".format(loss))
        
    print("Optimization Finished!")
    training_cost = sess.run(cost, feed_dict={X: x, Y: y})
    print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
    
    # Make prediction
    prediction = sess.run(Y_predicted, feed_dict={X: x[:5]})
    print(prediction)


Epoch: 1/10
loss: 0.00038448849227279425
Epoch: 2/10
loss: 4.661114144255407e-05
Epoch: 3/10
loss: 7.53782069295994e-06
Epoch: 4/10
loss: 5.30347392668773e-07
Epoch: 5/10
loss: 2.865476744773332e-08
Epoch: 6/10
loss: 1.3833414413966238e-09
Epoch: 7/10
loss: 6.190248313941993e-11
Epoch: 8/10
loss: 2.7853275241795927e-12
Epoch: 9/10
loss: 2.2737367544323206e-13
Epoch: 10/10
loss: 0.0
Optimization Finished!
Training cost= 1.86603e-13 W= 0.5 b= 1.0 

[ 1.81217313  0.6941213   0.73591363  0.46351498  1.43270397]

Notes

  • Early Stopping can be implemented via ValidationMonitor but rather used TFLearn
  • TFLearn on top of TF; alternative is TF-Slim

Logistic Regression


In [17]:
# data
from keras.datasets import mnist

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
x_train /= 255
x_test /= 255
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")
num_classes = 10

# convert class vectors to binary class matrices
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)


60000 train samples
10000 test samples

In [18]:
# Keras
batch_size = 128
epochs = 10

model = Sequential()
model.add(Dense(10, activation="softmax", input_shape=(784,)))

model.summary()

model.compile(loss="categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_3 (Dense)              (None, 10)                7850      
=================================================================
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________

In [19]:
model.fit(x_train, y_train, batch_size=batch_size, validation_split=0.2, verbose=1)


Train on 48000 samples, validate on 12000 samples
Epoch 1/10
48000/48000 [==============================] - 0s - loss: 1.4027 - acc: 0.6647 - val_loss: 0.9022 - val_acc: 0.8225
Epoch 2/10
48000/48000 [==============================] - 0s - loss: 0.7965 - acc: 0.8286 - val_loss: 0.6596 - val_acc: 0.8551
Epoch 3/10
48000/48000 [==============================] - 0s - loss: 0.6440 - acc: 0.8521 - val_loss: 0.5628 - val_acc: 0.8686
Epoch 4/10
48000/48000 [==============================] - 0s - loss: 0.5710 - acc: 0.8625 - val_loss: 0.5094 - val_acc: 0.8767
Epoch 5/10
48000/48000 [==============================] - 0s - loss: 0.5265 - acc: 0.8693 - val_loss: 0.4751 - val_acc: 0.8815
Epoch 6/10
48000/48000 [==============================] - 0s - loss: 0.4962 - acc: 0.8740 - val_loss: 0.4509 - val_acc: 0.8865
Epoch 7/10
48000/48000 [==============================] - 0s - loss: 0.4738 - acc: 0.8774 - val_loss: 0.4326 - val_acc: 0.8901
Epoch 8/10
48000/48000 [==============================] - 0s - loss: 0.4564 - acc: 0.8803 - val_loss: 0.4184 - val_acc: 0.8922
Epoch 9/10
48000/48000 [==============================] - 0s - loss: 0.4424 - acc: 0.8830 - val_loss: 0.4064 - val_acc: 0.8947
Epoch 10/10
48000/48000 [==============================] - 0s - loss: 0.4309 - acc: 0.8850 - val_loss: 0.3970 - val_acc: 0.8970
Out[19]:
<keras.callbacks.History at 0x1177ffb00>

In [20]:
x_train_full, y_train_full = x_train[:1000], y_train[:1000]

In [21]:
# Parameters
learning_rate = 0.01
training_epochs = 10
batch_size = 128
split = 0.2

import tensorflow as tf

graph = tf.Graph()

with graph.as_default():
    X = tf.placeholder(tf.float32, shape=[None, 784])
    Y = tf.placeholder(tf.float32, shape=[None, 10])
    W = tf.Variable(tf.zeros([784, 10]))
    b = tf.Variable(tf.zeros([10]))

    init = tf.global_variables_initializer()
    Y_predicted = tf.add(tf.matmul(X, W),  b)

    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=Y_predicted))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
    
    # evaluation
    correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_predicted, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    y = tf.nn.softmax(tf.add(tf.matmul(X, W),  b))

# Launch the graph
with tf.Session(graph=graph) as sess:
    writer = tf.summary.FileWriter("./graphs", sess.graph) # python -m tensorflow.tensorboard logs="./graphs"
    sess.run(init)
    
    # validation_split
    x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size=split)

    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        train_range = int(x_train.shape[0] / batch_size)
        for index in progressbar(range(x_train.shape[0])):
            if index > train_range:
                pass
            else:
                mini_batch = np.random.choice(x_train.shape[0], batch_size, replace=False)
                _, loss, acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_train[mini_batch], Y: y_train[mini_batch]})

        
        val_range = int(x_val.shape[0] / batch_size)
        for _ in range(x_val.shape[0]): 
            mini_batch = np.random.choice(x_val.shape[0], batch_size, replace=False)
            _, val_loss, val_acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_val[mini_batch], Y: y_val[mini_batch]})
            
        print("loss: {} - acc: {} - val_loss: {} - val_acc: {}".format(loss, acc, val_loss, val_acc))
        
    classification = sess.run(y, feed_dict={X: x_train[:5]})
    print(classification)


Epoch: 1/10
800/800 [==============================] 
loss: 2.2371249198913574 - acc: 0.703125 - val_loss: 1.0113177299499512 - val_acc: 0.8515625
Epoch: 2/10
800/800 [==============================] 
loss: 1.1763719320297241 - acc: 0.7578125 - val_loss: 0.6741073131561279 - val_acc: 0.90625
Epoch: 3/10
800/800 [==============================] 
loss: 0.8656826019287109 - acc: 0.828125 - val_loss: 0.5264722108840942 - val_acc: 0.9140625
Epoch: 4/10
800/800 [==============================] 
loss: 0.9123315215110779 - acc: 0.78125 - val_loss: 0.44967806339263916 - val_acc: 0.921875
Epoch: 5/10
800/800 [==============================] 
loss: 0.7521445155143738 - acc: 0.84375 - val_loss: 0.380745530128479 - val_acc: 0.921875
Epoch: 6/10
800/800 [==============================] 
loss: 0.5592109560966492 - acc: 0.84375 - val_loss: 0.3474493622779846 - val_acc: 0.9375
Epoch: 7/10
800/800 [==============================] 
loss: 0.5944522023200989 - acc: 0.8515625 - val_loss: 0.26874130964279175 - val_acc: 0.9765625
Epoch: 8/10
800/800 [==============================] 
loss: 0.5193523168563843 - acc: 0.875 - val_loss: 0.2808242440223694 - val_acc: 0.96875
Epoch: 9/10
800/800 [==============================] 
loss: 0.6707116365432739 - acc: 0.796875 - val_loss: 0.20600812137126923 - val_acc: 0.9921875
Epoch: 10/10
800/800 [==============================] 
loss: 0.6479211449623108 - acc: 0.765625 - val_loss: 0.21222259104251862 - val_acc: 0.984375
[[  2.24508792e-02   6.03727705e-04   3.54300030e-02   1.25243245e-02
    7.44860053e-01   2.45307703e-02   4.44783866e-02   7.85454921e-03
    8.16187449e-03   9.91053805e-02]
 [  2.35862136e-01   3.40955006e-03   1.71633009e-02   8.41790289e-02
    3.08180554e-03   5.21032453e-01   2.37787375e-03   5.22186756e-02
    2.28617638e-02   5.78133762e-02]
 [  5.19917533e-03   1.70677004e-03   5.12246881e-03   4.96316075e-01
    1.95741914e-02   3.87675762e-01   1.04589714e-02   9.12930816e-03
    1.60326082e-02   4.87846844e-02]
 [  1.77507158e-02   1.91420633e-02   1.87035091e-02   2.16841162e-03
    6.53066218e-01   2.55903900e-02   4.73861881e-02   8.48329347e-03
    9.91966948e-02   1.08512588e-01]
 [  4.36633267e-03   2.39565760e-01   2.82239556e-01   2.83104423e-02
    1.63450018e-01   1.15757221e-02   6.12798296e-02   1.15252204e-01
    4.87675108e-02   4.51926850e-02]]

Prediction (Logistic Regression)


In [22]:
# Keras
model.predict(x_test[:5]), model.predict_classes(x_test[:5]),  y_test[:5]


5/5 [==============================] - 0s
Out[22]:
(array([[  1.11662585e-03,   1.73592380e-05,   5.12819621e-04,
           3.89615097e-03,   3.93698661e-04,   4.22783603e-04,
           3.49929323e-05,   9.82232571e-01,   7.09537067e-04,
           1.06635075e-02],
        [  2.68459581e-02,   1.62315427e-03,   6.75759256e-01,
           4.78192233e-02,   2.02345491e-05,   5.22298962e-02,
           1.68703020e-01,   1.40400634e-05,   2.69088652e-02,
           7.64032957e-05],
        [  1.32354256e-03,   9.03236866e-01,   2.74400059e-02,
           1.53171299e-02,   2.46541877e-03,   5.93998237e-03,
           1.01718996e-02,   1.05592338e-02,   1.79239940e-02,
           5.62194828e-03],
        [  9.84893262e-01,   3.63211370e-07,   9.24897729e-04,
           3.09610390e-04,   1.43753005e-05,   5.00583043e-03,
           5.52221620e-03,   2.50136782e-03,   4.45762882e-04,
           3.82261642e-04],
        [  9.32559650e-03,   2.11156119e-04,   3.41025591e-02,
           2.29899934e-03,   7.54166663e-01,   4.78650676e-03,
           2.58525535e-02,   4.21677716e-02,   3.28795202e-02,
           9.42085534e-02]], dtype=float32),
 array([7, 2, 1, 0, 4]),
 array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.]]))

In [23]:
# TensorFlow

# Parameters
learning_rate = 0.01
training_epochs = 10
batch_size = 128
split = 0.2

import tensorflow as tf

graph = tf.Graph()

with graph.as_default():
    X = tf.placeholder(tf.float32, shape=[None, 784])
    Y = tf.placeholder(tf.float32, shape=[None, 10])
    W = tf.Variable(tf.zeros([784, 10]))
    b = tf.Variable(tf.zeros([10]))

    init = tf.global_variables_initializer()
    Y_predicted = tf.add(tf.matmul(X, W),  b)

    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=Y_predicted))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
    
    # evaluation
    correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_predicted, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # prediction
    pred = tf.nn.softmax(tf.add(tf.matmul(X, W),  b))

# Launch the graph
with tf.Session(graph=graph) as sess:
    writer = tf.summary.FileWriter("./graphs", sess.graph) # python -m tensorflow.tensorboard logs="./graphs"
    sess.run(init)
    
    # validation_split
    x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size=split)

    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        train_range = int(x_train.shape[0] / batch_size)
        for index in progressbar(range(x_train.shape[0])):
            if index > train_range:
                pass
            else:
                mini_batch = np.random.choice(x_train.shape[0], batch_size, replace=False)
                _, loss, acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_train[mini_batch], Y: y_train[mini_batch]})

        
        val_range = int(x_val.shape[0] / batch_size)
        for _ in range(x_val.shape[0]): 
            mini_batch = np.random.choice(x_val.shape[0], batch_size, replace=False)
            _, val_loss, val_acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_val[mini_batch], Y: y_val[mini_batch]})
            
        print("loss: {} - acc: {} - val_loss: {} - val_acc: {}".format(loss, acc, val_loss, val_acc))
    
    # Making predictions
    pred_prob, pred_classes = sess.run([pred, tf.arg_max(pred, 1)], feed_dict={X: x_train[:5]})
    print(pred_prob, pred_classes)


Epoch: 1/10
800/800 [==============================] 
loss: 2.228919506072998 - acc: 0.6171875 - val_loss: 1.0881786346435547 - val_acc: 0.828125
Epoch: 2/10
800/800 [==============================] 
loss: 1.22151780128479 - acc: 0.7421875 - val_loss: 0.7336153984069824 - val_acc: 0.875
Epoch: 3/10
800/800 [==============================] 
loss: 0.8571600914001465 - acc: 0.8125 - val_loss: 0.5839093923568726 - val_acc: 0.90625
Epoch: 4/10
800/800 [==============================] 
loss: 0.7725659608840942 - acc: 0.8203125 - val_loss: 0.5100392699241638 - val_acc: 0.921875
Epoch: 5/10
800/800 [==============================] 
loss: 0.7082560658454895 - acc: 0.828125 - val_loss: 0.3760635256767273 - val_acc: 0.9765625
Epoch: 6/10
loss: 0.6378764510154724 - acc: 0.84375 - val_loss: 0.3735986351966858 - val_acc: 0.953125
Epoch: 7/10
800/800 [==============================] 
loss: 0.6459987163543701 - acc: 0.828125 - val_loss: 0.3428560197353363 - val_acc: 0.953125
Epoch: 8/10
800/800 [==============================] 
loss: 0.6170925498008728 - acc: 0.8125 - val_loss: 0.3065798878669739 - val_acc: 0.9765625
Epoch: 9/10
800/800 [==============================] 
loss: 0.6519094705581665 - acc: 0.8203125 - val_loss: 0.2444753348827362 - val_acc: 0.9921875
Epoch: 10/10
loss: 0.5576114654541016 - acc: 0.828125 - val_loss: 0.22676709294319153 - val_acc: 0.9921875
[[  3.91958468e-03   7.94598609e-02   1.09387666e-01   1.49608344e-01
    7.76433153e-03   1.55161833e-02   1.02854753e-02   2.85257697e-01
    4.81593050e-02   2.90641457e-01]
 [  2.71052151e-04   2.96572689e-04   6.79177325e-03   2.38689850e-03
    2.31134556e-02   2.19830591e-03   2.99305073e-03   7.61645706e-03
    1.60213793e-03   9.52730238e-01]
 [  2.41650012e-03   7.18301442e-03   1.88524351e-01   7.72720156e-03
    7.63653994e-01   8.64896399e-04   9.98859480e-03   1.95660265e-04
    1.00172888e-02   9.42847785e-03]
 [  4.98932600e-03   4.70244180e-04   4.18554666e-03   2.51386897e-03
    6.25951827e-01   2.34483816e-02   8.87841824e-03   1.77135114e-02
    1.28911743e-02   2.98957735e-01]
 [  1.03877170e-03   4.05048195e-05   1.03082450e-03   3.36542609e-04
    6.14371419e-01   3.78625258e-03   4.34847176e-03   9.00822866e-04
    5.73690224e-04   3.73572677e-01]] [9 9 4 4 4]

Feedforward Network


In [24]:
# data
from keras.datasets import mnist

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
x_train /= 255
x_test /= 255
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)


60000 train samples
10000 test samples

In [25]:
# Keras
batch_size = 128
num_classes = 10
epochs = 10

model = Sequential()
model.add(Dense(32, activation="relu", input_shape=(784,)))
model.add(Dense(10, activation="softmax"))

model.summary()

model.compile(loss="categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_4 (Dense)              (None, 32)                25120     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                330       
=================================================================
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________

In [26]:
model.fit(x_train, y_train, batch_size=batch_size, validation_split=0.2, verbose=1)


Train on 48000 samples, validate on 12000 samples
Epoch 1/10
48000/48000 [==============================] - 1s - loss: 1.4971 - acc: 0.5855 - val_loss: 0.8632 - val_acc: 0.8153
Epoch 2/10
48000/48000 [==============================] - 0s - loss: 0.7099 - acc: 0.8307 - val_loss: 0.5508 - val_acc: 0.8637
Epoch 3/10
48000/48000 [==============================] - 0s - loss: 0.5291 - acc: 0.8639 - val_loss: 0.4494 - val_acc: 0.8853
Epoch 4/10
48000/48000 [==============================] - 0s - loss: 0.4541 - acc: 0.8792 - val_loss: 0.4006 - val_acc: 0.8958
Epoch 5/10
48000/48000 [==============================] - 0s - loss: 0.4124 - acc: 0.8886 - val_loss: 0.3709 - val_acc: 0.9002
Epoch 6/10
48000/48000 [==============================] - 0s - loss: 0.3851 - acc: 0.8942 - val_loss: 0.3509 - val_acc: 0.9053
Epoch 7/10
48000/48000 [==============================] - 0s - loss: 0.3655 - acc: 0.8989 - val_loss: 0.3363 - val_acc: 0.9083
Epoch 8/10
48000/48000 [==============================] - 0s - loss: 0.3502 - acc: 0.9024 - val_loss: 0.3233 - val_acc: 0.9111
Epoch 9/10
48000/48000 [==============================] - 0s - loss: 0.3381 - acc: 0.9053 - val_loss: 0.3140 - val_acc: 0.9127
Epoch 10/10
48000/48000 [==============================] - 0s - loss: 0.3277 - acc: 0.9076 - val_loss: 0.3058 - val_acc: 0.9158
Out[26]:
<keras.callbacks.History at 0x126e30e10>

In [27]:
x_train_full, y_train_full = x_train[:1000], y_train[:1000]

In [28]:
# Parameters
learning_rate = 0.01
training_epochs = 10
batch_size = 128
split = 0.2

import tensorflow as tf

graph = tf.Graph()

with graph.as_default():
    def init_weights(shape):
        weights = tf.random_normal(shape, stddev=0.1)
        return tf.Variable(weights)

    def deep_neural_network(X, w_1, w_2):
        h    = tf.nn.sigmoid(tf.matmul(X, w_1)) 
        yhat = tf.matmul(h, w_2)
        return yhat
    
    X = tf.placeholder(tf.float32, shape=[None, 784])
    Y = tf.placeholder(tf.float32, shape=[None, 10])
    w_1 = init_weights((784, 32))
    w_2 = init_weights((32, 10))

    init = tf.global_variables_initializer()
    Y_predicted = deep_neural_network(X, w_1, w_2)

    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=Y_predicted))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
    
    # evaluation
    correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_predicted, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Launch the graph
with tf.Session(graph=graph) as sess:
    writer = tf.summary.FileWriter("./graphs", sess.graph) # python -m tensorflow.tensorboard logs="./graphs"
    sess.run(init)
    
    # validation_split
    x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size=split)

    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        train_range = int(x_train.shape[0] / batch_size)
        for index in progressbar(range(x_train.shape[0])):
            if index > train_range:
                pass
            else:
                mini_batch = np.random.choice(x_train.shape[0], batch_size, replace=False)
                _, loss, acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_train[mini_batch], Y: y_train[mini_batch]})

        
        val_range = int(x_val.shape[0] / batch_size)
        for _ in range(x_val.shape[0]): 
            mini_batch = np.random.choice(x_val.shape[0], batch_size, replace=False)
            _, val_loss, val_acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_val[mini_batch], Y: y_val[mini_batch]})
            
        print("loss: {} - acc: {} - val_loss: {} - val_acc: {}".format(loss, acc, val_loss, val_acc))


Epoch: 1/10
800/800 [==============================] 
loss: 2.330880641937256 - acc: 0.09375 - val_loss: 2.1777536869049072 - val_acc: 0.1796875
Epoch: 2/10
800/800 [==============================] 
loss: 2.2388315200805664 - acc: 0.09375 - val_loss: 2.1257071495056152 - val_acc: 0.2421875
Epoch: 3/10
800/800 [==============================] 
loss: 2.2043590545654297 - acc: 0.203125 - val_loss: 1.9937424659729004 - val_acc: 0.40625
Epoch: 4/10
800/800 [==============================] 
loss: 2.070479393005371 - acc: 0.3359375 - val_loss: 1.845174789428711 - val_acc: 0.515625
Epoch: 5/10
800/800 [==============================] 
loss: 2.0151429176330566 - acc: 0.375 - val_loss: 1.7658815383911133 - val_acc: 0.4921875
Epoch: 6/10
800/800 [==============================] 
loss: 1.802952527999878 - acc: 0.484375 - val_loss: 1.5791423320770264 - val_acc: 0.6328125
Epoch: 7/10
800/800 [==============================] 
loss: 1.7946410179138184 - acc: 0.5078125 - val_loss: 1.5230448246002197 - val_acc: 0.6015625
Epoch: 8/10
800/800 [==============================] 
loss: 1.651677131652832 - acc: 0.546875 - val_loss: 1.363255500793457 - val_acc: 0.703125
Epoch: 9/10
800/800 [==============================] 
loss: 1.5117130279541016 - acc: 0.6015625 - val_loss: 1.2937953472137451 - val_acc: 0.7265625
Epoch: 10/10
800/800 [==============================] 
loss: 1.4962711334228516 - acc: 0.5546875 - val_loss: 1.1505191326141357 - val_acc: 0.7734375

Convolutional Neural Network


In [29]:
# data
from keras.datasets import mnist

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape((-1, 28, 28, 1))
x_train = x_train.astype("float32")
x_train /= 255
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)


60000 train samples
10000 test samples

In [30]:
# Keras
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD

model = Sequential()
model.add(Conv2D(32, (5, 5), strides=(1, 1), activation="relu", input_shape=(28, 28, 1), padding="same")) # same -> input = output
model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(Conv2D(64, (5, 5), strides=(1, 1), activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(Flatten())
model.add(Dense(1024, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="sgd")
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_1 (Conv2D)            (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        51264     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 1024)              3212288   
_________________________________________________________________
dense_7 (Dense)              (None, 10)                10250     
=================================================================
Total params: 3,274,634
Trainable params: 3,274,634
Non-trainable params: 0
_________________________________________________________________

In [31]:
model.fit(x_train[:1000], y_train[:1000], batch_size=32, epochs=10, validation_split=0.2, verbose=1)


Train on 800 samples, validate on 200 samples
Epoch 1/10
800/800 [==============================] - 2s - loss: 2.2875 - val_loss: 2.2605
Epoch 2/10
800/800 [==============================] - 1s - loss: 2.2322 - val_loss: 2.2032
Epoch 3/10
800/800 [==============================] - 1s - loss: 2.1524 - val_loss: 2.1029
Epoch 4/10
800/800 [==============================] - 1s - loss: 2.0049 - val_loss: 1.9055
Epoch 5/10
800/800 [==============================] - 1s - loss: 1.7037 - val_loss: 1.5063
Epoch 6/10
800/800 [==============================] - 1s - loss: 1.2443 - val_loss: 1.0478
Epoch 7/10
800/800 [==============================] - 1s - loss: 0.8662 - val_loss: 0.8671
Epoch 8/10
800/800 [==============================] - 1s - loss: 0.6753 - val_loss: 0.6823
Epoch 9/10
800/800 [==============================] - 1s - loss: 0.5701 - val_loss: 0.6458
Epoch 10/10
800/800 [==============================] - 1s - loss: 0.4783 - val_loss: 0.6371
Out[31]:
<keras.callbacks.History at 0x141d7def0>

In [32]:
x_train_full = x_train[:1000]
y_train_full = y_train[:1000]

In [33]:
# TensorFlow
import tensorflow as tf

learning_rate = 0.01
training_epochs = 10
batch_size = 32

graph = tf.Graph()

with graph.as_default():
    X = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    Y = tf.placeholder(tf.float32, shape=[None, 10])

    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")

    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])

    h_conv1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    W_fc1 = weight_variable([7 * 7 * 64, 1024]) # need to understand how paddle and stride works to define the numbers
    b_fc1 = bias_variable([1024])

    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])

    Y_predicted = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)

    init = tf.global_variables_initializer()
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(Y_predicted), reduction_indices=[1]))
    optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

    correct_prediction = tf.equal(tf.argmax(Y_predicted, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Launch the graph
with tf.Session(graph=graph) as sess:
    writer = tf.summary.FileWriter("./graphs", sess.graph) # python -m tensorflow.tensorboard logs="./graphs"
    sess.run(init)
    
    # validation_split
    x_train, x_val, y_train, y_val = train_test_split(x_train_full, y_train_full, test_size=split)

    for epoch in range(training_epochs):
        print("Epoch: {}/{}".format(epoch+1, training_epochs))
        
        train_range = int(x_train.shape[0] / batch_size)
        for index in progressbar(range(x_train.shape[0])):
            if index > train_range:
                pass
            else:
                mini_batch = np.random.choice(x_train.shape[0], batch_size, replace=False)
                _, loss, acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_train[mini_batch], Y: y_train[mini_batch]})

        
        val_range = int(x_val.shape[0] / batch_size)
        for _ in range(x_val.shape[0]): 
            mini_batch = np.random.choice(x_val.shape[0], batch_size, replace=False)
            _, val_loss, val_acc = sess.run([optimizer, cross_entropy, accuracy], feed_dict={X: x_val[mini_batch], Y: y_val[mini_batch]})
            
        print("loss: {} - acc: {} - val_loss: {} - val_acc: {}".format(loss, acc, val_loss, val_acc))


Epoch: 1/10
800/800 [==============================] 
loss: 0.8021054267883301 - acc: 0.75 - val_loss: 0.035391174256801605 - val_acc: 1.0
Epoch: 2/10
800/800 [==============================] 
loss: 0.6878167986869812 - acc: 0.78125 - val_loss: 0.008757269009947777 - val_acc: 1.0
Epoch: 3/10
800/800 [==============================] 
loss: 0.4788475036621094 - acc: 0.90625 - val_loss: 0.005460498388856649 - val_acc: 1.0
Epoch: 4/10
800/800 [==============================] 
loss: 0.14077109098434448 - acc: 0.9375 - val_loss: 0.0048263175413012505 - val_acc: 1.0
Epoch: 5/10
800/800 [==============================] 
loss: 0.1707521677017212 - acc: 0.90625 - val_loss: 0.003986487165093422 - val_acc: 1.0
Epoch: 6/10
800/800 [==============================] 
loss: 0.15068838000297546 - acc: 0.96875 - val_loss: 0.0019592728931456804 - val_acc: 1.0
Epoch: 7/10
800/800 [==============================] 
loss: 0.056178320199251175 - acc: 1.0 - val_loss: 0.0024542140308767557 - val_acc: 1.0
Epoch: 8/10
800/800 [==============================] 
loss: 0.020114684477448463 - acc: 1.0 - val_loss: 0.0015882442239671946 - val_acc: 1.0
Epoch: 9/10
800/800 [==============================] 
loss: 0.03296520188450813 - acc: 1.0 - val_loss: 0.001727847964502871 - val_acc: 1.0
Epoch: 10/10
800/800 [==============================] 
loss: 0.0837002843618393 - acc: 0.96875 - val_loss: 0.0017372460570186377 - val_acc: 1.0

In [ ]: