TensorFlow - Variable Scope


In [1]:
import numpy as np
import pandas as pd
%pylab inline
pylab.style.use('ggplot')


Populating the interactive namespace from numpy and matplotlib

The idea of variable scoping in TensorFlow is to be able to organize the names and initializations of variables that play the same role in a multilayer network. For example, consider an ANN with multiple hidden layers. All of them have a weight matrix $w$. Using variable scoping allows us to structure and initialize them in a systematic way.

Variable Scope mechanism in TensorFlow consists of two main functions:

  • tf.get_variable(<name>, <shape>, <initializer>) Creates or returns a variable with a given name.
  • tf.variable_scope(<scope_name>) Manages namespaces for names passed to tf.get_variable().

In [2]:
import tensorflow as tf

AND Gate with TensorFlow


In [35]:
X_val = numpy.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_val = np.atleast_2d(np.array([0, 0, 0, 1])).T

In [36]:
X_val


Out[36]:
array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

In [37]:
y_val


Out[37]:
array([[0],
       [0],
       [0],
       [1]])

In [48]:
tf.reset_default_graph()

n_iter = 500
threshold = 0.5

with tf.variable_scope('inputs'):
    X = tf.placeholder(name='X', shape=(4, 2), dtype=np.float64)
    y = tf.placeholder(name='y', shape=y_val.shape, dtype=np.float64)

with tf.variable_scope('weights'):
    w = tf.get_variable(name='w', shape=(2, 1), dtype=np.float64, initializer=tf.truncated_normal_initializer())
    b = tf.get_variable(name='b', shape=(1, 1), dtype=np.float64, initializer=tf.constant_initializer(1.0))

with tf.variable_scope('train'):
    output = tf.matmul(X, w) + b
    loss_func = tf.reduce_mean(tf.squared_difference(y, output))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.2)
    train_op = optimizer.minimize(loss_func)

init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)
    feed_dict = {X: X_val, y: y_val}
    
    for i in range(1, n_iter+1):        
        _, out_val = sess.run([train_op, output], feed_dict=feed_dict)        
        out_val = np.where(out_val > threshold, 1, 0)
        
        if i % 50 == 0:
            result = np.column_stack([X_val, y_val, out_val])
            result_df = pd.DataFrame(result, columns=['x1', 'x2', 'x1 and x2', 'output'])
            print('loss_function: {}'.format(loss_func.eval(session=sess, feed_dict=feed_dict)))
            print('iteration {}\n{}'.format(i, result_df))


loss_function: 0.06303688880218354
iteration 50
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.06250080476444264
iteration 100
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.06250000123040157
iteration 150
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.06250000000188181
iteration 200
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.06250000000000289
iteration 250
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.062499999999999986
iteration 300
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.06249999999999999
iteration 350
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.0625
iteration 400
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.06250000000000001
iteration 450
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1
loss_function: 0.0625
iteration 500
   x1  x2  x1 and x2  output
0   0   0          0       0
1   0   1          0       0
2   1   0          0       0
3   1   1          1       1

XOR Gate with TensorFlow


In [49]:
X_val = numpy.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_val = np.atleast_2d(np.array([0, 1, 1, 0])).T

In [50]:
X_val


Out[50]:
array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]])

In [51]:
y_val


Out[51]:
array([[0],
       [1],
       [1],
       [0]])

In [54]:
tf.reset_default_graph()

n_iter = 500
threshold = 0.5

def make_layer(name, x):
    with tf.variable_scope(name, reuse=None):
        if name == 'hidden':
            w_shape = (2, 3)
            b_shape = (4, 3)
        elif name == 'output':
            w_shape = (3, 1)
            b_shape = (1, 1)
        else:
            assert False
            
        w = tf.get_variable(name='w', shape=w_shape, dtype=np.float64, initializer=tf.truncated_normal_initializer())
        b = tf.get_variable(name='b', shape=b_shape, dtype=np.float64, initializer=tf.constant_initializer(1.0))

        mm = tf.matmul(x, w) + b
        return tf.sigmoid(mm) if name == 'hidden' else mm

with tf.variable_scope('inputs'):
    X = tf.placeholder(name='X', shape=(4, 2), dtype=np.float64)
    y = tf.placeholder(name='y', shape=(4, 1), dtype=np.float64)

hidden = make_layer('hidden', X)
output = make_layer('output', hidden)

with tf.variable_scope('train'):    
    loss_func = tf.reduce_mean(tf.squared_difference(y, output))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.2)
    train_op = optimizer.minimize(loss_func)
    
init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)
    feed_dict = {X: X_val, y: y_val}
    
    for i in range(1, n_iter+1):        
        _, out_val = sess.run([train_op, output], feed_dict=feed_dict)        
        out_val = np.where(out_val > threshold, 1, 0)
        
        if i % 50 == 0:
            result = np.column_stack([X_val, y_val, out_val])
            result_df = pd.DataFrame(result, columns=['x1', 'x2', 'x1 XOR x2', 'output'])
            print('loss_function: {}'.format(loss_func.eval(session=sess, feed_dict=feed_dict)))
            print('iteration {}\n{}'.format(i, result_df))


loss_function: 0.04504346879290956
iteration 50
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 0.0015679307923828969
iteration 100
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 3.0473930392723917e-05
iteration 150
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 5.439286448553145e-07
iteration 200
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 9.622766639625834e-09
iteration 250
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 1.7017005736613185e-10
iteration 300
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 3.0098396611669797e-12
iteration 350
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 5.324074539957869e-14
iteration 400
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 9.418018052433826e-16
iteration 450
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0
loss_function: 1.6660178139677143e-17
iteration 500
   x1  x2  x1 XOR x2  output
0   0   0          0       0
1   0   1          1       1
2   1   0          1       1
3   1   1          0       0

In [ ]: