Author:

Dr. Rahul Remanan

This code notebook is an introduction to GPU accelerated tensorflow.

Part 01 -- Checking Tensorflow GPU visibility



In [1]:

    
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())









    



[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7027593649920340000
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 6195973325
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6256258333725374541
physical_device_desc: "device: 0, name: Tesla P4, pci bus id: 0000:00:04.0, compute capability: 6.1"
]

Part 02 -- Manually specifying devices for running Tensorflow code



In [2]:

    
import tensorflow as tf



In [3]:

    
# Creates a graph.
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)



In [4]:

    
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))



In [5]:

    
# Runs the op.
print(sess.run(c))









    



[[22. 28.]
 [49. 64.]]

Setting up Tensorflow to run on CPU



In [6]:

    
# Creates a graph.
with tf.device('/cpu:0'):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)



In [7]:

    
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))



In [8]:

    
# Runs the op.
print(sess.run(c))









    



[[22. 28.]
 [49. 64.]]

Setting up Tensorflow to run on GPU



In [9]:

    
with tf.device('/gpu:0'):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)



In [10]:

    
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))



In [11]:

    
with tf.Session() as sess:
    print (sess.run(c))









    



[[22. 28.]
 [49. 64.]]

Part 03 -- Benchmarking Tensorflow GPU vs CPU



In [12]:

    
import time
import tensorflow as tf



In [13]:

    
def tf_benchmark(a=None, shape_a=None, b=None, shape_b=None, enable_GPU = False):
    device = 'cpu'
    if enable_GPU:
        device = 'gpu'
    start_time = time.time()
    with tf.device('/{}:0'.format(device)):
        a = tf.constant(a, shape=shape_a, name = 'a')
        b = tf.constant(b, shape=shape_b, name='b')
    c = tf.matmul(a, b)
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    output = sess.run(c)
    execution_time = time.time()-start_time
    return {'output': output, 'execution time': execution_time}



In [14]:

    
a=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
b=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
shape_a=[2, 3]
shape_b=[3,2]



In [15]:

    
CPU_exec_time = tf_benchmark(a=a,
                             b=b,
                             shape_a=shape_a,
                             shape_b=shape_b,
                             enable_GPU=False)



In [16]:

    
GPU_exec_time = tf_benchmark(a=a,
                             b=b,
                             shape_a=shape_a,
                             shape_b=shape_b,
                             enable_GPU=True)



In [17]:

    
print ("CPU execution time: {}".format(CPU_exec_time['execution time']))
print ("GPU execution time: {}".format(GPU_exec_time['execution time']))
print ("GPU vs CPU execution time delta: {}".format(GPU_exec_time['execution time'] - CPU_exec_time['execution time']))
print ("GPU acceleration factor: {}".format(CPU_exec_time['execution time'] / GPU_exec_time['execution time']))









    



CPU execution time: 0.036295413970947266
GPU execution time: 0.019434690475463867
GPU vs CPU execution time delta: -0.0168607234954834
GPU acceleration factor: 1.867558118137766