In [2]:
from theano import function, config, shared, sandbox

import theano.tensor as T
import numpy
import time

In [3]:
theano.config.device = 'gpu'
theano.config.floatX = 'float32'

vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
print(f.maker.fgraph.toposort())
t0 = time.time()
for i in xrange(iters):
    r = f()
t1 = time.time()
print("Looping %d times took %f seconds" % (iters, t1 - t0))
print("Result is %s" % (r,))
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print('Used the cpu')
else:
    print('Used the gpu')


INFO (theano.gof.compilelock): Waiting for existing lock by process '3166' (I am process '24983')
INFO:theano.gof.compilelock:Waiting for existing lock by process '3166' (I am process '24983')
INFO (theano.gof.compilelock): To manually release the lock, delete /home/hpc_dmytro/.theano/compiledir_Linux-3.4.90-x86_64-with-redhat-6.7-Carbon-x86_64-2.7.3-64/lock_dir
INFO:theano.gof.compilelock:To manually release the lock, delete /home/hpc_dmytro/.theano/compiledir_Linux-3.4.90-x86_64-with-redhat-6.7-Carbon-x86_64-2.7.3-64/lock_dir
[Elemwise{exp,no_inplace}(<TensorType(float64, vector)>)]
Looping 1000 times took 8.814286 seconds
Result is [ 1.23178032  1.61879341  1.52278065 ...,  2.20771815  2.29967753
  1.62323285]
Used the cpu