In [1]:
import tensorflow as tf
from tensorflow.contrib.compiler import xla
tfe = tf.contrib.eager # Shorthand for some symbols
#tf.enable_eager_execution()

# Having trouble getting GPU to be detected properly
config = tf.ConfigProto(
        device_count = {'GPU': 0}
    )
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
sess = tf.Session(config=config)

In [2]:
from pycalphad import Database, Model, variables as v
from pycalphad.codegen.sympydiff_utils import build_functions
from sympy import lambdify
import numpy as np

dbf = Database('Al-Cu-Zr_Zhou.tdb')
mod = Model(dbf, ['AL', 'CU', 'ZR'], 'LIQUID')

In [3]:
mod.variables


Out[3]:
[T, LIQUID0AL, LIQUID0CU, LIQUID0ZR]

In [4]:
cy_func, cy_grad = build_functions(mod.GM, mod.variables, include_grad=True)
%time cy_func.kernel # trigger JIT
%time cy_grad.kernel # trigger JIT


CPU times: user 208 ms, sys: 6.93 ms, total: 215 ms
Wall time: 1.9 s
CPU times: user 412 ms, sys: 8.72 ms, total: 421 ms
Wall time: 2.3 s
Out[4]:
<function wrapper_module_5e8b7be6_0e88_41e7_9740_bbb826dd132a.autofunc_c>

In [5]:
%timeit cy_func.kernel(np.array([[300., 0.3, 0.3, 0.4]]), np.array([0.]))


The slowest run took 15.18 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 6.7 µs per loop

In [6]:
%timeit cy_grad.kernel(np.array([[300., 0.3, 0.3, 0.4]]), np.array([0.]))


The slowest run took 5.42 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 9.09 µs per loop

In [7]:
%%timeit
tf_func = lambdify(mod.variables, mod.GM, modules='tensorflow')


10 loops, best of 3: 107 ms per loop

In [8]:
tf_func = lambdify(mod.variables, mod.GM, modules='tensorflow')

In [9]:
%%timeit
x1 = tf.placeholder("float", None)
x2 = tf.placeholder("float", None)
x3 = tf.placeholder("float", None)
x4 =  tf.placeholder("float", None)
result = tf_func(x1, x2, x3, x4)


1 loop, best of 3: 375 ms per loop

In [10]:
#x2_vals = np.random.uniform(size=1000)
#x3_vals = 1-x2_vals
x1 = tf.placeholder("float", None)
x2 = tf.placeholder("float", None)
x3 = tf.placeholder("float", None)
x4 = tf.placeholder("float", None)
result = tf_func(x1, x2, x3, x4)

In [11]:
grad = tf.gradients(result, [x1, x2, x3, x4], stop_gradients=[x1, x2, x3, x4])

In [12]:
%timeit sess.run(result, {x1: 300., x2: 0.3, x3: 0.3, x4: 0.4})


The slowest run took 258.49 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 865 µs per loop

In [13]:
%timeit sess.run(grad, {x1: 300., x2: 0.3, x3: 0.3, x4: 0.4})


The slowest run took 229.71 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 3.28 ms per loop

In [14]:
[tf_xla_func] = xla.compile(tf_func, [x1, x2, x3, x4])

In [15]:
%timeit sess.run(tf_xla_func, {x1: 300., x2: 0.3, x3: 0.3, x4: 0.4})


The slowest run took 996.98 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 807 µs per loop

In [ ]: