TensorflowAndAutoGrad


Autograd


In [1]:
import autograd
from autograd import numpy as np

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
ndim = 10

In [3]:
def x_init(ndim, seed=0):
    np.random.seed(seed)
    return np.random.rand(ndim)

In [4]:
def y(x):
    return np.sum(x**2)

def g_exact(x):
    return 2. * x

g_auto = autograd.grad(y)

In [5]:
from scipy.optimize import minimize

With numerical gradient


In [6]:
r = minimize(y, x_init(ndim), method='BFGS')
print("minimum: {}".format(r.x))
print("nit={}  nfev={}  njev={}".format(r.nit, r.nfev, r.njev))


minimum: [  1.44079457e-08   1.07777187e-09  -2.37100333e-08   4.97857175e-09
  -1.07706796e-08  -4.38312052e-09  -1.55044568e-08  -1.17504437e-08
  -1.18699967e-08  -1.79687886e-08]
nit=2  nfev=48  njev=4

With analytic gradient


In [7]:
r = minimize(y, x_init(ndim), jac=g_exact, method='BFGS')
print("minimum: {}".format(r.x))
print("nit={}  nfev={}  njev={}".format(r.nit, r.nfev, r.njev))


minimum: [ -4.25752641e-17  -1.59377719e-17  -2.36762649e-17   8.01631981e-18
   3.28716546e-17   6.41847686e-17  -3.87602277e-18   3.07642366e-17
  -3.52636757e-17  -6.43745040e-18]
nit=2  nfev=4  njev=4

With automatic gradient


In [8]:
r = minimize(y, x_init(ndim), jac=g_auto, method='BFGS')
print("minimum: {}".format(r.x))
print("nit={}  nfev={}  njev={}".format(r.nit, r.nfev, r.njev))


minimum: [ -4.25752641e-17  -1.59377719e-17  -2.36762649e-17   8.01631981e-18
   3.28716546e-17   6.41847686e-17  -3.87602277e-18   3.07642366e-17
  -3.52636757e-17  -6.43745040e-18]
nit=2  nfev=4  njev=4

Compare timing


In [9]:
print("time with numerical derivatives")
%timeit minimize(y, x_init(ndim), method='BFGS')
print("time with analytic derivatives")
%timeit minimize(y, x_init(ndim), jac=g_exact, method='BFGS')
print("time with autograd")
%timeit minimize(y, x_init(ndim), jac=g_auto, method='BFGS')


time with numerical derivatives
1000 loops, best of 3: 782 µs per loop
time with analytic derivatives
1000 loops, best of 3: 238 µs per loop
time with autograd
1000 loops, best of 3: 930 µs per loop

Benchmarking


In [10]:
import time as wallclock

def analyze(g, ndims):
    niters = np.zeros_like(ndims)
    nfevs = np.zeros_like(ndims)
    times = np.zeros_like(ndims, dtype=np.float64)
    for i, ndim in enumerate(ndims):
        r = minimize(y, x_init(ndim), jac=g, method='BFGS')
        niters[i] = r.nit
        nfevs[i] = r.nfev
        t0 = wallclock.time()
        nloops = max(1, 2000 // ndim)
        for j in range(nloops):
            minimize(y, x_init(ndim), jac=g, method='BFGS')
        t1 = wallclock.time()
        times[i] = (t1 - t0) / nloops

    return {'niters': niters, 'nfevs': nfevs, 'times': times}

In [11]:
ndims = np.array([10, 100, 1000, 3000], dtype=np.int64)
results = {}
results['Numerical'] = analyze(None, ndims)
results['Analytic'] = analyze(g_exact, ndims)
results['AutoGrad'] = analyze(g_auto, ndims)

In [12]:
def plot(results, ordinate, ylabel, ylogscale=True):
    fig = plt.figure()
    for label in results:
        if ylogscale:
            plt.loglog(ndims, results[label][ordinate], label=label)
        else:
            plt.semilogx(ndims, results[label][ordinate], label=label)
            
    plt.xlabel('dimensions')
    plt.ylabel(ylabel)
    plt.legend(loc='upper left')
    plt.show()

In [13]:
plot(results, 'niters', 'iterations', ylogscale=False)
plot(results, 'nfevs', 'function evaluations')
plot(results, 'times', 'time (s)')


Theano


In [14]:
import theano
import theano.tensor as T

x_ = T.dvector('x')
y_ = T.sum(x_ ** 2)
g_ = T.grad(y_, x_)
g_theano = theano.function([x_], g_)

In [15]:
print("time with Theano")
%timeit minimize(y, x_init(ndim), jac=g_theano, method='BFGS')


time with Theano
The slowest run took 6.08 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 3: 329 µs per loop

In [16]:
results['Theano'] = analyze(g_theano, ndims)

In [17]:
plot(results, 'niters', 'iterations', ylogscale=False)
plot(results, 'nfevs', 'function evaluations')
plot(results, 'times', 'time (s)')


Tensorflow


In [14]:
import tensorflow as tf

In [15]:
tol = 1e-9
maxiter = 1000

In [16]:
x = tf.Variable(np.array([3.]))
y = x**2

In [17]:
print(x)


<tensorflow.python.ops.variables.Variable object at 0x7ffa5ef4fa58>

In [18]:
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(y)

# Before starting, initialize the variables.  We will 'run' this first.
init = tf.initialize_all_variables()
# Launch the graph.
with tf.Session() as sess:
    sess.run(init)
    # Fit the line.
    step = 0
    while abs(sess.run(x)) > tol and step < maxiter:
        step += 1
        if step % 20 == 0:
            print(step, sess.run(x), sess.run(y))
        sess.run(train)
        
    print("Minimum found after {} steps: x={}, y={}".format(step, sess.run(x), sess.run(y)))


Minimum found after 1 steps: x=[ 0.], y=[ 0.]