Optimize Forward Step (2)

Test the running time for forward step in variational autoencoder by comparing 4 versions:

  • Tensorflow
  • numpy only
  • numpy with numba
  • numpy with cython

Compare the results in different batch sizes


In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import time
from tensorflow.python.client import timeline
import matplotlib.pyplot as plt
%matplotlib inline

Initialize Parameters


In [2]:
import sys
sys.path.append('../vae')
from vae_sta663 import *
from misc_sta663 import *

In [3]:
import tensorflow as tf
import numpy as np

config = {}
config['x_in'] = 784
config['encoder_1'] = 500
config['encoder_2'] = 500
config['decoder_1'] = 500
config['decoder_2'] = 500
config['z'] = 20

encoder_weights, _ = init_weights(config)

In [4]:
# transform tensors to numpy array
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

encoder_weights_np = {}
encoder_weights_np['h1'] = sess.run(encoder_weights['h1'])
encoder_weights_np['h2'] = sess.run(encoder_weights['h2'])
encoder_weights_np['mu'] = sess.run(encoder_weights['mu'])
encoder_weights_np['sigma'] = sess.run(encoder_weights['sigma'])
encoder_weights_np['b1'] = sess.run(encoder_weights['b1'])
encoder_weights_np['b2'] = sess.run(encoder_weights['b2'])
encoder_weights_np['bias_mu'] = sess.run(encoder_weights['bias_mu'])
encoder_weights_np['bias_sigma'] = sess.run(encoder_weights['bias_sigma'])

In [5]:
(mnist, n_samples) = mnist_loader()


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

Tensorflow


In [6]:
def forward_z(x, encoder_weights):
    """
    Compute mean and sigma of z
    """
    layer_1 = tf.nn.softplus(tf.add(tf.matmul(x, encoder_weights['h1']), encoder_weights['b1']))
    layer_2 = tf.nn.softplus(tf.add(tf.matmul(layer_1, encoder_weights['h2']), encoder_weights['b2']))
    z_mean = tf.add(tf.matmul(layer_2, encoder_weights['mu']), encoder_weights['bias_mu'])
    z_sigma = tf.add(tf.matmul(layer_2, encoder_weights['sigma']), encoder_weights['bias_sigma'])
    
    return(z_mean, z_sigma)

Numpy Only


In [7]:
def forward_z_raw(x, encoder_weights):
    """
    Compute mean and sigma of z using numpy without any optimization
    """
    layer_1 = np.log(np.exp(x_sample @ encoder_weights_np['h1'] + encoder_weights_np['b1']) + 1)
    layer_2 = np.log(np.exp(layer_1 @ encoder_weights_np['h2'] + encoder_weights_np['b2']) + 1)
    z_mean = (layer_2 @ encoder_weights_np['mu'] + encoder_weights_np['bias_mu'])
    z_sigma = (layer_2 @ encoder_weights_np['sigma'] + encoder_weights_np['bias_sigma'])
    
    return(z_mean, z_sigma)

Numpy with Numba


In [8]:
import numba
from numba import jit, vectorize, float32, float64

@jit('float32[:,:](float64[:,:],float64[:,:])')
def mat_mul(A, B):
    m, n = A.shape
    n, p = B.shape
    C = np.zeros((m, p))
    for i in range(m):
        for j in range(p):
            for k in range(n):
                C[i,j] += A[i,k] * B[k,j]
    return C

@vectorize([float64(float64)], target='parallel')
def soft_plus(x):
    """
    Vectorize version of numba
    """
    return np.log(np.exp(x) + 1)

@jit('UniTuple(float64[:,:], 2)(float64[:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:])')
def forward_z_numba(x, encoder_weights_h1, encoder_weights_h2, encoder_weights_b1, encoder_weights_b2, encoder_weights_mu, 
                  encoder_weights_bias_mu, encoder_weights_sigma, encoder_weights_bias_sigma):
    """
    Compute mean and sigma of z using numpy without any optimization
    """
    layer_1 = soft_plus(mat_mul(x, encoder_weights_h1) + encoder_weights_b1)
    layer_2 = soft_plus(mat_mul(layer_1, encoder_weights_h2) + encoder_weights_b2)
    z_mean = (mat_mul(layer_2, encoder_weights_mu) + encoder_weights_bias_mu)
    z_sigma = (mat_mul(layer_2, encoder_weights_sigma) + encoder_weights_bias_sigma)
    
    return(z_mean, z_sigma)

Numpy with Cython


In [9]:
%load_ext Cython

In [10]:
%%cython -a
cimport cython
import numpy as np
from libc.math cimport exp, log

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] mat_mul_cython(double[:,:] A, double[:,:] B):
    """Matrix multiply function. Cythonize"""
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    cdef int p = B.shape[1]
    cdef int i,j,k
    cdef double[:,:] C = np.zeros((m, p))
    for i in range(m):
        for j in range(p):
            for k in range(n):
                C[i,j] += A[i,k] * B[k,j]
    return C

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] mat_add_cython(double[:,:] A, double[:] B):
    """Matrix multiply function. Cythonize"""
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    cdef int i,j
    cdef double[:,:] C = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            C[i,j] = A[i,j] + B[j]
    return C

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] soft_plus_cython(double[:,:] x):
    cdef int m = x.shape[0]
    cdef int n = x.shape[1]
    cdef double[:,:] y = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            y[i,j] = log(exp(x[i,j])+1)
    return y

@cython.wraparound(False)
@cython.boundscheck(False)
def forward_z_cython(double[:,:] x, double[:,:] encoder_weights_h1, double[:,:] encoder_weights_h2, 
                     double[:] encoder_weights_b1, double[:] encoder_weights_b2, double [:,:] encoder_weights_mu, 
                     double[:] encoder_weights_bias_mu, double[:,:] encoder_weights_sigma, 
                     double[:] encoder_weights_bias_sigma):
    """
    Compute mean and sigma of z using numpy with cython optimization
    """
    cdef double[:,:] layer_1 = soft_plus_cython(mat_add_cython(mat_mul_cython(x, encoder_weights_h1), encoder_weights_b1))
    cdef double[:,:] layer_2 = soft_plus_cython(mat_add_cython(mat_mul_cython(layer_1, encoder_weights_h2), encoder_weights_b2))
    cdef double[:,:] z_mean = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_mu), encoder_weights_bias_mu)
    cdef double[:,:] z_sigma = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_sigma), encoder_weights_bias_sigma)
    
    return(np.array(z_mean), np.array(z_sigma))


Out[10]:
Cython: _cython_magic_74b6f75f751e472122dc4afa85baa6e2.pyx

Generated by Cython 0.23.5

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+01: cimport cython
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+02: import numpy as np
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 03: from libc.math cimport exp, log
 04: 
 05: @cython.wraparound(False)
 06: @cython.boundscheck(False)
+07: cdef double[:,:] mat_mul_cython(double[:,:] A, double[:,:] B):
static __Pyx_memviewslice __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_mul_cython(__Pyx_memviewslice __pyx_v_A, __Pyx_memviewslice __pyx_v_B) {
  int __pyx_v_m;
  int __pyx_v_n;
  int __pyx_v_p;
  int __pyx_v_i;
  int __pyx_v_j;
  int __pyx_v_k;
  __Pyx_memviewslice __pyx_v_C = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("mat_mul_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_74b6f75f751e472122dc4afa85baa6e2.mat_mul_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_C, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
 08:     """Matrix multiply function. Cythonize"""
+09:     cdef int m = A.shape[0]
  __pyx_v_m = (__pyx_v_A.shape[0]);
+10:     cdef int n = A.shape[1]
  __pyx_v_n = (__pyx_v_A.shape[1]);
+11:     cdef int p = B.shape[1]
  __pyx_v_p = (__pyx_v_B.shape[1]);
 12:     cdef int i,j,k
+13:     cdef double[:,:] C = np.zeros((m, p))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_p); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_C = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+14:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+15:         for j in range(p):
    __pyx_t_9 = __pyx_v_p;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+16:             for k in range(n):
      __pyx_t_11 = __pyx_v_n;
      for (__pyx_t_12 = 0; __pyx_t_12 < __pyx_t_11; __pyx_t_12+=1) {
        __pyx_v_k = __pyx_t_12;
+17:                 C[i,j] += A[i,k] * B[k,j]
        __pyx_t_13 = __pyx_v_i;
        __pyx_t_14 = __pyx_v_k;
        __pyx_t_15 = __pyx_v_k;
        __pyx_t_16 = __pyx_v_j;
        __pyx_t_17 = __pyx_v_i;
        __pyx_t_18 = __pyx_v_j;
        *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_C.data + __pyx_t_17 * __pyx_v_C.strides[0]) ) + __pyx_t_18 * __pyx_v_C.strides[1]) )) += ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_A.data + __pyx_t_13 * __pyx_v_A.strides[0]) ) + __pyx_t_14 * __pyx_v_A.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_B.data + __pyx_t_15 * __pyx_v_B.strides[0]) ) + __pyx_t_16 * __pyx_v_B.strides[1]) ))));
      }
    }
  }
+18:     return C
  __PYX_INC_MEMVIEW(&__pyx_v_C, 0);
  __pyx_r = __pyx_v_C;
  goto __pyx_L0;
 19: 
 20: @cython.wraparound(False)
 21: @cython.boundscheck(False)
+22: cdef double[:,:] mat_add_cython(double[:,:] A, double[:] B):
static __Pyx_memviewslice __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_add_cython(__Pyx_memviewslice __pyx_v_A, __Pyx_memviewslice __pyx_v_B) {
  int __pyx_v_m;
  int __pyx_v_n;
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_v_C = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("mat_add_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_74b6f75f751e472122dc4afa85baa6e2.mat_add_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_C, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
 23:     """Matrix multiply function. Cythonize"""
+24:     cdef int m = A.shape[0]
  __pyx_v_m = (__pyx_v_A.shape[0]);
+25:     cdef int n = A.shape[1]
  __pyx_v_n = (__pyx_v_A.shape[1]);
 26:     cdef int i,j
+27:     cdef double[:,:] C = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_C = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+28:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+29:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+30:             C[i,j] = A[i,j] + B[j]
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_j;
      __pyx_t_14 = __pyx_v_i;
      __pyx_t_15 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_C.data + __pyx_t_14 * __pyx_v_C.strides[0]) ) + __pyx_t_15 * __pyx_v_C.strides[1]) )) = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_A.data + __pyx_t_11 * __pyx_v_A.strides[0]) ) + __pyx_t_12 * __pyx_v_A.strides[1]) ))) + (*((double *) ( /* dim=0 */ (__pyx_v_B.data + __pyx_t_13 * __pyx_v_B.strides[0]) ))));
    }
  }
+31:     return C
  __PYX_INC_MEMVIEW(&__pyx_v_C, 0);
  __pyx_r = __pyx_v_C;
  goto __pyx_L0;
 32: 
 33: @cython.wraparound(False)
 34: @cython.boundscheck(False)
+35: cdef double[:,:] soft_plus_cython(double[:,:] x):
static __Pyx_memviewslice __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_soft_plus_cython(__Pyx_memviewslice __pyx_v_x) {
  int __pyx_v_m;
  int __pyx_v_n;
  __Pyx_memviewslice __pyx_v_y = { 0, 0, { 0 }, { 0 }, { 0 } };
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("soft_plus_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_74b6f75f751e472122dc4afa85baa6e2.soft_plus_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_y, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
+36:     cdef int m = x.shape[0]
  __pyx_v_m = (__pyx_v_x.shape[0]);
+37:     cdef int n = x.shape[1]
  __pyx_v_n = (__pyx_v_x.shape[1]);
+38:     cdef double[:,:] y = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_y = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+39:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+40:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+41:             y[i,j] = log(exp(x[i,j])+1)
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_i;
      __pyx_t_14 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_y.data + __pyx_t_13 * __pyx_v_y.strides[0]) ) + __pyx_t_14 * __pyx_v_y.strides[1]) )) = log((exp((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_x.data + __pyx_t_11 * __pyx_v_x.strides[0]) ) + __pyx_t_12 * __pyx_v_x.strides[1]) )))) + 1.0));
    }
  }
+42:     return y
  __PYX_INC_MEMVIEW(&__pyx_v_y, 0);
  __pyx_r = __pyx_v_y;
  goto __pyx_L0;
 43: 
 44: @cython.wraparound(False)
 45: @cython.boundscheck(False)
+46: def forward_z_cython(double[:,:] x, double[:,:] encoder_weights_h1, double[:,:] encoder_weights_h2,
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_1forward_z_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static char __pyx_doc_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_forward_z_cython[] = "\n    Compute mean and sigma of z using numpy with cython optimization\n    ";
static PyMethodDef __pyx_mdef_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_1forward_z_cython = {"forward_z_cython", (PyCFunction)__pyx_pw_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_1forward_z_cython, METH_VARARGS|METH_KEYWORDS, __pyx_doc_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_forward_z_cython};
static PyObject *__pyx_pw_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_1forward_z_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  __Pyx_memviewslice __pyx_v_x = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_h1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_h2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_b1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_b2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_mu = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_bias_mu = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_sigma = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_bias_sigma = { 0, 0, { 0 }, { 0 }, { 0 } };
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("forward_z_cython (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_x,&__pyx_n_s_encoder_weights_h1,&__pyx_n_s_encoder_weights_h2,&__pyx_n_s_encoder_weights_b1,&__pyx_n_s_encoder_weights_b2,&__pyx_n_s_encoder_weights_mu,&__pyx_n_s_encoder_weights_bias_mu,&__pyx_n_s_encoder_weights_sigma,&__pyx_n_s_encoder_weights_bias_sigma,0};
    PyObject* values[9] = {0,0,0,0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_x)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        case  1:
        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_h1)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  2:
        if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_h2)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  3:
        if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_b1)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  4:
        if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_b2)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  5:
        if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_mu)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  6:
        if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_bias_mu)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  7:
        if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_sigma)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  8:
        if (likely((values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_bias_sigma)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "forward_z_cython") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 9) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
      values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
      values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
      values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
    }
    __pyx_v_x = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[0]); if (unlikely(!__pyx_v_x.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_h1 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[1]); if (unlikely(!__pyx_v_encoder_weights_h1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_h2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[2]); if (unlikely(!__pyx_v_encoder_weights_h2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_b1 = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[3]); if (unlikely(!__pyx_v_encoder_weights_b1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_b2 = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[4]); if (unlikely(!__pyx_v_encoder_weights_b2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_mu = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[5]); if (unlikely(!__pyx_v_encoder_weights_mu.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_bias_mu = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[6]); if (unlikely(!__pyx_v_encoder_weights_bias_mu.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_sigma = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[7]); if (unlikely(!__pyx_v_encoder_weights_sigma.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_bias_sigma = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[8]); if (unlikely(!__pyx_v_encoder_weights_bias_sigma.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("forward_z_cython", 1, 9, 9, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_74b6f75f751e472122dc4afa85baa6e2.forward_z_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_forward_z_cython(__pyx_self, __pyx_v_x, __pyx_v_encoder_weights_h1, __pyx_v_encoder_weights_h2, __pyx_v_encoder_weights_b1, __pyx_v_encoder_weights_b2, __pyx_v_encoder_weights_mu, __pyx_v_encoder_weights_bias_mu, __pyx_v_encoder_weights_sigma, __pyx_v_encoder_weights_bias_sigma);
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_forward_z_cython(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_x, __Pyx_memviewslice __pyx_v_encoder_weights_h1, __Pyx_memviewslice __pyx_v_encoder_weights_h2, __Pyx_memviewslice __pyx_v_encoder_weights_b1, __Pyx_memviewslice __pyx_v_encoder_weights_b2, __Pyx_memviewslice __pyx_v_encoder_weights_mu, __Pyx_memviewslice __pyx_v_encoder_weights_bias_mu, __Pyx_memviewslice __pyx_v_encoder_weights_sigma, __Pyx_memviewslice __pyx_v_encoder_weights_bias_sigma) {
  __Pyx_memviewslice __pyx_v_layer_1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_layer_2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_z_mean = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_z_sigma = { 0, 0, { 0 }, { 0 }, { 0 } };
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("forward_z_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_7);
  __Pyx_XDECREF(__pyx_t_8);
  __Pyx_AddTraceback("_cython_magic_74b6f75f751e472122dc4afa85baa6e2.forward_z_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_layer_1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_layer_2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_z_mean, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_z_sigma, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_x, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_h1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_h2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_b1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_b2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_mu, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_bias_mu, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_sigma, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_bias_sigma, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__14 = PyTuple_Pack(13, __pyx_n_s_x, __pyx_n_s_encoder_weights_h1, __pyx_n_s_encoder_weights_h2, __pyx_n_s_encoder_weights_b1, __pyx_n_s_encoder_weights_b2, __pyx_n_s_encoder_weights_mu, __pyx_n_s_encoder_weights_bias_mu, __pyx_n_s_encoder_weights_sigma, __pyx_n_s_encoder_weights_bias_sigma, __pyx_n_s_layer_1, __pyx_n_s_layer_2, __pyx_n_s_z_mean, __pyx_n_s_z_sigma); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple__14);
  __Pyx_GIVEREF(__pyx_tuple__14);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_1forward_z_cython, NULL, __pyx_n_s_cython_magic_74b6f75f751e472122); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_forward_z_cython, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(9, 0, 13, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jovyan_cache_ipython_cytho, __pyx_n_s_forward_z_cython, 46, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 47:                      double[:] encoder_weights_b1, double[:] encoder_weights_b2, double [:,:] encoder_weights_mu,
 48:                      double[:] encoder_weights_bias_mu, double[:,:] encoder_weights_sigma,
 49:                      double[:] encoder_weights_bias_sigma):
 50:     """
 51:     Compute mean and sigma of z using numpy with cython optimization
 52:     """
+53:     cdef double[:,:] layer_1 = soft_plus_cython(mat_add_cython(mat_mul_cython(x, encoder_weights_h1), encoder_weights_b1))
  __pyx_t_1 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_mul_cython(__pyx_v_x, __pyx_v_encoder_weights_h1); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_2 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_add_cython(__pyx_t_1, __pyx_v_encoder_weights_b1); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_t_1 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_soft_plus_cython(__pyx_t_2); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_v_layer_1 = __pyx_t_1;
  __pyx_t_1.memview = NULL;
  __pyx_t_1.data = NULL;
+54:     cdef double[:,:] layer_2 = soft_plus_cython(mat_add_cython(mat_mul_cython(layer_1, encoder_weights_h2), encoder_weights_b2))
  __pyx_t_1 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_mul_cython(__pyx_v_layer_1, __pyx_v_encoder_weights_h2); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_2 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_add_cython(__pyx_t_1, __pyx_v_encoder_weights_b2); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_t_1 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_soft_plus_cython(__pyx_t_2); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_v_layer_2 = __pyx_t_1;
  __pyx_t_1.memview = NULL;
  __pyx_t_1.data = NULL;
+55:     cdef double[:,:] z_mean = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_mu), encoder_weights_bias_mu)
  __pyx_t_1 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_mul_cython(__pyx_v_layer_2, __pyx_v_encoder_weights_mu); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_2 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_add_cython(__pyx_t_1, __pyx_v_encoder_weights_bias_mu); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_v_z_mean = __pyx_t_2;
  __pyx_t_2.memview = NULL;
  __pyx_t_2.data = NULL;
+56:     cdef double[:,:] z_sigma = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_sigma), encoder_weights_bias_sigma)
  __pyx_t_2 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_mul_cython(__pyx_v_layer_2, __pyx_v_encoder_weights_sigma); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_1 = __pyx_f_46_cython_magic_74b6f75f751e472122dc4afa85baa6e2_mat_add_cython(__pyx_t_2, __pyx_v_encoder_weights_bias_sigma); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_v_z_sigma = __pyx_t_1;
  __pyx_t_1.memview = NULL;
  __pyx_t_1.data = NULL;
 57: 
+58:     return(np.array(z_mean), np.array(z_sigma))
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_array); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_4 = __pyx_memoryview_fromslice(__pyx_v_z_mean, 2, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_6 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_5))) {
    __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
    if (likely(__pyx_t_6)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
      __Pyx_INCREF(__pyx_t_6);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_5, function);
    }
  }
  if (!__pyx_t_6) {
    __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_GOTREF(__pyx_t_3);
  } else {
    __pyx_t_7 = PyTuple_New(1+1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_6); __pyx_t_6 = NULL;
    __Pyx_GIVEREF(__pyx_t_4);
    PyTuple_SET_ITEM(__pyx_t_7, 0+1, __pyx_t_4);
    __pyx_t_4 = 0;
    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_t_7, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  }
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_array); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_7 = __pyx_memoryview_fromslice(__pyx_v_z_sigma, 2, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_6 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_4))) {
    __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_4);
    if (likely(__pyx_t_6)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
      __Pyx_INCREF(__pyx_t_6);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_4, function);
    }
  }
  if (!__pyx_t_6) {
    __pyx_t_5 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_7); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_GOTREF(__pyx_t_5);
  } else {
    __pyx_t_8 = PyTuple_New(1+1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_6); __pyx_t_6 = NULL;
    __Pyx_GIVEREF(__pyx_t_7);
    PyTuple_SET_ITEM(__pyx_t_8, 0+1, __pyx_t_7);
    __pyx_t_7 = 0;
    __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_8, NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_5);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  }
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_GIVEREF(__pyx_t_3);
  PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3);
  __Pyx_GIVEREF(__pyx_t_5);
  PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_5);
  __pyx_t_3 = 0;
  __pyx_t_5 = 0;
  __pyx_r = __pyx_t_4;
  __pyx_t_4 = 0;
  goto __pyx_L0;

In [11]:
encoder_weights_cython = {}
encoder_weights_cython['h1'] = encoder_weights_np['h1'].astype(np.float64)
encoder_weights_cython['h2'] = encoder_weights_np['h2'].astype(np.float64)
encoder_weights_cython['b1'] = encoder_weights_np['b1'].astype(np.float64)
encoder_weights_cython['b2'] = encoder_weights_np['b2'].astype(np.float64)
encoder_weights_cython['mu'] = encoder_weights_np['mu'].astype(np.float64)
encoder_weights_cython['bias_mu'] = encoder_weights_np['bias_mu'].astype(np.float64)
encoder_weights_cython['sigma'] = encoder_weights_np['sigma'].astype(np.float64)
encoder_weights_cython['bias_sigma'] = encoder_weights_np['bias_sigma'].astype(np.float64)

Test on Different Batch Sizes


In [12]:
batch_size = np.arange(50, 1000, 50)
run_time_best = np.zeros((4, batch_size.shape[0]))
run_time_worst = np.zeros((4, batch_size.shape[0]))

In [13]:
for cnt, s in enumerate(batch_size):
    print('Evaluating at: %ds' % s)
    
    x_sample, _ = mnist.train.next_batch(s)
    x_sample_tf = tf.constant(x_sample)
    x_sample_cython = x_sample.astype(np.float64)
    
    result_1 = %timeit -o -n10 -r3 sess.run(forward_z(x_sample_tf, encoder_weights))
    run_time_best[0,cnt] = result_1.best
    run_time_worst[0, cnt] = result_1.worst
    
    result_2 = %timeit -o -n10 -r3 forward_z_raw(x_sample, encoder_weights_np)
    run_time_best[1,cnt] = result_2.best
    run_time_worst[1, cnt] = result_2.worst
    
    result_3 = %timeit -o -n10 -r3 forward_z_numba(x_sample, encoder_weights_np['h1'], encoder_weights_np['h2'], encoder_weights_np['b1'], encoder_weights_np['b2'], encoder_weights_np['mu'], encoder_weights_np['bias_mu'], encoder_weights_np['sigma'], encoder_weights_np['bias_sigma'])
    run_time_best[2,cnt] = result_3.best
    run_time_worst[2, cnt] = result_3.worst
    
    result_4 = %timeit -o -n10 -r3 forward_z_cython(x_sample_cython, encoder_weights_cython['h1'], encoder_weights_cython['h2'], encoder_weights_cython['b1'], encoder_weights_cython['b2'], encoder_weights_cython['mu'], encoder_weights_cython['bias_mu'], encoder_weights_cython['sigma'], encoder_weights_cython['bias_sigma'])
    run_time_best[3,cnt] = result_4.best
    run_time_worst[3, cnt] = result_4.worst


Evaluating at: 50s
10 loops, best of 3: 11.3 ms per loop
The slowest run took 15.74 times longer than the fastest. This could mean that an intermediate result is being cached.
10 loops, best of 3: 2.12 ms per loop
10 loops, best of 3: 122 ms per loop
10 loops, best of 3: 43.4 ms per loop
Evaluating at: 100s
10 loops, best of 3: 16 ms per loop
10 loops, best of 3: 3.64 ms per loop
10 loops, best of 3: 192 ms per loop
10 loops, best of 3: 88.4 ms per loop
Evaluating at: 150s
10 loops, best of 3: 21.2 ms per loop
10 loops, best of 3: 6.74 ms per loop
10 loops, best of 3: 262 ms per loop
10 loops, best of 3: 133 ms per loop
Evaluating at: 200s
10 loops, best of 3: 28 ms per loop
10 loops, best of 3: 6.44 ms per loop
10 loops, best of 3: 346 ms per loop
10 loops, best of 3: 180 ms per loop
Evaluating at: 250s
10 loops, best of 3: 31.4 ms per loop
10 loops, best of 3: 7.79 ms per loop
10 loops, best of 3: 416 ms per loop
10 loops, best of 3: 236 ms per loop
Evaluating at: 300s
10 loops, best of 3: 38.2 ms per loop
10 loops, best of 3: 9.32 ms per loop
10 loops, best of 3: 494 ms per loop
10 loops, best of 3: 271 ms per loop
Evaluating at: 350s
10 loops, best of 3: 43.8 ms per loop
10 loops, best of 3: 11 ms per loop
10 loops, best of 3: 574 ms per loop
10 loops, best of 3: 313 ms per loop
Evaluating at: 400s
10 loops, best of 3: 47.9 ms per loop
10 loops, best of 3: 12.4 ms per loop
10 loops, best of 3: 648 ms per loop
10 loops, best of 3: 361 ms per loop
Evaluating at: 450s
10 loops, best of 3: 55 ms per loop
10 loops, best of 3: 13.8 ms per loop
10 loops, best of 3: 711 ms per loop
10 loops, best of 3: 410 ms per loop
Evaluating at: 500s
10 loops, best of 3: 58.6 ms per loop
10 loops, best of 3: 14.5 ms per loop
10 loops, best of 3: 777 ms per loop
10 loops, best of 3: 452 ms per loop
Evaluating at: 550s
10 loops, best of 3: 63 ms per loop
10 loops, best of 3: 15.8 ms per loop
10 loops, best of 3: 862 ms per loop
10 loops, best of 3: 501 ms per loop
Evaluating at: 600s
10 loops, best of 3: 71 ms per loop
10 loops, best of 3: 17.8 ms per loop
10 loops, best of 3: 942 ms per loop
10 loops, best of 3: 544 ms per loop
Evaluating at: 650s
10 loops, best of 3: 81.6 ms per loop
10 loops, best of 3: 18.7 ms per loop
10 loops, best of 3: 1.01 s per loop
10 loops, best of 3: 579 ms per loop
Evaluating at: 700s
10 loops, best of 3: 83.2 ms per loop
10 loops, best of 3: 20.4 ms per loop
10 loops, best of 3: 1.09 s per loop
10 loops, best of 3: 636 ms per loop
Evaluating at: 750s
10 loops, best of 3: 85.2 ms per loop
10 loops, best of 3: 21.4 ms per loop
10 loops, best of 3: 1.17 s per loop
10 loops, best of 3: 686 ms per loop
Evaluating at: 800s
10 loops, best of 3: 92 ms per loop
10 loops, best of 3: 22.8 ms per loop
10 loops, best of 3: 1.26 s per loop
10 loops, best of 3: 724 ms per loop
Evaluating at: 850s
10 loops, best of 3: 102 ms per loop
10 loops, best of 3: 25 ms per loop
10 loops, best of 3: 1.32 s per loop
10 loops, best of 3: 772 ms per loop
Evaluating at: 900s
10 loops, best of 3: 110 ms per loop
10 loops, best of 3: 25.8 ms per loop
10 loops, best of 3: 1.41 s per loop
10 loops, best of 3: 798 ms per loop
Evaluating at: 950s
10 loops, best of 3: 117 ms per loop
10 loops, best of 3: 27.2 ms per loop
10 loops, best of 3: 1.47 s per loop
10 loops, best of 3: 847 ms per loop

In [23]:
plt.fill_between(batch_size, run_time_worst[0,:], run_time_best[0,:], facecolor='blue', color='none', label='tf')
plt.fill_between(batch_size, run_time_worst[1,:], run_time_best[1,:], facecolor='green', color='none', label='numpy')
plt.fill_between(batch_size, run_time_worst[2,:], run_time_best[2,:], facecolor='red', color='none', label='numba')
plt.fill_between(batch_size, run_time_worst[3,:], run_time_best[3,:], facecolor='yellow', color='none', label='cython')

plt.xlabel('batch size')
plt.ylabel('time:s')
plt.legend(loc='upper left')

plt.show()



In [ ]: