Optimize Forward Step (3)

Test the running time for 2 forward steps in variational autoencoder by comparing 4 versions:

  • Tensorflow
  • numpy only
  • numpy with numba
  • numpy with cython

Compare the results in different batch sizes


In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import time
from tensorflow.python.client import timeline
import matplotlib.pyplot as plt
%matplotlib inline

Initialize Parameters


In [2]:
import sys
sys.path.append('../vae')
from vae_sta663 import *
from misc_sta663 import *

In [6]:
import tensorflow as tf
import numpy as np

config = {}
config['x_in'] = 784
config['encoder_1'] = 500
config['encoder_2'] = 500
config['decoder_1'] = 500
config['decoder_2'] = 500
config['z'] = 20

encoder_weights, decoder_weights = init_weights(config)

In [7]:
# transform tensors to numpy array
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

encoder_weights_np = {}
encoder_weights_np['h1'] = sess.run(encoder_weights['h1'])
encoder_weights_np['h2'] = sess.run(encoder_weights['h2'])
encoder_weights_np['mu'] = sess.run(encoder_weights['mu'])
encoder_weights_np['sigma'] = sess.run(encoder_weights['sigma'])
encoder_weights_np['b1'] = sess.run(encoder_weights['b1'])
encoder_weights_np['b2'] = sess.run(encoder_weights['b2'])
encoder_weights_np['bias_mu'] = sess.run(encoder_weights['bias_mu'])
encoder_weights_np['bias_sigma'] = sess.run(encoder_weights['bias_sigma'])

decoder_weights_np = {}
decoder_weights_np['h1'] = sess.run(decoder_weights['h1'])
decoder_weights_np['h2'] = sess.run(decoder_weights['h2'])
decoder_weights_np['mu'] = sess.run(decoder_weights['mu'])
decoder_weights_np['sigma'] = sess.run(decoder_weights['sigma'])
decoder_weights_np['b1'] = sess.run(decoder_weights['b1'])
decoder_weights_np['b2'] = sess.run(decoder_weights['b2'])
decoder_weights_np['bias_mu'] = sess.run(decoder_weights['bias_mu'])
decoder_weights_np['bias_sigma'] = sess.run(decoder_weights['bias_sigma'])

In [8]:
(mnist, n_samples) = mnist_loader()


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

Tensorflow


In [15]:
def forward_tf(x, encoder_weights, decoder_weights, batch_size):
    """
    forward step in vae
    """
    layer_1 = tf.nn.softplus(tf.add(tf.matmul(x, encoder_weights['h1']), encoder_weights['b1']))
    layer_2 = tf.nn.softplus(tf.add(tf.matmul(layer_1, encoder_weights['h2']), encoder_weights['b2']))
    z_mean = tf.add(tf.matmul(layer_2, encoder_weights['mu']), encoder_weights['bias_mu'])
    z_sigma = tf.add(tf.matmul(layer_2, encoder_weights['sigma']), encoder_weights['bias_sigma'])
    
    # compute z by drawing sample from normal distribution
    eps = tf.random_normal((batch_size, 20), 0, 1, dtype=tf.float32)
    z_val = tf.add(z_mean, tf.multiply(tf.sqrt(tf.exp(z_sigma)), eps))
    
    layer_1 = tf.nn.softplus(tf.add(tf.matmul(z_val, decoder_weights['h1']), decoder_weights['b1']))
    layer_2 = tf.nn.softplus(tf.add(tf.matmul(layer_1, decoder_weights['h2']), decoder_weights['b2']))
    x_prime = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, decoder_weights['mu']), decoder_weights['bias_mu']))
    
    return x_prime

In [16]:
batch_size = 100
x_sample, _ = mnist.train.next_batch(batch_size)
x_sample_tf = tf.constant(x_sample)

In [17]:
%timeit -n10 -r3 sess.run(forward_tf(x_sample_tf, encoder_weights, decoder_weights, batch_size))


10 loops, best of 3: 30.2 ms per loop

Numpy Only


In [20]:
def forward_numpy(x, encoder_weights_np, decoder_weights_np, batch_size):
    """
    forward step in vae
    """
    layer_1 = np.log(np.exp(x_sample @ encoder_weights_np['h1'] + encoder_weights_np['b1']) + 1)
    layer_2 = np.log(np.exp(layer_1 @ encoder_weights_np['h2'] + encoder_weights_np['b2']) + 1)
    z_mean = (layer_2 @ encoder_weights_np['mu'] + encoder_weights_np['bias_mu'])
    z_sigma = (layer_2 @ encoder_weights_np['sigma'] + encoder_weights_np['bias_sigma'])
    
    # compute z by drawing sample from normal distribution
    eps = np.random.normal(0, 1, (batch_size, 20))
    z_val = z_mean + np.multiply(np.sqrt(np.exp(z_sigma)), eps)
    
    layer_1 = np.log(np.exp(z_val @ decoder_weights_np['h1'] + decoder_weights_np['b1']) + 1)
    layer_2 = np.log(np.exp(layer_1 @ decoder_weights_np['h2'] + decoder_weights_np['b2']) + 1)
    x_prime = 1/(1 + np.exp(-layer_2 @ decoder_weights_np['mu'] - decoder_weights_np['bias_mu']))
    
    return(x_prime)

In [21]:
%timeit -n10 -r3 forward_numpy(x_sample, encoder_weights_np, decoder_weights_np, batch_size)


10 loops, best of 3: 15 ms per loop

Numpy with Numba


In [36]:
import numba
from numba import jit, vectorize, float32, float64

@jit('float32[:,:](float64[:,:],float64[:,:])')
def mat_mul(A, B):
    m, n = A.shape
    n, p = B.shape
    C = np.zeros((m, p))
    for i in range(m):
        for j in range(p):
            for k in range(n):
                C[i,j] += A[i,k] * B[k,j]
    return C

@vectorize([float64(float64)])
def soft_plus(x):
    """
    Vectorize version of numba
    """
    return np.log(np.exp(x) + 1)

@jit('float64[:,:](float64[:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:,:],int32)')
def forward_numba(x, encoder_weights_h1, encoder_weights_h2, encoder_weights_b1, encoder_weights_b2, encoder_weights_mu, 
                    encoder_weights_bias_mu, encoder_weights_sigma, encoder_weights_bias_sigma, decoder_weights_h1,
                    decoder_weights_h2, decoder_weights_b1, decoder_weights_b2, decoder_weights_mu,
                    decoder_weights_bias_mu, batch_size):
    """
    Compute mean and sigma of z using numpy without any optimization
    """
    layer_1 = soft_plus(mat_mul(x, encoder_weights_h1) + encoder_weights_b1)
    layer_2 = soft_plus(mat_mul(layer_1, encoder_weights_h2) + encoder_weights_b2)
    z_mean = (mat_mul(layer_2, encoder_weights_mu) + encoder_weights_bias_mu)
    z_sigma = (mat_mul(layer_2, encoder_weights_sigma) + encoder_weights_bias_sigma)
    
    # compute z by drawing sample from normal distribution
    eps = np.random.normal(0, 1, (batch_size, 20))
    z_val = z_mean + np.multiply(np.sqrt(np.exp(z_sigma)), eps)
    
    layer_1 = soft_plus(mat_mul(z_val, decoder_weights_h1) + decoder_weights_b1)
    layer_2 = soft_plus(mat_mul(layer_1, decoder_weights_h2) + decoder_weights_b2)
    x_prime = 1/(1 + np.exp(-mat_mul(layer_2, decoder_weights_mu) - decoder_weights_bias_mu))
    
    return(x_prime)

In [37]:
%%timeit -n10 -r3 
forward_numba(x_sample, encoder_weights_np['h1'], encoder_weights_np['h2'], encoder_weights_np['b1'], 
              encoder_weights_np['b2'], encoder_weights_np['mu'], encoder_weights_np['bias_mu'], 
              encoder_weights_np['sigma'], encoder_weights_np['bias_sigma'], decoder_weights_np['h1'], 
              decoder_weights_np['h2'], decoder_weights_np['b1'], decoder_weights_np['b2'], 
              decoder_weights_np['mu'], decoder_weights_np['bias_mu'], batch_size)


10 loops, best of 3: 358 ms per loop

Numpy with Cython


In [38]:
%load_ext Cython

In [54]:
%%cython -a
cimport cython
import numpy as np
from libc.math cimport exp, log, sqrt

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] mat_mul_cython(double[:,:] A, double[:,:] B):
    """Matrix multiply function. Cythonize"""
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    cdef int p = B.shape[1]
    cdef int i,j,k
    cdef double[:,:] C = np.zeros((m, p))
    for i in range(m):
        for j in range(p):
            for k in range(n):
                C[i,j] += A[i,k] * B[k,j]
    return C

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] mat_mul_ele_cython(double[:,:] A, double[:,:] B):
    """Matrix element-wise multiply function. Cythonize"""
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    cdef int i,j
    cdef double[:,:] C = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            C[i,j] = A[i,j] * B[i,j]
    return C

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] mat_add_cython(double[:,:] A, double[:] B):
    """Matrix multiply function. Cythonize"""
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    cdef int i,j
    cdef double[:,:] C = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            C[i,j] = A[i,j] + B[j]
    return C

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] mat_add_cython_2d(double[:,:] A, double[:,:] B):
    """Matrix multiply function. Cythonize"""
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    cdef int i,j
    cdef double[:,:] C = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            C[i,j] = A[i,j] + B[i,j]
    return C

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] sqrt_cython(double[:,:] x):
    cdef int m = x.shape[0]
    cdef int n = x.shape[1]
    cdef double[:,:] y = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            y[i,j] = sqrt(x[i,j])
    return y

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] exp_cython(double[:,:] x):
    cdef int m = x.shape[0]
    cdef int n = x.shape[1]
    cdef double[:,:] y = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            y[i,j] = exp(x[i,j])
    return y

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] soft_plus_cython(double[:,:] x):
    cdef int m = x.shape[0]
    cdef int n = x.shape[1]
    cdef double[:,:] y = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            y[i,j] = log(exp(x[i,j])+1)
    return y

@cython.wraparound(False)
@cython.boundscheck(False)
cdef double[:,:] sigmoid_cython(double[:,:] x):
    cdef int m = x.shape[0]
    cdef int n = x.shape[1]
    cdef double[:,:] y = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            y[i,j] = 1/(1+log(-x[i,j]))
    return y

@cython.wraparound(False)
@cython.boundscheck(False)
def forward_cython(double[:,:] x, double[:,:] encoder_weights_h1, double[:,:] encoder_weights_h2, 
                     double[:] encoder_weights_b1, double[:] encoder_weights_b2, double [:,:] encoder_weights_mu, 
                     double[:] encoder_weights_bias_mu, double[:,:] encoder_weights_sigma, 
                     double[:] encoder_weights_bias_sigma, double[:,:] decoder_weights_h1,
                     double[:,:] decoder_weights_h2, double[:] decoder_weights_b1, 
                     double[:] decoder_weights_b2, double[:,:] decoder_weights_mu,
                     double[:] decoder_weights_bias_mu, int batch_size):
    """
    Compute mean and sigma of z using numpy with cython optimization
    """
    cdef double[:,:] layer_1 = soft_plus_cython(mat_add_cython(mat_mul_cython(x, encoder_weights_h1), encoder_weights_b1))
    cdef double[:,:] layer_2 = soft_plus_cython(mat_add_cython(mat_mul_cython(layer_1, encoder_weights_h2), encoder_weights_b2))
    cdef double[:,:] z_mean = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_mu), encoder_weights_bias_mu)
    cdef double[:,:] z_sigma = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_sigma), encoder_weights_bias_sigma)
    
    # compute z by drawing sample from normal distribution
    cdef double[:,:] eps = np.random.normal(0, 1, (batch_size, 20))
    cdef double[:,:] z_val = mat_add_cython_2d(mat_mul_ele_cython(sqrt_cython(exp_cython(z_sigma)), eps), z_mean)
    
    cdef double[:,:] layer_3 = soft_plus_cython(mat_add_cython(mat_mul_cython(z_val, decoder_weights_h1), decoder_weights_b1))
    cdef double[:,:] layer_4 = soft_plus_cython(mat_add_cython(mat_mul_cython(layer_3, decoder_weights_h2), decoder_weights_b2))
    cdef double[:,:] x_prime = sigmoid_cython(mat_add_cython(mat_mul_cython(layer_4, decoder_weights_mu), decoder_weights_bias_mu))
    
    return(np.array(x_prime))


Out[54]:
Cython: _cython_magic_59b83aa3533e7e2cea5b630075ccd99a.pyx

Generated by Cython 0.23.5

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+001: cimport cython
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+002: import numpy as np
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 003: from libc.math cimport exp, log, sqrt
 004: 
 005: @cython.wraparound(False)
 006: @cython.boundscheck(False)
+007: cdef double[:,:] mat_mul_cython(double[:,:] A, double[:,:] B):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__Pyx_memviewslice __pyx_v_A, __Pyx_memviewslice __pyx_v_B) {
  int __pyx_v_m;
  int __pyx_v_n;
  int __pyx_v_p;
  int __pyx_v_i;
  int __pyx_v_j;
  int __pyx_v_k;
  __Pyx_memviewslice __pyx_v_C = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("mat_mul_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.mat_mul_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_C, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
 008:     """Matrix multiply function. Cythonize"""
+009:     cdef int m = A.shape[0]
  __pyx_v_m = (__pyx_v_A.shape[0]);
+010:     cdef int n = A.shape[1]
  __pyx_v_n = (__pyx_v_A.shape[1]);
+011:     cdef int p = B.shape[1]
  __pyx_v_p = (__pyx_v_B.shape[1]);
 012:     cdef int i,j,k
+013:     cdef double[:,:] C = np.zeros((m, p))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_p); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_C = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+014:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+015:         for j in range(p):
    __pyx_t_9 = __pyx_v_p;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+016:             for k in range(n):
      __pyx_t_11 = __pyx_v_n;
      for (__pyx_t_12 = 0; __pyx_t_12 < __pyx_t_11; __pyx_t_12+=1) {
        __pyx_v_k = __pyx_t_12;
+017:                 C[i,j] += A[i,k] * B[k,j]
        __pyx_t_13 = __pyx_v_i;
        __pyx_t_14 = __pyx_v_k;
        __pyx_t_15 = __pyx_v_k;
        __pyx_t_16 = __pyx_v_j;
        __pyx_t_17 = __pyx_v_i;
        __pyx_t_18 = __pyx_v_j;
        *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_C.data + __pyx_t_17 * __pyx_v_C.strides[0]) ) + __pyx_t_18 * __pyx_v_C.strides[1]) )) += ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_A.data + __pyx_t_13 * __pyx_v_A.strides[0]) ) + __pyx_t_14 * __pyx_v_A.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_B.data + __pyx_t_15 * __pyx_v_B.strides[0]) ) + __pyx_t_16 * __pyx_v_B.strides[1]) ))));
      }
    }
  }
+018:     return C
  __PYX_INC_MEMVIEW(&__pyx_v_C, 0);
  __pyx_r = __pyx_v_C;
  goto __pyx_L0;
 019: 
 020: @cython.wraparound(False)
 021: @cython.boundscheck(False)
+022: cdef double[:,:] mat_mul_ele_cython(double[:,:] A, double[:,:] B):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_ele_cython(__Pyx_memviewslice __pyx_v_A, __Pyx_memviewslice __pyx_v_B) {
  int __pyx_v_m;
  int __pyx_v_n;
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_v_C = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("mat_mul_ele_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.mat_mul_ele_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_C, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
 023:     """Matrix element-wise multiply function. Cythonize"""
+024:     cdef int m = A.shape[0]
  __pyx_v_m = (__pyx_v_A.shape[0]);
+025:     cdef int n = A.shape[1]
  __pyx_v_n = (__pyx_v_A.shape[1]);
 026:     cdef int i,j
+027:     cdef double[:,:] C = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_C = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+028:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+029:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+030:             C[i,j] = A[i,j] * B[i,j]
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_i;
      __pyx_t_14 = __pyx_v_j;
      __pyx_t_15 = __pyx_v_i;
      __pyx_t_16 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_C.data + __pyx_t_15 * __pyx_v_C.strides[0]) ) + __pyx_t_16 * __pyx_v_C.strides[1]) )) = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_A.data + __pyx_t_11 * __pyx_v_A.strides[0]) ) + __pyx_t_12 * __pyx_v_A.strides[1]) ))) * (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_B.data + __pyx_t_13 * __pyx_v_B.strides[0]) ) + __pyx_t_14 * __pyx_v_B.strides[1]) ))));
    }
  }
+031:     return C
  __PYX_INC_MEMVIEW(&__pyx_v_C, 0);
  __pyx_r = __pyx_v_C;
  goto __pyx_L0;
 032: 
 033: @cython.wraparound(False)
 034: @cython.boundscheck(False)
+035: cdef double[:,:] mat_add_cython(double[:,:] A, double[:] B):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__Pyx_memviewslice __pyx_v_A, __Pyx_memviewslice __pyx_v_B) {
  int __pyx_v_m;
  int __pyx_v_n;
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_v_C = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("mat_add_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.mat_add_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_C, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
 036:     """Matrix multiply function. Cythonize"""
+037:     cdef int m = A.shape[0]
  __pyx_v_m = (__pyx_v_A.shape[0]);
+038:     cdef int n = A.shape[1]
  __pyx_v_n = (__pyx_v_A.shape[1]);
 039:     cdef int i,j
+040:     cdef double[:,:] C = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_C = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+041:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+042:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+043:             C[i,j] = A[i,j] + B[j]
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_j;
      __pyx_t_14 = __pyx_v_i;
      __pyx_t_15 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_C.data + __pyx_t_14 * __pyx_v_C.strides[0]) ) + __pyx_t_15 * __pyx_v_C.strides[1]) )) = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_A.data + __pyx_t_11 * __pyx_v_A.strides[0]) ) + __pyx_t_12 * __pyx_v_A.strides[1]) ))) + (*((double *) ( /* dim=0 */ (__pyx_v_B.data + __pyx_t_13 * __pyx_v_B.strides[0]) ))));
    }
  }
+044:     return C
  __PYX_INC_MEMVIEW(&__pyx_v_C, 0);
  __pyx_r = __pyx_v_C;
  goto __pyx_L0;
 045: 
 046: @cython.wraparound(False)
 047: @cython.boundscheck(False)
+048: cdef double[:,:] mat_add_cython_2d(double[:,:] A, double[:,:] B):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython_2d(__Pyx_memviewslice __pyx_v_A, __Pyx_memviewslice __pyx_v_B) {
  int __pyx_v_m;
  int __pyx_v_n;
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_v_C = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("mat_add_cython_2d", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.mat_add_cython_2d", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_C, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
 049:     """Matrix multiply function. Cythonize"""
+050:     cdef int m = A.shape[0]
  __pyx_v_m = (__pyx_v_A.shape[0]);
+051:     cdef int n = A.shape[1]
  __pyx_v_n = (__pyx_v_A.shape[1]);
 052:     cdef int i,j
+053:     cdef double[:,:] C = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_C = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+054:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+055:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+056:             C[i,j] = A[i,j] + B[i,j]
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_i;
      __pyx_t_14 = __pyx_v_j;
      __pyx_t_15 = __pyx_v_i;
      __pyx_t_16 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_C.data + __pyx_t_15 * __pyx_v_C.strides[0]) ) + __pyx_t_16 * __pyx_v_C.strides[1]) )) = ((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_A.data + __pyx_t_11 * __pyx_v_A.strides[0]) ) + __pyx_t_12 * __pyx_v_A.strides[1]) ))) + (*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_B.data + __pyx_t_13 * __pyx_v_B.strides[0]) ) + __pyx_t_14 * __pyx_v_B.strides[1]) ))));
    }
  }
+057:     return C
  __PYX_INC_MEMVIEW(&__pyx_v_C, 0);
  __pyx_r = __pyx_v_C;
  goto __pyx_L0;
 058: 
 059: @cython.wraparound(False)
 060: @cython.boundscheck(False)
+061: cdef double[:,:] sqrt_cython(double[:,:] x):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_sqrt_cython(__Pyx_memviewslice __pyx_v_x) {
  int __pyx_v_m;
  int __pyx_v_n;
  __Pyx_memviewslice __pyx_v_y = { 0, 0, { 0 }, { 0 }, { 0 } };
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sqrt_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.sqrt_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_y, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
+062:     cdef int m = x.shape[0]
  __pyx_v_m = (__pyx_v_x.shape[0]);
+063:     cdef int n = x.shape[1]
  __pyx_v_n = (__pyx_v_x.shape[1]);
+064:     cdef double[:,:] y = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_y = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+065:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+066:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+067:             y[i,j] = sqrt(x[i,j])
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_i;
      __pyx_t_14 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_y.data + __pyx_t_13 * __pyx_v_y.strides[0]) ) + __pyx_t_14 * __pyx_v_y.strides[1]) )) = sqrt((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_x.data + __pyx_t_11 * __pyx_v_x.strides[0]) ) + __pyx_t_12 * __pyx_v_x.strides[1]) ))));
    }
  }
+068:     return y
  __PYX_INC_MEMVIEW(&__pyx_v_y, 0);
  __pyx_r = __pyx_v_y;
  goto __pyx_L0;
 069: 
 070: @cython.wraparound(False)
 071: @cython.boundscheck(False)
+072: cdef double[:,:] exp_cython(double[:,:] x):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_exp_cython(__Pyx_memviewslice __pyx_v_x) {
  int __pyx_v_m;
  int __pyx_v_n;
  __Pyx_memviewslice __pyx_v_y = { 0, 0, { 0 }, { 0 }, { 0 } };
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("exp_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.exp_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_y, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
+073:     cdef int m = x.shape[0]
  __pyx_v_m = (__pyx_v_x.shape[0]);
+074:     cdef int n = x.shape[1]
  __pyx_v_n = (__pyx_v_x.shape[1]);
+075:     cdef double[:,:] y = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_y = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+076:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+077:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+078:             y[i,j] = exp(x[i,j])
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_i;
      __pyx_t_14 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_y.data + __pyx_t_13 * __pyx_v_y.strides[0]) ) + __pyx_t_14 * __pyx_v_y.strides[1]) )) = exp((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_x.data + __pyx_t_11 * __pyx_v_x.strides[0]) ) + __pyx_t_12 * __pyx_v_x.strides[1]) ))));
    }
  }
+079:     return y
  __PYX_INC_MEMVIEW(&__pyx_v_y, 0);
  __pyx_r = __pyx_v_y;
  goto __pyx_L0;
 080: 
 081: @cython.wraparound(False)
 082: @cython.boundscheck(False)
+083: cdef double[:,:] soft_plus_cython(double[:,:] x):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_soft_plus_cython(__Pyx_memviewslice __pyx_v_x) {
  int __pyx_v_m;
  int __pyx_v_n;
  __Pyx_memviewslice __pyx_v_y = { 0, 0, { 0 }, { 0 }, { 0 } };
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("soft_plus_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.soft_plus_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_y, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
+084:     cdef int m = x.shape[0]
  __pyx_v_m = (__pyx_v_x.shape[0]);
+085:     cdef int n = x.shape[1]
  __pyx_v_n = (__pyx_v_x.shape[1]);
+086:     cdef double[:,:] y = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_y = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+087:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+088:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+089:             y[i,j] = log(exp(x[i,j])+1)
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = __pyx_v_i;
      __pyx_t_14 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_y.data + __pyx_t_13 * __pyx_v_y.strides[0]) ) + __pyx_t_14 * __pyx_v_y.strides[1]) )) = log((exp((*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_x.data + __pyx_t_11 * __pyx_v_x.strides[0]) ) + __pyx_t_12 * __pyx_v_x.strides[1]) )))) + 1.0));
    }
  }
+090:     return y
  __PYX_INC_MEMVIEW(&__pyx_v_y, 0);
  __pyx_r = __pyx_v_y;
  goto __pyx_L0;
 091: 
 092: @cython.wraparound(False)
 093: @cython.boundscheck(False)
+094: cdef double[:,:] sigmoid_cython(double[:,:] x):
static __Pyx_memviewslice __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_sigmoid_cython(__Pyx_memviewslice __pyx_v_x) {
  int __pyx_v_m;
  int __pyx_v_n;
  __Pyx_memviewslice __pyx_v_y = { 0, 0, { 0 }, { 0 }, { 0 } };
  int __pyx_v_i;
  int __pyx_v_j;
  __Pyx_memviewslice __pyx_r = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("sigmoid_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __PYX_XDEC_MEMVIEW(&__pyx_t_6, 1);
  __pyx_r.data = NULL;
  __pyx_r.memview = NULL;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.sigmoid_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);

  goto __pyx_L2;
  __pyx_L0:;
  if (unlikely(!__pyx_r.memview)) {
    PyErr_SetString(PyExc_TypeError,"Memoryview return value is not initialized");
  }
  __pyx_L2:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_y, 1);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
+095:     cdef int m = x.shape[0]
  __pyx_v_m = (__pyx_v_x.shape[0]);
+096:     cdef int n = x.shape[1]
  __pyx_v_n = (__pyx_v_x.shape[1]);
+097:     cdef double[:,:] y = np.zeros((m, n))
  __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_n); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_4);
  PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4);
  __pyx_t_2 = 0;
  __pyx_t_4 = 0;
  __pyx_t_4 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  if (!__pyx_t_4) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_5); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_4); __pyx_t_4 = NULL;
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_5);
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_1);
  if (unlikely(!__pyx_t_6.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_y = __pyx_t_6;
  __pyx_t_6.memview = NULL;
  __pyx_t_6.data = NULL;
+098:     for i in range(m):
  __pyx_t_7 = __pyx_v_m;
  for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
    __pyx_v_i = __pyx_t_8;
+099:         for j in range(n):
    __pyx_t_9 = __pyx_v_n;
    for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
      __pyx_v_j = __pyx_t_10;
+100:             y[i,j] = 1/(1+log(-x[i,j]))
      __pyx_t_11 = __pyx_v_i;
      __pyx_t_12 = __pyx_v_j;
      __pyx_t_13 = (1.0 + log((-(*((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_x.data + __pyx_t_11 * __pyx_v_x.strides[0]) ) + __pyx_t_12 * __pyx_v_x.strides[1]) ))))));
      if (unlikely(__pyx_t_13 == 0)) {
        PyErr_SetString(PyExc_ZeroDivisionError, "float division");
        {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
      }
      __pyx_t_14 = __pyx_v_i;
      __pyx_t_15 = __pyx_v_j;
      *((double *) ( /* dim=1 */ (( /* dim=0 */ (__pyx_v_y.data + __pyx_t_14 * __pyx_v_y.strides[0]) ) + __pyx_t_15 * __pyx_v_y.strides[1]) )) = (1.0 / __pyx_t_13);
    }
  }
+101:     return y
  __PYX_INC_MEMVIEW(&__pyx_v_y, 0);
  __pyx_r = __pyx_v_y;
  goto __pyx_L0;
 102: 
 103: @cython.wraparound(False)
 104: @cython.boundscheck(False)
+105: def forward_cython(double[:,:] x, double[:,:] encoder_weights_h1, double[:,:] encoder_weights_h2,
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_1forward_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static char __pyx_doc_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_forward_cython[] = "\n    Compute mean and sigma of z using numpy with cython optimization\n    ";
static PyMethodDef __pyx_mdef_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_1forward_cython = {"forward_cython", (PyCFunction)__pyx_pw_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_1forward_cython, METH_VARARGS|METH_KEYWORDS, __pyx_doc_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_forward_cython};
static PyObject *__pyx_pw_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_1forward_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  __Pyx_memviewslice __pyx_v_x = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_h1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_h2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_b1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_b2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_mu = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_bias_mu = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_sigma = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_encoder_weights_bias_sigma = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_decoder_weights_h1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_decoder_weights_h2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_decoder_weights_b1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_decoder_weights_b2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_decoder_weights_mu = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_decoder_weights_bias_mu = { 0, 0, { 0 }, { 0 }, { 0 } };
  int __pyx_v_batch_size;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("forward_cython (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_x,&__pyx_n_s_encoder_weights_h1,&__pyx_n_s_encoder_weights_h2,&__pyx_n_s_encoder_weights_b1,&__pyx_n_s_encoder_weights_b2,&__pyx_n_s_encoder_weights_mu,&__pyx_n_s_encoder_weights_bias_mu,&__pyx_n_s_encoder_weights_sigma,&__pyx_n_s_encoder_weights_bias_sigma,&__pyx_n_s_decoder_weights_h1,&__pyx_n_s_decoder_weights_h2,&__pyx_n_s_decoder_weights_b1,&__pyx_n_s_decoder_weights_b2,&__pyx_n_s_decoder_weights_mu,&__pyx_n_s_decoder_weights_bias_mu,&__pyx_n_s_batch_size,0};
    PyObject* values[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case 16: values[15] = PyTuple_GET_ITEM(__pyx_args, 15);
        case 15: values[14] = PyTuple_GET_ITEM(__pyx_args, 14);
        case 14: values[13] = PyTuple_GET_ITEM(__pyx_args, 13);
        case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12);
        case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11);
        case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10);
        case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_x)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        case  1:
        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_h1)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  2:
        if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_h2)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  3:
        if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_b1)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  4:
        if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_b2)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  5:
        if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_mu)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  6:
        if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_bias_mu)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  7:
        if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_sigma)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  8:
        if (likely((values[8] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_encoder_weights_bias_sigma)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 8); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case  9:
        if (likely((values[9] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_decoder_weights_h1)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 9); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case 10:
        if (likely((values[10] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_decoder_weights_h2)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 10); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case 11:
        if (likely((values[11] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_decoder_weights_b1)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 11); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case 12:
        if (likely((values[12] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_decoder_weights_b2)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 12); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case 13:
        if (likely((values[13] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_decoder_weights_mu)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 13); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case 14:
        if (likely((values[14] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_decoder_weights_bias_mu)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 14); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
        case 15:
        if (likely((values[15] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_batch_size)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, 15); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "forward_cython") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 16) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
      values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
      values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
      values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
      values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
      values[10] = PyTuple_GET_ITEM(__pyx_args, 10);
      values[11] = PyTuple_GET_ITEM(__pyx_args, 11);
      values[12] = PyTuple_GET_ITEM(__pyx_args, 12);
      values[13] = PyTuple_GET_ITEM(__pyx_args, 13);
      values[14] = PyTuple_GET_ITEM(__pyx_args, 14);
      values[15] = PyTuple_GET_ITEM(__pyx_args, 15);
    }
    __pyx_v_x = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[0]); if (unlikely(!__pyx_v_x.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_h1 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[1]); if (unlikely(!__pyx_v_encoder_weights_h1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_h2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[2]); if (unlikely(!__pyx_v_encoder_weights_h2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_b1 = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[3]); if (unlikely(!__pyx_v_encoder_weights_b1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_b2 = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[4]); if (unlikely(!__pyx_v_encoder_weights_b2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_mu = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[5]); if (unlikely(!__pyx_v_encoder_weights_mu.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_bias_mu = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[6]); if (unlikely(!__pyx_v_encoder_weights_bias_mu.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_sigma = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[7]); if (unlikely(!__pyx_v_encoder_weights_sigma.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_encoder_weights_bias_sigma = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[8]); if (unlikely(!__pyx_v_encoder_weights_bias_sigma.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_decoder_weights_h1 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[9]); if (unlikely(!__pyx_v_decoder_weights_h1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_decoder_weights_h2 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[10]); if (unlikely(!__pyx_v_decoder_weights_h2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_decoder_weights_b1 = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[11]); if (unlikely(!__pyx_v_decoder_weights_b1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_decoder_weights_b2 = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[12]); if (unlikely(!__pyx_v_decoder_weights_b2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_decoder_weights_mu = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(values[13]); if (unlikely(!__pyx_v_decoder_weights_mu.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_decoder_weights_bias_mu = __Pyx_PyObject_to_MemoryviewSlice_ds_double(values[14]); if (unlikely(!__pyx_v_decoder_weights_bias_mu.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
    __pyx_v_batch_size = __Pyx_PyInt_As_int(values[15]); if (unlikely((__pyx_v_batch_size == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("forward_cython", 1, 16, 16, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.forward_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_forward_cython(__pyx_self, __pyx_v_x, __pyx_v_encoder_weights_h1, __pyx_v_encoder_weights_h2, __pyx_v_encoder_weights_b1, __pyx_v_encoder_weights_b2, __pyx_v_encoder_weights_mu, __pyx_v_encoder_weights_bias_mu, __pyx_v_encoder_weights_sigma, __pyx_v_encoder_weights_bias_sigma, __pyx_v_decoder_weights_h1, __pyx_v_decoder_weights_h2, __pyx_v_decoder_weights_b1, __pyx_v_decoder_weights_b2, __pyx_v_decoder_weights_mu, __pyx_v_decoder_weights_bias_mu, __pyx_v_batch_size);
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_forward_cython(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_x, __Pyx_memviewslice __pyx_v_encoder_weights_h1, __Pyx_memviewslice __pyx_v_encoder_weights_h2, __Pyx_memviewslice __pyx_v_encoder_weights_b1, __Pyx_memviewslice __pyx_v_encoder_weights_b2, __Pyx_memviewslice __pyx_v_encoder_weights_mu, __Pyx_memviewslice __pyx_v_encoder_weights_bias_mu, __Pyx_memviewslice __pyx_v_encoder_weights_sigma, __Pyx_memviewslice __pyx_v_encoder_weights_bias_sigma, __Pyx_memviewslice __pyx_v_decoder_weights_h1, __Pyx_memviewslice __pyx_v_decoder_weights_h2, __Pyx_memviewslice __pyx_v_decoder_weights_b1, __Pyx_memviewslice __pyx_v_decoder_weights_b2, __Pyx_memviewslice __pyx_v_decoder_weights_mu, __Pyx_memviewslice __pyx_v_decoder_weights_bias_mu, int __pyx_v_batch_size) {
  __Pyx_memviewslice __pyx_v_layer_1 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_layer_2 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_z_mean = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_z_sigma = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_eps = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_z_val = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_layer_3 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_layer_4 = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_x_prime = { 0, 0, { 0 }, { 0 }, { 0 } };
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("forward_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_8);
  __Pyx_AddTraceback("_cython_magic_59b83aa3533e7e2cea5b630075ccd99a.forward_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_layer_1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_layer_2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_z_mean, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_z_sigma, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_eps, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_z_val, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_layer_3, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_layer_4, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_x_prime, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_x, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_h1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_h2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_b1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_b2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_mu, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_bias_mu, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_sigma, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_encoder_weights_bias_sigma, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_decoder_weights_h1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_decoder_weights_h2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_decoder_weights_b1, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_decoder_weights_b2, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_decoder_weights_mu, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_decoder_weights_bias_mu, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__14 = PyTuple_Pack(25, __pyx_n_s_x, __pyx_n_s_encoder_weights_h1, __pyx_n_s_encoder_weights_h2, __pyx_n_s_encoder_weights_b1, __pyx_n_s_encoder_weights_b2, __pyx_n_s_encoder_weights_mu, __pyx_n_s_encoder_weights_bias_mu, __pyx_n_s_encoder_weights_sigma, __pyx_n_s_encoder_weights_bias_sigma, __pyx_n_s_decoder_weights_h1, __pyx_n_s_decoder_weights_h2, __pyx_n_s_decoder_weights_b1, __pyx_n_s_decoder_weights_b2, __pyx_n_s_decoder_weights_mu, __pyx_n_s_decoder_weights_bias_mu, __pyx_n_s_batch_size, __pyx_n_s_layer_1, __pyx_n_s_layer_2, __pyx_n_s_z_mean, __pyx_n_s_z_sigma, __pyx_n_s_eps, __pyx_n_s_z_val, __pyx_n_s_layer_3, __pyx_n_s_layer_4, __pyx_n_s_x_prime); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_tuple__14);
  __Pyx_GIVEREF(__pyx_tuple__14);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_1forward_cython, NULL, __pyx_n_s_cython_magic_59b83aa3533e7e2cea); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_forward_cython, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(16, 0, 25, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_jovyan_cache_ipython_cytho, __pyx_n_s_forward_cython, 105, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 106:                      double[:] encoder_weights_b1, double[:] encoder_weights_b2, double [:,:] encoder_weights_mu,
 107:                      double[:] encoder_weights_bias_mu, double[:,:] encoder_weights_sigma,
 108:                      double[:] encoder_weights_bias_sigma, double[:,:] decoder_weights_h1,
 109:                      double[:,:] decoder_weights_h2, double[:] decoder_weights_b1,
 110:                      double[:] decoder_weights_b2, double[:,:] decoder_weights_mu,
 111:                      double[:] decoder_weights_bias_mu, int batch_size):
 112:     """
 113:     Compute mean and sigma of z using numpy with cython optimization
 114:     """
+115:     cdef double[:,:] layer_1 = soft_plus_cython(mat_add_cython(mat_mul_cython(x, encoder_weights_h1), encoder_weights_b1))
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__pyx_v_x, __pyx_v_encoder_weights_h1); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__pyx_t_1, __pyx_v_encoder_weights_b1); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_soft_plus_cython(__pyx_t_2); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_v_layer_1 = __pyx_t_1;
  __pyx_t_1.memview = NULL;
  __pyx_t_1.data = NULL;
+116:     cdef double[:,:] layer_2 = soft_plus_cython(mat_add_cython(mat_mul_cython(layer_1, encoder_weights_h2), encoder_weights_b2))
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__pyx_v_layer_1, __pyx_v_encoder_weights_h2); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__pyx_t_1, __pyx_v_encoder_weights_b2); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_soft_plus_cython(__pyx_t_2); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_v_layer_2 = __pyx_t_1;
  __pyx_t_1.memview = NULL;
  __pyx_t_1.data = NULL;
+117:     cdef double[:,:] z_mean = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_mu), encoder_weights_bias_mu)
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__pyx_v_layer_2, __pyx_v_encoder_weights_mu); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__pyx_t_1, __pyx_v_encoder_weights_bias_mu); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_v_z_mean = __pyx_t_2;
  __pyx_t_2.memview = NULL;
  __pyx_t_2.data = NULL;
+118:     cdef double[:,:] z_sigma = mat_add_cython(mat_mul_cython(layer_2, encoder_weights_sigma), encoder_weights_bias_sigma)
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__pyx_v_layer_2, __pyx_v_encoder_weights_sigma); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__pyx_t_2, __pyx_v_encoder_weights_bias_sigma); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_v_z_sigma = __pyx_t_1;
  __pyx_t_1.memview = NULL;
  __pyx_t_1.data = NULL;
 119: 
 120:     # compute z by drawing sample from normal distribution
+121:     cdef double[:,:] eps = np.random.normal(0, 1, (batch_size, 20))
  __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_normal); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_batch_size); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_6);
  __Pyx_GIVEREF(__pyx_t_5);
  PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_5);
  __Pyx_INCREF(__pyx_int_20);
  __Pyx_GIVEREF(__pyx_int_20);
  PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_int_20);
  __pyx_t_5 = 0;
  __pyx_t_5 = NULL;
  __pyx_t_7 = 0;
  if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_4))) {
    __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
    if (likely(__pyx_t_5)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
      __Pyx_INCREF(__pyx_t_5);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_4, function);
      __pyx_t_7 = 1;
    }
  }
  __pyx_t_8 = PyTuple_New(3+__pyx_t_7); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_8);
  if (__pyx_t_5) {
    __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_5); __pyx_t_5 = NULL;
  }
  __Pyx_INCREF(__pyx_int_0);
  __Pyx_GIVEREF(__pyx_int_0);
  PyTuple_SET_ITEM(__pyx_t_8, 0+__pyx_t_7, __pyx_int_0);
  __Pyx_INCREF(__pyx_int_1);
  __Pyx_GIVEREF(__pyx_int_1);
  PyTuple_SET_ITEM(__pyx_t_8, 1+__pyx_t_7, __pyx_int_1);
  __Pyx_GIVEREF(__pyx_t_6);
  PyTuple_SET_ITEM(__pyx_t_8, 2+__pyx_t_7, __pyx_t_6);
  __pyx_t_6 = 0;
  __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_8, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_1 = __Pyx_PyObject_to_MemoryviewSlice_dsds_double(__pyx_t_3);
  if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_v_eps = __pyx_t_1;
  __pyx_t_1.memview = NULL;
  __pyx_t_1.data = NULL;
+122:     cdef double[:,:] z_val = mat_add_cython_2d(mat_mul_ele_cython(sqrt_cython(exp_cython(z_sigma)), eps), z_mean)
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_exp_cython(__pyx_v_z_sigma); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_sqrt_cython(__pyx_t_1); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_ele_cython(__pyx_t_2, __pyx_v_eps); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython_2d(__pyx_t_1, __pyx_v_z_mean); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_v_z_val = __pyx_t_2;
  __pyx_t_2.memview = NULL;
  __pyx_t_2.data = NULL;
 123: 
+124:     cdef double[:,:] layer_3 = soft_plus_cython(mat_add_cython(mat_mul_cython(z_val, decoder_weights_h1), decoder_weights_b1))
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__pyx_v_z_val, __pyx_v_decoder_weights_h1); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__pyx_t_2, __pyx_v_decoder_weights_b1); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_soft_plus_cython(__pyx_t_1); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_v_layer_3 = __pyx_t_2;
  __pyx_t_2.memview = NULL;
  __pyx_t_2.data = NULL;
+125:     cdef double[:,:] layer_4 = soft_plus_cython(mat_add_cython(mat_mul_cython(layer_3, decoder_weights_h2), decoder_weights_b2))
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__pyx_v_layer_3, __pyx_v_decoder_weights_h2); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__pyx_t_2, __pyx_v_decoder_weights_b2); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_soft_plus_cython(__pyx_t_1); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_v_layer_4 = __pyx_t_2;
  __pyx_t_2.memview = NULL;
  __pyx_t_2.data = NULL;
+126:     cdef double[:,:] x_prime = sigmoid_cython(mat_add_cython(mat_mul_cython(layer_4, decoder_weights_mu), decoder_weights_bias_mu))
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_mul_cython(__pyx_v_layer_4, __pyx_v_decoder_weights_mu); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 126; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __pyx_t_1 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_mat_add_cython(__pyx_t_2, __pyx_v_decoder_weights_bias_mu); if (unlikely(!__pyx_t_1.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 126; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_2, 1);
  __pyx_t_2 = __pyx_f_46_cython_magic_59b83aa3533e7e2cea5b630075ccd99a_sigmoid_cython(__pyx_t_1); if (unlikely(!__pyx_t_2.memview)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 126; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __PYX_XDEC_MEMVIEW(&__pyx_t_1, 1);
  __pyx_v_x_prime = __pyx_t_2;
  __pyx_t_2.memview = NULL;
  __pyx_t_2.data = NULL;
 127: 
+128:     return(np.array(x_prime))
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_array); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_8);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_4 = __pyx_memoryview_fromslice(__pyx_v_x_prime, 2, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_6 = NULL;
  if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_8))) {
    __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_8);
    if (likely(__pyx_t_6)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8);
      __Pyx_INCREF(__pyx_t_6);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_8, function);
    }
  }
  if (!__pyx_t_6) {
    __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_8, __pyx_t_4); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_GOTREF(__pyx_t_3);
  } else {
    __pyx_t_5 = PyTuple_New(1+1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_5);
    __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_6); __pyx_t_6 = NULL;
    __Pyx_GIVEREF(__pyx_t_4);
    PyTuple_SET_ITEM(__pyx_t_5, 0+1, __pyx_t_4);
    __pyx_t_4 = 0;
    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_5, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  }
  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  __pyx_r = __pyx_t_3;
  __pyx_t_3 = 0;
  goto __pyx_L0;

In [53]:
x_sample_cython = x_sample.astype(np.float64)
encoder_weights_cython = {}
encoder_weights_cython['h1'] = encoder_weights_np['h1'].astype(np.float64)
encoder_weights_cython['h2'] = encoder_weights_np['h2'].astype(np.float64)
encoder_weights_cython['b1'] = encoder_weights_np['b1'].astype(np.float64)
encoder_weights_cython['b2'] = encoder_weights_np['b2'].astype(np.float64)
encoder_weights_cython['mu'] = encoder_weights_np['mu'].astype(np.float64)
encoder_weights_cython['bias_mu'] = encoder_weights_np['bias_mu'].astype(np.float64)
encoder_weights_cython['sigma'] = encoder_weights_np['sigma'].astype(np.float64)
encoder_weights_cython['bias_sigma'] = encoder_weights_np['bias_sigma'].astype(np.float64)

decoder_weights_cython = {}
decoder_weights_cython['h1'] = decoder_weights_np['h1'].astype(np.float64)
decoder_weights_cython['h2'] = decoder_weights_np['h2'].astype(np.float64)
decoder_weights_cython['b1'] = decoder_weights_np['b1'].astype(np.float64)
decoder_weights_cython['b2'] = decoder_weights_np['b2'].astype(np.float64)
decoder_weights_cython['mu'] = decoder_weights_np['mu'].astype(np.float64)
decoder_weights_cython['bias_mu'] = decoder_weights_np['bias_mu'].astype(np.float64)

In [56]:
%%timeit -n10 -r3 
forward_cython(x_sample_cython, encoder_weights_cython['h1'], encoder_weights_cython['h2'], encoder_weights_cython['b1'], 
              encoder_weights_cython['b2'], encoder_weights_cython['mu'], encoder_weights_cython['bias_mu'], 
              encoder_weights_cython['sigma'], encoder_weights_cython['bias_sigma'], decoder_weights_cython['h1'],
              decoder_weights_cython['h2'], decoder_weights_cython['b1'], decoder_weights_cython['b2'],
              decoder_weights_cython['mu'], decoder_weights_cython['bias_mu'], batch_size)


10 loops, best of 3: 179 ms per loop

Test on Different Batch Sizes


In [57]:
batch_size = np.arange(50, 1000, 50)
run_time_best = np.zeros((4, batch_size.shape[0]))
run_time_worst = np.zeros((4, batch_size.shape[0]))

In [59]:
for cnt, s in enumerate(batch_size):
    print('Evaluating at: %d' % s)
    
    x_sample, _ = mnist.train.next_batch(s)
    x_sample_tf = tf.constant(x_sample)
    x_sample_cython = x_sample.astype(np.float64)
    
    result_1 = %timeit -o -n10 -r3 sess.run(forward_tf(x_sample_tf, encoder_weights, decoder_weights, s))
    run_time_best[0,cnt] = result_1.best
    run_time_worst[0, cnt] = result_1.worst
    
    result_2 = %timeit -o -n10 -r3 forward_numpy(x_sample, encoder_weights_np, decoder_weights_np, s)
    run_time_best[1,cnt] = result_2.best
    run_time_worst[1, cnt] = result_2.worst
    
    result_3 = %timeit -o -n10 -r3 forward_numba(x_sample, encoder_weights_np['h1'], encoder_weights_np['h2'], encoder_weights_np['b1'], encoder_weights_np['b2'], encoder_weights_np['mu'], encoder_weights_np['bias_mu'], encoder_weights_np['sigma'], encoder_weights_np['bias_sigma'], decoder_weights_np['h1'], decoder_weights_np['h2'], decoder_weights_np['b1'], decoder_weights_np['b2'], decoder_weights_np['mu'], decoder_weights_np['bias_mu'], s)
    run_time_best[2,cnt] = result_3.best
    run_time_worst[2, cnt] = result_3.worst
    
    result_4 = %timeit -o -n10 -r3 forward_cython(x_sample_cython, encoder_weights_cython['h1'], encoder_weights_cython['h2'], encoder_weights_cython['b1'], encoder_weights_cython['b2'], encoder_weights_cython['mu'], encoder_weights_cython['bias_mu'], encoder_weights_cython['sigma'], encoder_weights_cython['bias_sigma'], decoder_weights_cython['h1'], decoder_weights_cython['h2'], decoder_weights_cython['b1'], decoder_weights_cython['b2'], decoder_weights_cython['mu'], decoder_weights_cython['bias_mu'], s)
    run_time_best[3,cnt] = result_4.best
    run_time_worst[3, cnt] = result_4.worst


Evaluating at: 50s
10 loops, best of 3: 49.7 ms per loop
10 loops, best of 3: 13 ms per loop
10 loops, best of 3: 187 ms per loop
10 loops, best of 3: 104 ms per loop
Evaluating at: 100s
10 loops, best of 3: 63 ms per loop
10 loops, best of 3: 19 ms per loop
10 loops, best of 3: 396 ms per loop
10 loops, best of 3: 202 ms per loop
Evaluating at: 150s
10 loops, best of 3: 74.8 ms per loop
10 loops, best of 3: 24.9 ms per loop
10 loops, best of 3: 598 ms per loop
10 loops, best of 3: 301 ms per loop
Evaluating at: 200s
10 loops, best of 3: 90.9 ms per loop
10 loops, best of 3: 32.3 ms per loop
10 loops, best of 3: 796 ms per loop
10 loops, best of 3: 406 ms per loop
Evaluating at: 250s
10 loops, best of 3: 118 ms per loop
10 loops, best of 3: 38.4 ms per loop
10 loops, best of 3: 996 ms per loop
10 loops, best of 3: 501 ms per loop
Evaluating at: 300s
10 loops, best of 3: 133 ms per loop
10 loops, best of 3: 48.2 ms per loop
10 loops, best of 3: 1.2 s per loop
10 loops, best of 3: 599 ms per loop
Evaluating at: 350s
10 loops, best of 3: 145 ms per loop
10 loops, best of 3: 56.6 ms per loop
10 loops, best of 3: 1.39 s per loop
10 loops, best of 3: 706 ms per loop
Evaluating at: 400s
10 loops, best of 3: 172 ms per loop
10 loops, best of 3: 55.7 ms per loop
10 loops, best of 3: 1.59 s per loop
10 loops, best of 3: 804 ms per loop
Evaluating at: 450s
10 loops, best of 3: 177 ms per loop
10 loops, best of 3: 65.1 ms per loop
10 loops, best of 3: 1.8 s per loop
10 loops, best of 3: 928 ms per loop
Evaluating at: 500s
10 loops, best of 3: 212 ms per loop
10 loops, best of 3: 76.2 ms per loop
10 loops, best of 3: 1.99 s per loop
10 loops, best of 3: 996 ms per loop
Evaluating at: 550s
10 loops, best of 3: 217 ms per loop
10 loops, best of 3: 78.7 ms per loop
10 loops, best of 3: 2.19 s per loop
10 loops, best of 3: 1.09 s per loop
Evaluating at: 600s
10 loops, best of 3: 253 ms per loop
10 loops, best of 3: 105 ms per loop
10 loops, best of 3: 2.4 s per loop
10 loops, best of 3: 1.21 s per loop
Evaluating at: 650s
10 loops, best of 3: 258 ms per loop
10 loops, best of 3: 97.4 ms per loop
10 loops, best of 3: 2.6 s per loop
10 loops, best of 3: 1.31 s per loop
Evaluating at: 700s
10 loops, best of 3: 314 ms per loop
10 loops, best of 3: 109 ms per loop
10 loops, best of 3: 2.77 s per loop
10 loops, best of 3: 1.43 s per loop
Evaluating at: 750s
10 loops, best of 3: 322 ms per loop
10 loops, best of 3: 108 ms per loop
10 loops, best of 3: 2.99 s per loop
10 loops, best of 3: 1.52 s per loop
Evaluating at: 800s
10 loops, best of 3: 347 ms per loop
10 loops, best of 3: 125 ms per loop
10 loops, best of 3: 3.23 s per loop
10 loops, best of 3: 1.57 s per loop
Evaluating at: 850s
10 loops, best of 3: 368 ms per loop
10 loops, best of 3: 132 ms per loop
10 loops, best of 3: 3.4 s per loop
10 loops, best of 3: 1.73 s per loop
Evaluating at: 900s
10 loops, best of 3: 390 ms per loop
10 loops, best of 3: 131 ms per loop
10 loops, best of 3: 3.62 s per loop
10 loops, best of 3: 1.81 s per loop
Evaluating at: 950s
10 loops, best of 3: 410 ms per loop
10 loops, best of 3: 131 ms per loop
10 loops, best of 3: 3.77 s per loop
10 loops, best of 3: 1.88 s per loop

In [60]:
plt.fill_between(batch_size, run_time_worst[0,:], run_time_best[0,:], facecolor='blue', color='none', label='tf')
plt.fill_between(batch_size, run_time_worst[1,:], run_time_best[1,:], facecolor='green', color='none', label='numpy')
plt.fill_between(batch_size, run_time_worst[2,:], run_time_best[2,:], facecolor='red', color='none', label='numba')
plt.fill_between(batch_size, run_time_worst[3,:], run_time_best[3,:], facecolor='yellow', color='none', label='cython')

plt.xlabel('batch size')
plt.ylabel('time:s')
plt.legend(loc='upper left')

plt.show()



In [ ]: