Obtain the SELU parameters for arbitrary fixed points

Author: Guenter Klambauer, 2017

tested under Python 3.5


In [1]:
import numpy as np
from scipy.special import erf,erfc
from sympy import Symbol, solve, nsolve

Function to obtain the parameters for the SELU with arbitrary fixed point (mean variance)


In [2]:
def getSeluParameters(fixedpointMean=0,fixedpointVar=1):
    """ Finding the parameters of the SELU activation function. The function returns alpha and lambda for the desired fixed point. """
    
    import sympy
    from sympy import Symbol, solve, nsolve

    aa = Symbol('aa')
    ll = Symbol('ll')
    nu = fixedpointMean 
    tau = fixedpointVar 

    mean =  0.5*ll*(nu + np.exp(-nu**2/(2*tau))*np.sqrt(2/np.pi)*np.sqrt(tau) + \
                        nu*erf(nu/(np.sqrt(2*tau))) - aa*erfc(nu/(np.sqrt(2*tau))) + \
                        np.exp(nu+tau/2)*aa*erfc((nu+tau)/(np.sqrt(2*tau))))

    var = 0.5*ll**2*(np.exp(-nu**2/(2*tau))*np.sqrt(2/np.pi*tau)*nu + (nu**2+tau)* \
                          (1+erf(nu/(np.sqrt(2*tau)))) + aa**2 *erfc(nu/(np.sqrt(2*tau))) \
                          - aa**2 * 2 *np.exp(nu+tau/2)*erfc((nu+tau)/(np.sqrt(2*tau)))+ \
                          aa**2*np.exp(2*(nu+tau))*erfc((nu+2*tau)/(np.sqrt(2*tau))) ) - mean**2

    eq1 = mean - nu
    eq2 = var - tau

    res = nsolve( (eq2, eq1), (aa,ll), (1.67,1.05))
    return float(res[0]),float(res[1])

In [3]:
### To recover the parameters of the SELU with mean zero and unit variance
getSeluParameters(0,1)


Out[3]:
(1.6732632423543774, 1.0507009873554802)

In [22]:
### To obtain new parameters for mean zero and variance 2
myFixedPointMean = -0.1
myFixedPointVar = 2.0
myAlpha, myLambda = getSeluParameters(myFixedPointMean,myFixedPointVar)
getSeluParameters(myFixedPointMean,myFixedPointVar)


Out[22]:
(1.9769021954241999, 1.073851239616047)

Adjust the SELU function and Dropout to your new parameters


In [23]:
def selu(x):
    with ops.name_scope('elu') as scope:
        alpha = myAlpha
        scale = myLambda
        return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))

In [24]:
def dropout_selu(x, rate, alpha= -myAlpha*myLambda, fixedPointMean=myFixedPointMean, fixedPointVar=myFixedPointVar, 
                 noise_shape=None, seed=None, name=None, training=False):
    """Dropout to a value with rescaling."""

    def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name):
        keep_prob = 1.0 - rate
        x = ops.convert_to_tensor(x, name="x")
        if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
            raise ValueError("keep_prob must be a scalar tensor or a float in the "
                                             "range (0, 1], got %g" % keep_prob)
        keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob")
        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

        alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name="alpha")
        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

        if tensor_util.constant_value(keep_prob) == 1:
            return x

        noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)
        random_tensor = keep_prob
        random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype)
        binary_tensor = math_ops.floor(random_tensor)
        ret = x * binary_tensor + alpha * (1-binary_tensor)


        a = tf.sqrt(fixedPointVar / (keep_prob *((1-keep_prob) * tf.pow(alpha-fixedPointMean,2) + fixedPointVar)))
        
        b = fixedPointMean - a * (keep_prob * fixedPointMean + (1 - keep_prob) * alpha)
        ret = a * ret + b
        ret.set_shape(x.get_shape())
        return ret

    with ops.name_scope(name, "dropout", [x]) as name:
        return utils.smart_cond(training,
            lambda: dropout_selu_impl(x, rate, alpha, noise_shape, seed, name),
            lambda: array_ops.identity(x))

In [25]:
import tensorflow as tf
import numpy as np

from __future__ import absolute_import, division, print_function
import numbers
from tensorflow.contrib import layers
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.layers import utils


x = tf.Variable(tf.random_normal([10000],mean=myFixedPointMean, stddev=np.sqrt(myFixedPointVar)))
w = selu(x)
y = dropout_selu(w,0.2,training=True)
init = tf.global_variables_initializer()
                
gpu_options = tf.GPUOptions(allow_growth=True)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    sess.run(init)
    z,zz, zzz = sess.run([x, w, y]) 
    #print(z)
    #print(zz)
    print("mean/var should be at:", myFixedPointMean, "/", myFixedPointVar)
    print("Input data mean/var:  ", "{:.12f}".format(np.mean(z)), "/", "{:.12f}".format(np.var(z)))    
    print("After selu:           ", "{:.12f}".format(np.mean(zz)), "/", "{:.12f}".format(np.var(zz)))
    print("After dropout mean/var", "{:.12f}".format(np.mean(zzz)), "/", "{:.12f}".format(np.var(zzz)))


mean/var should be at: -0.1 / 2.0
Input data mean/var:   -0.081669516861 / 1.985695838928
After selu:            -0.086086399853 / 2.011691331863
After dropout mean/var -0.080544397235 / 2.031569242477

For completeness: These are the correct expressions for mean zero and unit variance


In [8]:
myAlpha = -np.sqrt(2/np.pi) / (np.exp(0.5) * erfc(1/np.sqrt(2))-1 )  
myLambda = (1-np.sqrt(np.exp(1))*erfc(1/np.sqrt(2)))  *  \
            np.sqrt( 2*np.pi/ (2 + np.pi -2*np.sqrt(np.exp(1))*(2+np.pi)*erfc(1/np.sqrt(2)) + \
            np.exp(1)*np.pi*erfc(1/np.sqrt(2))**2 + 2*np.exp(2)*erfc(np.sqrt(2))))

In [9]:
print("Alpha parameter of the SELU: ", myAlpha)
print("Lambda parameter of the SELU: ", myLambda)


Alpha parameter of the SELU:  1.67326324235
Lambda parameter of the SELU:  1.05070098736