In [4]:
from __future__ import print_function, division
import numpy as np
import theano
import theano.tensor as T

from neuralnilm.layers import MixtureDensityLayer
from lasagne.layers import InputLayer

In [2]:
mixing = np.array([0.5, 0.5])
sigma = np.array([0.1, 0.1])
mu = np.array([-1, 2])
t = np.array([-1., 2])
n = len(t)

log_likelihood = (
    - 0.5 * n * np.log(2 * np.pi)
    - 0.5 * n * np.log(sigma)
    - 0.5 * (1.0 / sigma) * np.sum((t - np.tile(mu, (n,1)))**2, axis=0)
    + mixing
)

-log_likelihood


Out[2]:
array([-0.96470803, -0.96470803])

In [8]:
arr = theano.shared(np.random.randn(2,3,4,5))

In [13]:
arr.flatten(2).shape.eval()


Out[13]:
array([ 2, 60])

In [24]:
T.concatenate([arr, arr], axis=3).shape.eval()


Out[24]:
array([ 2,  3,  4, 10])

In [36]:
arr.flatten(1).dimshuffle('x',0).eval()


Out[36]:
array([[ -9.24148613e-01,  -5.73637324e-01,   2.99802132e-01,
         -1.85171038e-01,  -6.37483579e-01,  -2.94061795e+00,
         -2.22821661e-01,  -1.06999928e+00,  -8.55054985e-01,
          1.19949253e+00,  -8.08247875e-01,  -6.12313843e-01,
         -1.19242190e+00,   1.73176434e-01,   1.31739943e+00,
         -2.92787259e+00,  -6.43583284e-01,   2.54871351e-01,
         -4.53157892e-01,   1.77625607e+00,   4.09646239e-01,
         -7.14927891e-01,  -2.64463746e-01,   2.68219258e-01,
         -1.48696838e-01,  -2.77845021e-01,  -2.22491007e-01,
         -1.70205439e+00,  -8.81109073e-02,   7.70872889e-01,
         -4.30556157e-01,  -1.47446973e+00,  -1.35885521e+00,
          1.11411817e+00,  -2.90778500e-02,  -5.75589972e-02,
         -1.34574343e+00,  -3.01578678e-01,   4.93330734e-01,
          2.31859305e-03,  -1.69471784e-01,  -1.27965633e+00,
         -2.39504623e+00,   4.19813636e-01,  -1.65213771e-01,
          5.65819000e-01,  -5.07072638e-01,  -3.06731756e-01,
         -5.67809339e-01,  -9.84835689e-01,   2.44289288e-01,
         -2.60249167e-01,  -1.49897459e+00,  -8.96275382e-01,
         -1.11356148e+00,  -5.37669968e-01,   1.40052282e+00,
         -1.97524391e-01,  -9.18473385e-02,   8.53766118e-01,
         -1.46818409e+00,  -2.52042520e+00,  -7.61641016e-01,
          2.10546053e-01,  -1.48260867e+00,  -1.54835987e-01,
         -1.18045351e+00,   1.39501590e+00,  -1.32934876e+00,
         -8.06690534e-01,   3.94083719e-01,  -4.27963699e-01,
         -9.88124270e-03,   6.10088741e-01,   2.53802263e-01,
          4.15084704e-01,  -6.53665720e-01,   2.57121200e-01,
          8.38647004e-01,  -8.36748694e-02,   1.40443651e+00,
          4.54911246e-01,  -9.41950580e-01,   1.35472057e+00,
          1.84895411e+00,  -2.37960428e-01,  -4.12642229e-01,
          3.09626814e+00,   1.74439702e-01,  -3.32648778e-01,
         -1.65707947e-01,   3.78938907e-01,   1.70763998e+00,
          6.60122403e-01,   1.51485907e+00,   5.94977001e-02,
          2.12502027e+00,  -7.57695000e-01,   1.48680401e-02,
          2.20355157e-02,  -1.02231774e-01,   1.72460814e+00,
          5.27602671e-01,  -9.00757587e-01,  -2.87797480e-01,
         -2.31649129e+00,  -3.23947949e-01,  -1.28837534e+00,
         -2.97323005e-01,  -1.36509387e+00,  -5.66829979e-01,
         -7.07379720e-01,  -1.11894148e+00,  -1.03882029e+00,
         -6.12277883e-01,  -2.19157333e+00,   5.82078719e-01,
          4.47165778e-01,   2.62643687e-01,  -7.52425132e-01]])

In [37]:
b = theano.shared(np.random.randn(3,))

In [43]:
b.dimshuffle('x', 0).eval()


Out[43]:
array([[-0.38564482, -0.21376647,  0.18451497]])

In [2]:
inp = theano.shared(np.random.randn(5,10), name='inp')
W = theano.shared(np.random.randn(10, 3), name='W')

In [3]:
T.dot(inp, W).shape.eval()


Out[3]:
array([5, 3])

In [ ]:


In [12]:
W.reshape?

In [3]:


In [4]:
i_l = InputLayer(shape=(5,10))

In [14]:
mdn = MixtureDensityLayer(i_l, num_units=1, num_components=2)

In [15]:
mdn.get_output_shape()


Out[15]:
(5, 1, 2, 3)

In [16]:
mdn.get_output_for(inp).shape.eval()


Out[16]:
array([5, 1, 2, 3])

In [19]:
mdn.get_output_for(inp).eval()[:,:,:,2].shape


Out[19]:
(5, 1, 2)

In [8]:
test = theano.shared(np.random.randn(4, 3, 2), name='test')

In [17]:
T.concatenate((T.shape_padright(test), T.shape_padright(test)), axis=3).eval()


Out[17]:
array([[[[ 0.35023724,  0.35023724],
         [ 0.88620884,  0.88620884]],

        [[ 2.20136729,  2.20136729],
         [ 0.13552071,  0.13552071]],

        [[ 0.98597707,  0.98597707],
         [ 0.95432965,  0.95432965]]],


       [[[ 0.08782344,  0.08782344],
         [-0.7312077 , -0.7312077 ]],

        [[ 0.76797193,  0.76797193],
         [ 0.21512934,  0.21512934]],

        [[-0.63539776, -0.63539776],
         [-1.02058442, -1.02058442]]],


       [[[-0.03939722, -0.03939722],
         [ 1.25695408,  1.25695408]],

        [[-0.50788745, -0.50788745],
         [ 0.97470263,  0.97470263]],

        [[ 0.83620002,  0.83620002],
         [-0.02224405, -0.02224405]]],


       [[[ 0.35969195,  0.35969195],
         [ 0.61311065,  0.61311065]],

        [[-0.92769775, -0.92769775],
         [ 1.11723909,  1.11723909]],

        [[ 0.45246166,  0.45246166],
         [ 1.66835887,  1.66835887]]]])

In [40]:
BATCH_SIZE = 10
N_INPUTS = 5
N_OUTPUTS = 1
N_COMPONENTS = 1

i_l = InputLayer(shape=(BATCH_SIZE, N_INPUTS))
inp = theano.shared(np.random.randn(BATCH_SIZE, N_INPUTS), name='inp')
mdn = MixtureDensityLayer(i_l, num_units=N_OUTPUTS, num_components=N_COMPONENTS)
y = mdn.get_output_for(inp)
t = theano.shared(np.random.randn(BATCH_SIZE, N_OUTPUTS), name='t')

In [69]:
mu = y[:,:,:,0]
sigma = y[:,:,:,1]
mixing = y[:,:,:,2]
mu.name = 'mu'
sigma.name = 'sigma'
mixing.name = 'mixing'
n = t.shape[0]

In [70]:
mu.shape.eval()


Out[70]:
array([10,  1,  1])

In [46]:
log_likelihood = (
    - 0.5 * n * T.log(2 * np.pi)
    - 0.5 * n * T.log(sigma)
    # - 0.5 * (1.0 / sigma) * T.sum((t.dimshuffle(0, 1) - mu)**2, axis=1)
    #+ mixing
)

In [47]:
log_likelihood.shape.eval()


Out[47]:
array([10,  1])

In [50]:
T.sum((t.dimshuffle(0, 1) - mu)**2, axis=0).shape.eval()


Out[50]:
array([1])

In [52]:
def normal_pdf(x, mu, sigma):
    exponent = -((x - mu)**2) / (2 * sigma**2)
    normaliser = sigma * T.sqrt(2 * np.pi)
    return T.exp(exponent) / normaliser

In [121]:
normal_pdf(1, 0, np.sqrt(1)).eval()


Out[121]:
array(0.24197072451914337)

In [95]:
np.log(normal_pdf(1, 0, np.sqrt(1)).eval())


Out[95]:
-1.4189385332046727

In [105]:
from scipy.stats import norm
norm.logpdf(1, loc=0, scale=np.sqrt(1))


Out[105]:
-1.4189385332046727

In [127]:
MINUS_HALF_LOG_2PI = np.float64(- 0.5 * np.log(2 * np.pi))
def log_likelihood(t, mu, sigma, mixing=1):
     return (
        MINUS_HALF_LOG_2PI
        - T.log(sigma)
        - 0.5 * T.inv(sigma**2) * (t - mu)**2
        + T.log(mixing)
    )

(T.exp(log_likelihood(1, 0, np.sqrt(1), 0.5)) + T.exp(log_likelihood(1, 0, np.sqrt(1), 0.5))).eval()


Out[127]:
array(0.24197072405827277)

In [131]:
(T.exp(log_likelihood(1, 0, np.sqrt(1), 0.5) * log_likelihood(1, 0, np.sqrt(1), 0.5))).eval()


Out[131]:
array(86.56590837486551)

In [129]:
log_likelihood(1, 0, np.sqrt(1), 0.5).eval() * log_likelihood(1, 0, np.sqrt(1), 0.5).eval()


Out[129]:
4.4609060703341816

In [97]:
def log_likelihood(t, mu, sigma):
     return (
        - 0.5 * T.log(2 * np.pi)
        - 2.0 * T.log(t - mu)
        + T.log(2.0)
        + T.log(sigma)
    )

log_likelihood(1, 0, np.sqrt(1)).eval()


Out[97]:
array(-0.22579135074007306)

In [122]:
def normal_gmm(x, mu, sigma, mixing):
    pdf_cumulator = 0.0
    n_components = len(mu)
    for component_i in range(n_components):
        pdf = normal_pdf(x, mu[component_i], sigma[component_i])
        pdf *= mixing[component_i]
        pdf_cumulator += pdf
    return pdf_cumulator

In [123]:
normal_gmm(1, [0,0], [np.sqrt(1),np.sqrt(1)], [.5, .5]).eval()


Out[123]:
array(0.24197072451914337)

In [139]:
def list_to_shared(lst):
    return theano.shared(np.array(lst))

def normal_gmm_vectorised(x, mu, sigma, mixing):
    # mu = list_to_shared(mu)
    # sigma = list_to_shared(sigma)
    # mixing = list_to_shared(mixing)
    normal_pdfs = normal_pdf(x, mu, sigma)
    return T.dot(normal_pdfs, mixing)

In [137]:
normal_gmm_vectorised(1, [0,0], [np.sqrt(1),np.sqrt(1)], [.5, .5]).eval()


Out[137]:
array(0.24197072451914337)

In [148]:
t.shape.eval()


Out[148]:
array([10,  1])

In [149]:
mu.shape.eval()


Out[149]:
array([10,  1,  1])

In [150]:
(t.dimshuffle(0,1,'x') - mu).shape.eval()


Out[150]:
array([10,  1,  1])

In [155]:
normal_pdf(t.dimshuffle(0,1,'x'), mu, sigma).reshape((10, 1)).eval()


Out[155]:
array([[  1.11667743e-01],
       [  1.63015763e-05],
       [  1.15920473e-82],
       [  3.56634647e-04],
       [  6.40570802e-02],
       [  1.75931391e-01],
       [  8.25700714e-02],
       [  1.38029897e-01],
       [  0.00000000e+00],
       [  4.22842335e-01]])

In [88]:
def nll():
    log_likelihood = 0.0
    for batch_i in range(BATCH_SIZE):
        for output_i in range(N_OUTPUTS):
            lh = normal_gmm(
                t[batch_i, output_i], 
                mu[batch_i, output_i, :],
                sigma[batch_i, output_i, :],
                mixing[batch_i, output_i, :]
            )
            print(lh.eval())
            log_likelihood += T.log(lh)
            print(log_likelihood.eval())
            print()
    return -log_likelihood

In [89]:
neglogl = nll()


0.111667743339
-2.19222739403

1.63015763021e-05
-13.2164761432

1.1592047326e-82
-201.880719574

0.000356634646821
-209.819518273

0.0640570802263
-212.567498987

0.175931391122
-214.30516017

0.0825700714066
-216.799268166

0.1380298973
-218.779553136

0.0
-inf

0.422842334908
-inf


In [83]:
neglogl.eval()


Out[83]:
array(inf)

In [156]:
from neuralnilm.objectives import mdn_nll

In [205]:
nll = mdn_nll(y, t)

In [195]:
normal_pdfs, mixing = mdn_nll(y, t)

In [206]:
mixing.shape.eval()


Out[206]:
array([10,  1,  1])

In [207]:
nll.shape.eval()


Out[207]:
array([10,  1])

In [208]:
nll.eval()


Out[208]:
array([[   2.19222739],
       [  11.02424875],
       [ 188.66424343],
       [   7.9387987 ],
       [   2.74798071],
       [   1.73766118],
       [   2.494108  ],
       [   1.98028497],
       [          inf],
       [   0.8607559 ]])

In [204]:
T.batched_tensordot(normal_pdfs, mixing, axes=1).shape.eval()


Out[204]:
array([10,  1,  1])

In [163]:
y.shape.eval()


Out[163]:
array([10,  1,  1,  3])

In [164]:
mu.shape.eval()


Out[164]:
array([10,  1,  1])

In [165]:
t.shape.eval()


Out[165]:
array([10,  1])

In [169]:
T.tensordot?

In [2]:
from neuralnilm.objectives import normal_log_likelihood, log_sum_exp

In [5]:
BATCH_SIZE = 10
N_INPUTS = 5
N_OUTPUTS = 2
N_COMPONENTS = 3

i_l = InputLayer(shape=(BATCH_SIZE, N_INPUTS))
inp = theano.shared(np.random.randn(BATCH_SIZE, N_INPUTS), name='inp')
mdn = MixtureDensityLayer(i_l, num_units=N_OUTPUTS, num_components=N_COMPONENTS)
y = mdn.get_output_for(inp)
t = theano.shared(np.random.randn(BATCH_SIZE, N_OUTPUTS), name='t')

In [8]:
mu = y[:,:,:,0]
sigma = y[:,:,:,1]
mixing = y[:,:,:,2]
mu.name = 'mu'
sigma.name = 'sigma'
mixing.name = 'mixing'
n = t.shape[0]

In [6]:
x = t.dimshuffle(0, 1, 'x')

In [7]:
x.shape.eval()


Out[7]:
array([10,  2,  1])

In [9]:
log_likelihood = normal_log_likelihood(x, mu, sigma, mixing)

In [10]:
log_likelihood.shape.eval()


Out[10]:
array([10,  2,  3])

In [15]:
summed = log_sum_exp(log_likelihood, axis=2)

In [16]:
summed.shape.eval()


Out[16]:
array([10,  2,  1])

In [223]:
summed.eval()


Out[223]:
array([[[-0.00453883]]])

In [224]:
T.mean(log_likelihood).eval()


Out[224]:
array(-174.06960658757058)

In [ ]: