In [4]:
from __future__ import print_function, division
import numpy as np
import theano
import theano.tensor as T
from neuralnilm.layers import MixtureDensityLayer
from lasagne.layers import InputLayer
In [2]:
mixing = np.array([0.5, 0.5])
sigma = np.array([0.1, 0.1])
mu = np.array([-1, 2])
t = np.array([-1., 2])
n = len(t)
log_likelihood = (
- 0.5 * n * np.log(2 * np.pi)
- 0.5 * n * np.log(sigma)
- 0.5 * (1.0 / sigma) * np.sum((t - np.tile(mu, (n,1)))**2, axis=0)
+ mixing
)
-log_likelihood
Out[2]:
In [8]:
arr = theano.shared(np.random.randn(2,3,4,5))
In [13]:
arr.flatten(2).shape.eval()
Out[13]:
In [24]:
T.concatenate([arr, arr], axis=3).shape.eval()
Out[24]:
In [36]:
arr.flatten(1).dimshuffle('x',0).eval()
Out[36]:
In [37]:
b = theano.shared(np.random.randn(3,))
In [43]:
b.dimshuffle('x', 0).eval()
Out[43]:
In [2]:
inp = theano.shared(np.random.randn(5,10), name='inp')
W = theano.shared(np.random.randn(10, 3), name='W')
In [3]:
T.dot(inp, W).shape.eval()
Out[3]:
In [ ]:
In [12]:
W.reshape?
In [3]:
In [4]:
i_l = InputLayer(shape=(5,10))
In [14]:
mdn = MixtureDensityLayer(i_l, num_units=1, num_components=2)
In [15]:
mdn.get_output_shape()
Out[15]:
In [16]:
mdn.get_output_for(inp).shape.eval()
Out[16]:
In [19]:
mdn.get_output_for(inp).eval()[:,:,:,2].shape
Out[19]:
In [8]:
test = theano.shared(np.random.randn(4, 3, 2), name='test')
In [17]:
T.concatenate((T.shape_padright(test), T.shape_padright(test)), axis=3).eval()
Out[17]:
In [40]:
BATCH_SIZE = 10
N_INPUTS = 5
N_OUTPUTS = 1
N_COMPONENTS = 1
i_l = InputLayer(shape=(BATCH_SIZE, N_INPUTS))
inp = theano.shared(np.random.randn(BATCH_SIZE, N_INPUTS), name='inp')
mdn = MixtureDensityLayer(i_l, num_units=N_OUTPUTS, num_components=N_COMPONENTS)
y = mdn.get_output_for(inp)
t = theano.shared(np.random.randn(BATCH_SIZE, N_OUTPUTS), name='t')
In [69]:
mu = y[:,:,:,0]
sigma = y[:,:,:,1]
mixing = y[:,:,:,2]
mu.name = 'mu'
sigma.name = 'sigma'
mixing.name = 'mixing'
n = t.shape[0]
In [70]:
mu.shape.eval()
Out[70]:
In [46]:
log_likelihood = (
- 0.5 * n * T.log(2 * np.pi)
- 0.5 * n * T.log(sigma)
# - 0.5 * (1.0 / sigma) * T.sum((t.dimshuffle(0, 1) - mu)**2, axis=1)
#+ mixing
)
In [47]:
log_likelihood.shape.eval()
Out[47]:
In [50]:
T.sum((t.dimshuffle(0, 1) - mu)**2, axis=0).shape.eval()
Out[50]:
In [52]:
def normal_pdf(x, mu, sigma):
exponent = -((x - mu)**2) / (2 * sigma**2)
normaliser = sigma * T.sqrt(2 * np.pi)
return T.exp(exponent) / normaliser
In [121]:
normal_pdf(1, 0, np.sqrt(1)).eval()
Out[121]:
In [95]:
np.log(normal_pdf(1, 0, np.sqrt(1)).eval())
Out[95]:
In [105]:
from scipy.stats import norm
norm.logpdf(1, loc=0, scale=np.sqrt(1))
Out[105]:
In [127]:
MINUS_HALF_LOG_2PI = np.float64(- 0.5 * np.log(2 * np.pi))
def log_likelihood(t, mu, sigma, mixing=1):
return (
MINUS_HALF_LOG_2PI
- T.log(sigma)
- 0.5 * T.inv(sigma**2) * (t - mu)**2
+ T.log(mixing)
)
(T.exp(log_likelihood(1, 0, np.sqrt(1), 0.5)) + T.exp(log_likelihood(1, 0, np.sqrt(1), 0.5))).eval()
Out[127]:
In [131]:
(T.exp(log_likelihood(1, 0, np.sqrt(1), 0.5) * log_likelihood(1, 0, np.sqrt(1), 0.5))).eval()
Out[131]:
In [129]:
log_likelihood(1, 0, np.sqrt(1), 0.5).eval() * log_likelihood(1, 0, np.sqrt(1), 0.5).eval()
Out[129]:
In [97]:
def log_likelihood(t, mu, sigma):
return (
- 0.5 * T.log(2 * np.pi)
- 2.0 * T.log(t - mu)
+ T.log(2.0)
+ T.log(sigma)
)
log_likelihood(1, 0, np.sqrt(1)).eval()
Out[97]:
In [122]:
def normal_gmm(x, mu, sigma, mixing):
pdf_cumulator = 0.0
n_components = len(mu)
for component_i in range(n_components):
pdf = normal_pdf(x, mu[component_i], sigma[component_i])
pdf *= mixing[component_i]
pdf_cumulator += pdf
return pdf_cumulator
In [123]:
normal_gmm(1, [0,0], [np.sqrt(1),np.sqrt(1)], [.5, .5]).eval()
Out[123]:
In [139]:
def list_to_shared(lst):
return theano.shared(np.array(lst))
def normal_gmm_vectorised(x, mu, sigma, mixing):
# mu = list_to_shared(mu)
# sigma = list_to_shared(sigma)
# mixing = list_to_shared(mixing)
normal_pdfs = normal_pdf(x, mu, sigma)
return T.dot(normal_pdfs, mixing)
In [137]:
normal_gmm_vectorised(1, [0,0], [np.sqrt(1),np.sqrt(1)], [.5, .5]).eval()
Out[137]:
In [148]:
t.shape.eval()
Out[148]:
In [149]:
mu.shape.eval()
Out[149]:
In [150]:
(t.dimshuffle(0,1,'x') - mu).shape.eval()
Out[150]:
In [155]:
normal_pdf(t.dimshuffle(0,1,'x'), mu, sigma).reshape((10, 1)).eval()
Out[155]:
In [88]:
def nll():
log_likelihood = 0.0
for batch_i in range(BATCH_SIZE):
for output_i in range(N_OUTPUTS):
lh = normal_gmm(
t[batch_i, output_i],
mu[batch_i, output_i, :],
sigma[batch_i, output_i, :],
mixing[batch_i, output_i, :]
)
print(lh.eval())
log_likelihood += T.log(lh)
print(log_likelihood.eval())
print()
return -log_likelihood
In [89]:
neglogl = nll()
In [83]:
neglogl.eval()
Out[83]:
In [156]:
from neuralnilm.objectives import mdn_nll
In [205]:
nll = mdn_nll(y, t)
In [195]:
normal_pdfs, mixing = mdn_nll(y, t)
In [206]:
mixing.shape.eval()
Out[206]:
In [207]:
nll.shape.eval()
Out[207]:
In [208]:
nll.eval()
Out[208]:
In [204]:
T.batched_tensordot(normal_pdfs, mixing, axes=1).shape.eval()
Out[204]:
In [163]:
y.shape.eval()
Out[163]:
In [164]:
mu.shape.eval()
Out[164]:
In [165]:
t.shape.eval()
Out[165]:
In [169]:
T.tensordot?
In [2]:
from neuralnilm.objectives import normal_log_likelihood, log_sum_exp
In [5]:
BATCH_SIZE = 10
N_INPUTS = 5
N_OUTPUTS = 2
N_COMPONENTS = 3
i_l = InputLayer(shape=(BATCH_SIZE, N_INPUTS))
inp = theano.shared(np.random.randn(BATCH_SIZE, N_INPUTS), name='inp')
mdn = MixtureDensityLayer(i_l, num_units=N_OUTPUTS, num_components=N_COMPONENTS)
y = mdn.get_output_for(inp)
t = theano.shared(np.random.randn(BATCH_SIZE, N_OUTPUTS), name='t')
In [8]:
mu = y[:,:,:,0]
sigma = y[:,:,:,1]
mixing = y[:,:,:,2]
mu.name = 'mu'
sigma.name = 'sigma'
mixing.name = 'mixing'
n = t.shape[0]
In [6]:
x = t.dimshuffle(0, 1, 'x')
In [7]:
x.shape.eval()
Out[7]:
In [9]:
log_likelihood = normal_log_likelihood(x, mu, sigma, mixing)
In [10]:
log_likelihood.shape.eval()
Out[10]:
In [15]:
summed = log_sum_exp(log_likelihood, axis=2)
In [16]:
summed.shape.eval()
Out[16]:
In [223]:
summed.eval()
Out[223]:
In [224]:
T.mean(log_likelihood).eval()
Out[224]:
In [ ]: