In [ ]:
require 'nn'

In [ ]:
model = nn.Sequential()
cat = nn.ConcatTable()
inner1 = nn.Sequential()
inner1:add(nn.Linear(1,2))
inner2 = nn.Sequential()
inner2:add(nn.Linear(1,5))
inner2:add(nn.ReLU(true))
inner2:add(nn.Linear(5,1))
inner2:add(nn.SoftMax())
cat:add(inner1)
cat:add(inner2)
model:add(cat)

In [ ]:
model = nn.Sequential()
cat = nn.ConcatTable()
cat:add(nn.Linear(1,3))
cat:add(nn.Linear(1,4))
model:add(cat)

In [ ]:
k = model:forward(torch.rand(10,1))

In [ ]:
k[2]

In [ ]:
x = torch.rand(10,1)
v = torch.rand(10,3)

In [ ]:
x:cmul(v[{{},1}])

In [ ]:
torch.zeros(3,1)

In [ ]:
require 'criteria/GMMKLDCriterion'
gkld = nn.GMMKLDCriterion
pmu = torch.zeros(3)
plogv = torch.zeros(3)
mu = torch.randn(3)
logv = torch.randn(3):pow(2):log()
pi = torch.rand(3)
input = {pmu, plogv, pi}
target = {mu, logv}
gkld:forward({pmu, plogv, pi}, {mu, logv})

In [ ]:
require 'criteria/GMMKLDCriterion'
require 'criteria/KLDCriterion'
gkld = nn.GMMKLDCriterion()
kld = nn.KLDCriterion()
pmu = torch.zeros(3,2)
plogv = torch.zeros(3,2)
pi = torch.ones(3,1)

mu = torch.randn(3,2)
logv = torch.randn(3,2):pow(2):log()
input = {pmu, plogv, pi}
target = {mu, logv}
print(gkld:forward({pmu, plogv, pi}, {mu, logv}))
print(kld:forward({pmu, plogv}, {mu, logv}))

In [26]:
require 'criteria/GMMKLDCriterion'
require 'criteria/KLDCriterion'
gkld = nn.GMMKLDCriterion()
kld = nn.KLDCriterion()

N = 1
D = 3
K = 2

pmu1 = torch.zeros(N,D)
pmu2 = torch.ones(N,D)*40

plogv1 = torch.zeros(N,D)
plogv2 = torch.ones(N,D)

pi = torch.ones(N,K):div(K)

mu = torch.ones(N,D)*.1
logv = torch.ones(N,D)

input = {pmu, plogv, pi}
target = {mu, logv}
print(gkld:forward({pmu1, pmu2, plogv1, plogv2, pi}, {mu, logv}))
print(kld:forward({pmu1, plogv1},{mu, logv}))


Out[26]:
1.7855699232485	
1.0924227426886	

In [ ]:
-- compare with direct KLD:
print(kld:forward({pmu1, plogv1}, {mu, logv}))
print(kld:forward({pmu2, plogv2}, {mu, logv}))
print(kld:forward({pmu3, plogv3}, {mu, logv}))

Gradient Check


In [12]:
require 'criteria/GMMKLDCriterion'
require 'criteria/KLDCriterion'
gkld = nn.GMMKLDCriterion()
kld = nn.KLDCriterion()

N = 5
D = 3
K = 1

pmu1 = torch.randn(N,D)
pi = torch.Tensor({{1}}):expand(N,1)
plogv1 = torch.randn(N,D)

mu = torch.randn(N,D)
logv = torch.randn(N,D)

h = 1e-4

In [13]:
gkld:forward({pmu1, plogv1, pi}, {mu, logv})
dpmu1, dplogv1, dpi, dmu, dlogv = unpack(gkld:backward({pmu1, plogv1, pi}, {mu, logv}))

In [21]:
b


Out[21]:
  0.4285   0.3246  -5.9314
  0.0432   0.3800   0.1303
 -1.3696  -6.0556   0.2066
 -2.2196   0.3846  -0.1272
 -4.6368 -17.9626   0.2407
[torch.DoubleTensor of size 5x3]


In [14]:
a, b, c, d = unpack(kld:backward({pmu1, plogv1}, {mu, logv}))

In [1]:
require 'criteria/GMMKLDCriterion'
require 'criteria/KLDCriterion'
gkld = nn.GMMKLDCriterion()
kld = nn.KLDCriterion()

N = 5
D = 3
K = 3

pmu1 = torch.zeros(N,D)
pmu2 = torch.randn(N,D)
pmu3 = torch.randn(N,D)
pi = torch.Tensor({{1,2,3}}):expand(5,3)/6
plogv1 = torch.zeros(N,D)
plogv2 = torch.randn(N,D):pow(2):log()
plogv3 = torch.randn(N,D):pow(2):log()

mu = torch.zeros(N,D)
logv = torch.zeros(N,D)

h = 1e-4

In [ ]:
gkld:forward({pmu1, pmu2, pmu3, plogv1, plogv2, plogv3, pi}, {mu, logv})
dpmu1, dpmu2, dpmu3, dplogv1, dplogv2, dplogv3, dpi, dmu, dlogv = unpack(gkld:backward({pmu1, pmu2, pmu3, plogv1, plogv2, plogv3, pi}, {mu, logv}))

In [ ]:
dmu

In [ ]:
x = mu
y = dmu
print("Error:")
for i=1,x:size(1) do
    for j=1,x:size(2) do
        x[{i,j}] = x[{i,j}] + h
        fph = gkld:forward({pmu1, pmu2, pmu3, plogv1, plogv2, plogv3, pi}, {mu, logv})
        x[{i,j}] = x[{i,j}] - h - h
        fmh = gkld:forward({pmu1, pmu2, pmu3, plogv1, plogv2, plogv3, pi}, {mu, logv})
        x[{i,j}] = x[{i,j}] + h
        print((fph - fmh)/2/h - y[{i,j}])
    end
end

Gradient


In [ ]:
dpi = unpack(gkld:backward({pmu1, pmu2, pmu3, plogv1, plogv2, plogv3, pi}, {mu, logv}))
print(dpi)

Math


In [22]:
require 'nn'


Out[22]:
{
  VolumetricMaxUnpooling : table: 0x41ce8e50
  ConcatTable : table: 0x41d16708
  SpatialAveragePooling : table: 0x41bdb718
  BCECriterion : table: 0x40933360
  Reshape : table: 0x41cebc90
  Jacobian : 
    {
      forward : function: 0x401f9240
      testAllUpdate : function: 0x401eff80
      testDiagHessianInput : function: 0x41308cf8
      testDiagHessianWeight : function: 0x41308d18
      testDiagHessianBias : function: 0x41308d38
      testDiagHessian : function: 0x41e349d0
      testJacobian : function: 0x401f0048
      testIO : function: 0x401eff60
      testJacobianUpdateParameters : function: 0x41e349b0
      backwardDiagHessian : function: 0x41308d60
      testJacobianParameters : function: 0x4092d8f0
      backwardUpdate : function: 0x401f9220
      forwardUpdate : function: 0x401f0028
      backward : function: 0x401f9200
      linearModuleDiagHessian : function: 0x41308d80
    }
  SparseLinear : table: 0x41ce9400
  SpatialCrossMapLRN : table: 0x401f1310
  CAddTable : table: 0x41e3d838
  TemporalConvolution : table: 0x41ce8970
  PairwiseDistance : table: 0x41e4d138
  WeightedMSECriterion : table: 0x41306850
  SmoothL1Criterion : table: 0x40923b40
  SpatialLPPooling : table: 0x41d8aa00
  TanhShrink : table: 0x40935860
  MixtureTable : table: 0x413098f0
  MSECriterion : table: 0x40933b28
  LogSoftMax : table: 0x4092da40
  Identity : table: 0x40cf4e88
  Exp : table: 0x40927b30
  Add : table: 0x41e36320
  SpatialConvolutionLocal : table: 0x40cf06d0
  BatchNormalization : table: 0x401f7da0
  AbsCriterion : table: 0x402097a8
  MultiCriterion : table: 0x40205350
  Max : table: 0x40200950
  MulConstant : table: 0x41e34368
  NarrowTable : table: 0x401ec140
  View : table: 0x401eba20
  VolumetricConvolution : table: 0x40942b38
  SpatialSubSampling : table: 0x41d2ab68
  HardTanh : table: 0x4092a1b8
  DistKLDivCriterion : table: 0x40923d98
  SplitTable : table: 0x41cd6020
  DotProduct : table: 0x41e4e748
  HingeEmbeddingCriterion : table: 0x401ed980
  SpatialBatchNormalization : table: 0x41d7cb00
  DepthConcat : table: 0x41cdf778
  CMulTable : table: 0x41e410f8
  SpatialAdaptiveMaxPooling : table: 0x4092f350
  Parallel : table: 0x41cdac20
  SoftShrink : table: 0x4093ccd8
  SpatialSubtractiveNormalization : table: 0x41dd2a78
  Log : table: 0x40928eb0
  SpatialDropout : table: 0x41e3b4f8
  LeakyReLU : table: 0x40942ae8
  VolumetricMaxPooling : table: 0x41663678
  hessian : 
    {
      enable : function: 0x41cd69e8
    }
  Linear : table: 0x41ce22d0
  Euclidean : table: 0x41e43390
  CriterionTable : table: 0x40c52160
  SpatialMaxPooling : table: 0x412fcf28
  MultiMarginCriterion : table: 0x409373a8
  ELU : table: 0x40ce5b90
  CSubTable : table: 0x41e42890
  MultiLabelMarginCriterion : table: 0x41e3e840
 
Out[22]:
 Copy : table: 0x401fc1f0
  L1HingeEmbeddingCriterion : table: 0x40925448
  VolumetricAveragePooling : table: 0x40849f60
  StochasticGradient : table: 0x40ceaba8
  SpatialContrastiveNormalization : table: 0x41bc9d70
  Bilinear : table: 0x41ce5d48
  CosineEmbeddingCriterion : table: 0x41e477f0
  Padding : table: 0x401f92f0
  Container : table: 0x41cd3d48
  MarginRankingCriterion : table: 0x401f49e8
  Module : table: 0x41ccebf8
  VolumetricFullConvolution : table: 0x416627c8
  Concat : table: 0x41cd77e0
  CrossEntropyCriterion : table: 0x41dcdb18
  LookupTable : table: 0x40ce7728
  MarginCriterion : table: 0x41e34b90
  HardShrink : table: 0x4093b7a8
  Abs : table: 0x40936cb8
  SparseJacobian : 
    {
      forward : function: 0x41bc6738
      testJacobian : function: 0x41bd1db8
      testIO : function: 0x41bc6778
      testAllUpdate : function: 0x41bd1d30
      testJacobianParameters : function: 0x41dcfe68
      testJacobianUpdateParameters : function: 0x41bc6758
      forwardUpdate : function: 0x41bd1d98
      backward : function: 0x41dcfe48
      backwardUpdate : function: 0x41bc6718
    }
  SoftMin : table: 0x40930e20
  WeightedEuclidean : table: 0x41e47010
  Contiguous : table: 0x401ee170
  L1Cost : table: 0x40c405f0
  PReLU : table: 0x409412c8
  utils : 
    {
      recursiveType : function: 0x41668b30
      recursiveResizeAs : function: 0x41668ba8
      recursiveAdd : function: 0x41668b70
      addSingletonDimension : function: 0x41665ed8
      recursiveFill : function: 0x41665cb0
    }
  JoinTable : table: 0x41d255f0
  ClassNLLCriterion : table: 0x41d0dc40
  CMul : table: 0x40206df0
  CosineDistance : table: 0x41e4e898
  Index : table: 0x401f1f98
  Mean : table: 0x40202950
  Dropout : table: 0x41e38ff0
  SoftPlus : table: 0x40932070
  SpatialDivisiveNormalization : table: 0x41bdaf98
  L1Penalty : table: 0x41e401d8
  Power : table: 0x409381b8
  Sqrt : table: 0x4093a618
  Sequential : table: 0x41cddfb0
  Square : table: 0x409393b0
  AddConstant : table: 0x41e38760
  GMMKLDCriterion : table: 0x41141ec8
  test : function: 0x41144818
  MM : table: 0x41ccd3b8
  SoftMax : table: 0x4092faa0
  ParallelCriterion : table: 0x412fb480
  Cosine : table: 0x40925da0
  Clamp : table: 0x4092b410
  SpatialConvolutionMM : table: 0x40cf7bf8
  Sigmoid : table: 0x4092ea38
  LogSigmoid : table: 0x4092c548
  TemporalMaxPooling : table: 0x41d833a8
  Threshold : table: 0x4093db28
  Sum : table: 0x40205690
  SoftSign : table: 0x40933508
  ParallelTable : table: 0x401f6330
  Min : table: 0x401fe450
  KLDCriterion : table: 0x40af9350
  Replicate : table: 0x401f3ae0
  Tanh : table: 0x409344b0
  CDivTable : table: 0x41e3f4b8
  Mul : table: 0x402098e0
  Select : table: 0x401ef570
  ReLU : table: 0x4093f5c0
  SpatialFullConvolutionMap : table: 0x40cf6238
  GradientReversal : table: 0x401fb648
  SpatialConvolution : table: 0x40cea750
  Criterion : table: 0x41e4bab0
  SpatialConvolutionMap : table: 0x40cfa878
  tables : 
    {
      full : function: 0x40cfa8f0
      oneToOne : function: 0x40cfd2e8
      random : function: 0x40cfd6f0
    }
  SpatialMaxUnpooling : table: 0x41dc9690
  TemporalSubSampling : table: 0x412f7f00
  Transpose : table: 0x401f4d80
  SpatialFullConvolution : table: 0x40cf1f68
  SpatialUpSamplingNearest : table: 0x4093ae30
  RReLU : table: 0x40ce3a70
  SpatialZeroPadding : table: 0x41e3ea50
  FlattenTable : table: 0x41cd8228
  Narrow : table: 0x401f0338
  Normalize : table: 0x41e53598
  SpatialSoftMax : table: 0x40ce2b88
  SelectTable : table: 0x4092dea8
  SpatialFractionalMaxPooling : table: 0x40850558
}

In [23]:
model = nn.Sequential()
model:add(nn.Linear(2,3))
model:add(nn.ReLU(true))
model:add(nn.Linear(3,4))
model:add(nn.SoftMax())
model:add(nn.Linear(4,1))

In [28]:
x = torch.randn(10,2)
out = model:forward(x)

In [30]:
dout = torch.randn(10, 1)

In [31]:
model:backward(x, dout)


Out[31]:
0.01 *
 -0.0470 -0.0789
 -0.1024  1.5457
  3.3797  5.5302
 -0.0277  0.4179
 -0.0052  0.0786
 -2.3183 -3.8346
  0.0474 -0.7152
  0.1209 -1.8247
  0.4824  0.7931
 -0.2984 -0.5001
[torch.DoubleTensor of size 10x2]

GMM Sampler


In [31]:
pi


Out[31]:
 0.1667  0.3333  0.5000
 0.1667  0.3333  0.5000
 0.1667  0.3333  0.5000
 0.1667  0.3333  0.5000
 0.1667  0.3333  0.5000
[torch.DoubleTensor of size 5x3]


In [49]:
a = torch.multinomial(pi, 1)
print(a)


Out[49]:
 3
 1
 1
 3
 3
[torch.LongTensor of size 5x1]


In [3]:
a


Out[3]:
 2
[torch.LongTensor of size 1]