In [1]:
using NPGM, BSpline
using ExpFamilyGM

In [2]:
using PyPlot


INFO: Loading help data...

In [3]:
using Distributions, Cubature

These functions generate functions for BSpline basis.


In [4]:
function createNodeFunction(M, xmin, xmax, numKnots)
   
    bs = BSpline.BSplineT(M, xmin, xmax, numKnots)
    K = BSpline.num_basis(bs)
    
    f_node(x, k)      = BSpline.bspline_basis(k-1, bs, x)
    f_node_der(x, k)  = BSpline.derivative_bspline_basis(k-1, bs, x)
    f_node_der2(x, k) = BSpline.derivative2_bspline_basis(k-1, bs, x)
    
    (f_node, f_node_der, f_node_der2, K)
end

function createEdgeFunction(M, xmin, xmax, numKnots)
   
    bs1 = BSpline.BSplineT(M, xmin[1], xmax[1], numKnots[1])
    bs2 = BSpline.BSplineT(M, xmin[2], xmax[2], numKnots[2])
    L1 = BSpline.num_basis(bs1) 
    L2 = BSpline.num_basis(bs2)
    
    function f_edge(x, y, l) 
        r, c = ind2sub((L1, L2), l)
        r -= 1
        c -= 1
        return BSpline.bspline_basis(r, bs1, x) * BSpline.bspline_basis(c, bs2, y)
    end    
    function f_edge_der(x, y, l, whichArgument) 
        r, c = ind2sub((L1, L2), l)
        r -= 1
        c -= 1
        if whichArgument == 1
            return BSpline.derivative_bspline_basis(r, bs1, x) * BSpline.bspline_basis(c, bs2, y)
        else
            return BSpline.bspline_basis(r, bs1, x) * BSpline.derivative_bspline_basis(c, bs2, y)
        end
    end
    function f_edge_der2(x, y, l, whichArgument) 
        r, c = ind2sub((L1, L2), l)
        r -= 1
        c -= 1
        if whichArgument == 1
            return BSpline.derivative2_bspline_basis(r, bs1, x) * BSpline.bspline_basis(c, bs2, y)
        else
            return BSpline.bspline_basis(r, bs1, x) * BSpline.derivative2_bspline_basis(c, bs2, y)
        end
    end
    
    (f_edge, f_edge_der, f_edge_der2, L1*L2)
end


Out[4]:
createEdgeFunction (generic function with 1 method)

Univariate Gaussian distribution

We first try fitting a well specified model.


In [43]:
n = 1000
X = randn(n,1);

K = 2
L = 1
DD = getDD(X, K, L, gm_node_der_f, gm_edge_der_f)
E = getE(X, K, L, gm_node_der_2_f, gm_edge_der_2_f);

hat_beta = - DD \ E;
testX = linspace(-4, 4, 1000)
psiX = zeros(length(testX), 2)
for i=1:length(testX)
    for j=1:2
        psiX[i, j] = gm_node_f(testX[i], j)
    end
end
f = exp( psiX * hat_beta )
c0 = ExpFamilyGM.trapz(f, -4., 4.)

plot(testX, f/c0);


Out[43]:
1-element Array{Any,1}:
 PyObject <matplotlib.lines.Line2D object at 0x7f56b0a70e90>

In [44]:
n = 1000
X = randn(n,1)/5 + 1;

K = 2
L = 1
DD = getDD(X, K, L, gm_node_der_f, gm_edge_der_f)
E = getE(X, K, L, gm_node_der_2_f, gm_edge_der_2_f);

hat_beta = - DD \ E;
testX = linspace(-4, 4, 1000)
psiX = zeros(length(testX), 2)
for i=1:length(testX)
    for j=1:2
        psiX[i, j] = gm_node_f(testX[i], j)
    end
end
f = exp( psiX * hat_beta )
c0 = ExpFamilyGM.trapz(f, -4., 4.)

plot(testX, f/c0);


Next, we use BSpline basis


In [52]:
n = 1000
X = randn(n,1);

DD = getDD(X, K, L, phi_node_der, phi_edge_der)
E = getE(X, K, L, phi_node_der2, phi_edge_der2);

hat_beta = - DD \ E;
testX = linspace(-4, 4, 1000)
psiX = zeros(length(testX), K)
for i=1:length(testX)
    for j=1:K
        psiX[i, j] = phi_node(testX[i], j)
    end
end
f = exp( psiX * hat_beta )
c0 = ExpFamilyGM.trapz(f, -4., 4.)

plot(testX, f/c0);



In [55]:
n = 1000
X = randn(n,1)/5 + 1;

DD = getDD(X, K, L, phi_node_der, phi_edge_der)
E = getE(X, K, L, phi_node_der2, phi_edge_der2);

hat_beta = - (DD+1e-12*eye(K)) \ E;
testX = linspace(-4, 4, 1000)
psiX = zeros(length(testX), K)
for i=1:length(testX)
    for j=1:K
        psiX[i, j] = phi_node(testX[i], j)
    end
end
f = exp( psiX * hat_beta )
c0 = ExpFamilyGM.trapz(f, -4., 4.)

plot(testX, f/c0);


Bivariate densities


In [6]:
n = 1000
p = 2
covM = [1 0.8; 0.8 1]
X = randn(n, p) * sqrtm(covM)

K = 2
L = 1
DD = getDD(X, K, L, gm_node_der_f, gm_edge_der_f)
E = getE(X, K, L, gm_node_der_2_f, gm_edge_der_2_f);

hat_beta = - DD \ E;

function estimF(eval_x)
    exp(gm_node_f(eval_x[1], 1) * hat_beta[1] + 
        gm_node_f(eval_x[1], 2) * hat_beta[2] + 
        gm_node_f(eval_x[2], 1) * hat_beta[3] + 
        gm_node_f(eval_x[2], 2) * hat_beta[4] + 
        gm_edge_f(eval_x[1], eval_x[2], 1) * hat_beta[5])
end
c0, _ = hcubature(estimF, (-4.,-4.), (4.,4.); abstol=1e-4)


numEvalP = 500
X1 = linspace(-4, 4, numEvalP)
X2 = linspace(-4, 4, numEvalP)

xgrid = repmat(X1', numEvalP, 1)
ygrid = repmat(X2,  1, numEvalP)

z = zeros(numEvalP, numEvalP)
for i in 1:numEvalP
    for j in 1:numEvalP
        z[i,j] = estimF([X1[i],X2[j]])
    end
end

mu = zeros(2)
d = MvNormal(mu, covM)
zt = zeros(numEvalP, numEvalP)
for i in 1:numEvalP
    for j in 1:numEvalP
        pt = [X1[i], X2[j]]    
        zt[i,j] = pdf(d, pt)
    end
end


fig = figure("pyplot_surfaceplot",figsize=(5,5))
ax = fig[:add_subplot](2,1,1, ) 
cp = ax[:contour](xgrid, ygrid, zt, colors="black", linewidth=2.0) 
ax[:clabel](cp, inline=1, fontsize=10) 
xlabel("X1") 
ylabel("X2")
title("Contour Plot -- TRUE")

subplot(212) 
ax = fig[:add_subplot](2,1,2) 
cp = ax[:contour](xgrid, ygrid, z, colors="black", linewidth=2.0) 
ax[:clabel](cp, inline=1, fontsize=10) 
xlabel("X1") 
ylabel("X2")
title("Contour Plot")
tight_layout()


/home/mkolar/anaconda/lib/python2.7/site-packages/matplotlib/text.py:52: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  if rotation in ('horizontal', None):
/home/mkolar/anaconda/lib/python2.7/site-packages/matplotlib/text.py:54: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  elif rotation == 'vertical':

In [113]:
(phi_node, phi_node_der, phi_node_der2, K) = createNodeFunction(4, -4., 4., 5)
(phi_edge, phi_edge_der, phi_edge_der2, L) = createEdgeFunction(4, (-4., -4.), (4., 4.), (5,5))

n = 10000
p = 2
covM = [1 0.8; 0.8 1]
X = randn(n, p) * sqrtm(covM)

DD = getDD(X, K, L, phi_node_der, phi_edge_der)
E = getE(X, K, L, phi_node_der2, phi_edge_der2);
indC = 10
D, U, _ = eigs(DD/n; nev=indC)
# @show indC = findfirst((x) -> x < 5e-5, D)
# plot(D)
@show D
# D[indC:end] = 0
for j=1:indC
    D[j] = 1. / D[j]
end
hat_beta = - U * diagm(D) * U' * (E/n)

hat_beta1 = - (DD/n + 1e-2 * eye(2*K+L)) \ (E/n);
@show hat_beta'


D => [0.12830042812868891,0.11272508568447949,0.10633437665538749,0.09285101321066079,0.012091222095737139,0.008178273060318866,0.007997518748933587,0.007247575913085534,0.006442933000835401,0.003967895977801033]
hat_beta' => [-0.13584780663155172 -3.3987603067918735 1.6187912735830343 3.564143490148042 1.644358218922692 -3.156016697537513 -0.1366680065628408 -0.09515896550334267 -2.583355982859968 1.1951800763735208 2.834207923368479 1.1641272456586549 -2.3871824711537735 -0.12782166163597472 -0.002686882184670316 -0.03164050665433403 -0.04843093831878066 -0.012159140779149695 -0.00024149756641086886 -3.6870198880695767e-16 7.724837653734136e-16 -0.0415488882671135 -0.7712544072119243 -0.7943808284758597 -0.7629855574286595 -0.21202428224236275 -0.0011620192340458441 6.52211429495207e-16 -0.07340995980266278 -1.2952388019970367 6.446037692289855 0.18159523322065552 -3.867705131693244 -0.19588705825956435 -0.00021189738446240213 -0.01788122487296589 -1.0675767049666993 0.23186116094578327 4.450922967040248 0.16044767868749854 -0.907132708578635 -0.016433244886919867 -0.000317617619470309 -0.23148553004365488 -4.000457948024105 0.3582227983861937 6.293051357920247 -1.183149930426218 -0.07173587147235015 5.951616491204761e-18 -0.0015557203831057128 -0.2156332483129039 -0.639563881193517 -0.665623674062855 -0.8203281015368636 -0.04447774268921938 3.204061696273456e-16 -1.6104161005001205e-14 -0.00020744421280997788 -0.011889122089518355 -0.06355190995241773 -0.048365465702786915 -0.003807670878695915]
Out[113]:
1x63 Array{Float64,2}:
 -0.135848  -3.39876  1.61879  …  -0.0635519  -0.0483655  -0.00380767

In [114]:
function estimF1(eval_x)
    tmp = 0.
    indBeta = 0
    for j=1:p
        for k=1:K
            indBeta += 1
            tmp += phi_node(eval_x[j], k) * hat_beta[indBeta]
        end
    end
    for l=1:L
        indBeta += 1
        tmp += phi_edge(eval_x[1], eval_x[2], l) * hat_beta[indBeta]
    end
    return exp(tmp)
end
c0, _ = hcubature(estimF1, (-4.,-4.), (4.,4.); abstol=1e-4)


numEvalP = 100
X1 = linspace(-4, 4, numEvalP)
X2 = linspace(-4, 4, numEvalP)

xgrid = repmat(X1', numEvalP, 1)
ygrid = repmat(X2,  1, numEvalP)

z = zeros(numEvalP, numEvalP)
for i in 1:numEvalP
    for j in 1:numEvalP
        z[i,j] = estimF1([X1[i],X2[j]]) / c0
    end
end

mu = zeros(2)
d = MvNormal(mu, covM)
zt = zeros(numEvalP, numEvalP)
for i in 1:numEvalP
    for j in 1:numEvalP
        pt = [X1[i], X2[j]]    
        zt[i,j] = pdf(d, pt)
    end
end


fig = figure("pyplot_surfaceplot",figsize=(5,5))
ax = fig[:add_subplot](2,1,1, ) 
cp = ax[:contour](xgrid, ygrid, zt, colors="black", linewidth=2.0) 
ax[:clabel](cp, inline=1, fontsize=10) 
xlabel("X1") 
ylabel("X2")
title("Contour Plot -- TRUE")

subplot(212) 
ax = fig[:add_subplot](2,1,2) 
cp = ax[:contour](xgrid, ygrid, z, colors="black", linewidth=2.0) 
ax[:clabel](cp, inline=1, fontsize=10) 
xlabel("X1") 
ylabel("X2")
title("Contour Plot")
tight_layout()


3-dim Gaussian, plotting conditional $X_1\mid X_2,X_3$


In [26]:
n = 1000
p = 3
covM = [1 0.95 0; 0.95 1 0; 0 0 1]
X = randn(n, p) * sqrtm(covM)

K = 2
L = 1
DD = getDD(X, K, L, gm_node_der_f, gm_edge_der_f)
E = getE(X, K, L, gm_node_der_2_f, gm_edge_der_2_f);
hat_beta = - DD \ E;

function estimF3(x)
    tmp = 0.
    for a=1:p
        for k=1:K
            tmp += gm_node_f(x[a], k) * hat_beta[(a-1)*K+k]
        end
    end
    indEdge = 0
    for a=1:p
        for b=a+1:p
            indEdge += 1
            for l=1:L
                tmp += gm_edge_f(x[a], x[b], l) * hat_beta[K*p + (indEdge-1)*L + l]    
            end
        end
    end
    exp(tmp)
end

x0 = [-0.5 0]

numEvalP = 500
X1 = linspace(-4, 4, numEvalP)
f = (eltype(Float64))[estimF3([x, x0[1], x0[2]]) for x in X1]
c0 = ExpFamilyGM.trapz(f, -4., 4.)

plot(X1, f/c0)

mu = zeros(3)
d = MvNormal(mu, covM)
ft = (eltype(Float64))[pdf(d, [x, x0[1], x0[2]]) for x in X1]
c0t = ExpFamilyGM.trapz(ft, -4., 4.)

plot(X1, ft/c0t)


Out[26]:
1-element Array{Any,1}:
 PyObject <matplotlib.lines.Line2D object at 0x7f72743e0150>

In [22]:
(phi_node, phi_node_der, phi_node_der2, K) = createNodeFunction(4, -4., 4., 5)
(phi_edge, phi_edge_der, phi_edge_der2, L) = createEdgeFunction(4, (-4., -4.), (4., 4.), (2,2))

n = 200
p = 3
covM = [1 0.95 0; 0.95 1 0; 0 0 1]
X = randn(n, p) * sqrtm(covM)

DD = getDD(X, K, L, phi_node_der, phi_edge_der)
E = getE(X, K, L, phi_node_der2, phi_edge_der2);
U, D, Vt = svd(full(DD) / n)
indC = findfirst((x) -> x < 1e-5, D)
D[indC:end] = 0
for j=1:indC-1
    D[j] = 1. / D[j]
end
hat_beta = - Vt * diagm(D) * U' * E / n;


elapsed time: 0.003639338 seconds (197512 bytes allocated)

In [24]:
function estimF(x)
    tmp = 0.
    for a=1:p
        for k=1:K
            tmp += phi_node(x[a], k) * hat_beta[(a-1)*K+k]
        end
    end
    indEdge = 0
    for a=1:p
        for b=a+1:p
            indEdge += 1
            for l=1:L
                tmp += phi_edge(x[a], x[b], l) * hat_beta[K*p + (indEdge-1)*L + l]    
            end
        end
    end
    exp(tmp)
end

x0 = [2.1 0]

lb = -4.
ub = 4.
numEvalP = 500
X1 = linspace(lb, ub, numEvalP)
f = (eltype(Float64))[estimF([x, x0[1], x0[2]]) for x in X1]
c0 = ExpFamilyGM.trapz(f, lb, ub)

plot(X1, f/c0)

mu = zeros(3)
d = MvNormal(mu, covM)
ft = (eltype(Float64))[pdf(d, [x, x0[1], x0[2]]) for x in X1]
c0t = ExpFamilyGM.trapz(ft, lb, ub)

plot(X1, ft/c0t)


Out[24]:
1-element Array{Any,1}:
 PyObject <matplotlib.lines.Line2D object at 0x7f727527eb90>

How well BSpline approximate x*y function


In [82]:
N = 100
t1 = rand(Uniform(-4, 4), N)
t2 = rand(Uniform(-4, 4), N)
nX = zeros(N, L)
nY = zeros(N)
for i=1:N
    for j=1:L
        nX[i,j] = phi_edge(t1[i], t2[i], j)
    end
    nY[i] = t1[i] * t2[i]
end

In [83]:
hb = nX \ nY

numEvalP = 200
X1 = linspace(-4, 4, numEvalP)
X2 = linspace(-4, 4, numEvalP)
xgrid = repmat(X1', numEvalP, 1)
ygrid = repmat(X2,  1, numEvalP)

function f1(x,y) 
    tmp = 0.
    for j=1:L
        tmp += phi_edge(x, y, j) * hb[j]
    end
    tmp
end
z = zeros(numEvalP, numEvalP)
for i in 1:numEvalP
    for j in 1:numEvalP
        z[i,j] = f1(X1[i],X2[j]) 
    end
end
zt = zeros(numEvalP, numEvalP)
for i in 1:numEvalP
    for j in 1:numEvalP
        zt[i,j] = X1[i] * X2[j]
    end
end

In [84]:
fig = figure("pyplot_surfaceplot",figsize=(10,10))
ax = fig[:add_subplot](1,1,1, projection = "3d")
ax[:plot_surface](xgrid, ygrid, zt-z, rstride=2, edgecolors="k", cstride=2, cmap=ColorMap("gray"), alpha=0.8, linewidth=0.25)
xlabel("X")
ylabel("Y")
title("Surface Plot")


Out[84]:
PyObject <matplotlib.text.Text object at 0x7f43b26c0610>