We work through several more examples in this worksheet: mixture of Gaussians and maxcut. See the basic worksheet for setups, references, and simpler examples.
In [1]:
# we are dependent on numpy, sympy and cvxopt.
import numpy as np
import cvxopt
import mompy as mp
# just some basic settings and setup
mp.cvxsolvers.options['show_progress'] = False
from IPython.display import display, Markdown, Math, display_markdown
sp.init_printing()
def print_problem(obj, constraints = None, moment_constraints = None):
display_markdown(mp.problem_to_str(obj,constraints,moment_constraints, False), raw=True)
This is original problem considered by Pearson, and we will solve it as an optimization problem.
K. Pearson. Contributions to the mathematical theory of evolution. Philosophical Transactions of the Royal Society of London. A, 185:71–110, 1894.
We have two Gaussians with means $\xi_1, \xi_2$ and variances $c_1, c_2$, mixing coefficients $\pi_1, \pi_2$. So that $x \sim \pi_1 p(x; \xi_1, c_1) + \pi_2 p(x; \xi_2, c_2)$ where $p$ is the density function of the normal distribution.
More details can be found in our paper: Estimating mixture models via mixtures of polynomials.
In [2]:
xi,c = sp.symbols('xi,c')
K = 2 # number of clusters
xi0 = [1, -0.9] # true parameters
c0 = [0.4, 0.6]
pi0 = [0.4, 0.6]
moment_exprs = [xi, xi**2 + c, xi**3 + 3*xi*c, xi**4 + 6*xi**2 * c + 3*c**2,\
xi**5 + 10*xi**3*c + 15*xi*c**2,\
xi**6 + 15*xi**4*c**1 + 45*xi**2*c**2 + 15*c**3 ,\
xi**7 + 21*xi**5*c**1 + 105*xi**3*c**2 + 105*xi*c**3]
moment_exprs = moment_exprs[0:6]
#print 'Gaussian moments are '
display(moment_exprs)
# construct the true constraints
hs = []
for expr in moment_exprs:
val = 0
for k in range(K):
val += pi0[k]*expr.subs({xi:xi0[k], c:c0[k]})
hs += [expr - val]
hs_true = hs
# we will minimize some kind of a trace..
f = 1 + xi**2 + c + c**2 + xi**4 + c*xi**2
gs = [c>=0]
print_problem(f, gs, hs)
sol = mp.solvers.solve_GMP(f, gs, hs, rounds = 2, slack=1e-3)
display(mp.extractors.extract_solutions_lasserre(sol['MM'], sol['x'], 2, tol = 1e-5, maxdeg=2))
print 'the truth: ' + str({c:c0, xi:xi0})
In [3]:
sol['MM'].numeric_instance(sol['x'],1)
Out[3]:
In [4]:
# draw some samples
numsample = 1e5
np.random.seed(1)
z = (np.random.rand(numsample) < pi0[0]).astype('int8')
means = xi0[0]*z + xi0[1]*(1-z)
stds = np.sqrt(c0[0]*z + c0[1]*(1-z))
Xs = means + stds * np.random.randn(numsample)
import matplotlib.pyplot as plt
%matplotlib inline
plt.hist(Xs, 50);
In [5]:
# construct the empirical constraints
hs = []
for d,expr in enumerate(moment_exprs):
val = np.mean(np.power(Xs,d+1))
hs += [expr - val]
# we will minimize some kind of a trace..
f = 1 + xi**2 + c + c**2 + xi**4 + c*xi**2
gs = [c>=0.1]
print_problem(f, gs, hs)
sol = mp.solvers.solve_GMP(f, gs, hs, rounds = 4, slack = 1e-5)
display(mp.extractors.extract_solutions_lasserre(sol['MM'], sol['x'], 2, tol = 1e-5, maxdeg=2))
print 'the truth: ' + str({c:c0, xi:xi0})
In [6]:
size = 5
np.random.seed(1)
xs = sp.symbols('x1:'+str(size+1))
Wh = np.random.randn(size,size)
W = -Wh*Wh.T;
gs = [x**2 >=1 for x in xs] + [x**2 <=1 for x in xs]
fs = [ w * xs[ij[0]] * xs[ij[1]] for ij,w in np.ndenumerate(W) ]
f = sum(fs)
print_problem(f, gs)
sol = mp.solvers.solve_GMP(f, gs, rounds = 3)
mp.extractors.extract_solutions_lasserre(sol['MM'], sol['x'], 2, maxdeg = 2)
Out[6]: