``````

In [1]:

import ipyparallel
_clients = ipyparallel.Client()
print("Kernels available: {}".format(len(_clients)))

``````
``````

Kernels available: 4

``````
``````

In [40]:

import numpy as np
import matplotlib.pyplot as pl
import cvxpy as cvx

import sys
sys.path.append('/Users/dsuess/Code/Pythonlibs/')
from tools.helpers import Progress
from tools.plot import matshow

``````
``````

In [3]:

def create_01_matrix(rows, cols, p=.5, rgen=np.random):
mat = rgen.choice([0, 1], size=(rows, cols), p=(1 - p, p))
return mat

def random_sparse_vector(dim, nnz, rgen=np.random):
"""Create a random sparse vector of dimension `dim`
with `nnz` nonzero elements """
idx = rgen.choice(np.arange(dim), size=nnz, replace=False)
result = np.zeros(dim)
result[idx] = rgen.randn(nnz)
return result

``````
``````

In [4]:

def l2_recover(A, y, sparsity=None):
dim = A.shape[1]
x_hat = cvx.Variable(dim)

objective = cvx.Minimize(cvx.norm2(y - A*x_hat)**2)
constraints = [x_hat >= 0]
if sparsity is not None:
constraints += [np.ones(dim) * x_hat <= sparsity]
problem = cvx.Problem(objective, constraints)
r = problem.solve()
return r, np.ravel(x_hat.value)

``````
``````

In [5]:

def likelihood(a, x_obs, y_obs):
rho = norm(scale=SIGMA).pdf
return rho(vec_to_poly(a)(x_obs) - y_obs)

def aic(x, y, a):
L = likelihood(a, x, y)
return 2 * (len(a) - sum(np.log(L)))

def aicc(x, y, a):
n = len(x)
k = len(a)
return aic(x, y, a) + 2*k*(k+1) / (n - k - 1)

def bic(x, y, a):
return len(a) * np.log(len(x)) - 2 * sum(np.log(likelihood(a, x, y)))

``````
``````

In [123]:

N = 200
R = 10
SIGMA = .1

x = np.abs(random_sparse_vector(N, R))
A = create_01_matrix(N, N)

x_hats = []
for M in Progress(range(1, N)):
y = A[:M] @ x + SIGMA * np.random.randn(M)

_, x_hat = l2_recover(A[:M], y)
x_hats.append(x_hat)

``````
``````

100% (199 of 199) |#######################| Elapsed Time: 0:00:11 Time: 0:00:11

``````
``````

In [129]:

import scipy.stats as st

M = 80
N = 400
R = 5
SIGMA = .001 / M
SAMPLES = 100

for R in [5, 10, 15]:

for _ in range(5):
A = create_01_matrix(M, N)
x = np.abs(random_sparse_vector(N, R))
x /= np.linalg.norm(x)
rs = np.array([l2_recover(A, A @ x + SIGMA * np.random.randn(M))[0]
for _ in Progress(range(SAMPLES))])
rs /= SIGMA**2

for r in sorted(rss, key=lambda x: np.mean(x)):
print(np.mean(r), end=" ")

print("")

for r in sorted(rss, key=lambda x: np.mean(x)):
mean = np.mean(r)
var = np.var(r)
print(kstest(r, st.norm(mean, np.sqrt(var)).cdf).pvalue, end=" ")

``````
``````

100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:07 Time: 0:00:07
100% (100 of 100) |#######################| Elapsed Time: 0:00:07 Time: 0:00:07
3% (  3 of 100) |                       | Elapsed Time: 0:00:00 ETA:  0:00:08

48.5113743687 51.3440275998 53.8765617674 54.4901647635 54.869022547
0.786890253531 0.995640545674 0.478063457174 0.91663567182 0.996917134532

100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
3% (  3 of 100) |                       | Elapsed Time: 0:00:00 ETA:  0:00:07

28.109851131 28.6336676763 34.2365062698 37.7603679228 38.7434200702
0.77023730706 0.855154277533 0.293671098353 0.79325786839 0.411490776185

100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08
100% (100 of 100) |#######################| Elapsed Time: 0:00:07 Time: 0:00:07
99% ( 99 of 100) |###################### | Elapsed Time: 0:00:08 ETA:  0:00:06

17.1175817894 21.3877618633 21.6120452005 24.8845188099 27.856031028
0.834912013208 0.319978286995 0.83159794608 0.208219856821 0.402564892739

100% (100 of 100) |#######################| Elapsed Time: 0:00:08 Time: 0:00:08

``````
``````

In [35]:

from scipy.stats import chi2, kstest

def model_to_f(model):
pdf = lambda x: np.zeros_like(x) + sum(w * chi2(k).pdf(x) for k, w in model.items() if k > 0)
cdf = lambda x: np.zeros_like(x) + sum(w * chi2(k).cdf(x) for k, w in model.items() if k > 0) \
+ model.get(0, 0)
return pdf, cdf

def test(data, model, bins=100):
pdf, cdf = model_to_f(model)

weights = np.ones_like(data) / len(data)

pl.subplot(121)
pl.hist(data, bins=bins, cumulative=False, normed=True)
xs = np.linspace(*pl.xlim(), 1000)
pl.plot(xs, pdf(xs))

pl.subplot(122)
pl.hist(data, bins=bins, cumulative=True, normed=True)
xs = np.linspace(*pl.xlim(), 1000)
pl.plot(xs, cdf(xs))

print(kstest(y, cdf))

``````
``````

In [30]:

from scipy.special import binom

pl.figure(0, figsize=(16, 5))
binomc = lambda m: np.array([binom(m, k) / 2**m for k in range(m + 1)])
ws = binomc(M)

test(rs, {M - k: w for k, w in enumerate(binomc(R))},
bins=50)

``````
``````

KstestResult(statistic=0.99999935542096663, pvalue=0.0)

``````
``````

In [17]:

from scipy.special import binom

pl.figure(0, figsize=(16, 5))
binomc = lambda m: np.array([binom(m, k) / 2**m for k in range(m + 1)])
ws = binomc(M)

test(rs, {M - k: w for k, w in enumerate(binomc(R))})

``````
``````

KstestResult(statistic=0.99999935542096663, pvalue=0.0)

``````
``````

In [15]:

from scipy.special import binom

pl.figure(0, figsize=(16, 5))
binomc = lambda m: np.array([binom(m, k) / 2**m for k in range(m + 1)])
ws = binomc(M)

test(rs, {M - k: w for k, w in enumerate(binomc(R))})

``````
``````

KstestResult(statistic=1.0, pvalue=0.0)

``````
``````

In [46]:

model = {M - R - k: w for k, w in enumerate(binomc(M - R))}
pdf, cdf = model_to_f(model)

``````
``````

In [52]:

x = np.linspace(0, 40, 10000)[1:]
pl.plot(x, np.cumsum(pdf(x)) * 40 / 10000)
pl.plot(x, cdf(x))

``````
``````

Out[52]:

[<matplotlib.lines.Line2D at 0x1112a02e8>]

``````
``````

In [48]:

``````
``````

Out[48]:

array([             inf,   5.03791636e-07,   5.26209216e-07, ...,
3.26118366e-04,   3.25730132e-04,   3.25342338e-04])

``````
``````

In [ ]:

``````