In [1]:
# %load /Users/dsuess/Code/Pythonlibs/parallel.ipy
import ipyparallel
_clients = ipyparallel.Client()
_view = _clients.load_balanced_view()
print("Kernels available: {}".format(len(_clients)))
In [40]:
import numpy as np
import matplotlib.pyplot as pl
import cvxpy as cvx
import sys
sys.path.append('/Users/dsuess/Code/Pythonlibs/')
from tools.helpers import Progress
from tools.plot import matshow
In [3]:
def create_01_matrix(rows, cols, p=.5, rgen=np.random):
mat = rgen.choice([0, 1], size=(rows, cols), p=(1 - p, p))
return mat
def random_sparse_vector(dim, nnz, rgen=np.random):
"""Create a random sparse vector of dimension `dim`
with `nnz` nonzero elements """
idx = rgen.choice(np.arange(dim), size=nnz, replace=False)
result = np.zeros(dim)
result[idx] = rgen.randn(nnz)
return result
In [4]:
def l2_recover(A, y, sparsity=None):
dim = A.shape[1]
x_hat = cvx.Variable(dim)
objective = cvx.Minimize(cvx.norm2(y - A*x_hat)**2)
constraints = [x_hat >= 0]
if sparsity is not None:
constraints += [np.ones(dim) * x_hat <= sparsity]
problem = cvx.Problem(objective, constraints)
r = problem.solve()
return r, np.ravel(x_hat.value)
In [5]:
def likelihood(a, x_obs, y_obs):
rho = norm(scale=SIGMA).pdf
return rho(vec_to_poly(a)(x_obs) - y_obs)
def aic(x, y, a):
L = likelihood(a, x, y)
return 2 * (len(a) - sum(np.log(L)))
def aicc(x, y, a):
n = len(x)
k = len(a)
return aic(x, y, a) + 2*k*(k+1) / (n - k - 1)
def bic(x, y, a):
return len(a) * np.log(len(x)) - 2 * sum(np.log(likelihood(a, x, y)))
In [123]:
N = 200
R = 10
SIGMA = .1
x = np.abs(random_sparse_vector(N, R))
A = create_01_matrix(N, N)
x_hats = []
for M in Progress(range(1, N)):
y = A[:M] @ x + SIGMA * np.random.randn(M)
_, x_hat = l2_recover(A[:M], y)
x_hats.append(x_hat)
In [129]:
import scipy.stats as st
M = 80
N = 400
R = 5
SIGMA = .001 / M
SAMPLES = 100
for R in [5, 10, 15]:
rss = []
for _ in range(5):
A = create_01_matrix(M, N)
x = np.abs(random_sparse_vector(N, R))
x /= np.linalg.norm(x)
rs = np.array([l2_recover(A, A @ x + SIGMA * np.random.randn(M))[0]
for _ in Progress(range(SAMPLES))])
rs /= SIGMA**2
rss.append(rs)
for r in sorted(rss, key=lambda x: np.mean(x)):
print(np.mean(r), end=" ")
print("")
for r in sorted(rss, key=lambda x: np.mean(x)):
mean = np.mean(r)
var = np.var(r)
print(kstest(r, st.norm(mean, np.sqrt(var)).cdf).pvalue, end=" ")
In [35]:
from scipy.stats import chi2, kstest
def model_to_f(model):
pdf = lambda x: np.zeros_like(x) + sum(w * chi2(k).pdf(x) for k, w in model.items() if k > 0)
cdf = lambda x: np.zeros_like(x) + sum(w * chi2(k).cdf(x) for k, w in model.items() if k > 0) \
+ model.get(0, 0)
return pdf, cdf
def test(data, model, bins=100):
pdf, cdf = model_to_f(model)
weights = np.ones_like(data) / len(data)
pl.subplot(121)
pl.hist(data, bins=bins, cumulative=False, normed=True)
xs = np.linspace(*pl.xlim(), 1000)
pl.plot(xs, pdf(xs))
pl.subplot(122)
pl.hist(data, bins=bins, cumulative=True, normed=True)
xs = np.linspace(*pl.xlim(), 1000)
pl.plot(xs, cdf(xs))
print(kstest(y, cdf))
In [30]:
from scipy.special import binom
pl.figure(0, figsize=(16, 5))
binomc = lambda m: np.array([binom(m, k) / 2**m for k in range(m + 1)])
ws = binomc(M)
test(rs, {M - k: w for k, w in enumerate(binomc(R))},
bins=50)
In [17]:
from scipy.special import binom
pl.figure(0, figsize=(16, 5))
binomc = lambda m: np.array([binom(m, k) / 2**m for k in range(m + 1)])
ws = binomc(M)
test(rs, {M - k: w for k, w in enumerate(binomc(R))})
In [15]:
from scipy.special import binom
pl.figure(0, figsize=(16, 5))
binomc = lambda m: np.array([binom(m, k) / 2**m for k in range(m + 1)])
ws = binomc(M)
test(rs, {M - k: w for k, w in enumerate(binomc(R))})
In [46]:
model = {M - R - k: w for k, w in enumerate(binomc(M - R))}
pdf, cdf = model_to_f(model)
In [52]:
x = np.linspace(0, 40, 10000)[1:]
pl.plot(x, np.cumsum(pdf(x)) * 40 / 10000)
pl.plot(x, cdf(x))
Out[52]:
In [48]:
Out[48]:
In [ ]: