In [13]:
import numpy as np
from scipy.stats import norm as gaussian
from sklearn.linear_model import LogisticRegression

logistic = lambda x: 1/(1+np.exp(-x))
$$p = \text{logistic}\left(a x_1 + b x_2 + \mathcal{N}(0, \epsilon)\right)$$

or equivalently

$$\log\left(\frac{p}{1-p}\right) = a x_1 + b x_2 + \mathcal{N}(0, \epsilon)$$

where $p = Prob(y = 1)$


In [2]:
N = 1e5

a = 2
b = -3
epsilon = 1e-1

Randomly assign $a$ and $b$ to our data points, and represent this as the design matrix $X$:


In [5]:
X = np.random.randint(0, 2, size=(N, 2))
X[:10]


Out[5]:
array([[1, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [1, 0],
       [0, 0],
       [1, 1],
       [0, 1],
       [1, 1],
       [1, 0]])

Compute $a x_1 + b x_2$ for each data point:


In [9]:
mu = X.dot([[a], [b]])[:, 0]
mu[:10]


Out[9]:
array([ 2,  0,  0,  0,  2,  0, -1, -3, -1,  2])

In [10]:
p = logistic(mu + gaussian.rvs(0, epsilon, size=N))
p[:5]


Out[10]:
array([ 0.8851939 ,  0.50328963,  0.50195543,  0.46800914,  0.89607792])

In [11]:
y = np.random.rand(N) < p
y[:10]


Out[11]:
array([ True,  True, False, False,  True,  True, False, False, False,  True], dtype=bool)

Intercept should be 0, coefficients should be [a, b]


In [12]:
lr = LogisticRegression(C=1e16)
lr.fit(X, y)
print('intercept', lr.intercept_[0])
print('coefficients', lr.coef_[0])


intercept 0.000415162260143
coefficients [ 1.99138227 -2.97568567]

Regularization

$p = \text{logistic}\left(w^T x + \mathcal{N}(0, \epsilon)\right)$

$w = \mathcal{N}(0, \sigma)$


In [ ]:
N = 1e5
num_features = 1e3
sigma = .3
epsilon = 1e-1

w = gaussian.rvs(0, sigma, size=num_features)
w[:5]

In [160]:
X = np.random.randint(0, 2, size=(N, num_features))
mu = X.dot(w)
p = logistic(mu + gaussian.rvs(0, epsilon, size=N))
y = np.random.rand(N) < p

In [161]:
lr = LogisticRegression(C=1e16)
lr.fit(X, y)
print('intercept', lr.intercept_[0])
print('coefficient error', np.linalg.norm(lr.coef_[0] - w))


intercept 0.0471153112658
coefficient error 0.948455902104

In [ ]:
lr = LogisticRegression(C=sigma)
lr.fit(X, y)
print('intercept', lr.intercept_[0])
print('coefficient error', np.linalg.norm(lr.coef_[0] - w))

In [162]:
lr.C = sigma
lr.fit(X, y)
print('intercept', lr.intercept_[0])
print('coefficient error', np.linalg.norm(lr.coef_[0] - w))


intercept 0.150561744534
coefficient error 0.859556511327

In [163]:
lr.C = sigma**2
lr.fit(X, y)
print('intercept', lr.intercept_[0])
print('coefficient error', np.linalg.norm(lr.coef_[0] - w))


intercept 0.0311773739001
coefficient error 0.830650894553

In [164]:
lr.C = 2*sigma
lr.fit(X, y)
print('intercept', lr.intercept_[0])
print('coefficient error', np.linalg.norm(lr.coef_[0] - w))


intercept 0.0463168029646
coefficient error 0.898627263513

wtf is C anyway

alpha = 1/(2C)


In [211]:
from sklearn.linear_model import Ridge, RidgeCV

In [247]:
N = 3e5
num_features = 3e2
sigma = .1
invsigma = 1/sigma
epsilon = 1e-1

w = gaussian.rvs(0, sigma, size=num_features)
X = np.random.randint(0, 2, size=(N, num_features))
mu = X.dot(w)
y = mu + gaussian.rvs(0, epsilon, size=N)

In [244]:
lm = Ridge(alpha=0)
lm.fit(X, y)
print('coefficient error', np.linalg.norm(lm.coef_ - w))


coefficient error 0.0067882001884

In [248]:
alphas = np.linspace(.8, 1.2, 20)
lm = RidgeCV(alphas=alphas)
lm.fit(X, y);

In [249]:
lm.alpha_


Out[249]:
1.0105263157894737

In [250]:
alphas


Out[250]:
array([ 0.8       ,  0.82105263,  0.84210526,  0.86315789,  0.88421053,
        0.90526316,  0.92631579,  0.94736842,  0.96842105,  0.98947368,
        1.01052632,  1.03157895,  1.05263158,  1.07368421,  1.09473684,
        1.11578947,  1.13684211,  1.15789474,  1.17894737,  1.2       ])

In [252]:
2*.1**.5


Out[252]:
0.6324555320336759

In [253]:
2*.3**.5


Out[253]:
1.0954451150103321