Gradient Check

I want to check if the analytic gradients I have match the gradient of the optimisation target. #130

Of course, differentiation is

$$Q'(x) = \lim_{h \to 0^+} \frac{Q(x + h) - Q(x - h)}{2h}$$

so we can just sample $h$ and compare the above to the implementation.


In [1]:
import numpy, sys

sys.path.insert(1, '..')
import crowdastro.active_learning.passive_crowd as pc

In [4]:
def softmax(x):
    """Compute softmax values for each sets of scores in x. (From Udacity)"""
    return numpy.exp(x) / numpy.sum(numpy.exp(x), axis=0)

def check(trials=10):
    T = 4
    D = 5
    N = 20
    
    x = numpy.random.random(size=(N, D))
    y = numpy.random.binomial(1, 0.5, size=(T, N))
    
    posteriors = numpy.random.random(size=(N,))
    posteriors_0 = 1 - posteriors

    params = numpy.random.normal(scale=0.5, size=(D + 1 + T * D + T,))
    
    print(pc.Q(params, D, T, N, posteriors, posteriors_0, x, y)[1])
    
    def Q(params):
        a, b, w, g = pc.unpack(params, D, T)
        return (posteriors.dot((numpy.log(pc.annotator_model(w, g, x, y, 1)) +
                                numpy.log(pc.logistic_regression(a, b, x))).T) +
                posteriors_0.dot((numpy.log(pc.annotator_model(w, g, x, y, 0)) +
                                  numpy.log(1 - pc.logistic_regression(a, b, x))).T)
                ).sum()

    def approx_grad(params):
        grads = []
        for _ in range(100):
            h = numpy.zeros(params.shape)
            h[D + 1] = abs(numpy.random.normal(scale=numpy.linalg.norm(params) * 1e-10))
            grads.append((pc.Q(params + h, D, T, N, posteriors, posteriors_0, x, y)[0] - pc.Q(params, D, T, N, posteriors, posteriors_0, x, y)[0]) / h[D + 1])
        return numpy.mean(grads, axis=0)

    def grad_b(params):
        a, b, w, g = pc.unpack(params, D, T)
        return T*(posteriors.dot(pc.logistic_regression(-a, -b, x)) +
                  posteriors_0.dot(pc.logistic_regression(-a, -b, x) - 1))

    def grad_a0(params):
        a, b, w, g = pc.unpack(params, D, T)
        return T*((x[:, 0] * posteriors).dot(pc.logistic_regression(-a, -b, x)) +
                  (x[:, 0] * posteriors_0).dot(pc.logistic_regression(-a, -b, x) - 1))

    def grad_γ0(params):
        a, b, w, g = pc.unpack(params, D, T)
        return sum(posteriors[i] * (pc.logistic_regression(-w[0], -g[0], x[i]) - abs(y[0, i] - 1)) +
                   posteriors_0[i] * (pc.logistic_regression(-w[0], -g[0], x[i]) - abs(y[0, i] - 0))
                   for i in range(N))

    def grad_w0(params):
        a, b, w, g = pc.unpack(params, D, T)
        ddw = numpy.zeros(w.shape)
        for t in range(T):
            ddw[t] += sum(x[i] * posteriors[i] * (pc.logistic_regression(-w[t], -g[t], x[i]) - abs(y[t, i] - 1)) +
                          x[i] * posteriors_0[i] * (pc.logistic_regression(-w[t], -g[t], x[i]) - abs(y[t, i] - 0))
                          for i in range(N))
        return ddw
    
    return approx_grad(params), grad_w0(params)

In [5]:
check()


[ 5.6965961   2.62526152  5.44163006  4.55768567  0.75906086  7.94502106
  2.38303711  1.74267135  1.93973346  1.70254263  2.0052662  -2.4560358
 -1.9239815  -1.90423456 -1.10316555 -1.87786511 -0.33029406 -1.11499397
 -1.40680896 -0.91570391 -0.86608039  0.07425844 -0.04972941 -0.15018714
 -0.43148124  0.43932629  3.72774373 -3.58546661 -1.84149477  0.25027849]
Out[5]:
(2.384527463426005,
 array([[-2.38303711, -1.74267135, -1.93973346, -1.70254263, -2.0052662 ],
        [ 2.4560358 ,  1.9239815 ,  1.90423456,  1.10316555,  1.87786511],
        [ 0.33029406,  1.11499397,  1.40680896,  0.91570391,  0.86608039],
        [-0.07425844,  0.04972941,  0.15018714,  0.43148124, -0.43932629]]))

In [ ]: