I want to check if the analytic gradients I have match the gradient of the optimisation target. #130
Of course, differentiation is
$$Q'(x) = \lim_{h \to 0^+} \frac{Q(x + h) - Q(x - h)}{2h}$$so we can just sample $h$ and compare the above to the implementation.
In [1]:
    
import numpy, sys
sys.path.insert(1, '..')
import crowdastro.active_learning.passive_crowd as pc
    
In [4]:
    
def softmax(x):
    """Compute softmax values for each sets of scores in x. (From Udacity)"""
    return numpy.exp(x) / numpy.sum(numpy.exp(x), axis=0)
def check(trials=10):
    T = 4
    D = 5
    N = 20
    
    x = numpy.random.random(size=(N, D))
    y = numpy.random.binomial(1, 0.5, size=(T, N))
    
    posteriors = numpy.random.random(size=(N,))
    posteriors_0 = 1 - posteriors
    params = numpy.random.normal(scale=0.5, size=(D + 1 + T * D + T,))
    
    print(pc.Q(params, D, T, N, posteriors, posteriors_0, x, y)[1])
    
    def Q(params):
        a, b, w, g = pc.unpack(params, D, T)
        return (posteriors.dot((numpy.log(pc.annotator_model(w, g, x, y, 1)) +
                                numpy.log(pc.logistic_regression(a, b, x))).T) +
                posteriors_0.dot((numpy.log(pc.annotator_model(w, g, x, y, 0)) +
                                  numpy.log(1 - pc.logistic_regression(a, b, x))).T)
                ).sum()
    def approx_grad(params):
        grads = []
        for _ in range(100):
            h = numpy.zeros(params.shape)
            h[D + 1] = abs(numpy.random.normal(scale=numpy.linalg.norm(params) * 1e-10))
            grads.append((pc.Q(params + h, D, T, N, posteriors, posteriors_0, x, y)[0] - pc.Q(params, D, T, N, posteriors, posteriors_0, x, y)[0]) / h[D + 1])
        return numpy.mean(grads, axis=0)
    def grad_b(params):
        a, b, w, g = pc.unpack(params, D, T)
        return T*(posteriors.dot(pc.logistic_regression(-a, -b, x)) +
                  posteriors_0.dot(pc.logistic_regression(-a, -b, x) - 1))
    def grad_a0(params):
        a, b, w, g = pc.unpack(params, D, T)
        return T*((x[:, 0] * posteriors).dot(pc.logistic_regression(-a, -b, x)) +
                  (x[:, 0] * posteriors_0).dot(pc.logistic_regression(-a, -b, x) - 1))
    def grad_γ0(params):
        a, b, w, g = pc.unpack(params, D, T)
        return sum(posteriors[i] * (pc.logistic_regression(-w[0], -g[0], x[i]) - abs(y[0, i] - 1)) +
                   posteriors_0[i] * (pc.logistic_regression(-w[0], -g[0], x[i]) - abs(y[0, i] - 0))
                   for i in range(N))
    def grad_w0(params):
        a, b, w, g = pc.unpack(params, D, T)
        ddw = numpy.zeros(w.shape)
        for t in range(T):
            ddw[t] += sum(x[i] * posteriors[i] * (pc.logistic_regression(-w[t], -g[t], x[i]) - abs(y[t, i] - 1)) +
                          x[i] * posteriors_0[i] * (pc.logistic_regression(-w[t], -g[t], x[i]) - abs(y[t, i] - 0))
                          for i in range(N))
        return ddw
    
    return approx_grad(params), grad_w0(params)
    
In [5]:
    
check()
    
    
    Out[5]:
In [ ]: