In [28]:
import random
import numpy as np

In [29]:
def sigmoid(x):
    """ Sigmoid function """
    ###################################################################
    # Compute the sigmoid function for the input here.                #
    ###################################################################
    
    ### YOUR CODE HERE
    x = 1 / (1 + np.exp(-x))   
    ### END YOUR CODE
    
    return x

def sigmoid_grad(f):
    """ Sigmoid gradient function """
    ###################################################################
    # Compute the gradient for the sigmoid function here. Note that   #
    # for this implementation, the input f should be the sigmoid      #
    # function value of your original input x.                        #
    ###################################################################
    
    ### YOUR CODE HERE
    f = (1 - f) * f
    ### END YOUR CODE
    
    return f

In [30]:
def softmax(x):
    """ Softmax function """
    ###################################################################
    # Compute the softmax function for the input here.                #
    # It is crucial that this function is optimized for speed because #
    # it will be used frequently in later code.                       #
    # You might find numpy functions np.exp, np.sum, np.reshape,      #
    # np.max, and numpy broadcasting useful for this task. (numpy     #
    # broadcasting documentation:                                     #
    # http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)  #
    # You should also make sure that your code works for one          #
    # dimensional inputs (treat the vector as a row), you might find  #
    # it helpful for your later problems.                             #
    ###################################################################
    
    ### YOUR CODE HERE
    N = x.shape[0]
    x -= np.max(x, axis=1).reshape(N, 1)
    x = np.exp(x) / np.sum(np.exp(x), axis=1).reshape(N, 1)
    ### END YOUR CODE
    
    return x

In [31]:
# First implement a gradient checker by filling in the following functions
def gradcheck_naive(f, x):
    """ 
    Gradient check for a function f 
    - f should be a function that takes a single argument and outputs the cost and its gradients
    - x is the point (numpy array) to check the gradient at
    """ 

    rndstate = random.getstate()
    random.setstate(rndstate)  
    fx, grad = f(x) # Evaluate function value at original point
    h = 1e-4

    # Iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index
    
        ### YOUR CODE HERE: try modifying x[ix] with h defined above to compute numerical gradients
        ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 
        ### possible to test cost functions with built in randomness later
        x[ix] += h 
        random.setstate(rndstate)
        fxph = f(x)[0]
        x[ix] -= 2 * h
        random.setstate(rndstate)
        fxmh = f(x)[0]
        x[ix] += h
        numgrad = (fxph - fxmh) / (2 * h)   
    
        ### END YOUR CODE

        # Compare gradients
        reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
        if reldiff > 1e-5:
            print "Gradient check failed."
            print "First gradient error found at index %s" % str(ix)
            print "Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad)
            return
    
        it.iternext() # Step to next dimension

    print "Gradient check passed!"

In [32]:
N = 20
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0])   # each row will be a datum
labels = np.zeros((N, dimensions[2]))
for i in xrange(N):
    labels[i,random.randint(0,dimensions[2]-1)] = 1

params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], )

In [44]:
def forward_backward_prop(data, labels, params):
    """ Forward and backward propagation for a two-layer sigmoidal network """
    ###################################################################
    # Compute the forward propagation and for the cross entropy cost, #
    # and backward propagation for the gradients for all parameters.  #
    ###################################################################
    
    ### Unpack network parameters (do not modify)
    t = 0
    W1 = np.reshape(params[t:t+dimensions[0]*dimensions[1]], (dimensions[0], dimensions[1]))
    t += dimensions[0]*dimensions[1]
    b1 = np.reshape(params[t:t+dimensions[1]], (1, dimensions[1]))
    t += dimensions[1]
    W2 = np.reshape(params[t:t+dimensions[1]*dimensions[2]], (dimensions[1], dimensions[2]))
    t += dimensions[1]*dimensions[2]
    b2 = np.reshape(params[t:t+dimensions[2]], (1, dimensions[2]))
    
    ### YOUR CODE HERE: forward propagation
    N, D = data.shape
    print N
    print D
    
    print data.dot(W1)
    print data.dot(W1)+b1
    h = sigmoid(data.dot(W1) + b1)
    scores = softmax(h.dot(W2) + b2)
    cost = np.sum(- np.log(scores[labels == 1])) / N
    ### END YOUR CODE
    
    ### YOUR CODE HERE: backward propagation

    dscores = scores - labels
    
    dscores /= N
    
    gradb2 = np.sum(dscores, axis=0)
    gradW2 = np.dot(h.T, dscores)
    
    
    grad_h = np.dot(dscores, W2.T)
    grad_h = sigmoid_grad(h) * grad_h
    
    gradb1 = np.sum(grad_h, axis=0)
    gradW1 = np.dot(data.T, grad_h)
    
    ### END YOUR CODE
    
    ### Stack gradients (do not modify)
    #grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), gradW2.flatten(), gradb2.flatten()))
    
    return #cost, grad

In [45]:
print "=== For autograder ==="
forward_backward_prop(data, labels, params)
#gradcheck_naive(lambda params: forward_backward_prop(data, labels, params), params)


=== For autograder ===
20
10
[[ 0.32592383  0.91951392 -3.56429277  0.92083926  1.58695602]
 [-0.1907453   3.88654445 -0.9924596   0.50894334 -1.54094797]
 [ 1.36975257 -0.949706   -1.21502842 -1.01219414  1.17766926]
 [ 6.68364639 -3.63290504  5.84492569  3.05703556 -5.66934565]
 [ 1.11094749  0.09952239  3.86083461  1.45311888 -2.11112746]
 [ 0.10986398  2.13384065 -2.62225335 -2.80250807  6.23969078]
 [ 3.29361941  7.8038488  -4.13355028 -0.04008302  4.15263921]
 [-5.08482112 -1.6771261  -1.293041   -0.7298814   2.76906524]
 [-2.30295519 -2.65330525  0.65602384  3.00501587 -5.8811091 ]
 [-0.21924211 -0.44924887  3.87631796  0.45510407 -0.97592215]
 [-0.22088038 -4.02412324  2.3911621   2.55048646 -4.25144675]
 [ 2.80865208  3.65267906 -1.65379397  1.86969535  1.39514989]
 [ 0.79294642 -0.97704811  2.31465216  2.0320489  -0.92616106]
 [-3.01171075 -2.30548849 -1.13200347  1.13987352 -1.97707209]
 [ 6.21434024  1.75413427  1.04787598  0.14412573 -1.37908413]
 [ 6.20038062 -5.50555605  6.00884933  1.3502165  -4.70107605]
 [ 4.49306869  5.03263259 -2.59978875 -2.97492992  5.8179039 ]
 [ 1.70613897  6.16014672 -1.68403347  2.59070332  3.51902406]
 [ 3.44900464  6.32213485  5.27968237  3.07530425 -6.90325902]
 [-2.12370624  1.79931158  1.17538677 -0.09736442  0.19900814]]
[[ 0.13861739  0.45240161 -3.75474031  1.77751768  1.48995401]
 [-0.37805174  3.41943213 -1.18290714  1.36562176 -1.63794998]
 [ 1.18244613 -1.41681831 -1.40547596 -0.15551571  1.08066725]
 [ 6.49633995 -4.10001735  5.65447815  3.91371398 -5.76634766]
 [ 0.92364105 -0.36758992  3.67038707  2.3097973  -2.20812947]
 [-0.07744246  1.66672834 -2.81270089 -1.94582964  6.14268877]
 [ 3.10631297  7.33673649 -4.32399782  0.81659541  4.0556372 ]
 [-5.27212756 -2.14423841 -1.48348854  0.12679703  2.67206323]
 [-2.49026163 -3.12041756  0.4655763   3.8616943  -5.97811111]
 [-0.40654855 -0.91636118  3.68587043  1.31178249 -1.07292416]
 [-0.40818682 -4.49123555  2.20071456  3.40716488 -4.34844876]
 [ 2.62134564  3.18556675 -1.84424151  2.72637378  1.29814788]
 [ 0.60563998 -1.44416043  2.12420462  2.88872733 -1.02316307]
 [-3.19901719 -2.7726008  -1.32245101  1.99655194 -2.0740741 ]
 [ 6.0270338   1.28702195  0.85742844  1.00080416 -1.47608614]
 [ 6.01307418 -5.97266836  5.81840179  2.20689493 -4.79807806]
 [ 4.30576225  4.56552028 -2.79023629 -2.11825149  5.72090189]
 [ 1.51883253  5.69303441 -1.87448101  3.44738175  3.42202205]
 [ 3.2616982   5.85502254  5.08923483  3.93198267 -7.00026103]
 [-2.31101268  1.33219927  0.98493923  0.75931401  0.10200613]]

In [ ]:


In [ ]: