In [28]:
import random
import numpy as np
In [29]:
def sigmoid(x):
""" Sigmoid function """
###################################################################
# Compute the sigmoid function for the input here. #
###################################################################
### YOUR CODE HERE
x = 1 / (1 + np.exp(-x))
### END YOUR CODE
return x
def sigmoid_grad(f):
""" Sigmoid gradient function """
###################################################################
# Compute the gradient for the sigmoid function here. Note that #
# for this implementation, the input f should be the sigmoid #
# function value of your original input x. #
###################################################################
### YOUR CODE HERE
f = (1 - f) * f
### END YOUR CODE
return f
In [30]:
def softmax(x):
""" Softmax function """
###################################################################
# Compute the softmax function for the input here. #
# It is crucial that this function is optimized for speed because #
# it will be used frequently in later code. #
# You might find numpy functions np.exp, np.sum, np.reshape, #
# np.max, and numpy broadcasting useful for this task. (numpy #
# broadcasting documentation: #
# http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) #
# You should also make sure that your code works for one #
# dimensional inputs (treat the vector as a row), you might find #
# it helpful for your later problems. #
###################################################################
### YOUR CODE HERE
N = x.shape[0]
x -= np.max(x, axis=1).reshape(N, 1)
x = np.exp(x) / np.sum(np.exp(x), axis=1).reshape(N, 1)
### END YOUR CODE
return x
In [31]:
# First implement a gradient checker by filling in the following functions
def gradcheck_naive(f, x):
"""
Gradient check for a function f
- f should be a function that takes a single argument and outputs the cost and its gradients
- x is the point (numpy array) to check the gradient at
"""
rndstate = random.getstate()
random.setstate(rndstate)
fx, grad = f(x) # Evaluate function value at original point
h = 1e-4
# Iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index
### YOUR CODE HERE: try modifying x[ix] with h defined above to compute numerical gradients
### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it
### possible to test cost functions with built in randomness later
x[ix] += h
random.setstate(rndstate)
fxph = f(x)[0]
x[ix] -= 2 * h
random.setstate(rndstate)
fxmh = f(x)[0]
x[ix] += h
numgrad = (fxph - fxmh) / (2 * h)
### END YOUR CODE
# Compare gradients
reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
if reldiff > 1e-5:
print "Gradient check failed."
print "First gradient error found at index %s" % str(ix)
print "Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad)
return
it.iternext() # Step to next dimension
print "Gradient check passed!"
In [32]:
N = 20
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0]) # each row will be a datum
labels = np.zeros((N, dimensions[2]))
for i in xrange(N):
labels[i,random.randint(0,dimensions[2]-1)] = 1
params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], )
In [44]:
def forward_backward_prop(data, labels, params):
""" Forward and backward propagation for a two-layer sigmoidal network """
###################################################################
# Compute the forward propagation and for the cross entropy cost, #
# and backward propagation for the gradients for all parameters. #
###################################################################
### Unpack network parameters (do not modify)
t = 0
W1 = np.reshape(params[t:t+dimensions[0]*dimensions[1]], (dimensions[0], dimensions[1]))
t += dimensions[0]*dimensions[1]
b1 = np.reshape(params[t:t+dimensions[1]], (1, dimensions[1]))
t += dimensions[1]
W2 = np.reshape(params[t:t+dimensions[1]*dimensions[2]], (dimensions[1], dimensions[2]))
t += dimensions[1]*dimensions[2]
b2 = np.reshape(params[t:t+dimensions[2]], (1, dimensions[2]))
### YOUR CODE HERE: forward propagation
N, D = data.shape
print N
print D
print data.dot(W1)
print data.dot(W1)+b1
h = sigmoid(data.dot(W1) + b1)
scores = softmax(h.dot(W2) + b2)
cost = np.sum(- np.log(scores[labels == 1])) / N
### END YOUR CODE
### YOUR CODE HERE: backward propagation
dscores = scores - labels
dscores /= N
gradb2 = np.sum(dscores, axis=0)
gradW2 = np.dot(h.T, dscores)
grad_h = np.dot(dscores, W2.T)
grad_h = sigmoid_grad(h) * grad_h
gradb1 = np.sum(grad_h, axis=0)
gradW1 = np.dot(data.T, grad_h)
### END YOUR CODE
### Stack gradients (do not modify)
#grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), gradW2.flatten(), gradb2.flatten()))
return #cost, grad
In [45]:
print "=== For autograder ==="
forward_backward_prop(data, labels, params)
#gradcheck_naive(lambda params: forward_backward_prop(data, labels, params), params)
In [ ]:
In [ ]: