In [10]:
import numpy as np

In [11]:
class Node(object):
    """
    Base class for nodes in the network.

    Arguments:

        `inbound_nodes`: A list of nodes with edges into this node.
    """
    def __init__(self, inbound_nodes=[]):
        """
        Node's constructor (runs when the object is instantiated). Sets
        properties that all nodes need.
        """
        # A list of nodes with edges into this node.
        self.inbound_nodes = inbound_nodes
        # The eventual value of this node. Set by running
        # the forward() method.
        self.value = None
        # A list of nodes that this node outputs to.
        self.outbound_nodes = []
        # New property! Keys are the inputs to this node and
        # their values are the partials of this node with
        # respect to that input.
        self.gradients = {}
        # Sets this node as an outbound node for all of
        # this node's inputs.
        for node in inbound_nodes:
            node.outbound_nodes.append(self)

    def forward(self):
        """
        Every node that uses this class as a base class will
        need to define its own `forward` method.
        """
        raise NotImplementedError

    def backward(self):
        """
        Every node that uses this class as a base class will
        need to define its own `backward` method.
        """
        raise NotImplementedError

In [12]:
class Input(Node):
    """
    A generic input into the network.
    """
    def __init__(self):
        # The base class constructor has to run to set all
        # the properties here.
        #
        # The most important property on an Input is value.
        # self.value is set during `topological_sort` later.
        Node.__init__(self)

    def forward(self):
        # Do nothing because nothing is calculated.
        pass

    def backward(self):
        # An Input node has no inputs so the gradient (derivative)
        # is zero.
        # The key, `self`, is reference to this object.
        self.gradients = {self: 0}
        # Weights and bias may be inputs, so you need to sum
        # the gradient from output gradients.
        for n in self.outbound_nodes:
            self.gradients[self] += n.gradients[self]

In [13]:
class Linear(Node):
    """
    Represents a node that performs a linear transform.
    """
    def __init__(self, X, W, b):
        # The base class (Node) constructor. Weights and bias
        # are treated like inbound nodes.
        Node.__init__(self, [X, W, b])

    def forward(self):
        """
        Performs the math behind a linear transform.
        """
        X = self.inbound_nodes[0].value
        W = self.inbound_nodes[1].value
        b = self.inbound_nodes[2].value
        self.value = np.dot(X, W) + b

    def backward(self):
        """
        Calculates the gradient based on the output values.
        """
        # Initialize a partial for each of the inbound_nodes.
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        # Cycle through the outputs. The gradient will change depending
        # on each output, so the gradients are summed over all outputs.
        for n in self.outbound_nodes:
            # Get the partial of the cost with respect to this node.
            grad_cost = n.gradients[self]
            # Set the partial of the loss with respect to this node's inputs.
            self.gradients[self.inbound_nodes[0]] += np.dot(grad_cost, self.inbound_nodes[1].value.T)
            # Set the partial of the loss with respect to this node's weights.
            self.gradients[self.inbound_nodes[1]] += np.dot(self.inbound_nodes[0].value.T, grad_cost)
            # Set the partial of the loss with respect to this node's bias.
            self.gradients[self.inbound_nodes[2]] += np.sum(grad_cost, axis=0, keepdims=False)

In [14]:
class Sigmoid(Node):
    """
    Represents a node that performs the sigmoid activation function.
    """
    def __init__(self, node):
        # The base class constructor.
        Node.__init__(self, [node])

    def _sigmoid(self, x):
        """
        This method is separate from `forward` because it
        will be used with `backward` as well.

        `x`: A numpy array-like object.
        """
        return 1. / (1. + np.exp(-x))

    def forward(self):
        """
        Perform the sigmoid function and set the value.
        """
        input_value = self.inbound_nodes[0].value
        self.value = self._sigmoid(input_value)

    def backward(self):
        """
        Calculates the gradient using the derivative of
        the sigmoid function.
        """
        # Initialize the gradients to 0.
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        # Sum the partial with respect to the input over all the outputs.
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            sigmoid = self.value
            self.gradients[self.inbound_nodes[0]] += sigmoid * (1 - sigmoid) * grad_cost

In [15]:
class MSE(Node):
    def __init__(self, y, a):
        """
        The mean squared error cost function.
        Should be used as the last node for a network.
        """
        # Call the base class' constructor.
        Node.__init__(self, [y, a])

    def forward(self):
        """
        Calculates the mean squared error.
        """
        # NOTE: We reshape these to avoid possible matrix/vector broadcast
        # errors.
        #
        # For example, if we subtract an array of shape (3,) from an array of shape
        # (3,1) we get an array of shape(3,3) as the result when we want
        # an array of shape (3,1) instead.
        #
        # Making both arrays (3,1) insures the result is (3,1) and does
        # an elementwise subtraction as expected.
        y = self.inbound_nodes[0].value.reshape(-1, 1)
        a = self.inbound_nodes[1].value.reshape(-1, 1)

        self.m = self.inbound_nodes[0].value.shape[0]
        # Save the computed output for backward.
        self.diff = y - a
        self.value = np.mean(self.diff**2)

    def backward(self):
        """
        Calculates the gradient of the cost.
        """
        self.gradients[self.inbound_nodes[0]] = (2 / self.m) * self.diff
        self.gradients[self.inbound_nodes[1]] = (-2 / self.m) * self.diff

In [16]:
def topological_sort(feed_dict):
    """
    Sort the nodes in topological order using Kahn's Algorithm.

    `feed_dict`: A dictionary where the key is a `Input` Node and the value is the respective value feed to that Node.

    Returns a list of sorted nodes.
    """

    input_nodes = [n for n in feed_dict.keys()]

    G = {}
    nodes = [n for n in input_nodes]
    while len(nodes) > 0:
        n = nodes.pop(0)
        if n not in G:
            G[n] = {'in': set(), 'out': set()}
        for m in n.outbound_nodes:
            if m not in G:
                G[m] = {'in': set(), 'out': set()}
            G[n]['out'].add(m)
            G[m]['in'].add(n)
            nodes.append(m)

    L = []
    S = set(input_nodes)
    while len(S) > 0:
        n = S.pop()

        if isinstance(n, Input):
            n.value = feed_dict[n]

        L.append(n)
        for m in n.outbound_nodes:
            G[n]['out'].remove(m)
            G[m]['in'].remove(n)
            # if no other incoming edges add to S
            if len(G[m]['in']) == 0:
                S.add(m)
    return L

In [17]:
def forward_and_backward(graph):
    """
    Performs a forward pass and a backward pass through a list of sorted Nodes.

    Arguments:

        `graph`: The result of calling `topological_sort`.
    """
    # Forward pass
    for n in graph:
        n.forward()

    # Backward pass
    # see: https://docs.python.org/2.3/whatsnew/section-slices.html
    for n in graph[::-1]:
        n.backward()

In [20]:
def sgd_update(trainables, learning_rate=1e-2):
    """
    Updates the value of each trainable with SGD.

    Arguments:

        `trainables`: A list of `Input` Nodes representing weights/biases.
        `learning_rate`: The learning rate.
    """
    # TODO: update all the `trainables` with SGD
    # You can access and assign the value of a trainable with `value` attribute.
    # Example:
    # for t in trainables:
    #   t.value = your implementation here
    
    for t in trainables:
        # Change the trainable's value by subtracting the learning rate
        # multiplied by the partial of the cost with respect to this
        # trainable.
        partial = t.gradients[t]
        t.value -= learning_rate * partial

In [22]:
if __name__ == "__main__":
    """
    Check out the new network architecture and dataset!

    Notice that the weights and biases are
    generated randomly.

    No need to change anything, but feel free to tweak
    to test your network, play around with the epochs, batch size, etc!
    """

    #import numpy as np
    from sklearn.datasets import load_boston
    from sklearn.utils import shuffle, resample
    #from miniflow import *

    # Load data
    data = load_boston()
    X_ = data['data']
    y_ = data['target']

    # Normalize data
    X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

    n_features = X_.shape[1]
    n_hidden = 10
    W1_ = np.random.randn(n_features, n_hidden)
    b1_ = np.zeros(n_hidden)
    W2_ = np.random.randn(n_hidden, 1)
    b2_ = np.zeros(1)

    # Neural network
    X, y = Input(), Input()
    W1, b1 = Input(), Input()
    W2, b2 = Input(), Input()

    l1 = Linear(X, W1, b1)
    s1 = Sigmoid(l1)
    l2 = Linear(s1, W2, b2)
    cost = MSE(y, l2)

    feed_dict = {
        X: X_,
        y: y_,
        W1: W1_,
        b1: b1_,
        W2: W2_,
        b2: b2_
    }

    epochs = 100
    # Total number of examples
    m = X_.shape[0]
    batch_size = 10
    steps_per_epoch = m // batch_size

    graph = topological_sort(feed_dict)
    trainables = [W1, b1, W2, b2]

    print("Total number of examples = {}".format(m))

    # Step 4
    for i in range(epochs):
        loss = 0
        for j in range(steps_per_epoch):
            # Step 1
            # Randomly sample a batch of examples
            X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

            # Reset value of X and y Inputs
            X.value = X_batch
            y.value = y_batch

            # Step 2
            forward_and_backward(graph)

            # Step 3
            sgd_update(trainables)

            loss += graph[-1].value

        print("Epoch: {}, Loss: {:.3f}".format(i+1, loss/steps_per_epoch))


Total number of examples = 506
Epoch: 1, Loss: 123.825
Epoch: 2, Loss: 42.457
Epoch: 3, Loss: 32.192
Epoch: 4, Loss: 23.800
Epoch: 5, Loss: 22.923
Epoch: 6, Loss: 24.116
Epoch: 7, Loss: 21.650
Epoch: 8, Loss: 16.218
Epoch: 9, Loss: 22.868
Epoch: 10, Loss: 15.636
Epoch: 11, Loss: 15.865
Epoch: 12, Loss: 19.587
Epoch: 13, Loss: 16.375
Epoch: 14, Loss: 14.429
Epoch: 15, Loss: 11.867
Epoch: 16, Loss: 14.157
Epoch: 17, Loss: 10.818
Epoch: 18, Loss: 10.327
Epoch: 19, Loss: 12.713
Epoch: 20, Loss: 12.505
Epoch: 21, Loss: 11.484
Epoch: 22, Loss: 11.438
Epoch: 23, Loss: 13.863
Epoch: 24, Loss: 10.978
Epoch: 25, Loss: 10.375
Epoch: 26, Loss: 12.806
Epoch: 27, Loss: 9.414
Epoch: 28, Loss: 8.957
Epoch: 29, Loss: 10.376
Epoch: 30, Loss: 8.995
Epoch: 31, Loss: 10.323
Epoch: 32, Loss: 8.733
Epoch: 33, Loss: 9.311
Epoch: 34, Loss: 10.067
Epoch: 35, Loss: 8.211
Epoch: 36, Loss: 10.174
Epoch: 37, Loss: 9.837
Epoch: 38, Loss: 8.276
Epoch: 39, Loss: 8.089
Epoch: 40, Loss: 7.627
Epoch: 41, Loss: 9.952
Epoch: 42, Loss: 7.967
Epoch: 43, Loss: 7.228
Epoch: 44, Loss: 9.272
Epoch: 45, Loss: 8.208
Epoch: 46, Loss: 9.076
Epoch: 47, Loss: 9.954
Epoch: 48, Loss: 6.102
Epoch: 49, Loss: 8.428
Epoch: 50, Loss: 8.720
Epoch: 51, Loss: 6.860
Epoch: 52, Loss: 8.193
Epoch: 53, Loss: 7.883
Epoch: 54, Loss: 8.274
Epoch: 55, Loss: 8.986
Epoch: 56, Loss: 6.787
Epoch: 57, Loss: 8.215
Epoch: 58, Loss: 7.557
Epoch: 59, Loss: 7.798
Epoch: 60, Loss: 7.415
Epoch: 61, Loss: 6.647
Epoch: 62, Loss: 6.761
Epoch: 63, Loss: 7.117
Epoch: 64, Loss: 5.865
Epoch: 65, Loss: 6.205
Epoch: 66, Loss: 7.735
Epoch: 67, Loss: 5.422
Epoch: 68, Loss: 6.805
Epoch: 69, Loss: 8.060
Epoch: 70, Loss: 6.639
Epoch: 71, Loss: 5.402
Epoch: 72, Loss: 5.592
Epoch: 73, Loss: 6.530
Epoch: 74, Loss: 7.273
Epoch: 75, Loss: 6.947
Epoch: 76, Loss: 8.749
Epoch: 77, Loss: 8.076
Epoch: 78, Loss: 8.428
Epoch: 79, Loss: 7.837
Epoch: 80, Loss: 6.462
Epoch: 81, Loss: 6.372
Epoch: 82, Loss: 5.514
Epoch: 83, Loss: 6.947
Epoch: 84, Loss: 5.753
Epoch: 85, Loss: 6.127
Epoch: 86, Loss: 6.787
Epoch: 87, Loss: 6.898
Epoch: 88, Loss: 6.314
Epoch: 89, Loss: 7.177
Epoch: 90, Loss: 6.255
Epoch: 91, Loss: 5.862
Epoch: 92, Loss: 5.935
Epoch: 93, Loss: 6.686
Epoch: 94, Loss: 5.625
Epoch: 95, Loss: 6.508
Epoch: 96, Loss: 5.768
Epoch: 97, Loss: 6.648
Epoch: 98, Loss: 6.549
Epoch: 99, Loss: 7.118
Epoch: 100, Loss: 6.052

In [ ]: