In [39]:
from __future__ import division

In [40]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import math

In [41]:
import random
import string
import seaborn as sns
from scipy import optimize
from scipy.interpolate import griddata

In [42]:
import pandas as pd
from scipy.ndimage import convolve
import scipy.ndimage as nd

In [43]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.html.widgets import interact

In [44]:
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

In [45]:
from sklearn.datasets import load_digits
digits = load_digits()
print(digits.data.shape)


(1797, 64)

In [46]:
def show_digit(i):
    plt.matshow(digits.images[i]);

In [47]:
interact(show_digit, i=(0,1797));



In [94]:
class Network():

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) 
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid_vec(np.dot(w, a)+b)
        return a

    def SGD(self, train_data, epochs, mini_batch_size, eta,test_data=None):
        n = len(train_data)
        for j in range(epochs):
            random.shuffle(train_data)
            mini_batches = [
                train_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)


    def update_mini_batch(self, mini_batch, eta):
        new_b = [np.zeros(b.shape) for b in self.biases]
        new_w = [np.zeros(w.shape) for w in self.weights]
        self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, new_w)]
        self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, new_b)]

    def backprop(self, t, q):
        new_b = [np.zeros(b.shape) for b in self.biases]
        new_w = [np.zeros(w.shape) for w in self.weights]
        act = t
        acts = [t]
        p = [] 
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, act)+b
            p.append(z)
            act = sigmoid_vec(z)
            acts.append(act)
        delta = self.cost_derivative(acts[-1], q) * sigmoid_prime_vec(p[-1])
        new_b[-1] = delta
        new_w[-1] = np.dot(delta, acts[-2].transpose())

        for j in range(2, self.num_layers):
            z = p[-j]
            spv = sigmoid_prime_vec(z)
            delta = np.dot(self.weights[-j+1].transpose(), delta) * spv
            new_b[-j] = delta
            new_w[-j] = np.dot(delta, acts[-j-1].transpose())
        return (new_b, new_w)

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), y) 
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
        
    def cost_derivative(self, output_acts, y):
        return (output_acts-y) 

#### Miscellaneous functions
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

sigmoid_vec = np.vectorize(sigmoid)

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

sigmoid_prime_vec = np.vectorize(sigmoid_prime)

In [95]:
net = Network([100, 10, 13])

In [96]:
a = net.SGD(digits.data, 30, 10, 0.001, test_data=digits.target)