In [26]:
import numpy as np

trace = False
trace_forward = False

class FC:
    '''
    This class is not thread safe.
    '''
    def __init__(self, in_num, out_num, lr = 0.1):
        self._in_num = in_num
        self._out_num = out_num
        self.w = np.random.randn(in_num, out_num)
        self.b = np.zeros((out_num, 1))
        self.lr = lr
    def _sigmoid(self, in_data):
        return 1 / (1 + np.exp(-in_data))
    def forward(self, in_data):
        
        self.topVal = self._sigmoid(np.dot(self.w.T, in_data) + self.b)
        if trace_forward:
            print '=== topVal {0} ==='.format(self.topVal.shape)
            print self.topVal
        self.bottomVal = in_data
        return self.topVal
    def backward(self, loss):
        residual_z = loss * self.topVal * (1 - self.topVal)
        grad_w = np.dot(self.bottomVal, residual_z.T)
        grad_b = np.sum(residual_z)
        self.w -= self.lr * grad_w
        self.b -= self.lr * grad_b
        residual_x = np.dot(self.w, residual_z)
        if trace:
            print '=== z {0}==='.format(residual_z.shape)
            print residual_z
            print '=== grad_w {0}==='.format(grad_w.shape)
            print grad_w
            print '=== grad_b {0}==='.format(grad_b.shape)
            print grad_b
            print '=== self.w {0}==='.format(self.w.shape)
            print self.w
            print '=== self.b {0} ==='.format(self.b.shape)
            print self.b
            print '=== residual {0} ==='.format(residual_x.shape)
            print residual_x
        return residual_x

class SquareLoss:
    '''
    Same as above, not thread safe
    '''
    def forward(self, y, t):
        self.loss = y - t
        if trace:
            print '=== Loss ==='.format(self.loss.shape)
            print self.loss
        return np.sum(self.loss * self.loss) /  self.loss.shape[1] / 2
    def backward(self):
        if trace:
            print '=== loss {0} ==='.format(self.loss.shape)
            print self.loss
        return self.loss

class Net:
    def __init__(self, input_num=2, hidden_num=4, out_num=1, lr=0.1):
        self.fc1 = FC(input_num, hidden_num, lr)
        self.fc2 = FC(hidden_num, out_num, lr)
        self.loss = SquareLoss()
    def train(self, X, y): # X are arranged by col
        for i in range(10000):
            # forward step
            layer1out = self.fc1.forward(X)
            layer2out = self.fc2.forward(layer1out)
            loss = self.loss.forward(layer2out, y)
            if i % 1000 == 0:
                print 'iter = {0}, loss ={1}'.format(i, loss)
                print '=== Label vs Prediction ==='
                print 't={0}'.format(y)
                print 'y={0}'.format(layer2out)
            # backward step
            layer2loss = self.loss.backward()
            layer1loss = self.fc2.backward(layer2loss)
            saliency = self.fc1.backward(layer1loss)
        layer1out = self.fc1.forward(X)
        layer2out = self.fc2.forward(layer1out)
        print '=== Final ==='
        print 'X={0}'.format(X)
        print 't={0}'.format(y)
        print 'y={0}'.format(layer2out)

In [25]:
# example from https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
X = np.array([[0.05, 0.1]]).T
y = np.array([[0.01, 0.99]]).T

net = Net(2,2,2,0.5)
net.fc1.w = np.array([[.15,.25], [.2, .3]])
net.fc1.b = np.array([[.35], [.35]])
net.fc2.w = np.array([[.4,.5], [.45,.55]])
net.fc2.b = np.array([[.6], [.6]])
net.train(X,y)


iter = 0, loss =0.29837110876
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.75136507]
 [ 0.77292847]]
iter = 1000, loss =0.00034637971269
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.02873778]
 [ 0.97151609]]
iter = 2000, loss =0.000119529215903
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.0210022 ]
 [ 0.97913675]]
iter = 3000, loss =6.0258461253e-05
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01781127]
 [ 0.98228631]]
iter = 4000, loss =3.54968683217e-05
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01599555]
 [ 0.98407994]]
iter = 5000, loss =2.27849062666e-05
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01480398]
 [ 0.98525747]]
iter = 6000, loss =1.5445159009e-05
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01395575]
 [ 0.98609585]]
iter = 7000, loss =1.08721488321e-05
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01331933]
 [ 0.98672489]]
iter = 8000, loss =7.86715065902e-06
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01282401]
 [ 0.98721445]]
iter = 9000, loss =5.81352036324e-06
=== Label vs Prediction ===
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01242797]
 [ 0.98760584]]
=== Final ===
X=[[ 0.05]
 [ 0.1 ]]
t=[[ 0.01]
 [ 0.99]]
y=[[ 0.01210474]
 [ 0.98792522]]

In [27]:
# and operation
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
y = np.array([[0],[0],[0],[1]]).T

net = Net(2,4,1,0.1)
net.train(X,y)


iter = 0, loss =0.105256639066
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.40930536  0.4617139   0.36923076  0.4299025 ]]
iter = 1000, loss =0.0229368486589
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.04445123  0.22684496  0.17747671  0.68605373]]
iter = 2000, loss =0.00657594469044
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.01057127  0.11332809  0.11016211  0.83411794]]
iter = 3000, loss =0.00322081318498
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00517544  0.07831654  0.07871461  0.88419737]]
iter = 4000, loss =0.00201059297485
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00336374  0.06171018  0.0624756   0.90855558]]
iter = 5000, loss =0.00142205310651
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00249895  0.05189239  0.05257126  0.92309992]]
iter = 6000, loss =0.00108341055769
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00200067  0.04532728  0.04585262  0.93287134]]
iter = 7000, loss =0.000866734887908
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00167856  0.04058314  0.04096262  0.9399489 ]]
iter = 8000, loss =0.000717647908313
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00145369  0.03696819  0.0372232   0.94534786]]
iter = 9000, loss =0.000609513241467
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00128784  0.03410575  0.03425751  0.94962473]]
=== Final ===
X=[[0 0 1 1]
 [0 1 0 1]]
t=[[0 0 0 1]]
y=[[ 0.00116042  0.03177232  0.03183889  0.95311123]]