In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [64]:
# X = (hours sleeping, hours studying), y = Score on test
X = np.array(([3,5], [5,1], [10,2]), dtype=float)
y = np.array(([75], [82], [93]), dtype=float)

# Normalize
X = X/np.amax(X, axis=0)
y = y/100 #Max test score is 100

In [ ]:


In [ ]:


In [ ]:


In [65]:
class NeuralNetwork:
    def __init__(self):
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        
        #initialize the random weights
        self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
    
    def forward(self,X):
        #propagates inputs through networks.
        self.z2 = np.dot(X,self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2,self.W2)
        yHat = self.sigmoid(self.z3)
        return yHat
        
    def sigmoid(self,Z):
        #apply the sigmoid activation function
        return (1/(1+np.exp(-Z)))
    
    def sigmoidPrime(self,Z):
        #apply the sigmoid activation derivation function
        return (np.exp(-Z)/((1+np.exp(-Z))**2))
    
    def costFunction(self,X,y):
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)
        return J

    def costFunctionPrime(self, X, y):
        #Compute derivative with respect to W and W2 for a given X and y:
        self.yHat = self.forward(X)
        
        delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
        dJdW2 = np.dot(self.a2.T, delta3)
        
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
        dJdW1 = np.dot(X.T, delta2)  
        
        return dJdW1, dJdW2
    
    def getParams(self):
        params = np.concatenate((self.W1.ravel(),self.W2.ravel()))
        return params
    
    def setParams(self, params):
        #Set W1 and W2 using single paramater vector.
        W1_start = 0
        W1_end = self.hiddenLayerSize * self.inputLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize , self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
        self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))
        
    def computeGradients(self,X,Y):
        djdw1,djdw2 = self.costFunctionPrime(X,Y)
        return np.concatenate((djdw1.ravel(),djdw2.ravel()))

def computeNumericalGradient(N,X,Y):
    params = N.getParams()
    numgrad = np.zeros(params.shape)
    perturb = np.zeros(params.shape)
    e = 1e-4
    print(params)
    for p in range(len(params)):
        perturb[p] = e            
        N.setParams(params+perturb)
        loss2 = N.costFunction(X,Y)
        #print(type(loss2))
        N.setParams(params-perturb)
        loss1 = N.costFunction(X,Y)
        
        numgrad[p] = (loss2-loss1)/(2*e)
        perturb[p] = 0
    N.setParams(params)
    return numgrad

In [72]:
NN = NeuralNetwork()
grad = NN.computeGradients(x,y)
grad


Out[72]:
array([ 0.00266876,  0.03650722, -0.0373582 ,  0.00216782,  0.02967355,
       -0.03335717, -0.18597606, -0.17562522, -0.16134461])

In [71]:
NN = NeuralNetwork()
numGrad = computeNumericalGradient(NN,x,y)
numGrad


[-0.69821064 -0.08143537  1.17144758  0.0328924  -0.05837331 -0.31502344
  1.67897696  2.3057788   0.59467548]
Out[71]:
array([ 0.00168613,  0.00214985,  0.00064884,  0.00564361,  0.00770778,
        0.00203744,  0.00784743,  0.0081341 ,  0.00850076])

In [26]:
NN.costFunction(x,y)


Out[26]:
array([ 0.00667323,  0.04546069,  0.20898335])

In [ ]:
sigmoid(np.random.randn(3,3))

In [48]:
NN.costFunctionPrime(x,y)


Out[48]:
(array([[ 0.01098352,  0.03332509,  0.03305924],
        [ 0.00821445,  0.02884472,  0.02919704]]),
 array([[ 0.05907989,  0.04177399,  0.01030872],
        [ 0.03339024,  0.02361718,  0.00584798],
        [ 0.02866239,  0.0202368 ,  0.00491755]]))

In [136]:
from keras.models import Model
from keras.layers import Input, Dense

X = np.array(([5,2], [5,1], [5,3]), dtype=float)
y = np.array(([75], [62], [93]), dtype=float)

# Normalize
X = X/np.amax(X, axis=0)
y = y/100 #Max test score is 100
model = Sequential()
model.add(Dense(3, input_dim=2, activation='relu'))
model.add(Dense(3,  activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam',metrics=['accuracy'])
model.fit(X,y)
model.predict(np.array([[10,2]]))


Epoch 1/10
3/3 [==============================] - 0s - loss: 0.6039 - acc: 0.0000e+00
Epoch 2/10
3/3 [==============================] - 0s - loss: 0.6008 - acc: 0.0000e+00
Epoch 3/10
3/3 [==============================] - 0s - loss: 0.5976 - acc: 0.0000e+00
Epoch 4/10
3/3 [==============================] - 0s - loss: 0.5945 - acc: 0.0000e+00
Epoch 5/10
3/3 [==============================] - 0s - loss: 0.5913 - acc: 0.0000e+00
Epoch 6/10
3/3 [==============================] - 0s - loss: 0.5882 - acc: 0.0000e+00
Epoch 7/10
3/3 [==============================] - 0s - loss: 0.5850 - acc: 0.0000e+00
Epoch 8/10
3/3 [==============================] - 0s - loss: 0.5818 - acc: 0.0000e+00
Epoch 9/10
3/3 [==============================] - 0s - loss: 0.5787 - acc: 0.0000e+00
Epoch 10/10
3/3 [==============================] - 0s - loss: 0.5755 - acc: 0.0000e+00
Out[136]:
array([[ 0.02088054]], dtype=float32)

In [1]:
import numpy as np
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T

In [2]:
X


Out[2]:
array([[0, 0, 1],
       [0, 1, 1],
       [1, 0, 1],
       [1, 1, 1]])

In [3]:
y


Out[3]:
array([[0],
       [1],
       [1],
       [0]])

In [4]:
syn0 = np.random.randn(3,4)
syn1 = np.random.randn(4,1)

In [5]:
print(syn0)
print(syn1)


[[ 1.00304255 -0.13323203  0.05601549  0.07343647]
 [-0.4067525  -1.21630025 -0.86916172 -1.41188771]
 [ 0.2567302  -2.03621163  0.00615706  0.21373147]]
[[-0.54676742]
 [-1.06529476]
 [-0.46787434]
 [-0.50851757]]

In [7]:
for i in range(1000):
    l1 = 1/1+(np.exp(np.dot(X,syn0)))
    l2 = 1/1+(np.exp(np.dot(l1,syn1)))
    l2_delta = (y-l2)*(l2*(1-l2))
    l1_delta = l2_delta.dot(syn1.T)*(l1*(1-l1))
    syn1 += l1.T.dot(l2_delta)
    syn0 += X.T.dot(l1_delta)


/home/surya/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: overflow encountered in exp
  
/home/surya/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in multiply
  """

In [6]:
syn0


Out[6]:
array([[ 1.00304255, -0.13323203,  0.05601549,  0.07343647],
       [-0.4067525 , -1.21630025, -0.86916172, -1.41188771],
       [ 0.2567302 , -2.03621163,  0.00615706,  0.21373147]])

In [8]:
import numpy as np

# sigmoid function
def nonlin(x,deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))
    
# input dataset
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1] ])
    
# output dataset            
y = np.array([[0,0,1,1]]).T

# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

# initialize weights randomly with mean 0
syn0 = 2*np.random.random((3,1)) - 1

for iter in xrange(10000):

    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(l0,syn0))

    # how much did we miss?
    l1_error = y - l1

    # multiply how much we missed by the 
    # slope of the sigmoid at the values in l1
    l1_delta = l1_error * nonlin(l1,True)

    # update weights
    syn0 += np.dot(l0.T,l1_delta)

print "Output After Training:"
print l1


Output After Training:
[[ 0.00966449]
 [ 0.00786506]
 [ 0.99358898]
 [ 0.99211957]]

In [9]:
import numpy as np

def nonlin(x,deriv=False):
	if(deriv==True):
	    return x*(1-x)

	return 1/(1+np.exp(-x))
    
X = np.array([[0,0,1],
            [0,1,1],
            [1,0,1],
            [1,1,1]])
                
y = np.array([[0],
			[1],
			[1],
			[0]])

np.random.seed(1)

# randomly initialize our weights with mean 0
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1

for j in xrange(60000):

	# Feed forward through layers 0, 1, and 2
    l0 = X
    l1 = nonlin(np.dot(l0,syn0))
    l2 = nonlin(np.dot(l1,syn1))

    # how much did we miss the target value?
    l2_error = y - l2
    
    if (j% 10000) == 0:
        print "Error:" + str(np.mean(np.abs(l2_error)))
        
    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    l2_delta = l2_error*nonlin(l2,deriv=True)

    # how much did each l1 value contribute to the l2 error (according to the weights)?
    l1_error = l2_delta.dot(syn1.T)
    
    # in what direction is the target l1?
    # were we really sure? if so, don't change too much.
    l1_delta = l1_error * nonlin(l1,deriv=True)

    syn1 += l1.T.dot(l2_delta)
    syn0 += l0.T.dot(l1_delta)


Error:0.496410031903
Error:0.00858452565325
Error:0.00578945986251
Error:0.00462917677677
Error:0.00395876528027
Error:0.00351012256786