In [249]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
The sigmoid function:
In [103]:
x = [i for i in range(-10,10)]
#print(x)
def sigmoid(num):
return 1.0 / (1.0 + np.exp(-num))
plt.plot(range(-10,10), [sigmoid(i) for i in x])
plt.show()
In [108]:
from IPython.display import YouTubeVideo
YouTubeVideo('29PmNG7fuuM', width="560")
Out[108]:
In [113]:
# Defining the sigmoid function for activations
def sigmoid(x):
return 1/(1+np.exp(-x))
# Derivative of the sigmoid function
def sigmoid_prime(x):
return sigmoid(x) * (1 - sigmoid(x))
x = np.array([0.1, 0.3])
y = 0.2
weights = np.array([-0.8, 0.5])
# The learning rate, eta in the weight step equation
learnrate = 0.5
# The neural network output
nn_output = sigmoid(x[0]*weights[0] + x[1]*weights[1])
# or nn_output = sigmoid(np.dot(x, w))
# output error
error = y - nn_output
# error gradient
error_grad = error * sigmoid_prime(np.dot(x,weights))
# Gradient descent step
del_w = [ learnrate * error_grad * x[0],
learnrate * error_grad * x[1]]
# or del_w = learnrate * error_grad * x
Below, you'll calculate one gradient descent step for the weights of a simple network with two inputs and one output unit with a sigmoid activation function.
Your goal here is to calculate the correct weight step using gradient descent. Remember that the weight step is the learning rate times the error times the input values:
In [119]:
def sigmoid(x):
"""
Calculate sigmoid
"""
return 1/(1+np.exp(-x))
learnrate = 0.5
x = np.array([1, 2])
y = np.array(0.5)
# Initial weights
w = np.array([0.5, -0.5])
# Calculate one gradient descent step for each weight
# TODO: Calculate output of neural network
nn_output = sigmoid(np.dot(x,w))
# TODO: Calculate error of neural network
error = y - nn_output
# TODO: Calculate change in weights
del_w = learnrate * error * nn_output * (1 - nn_output) * x
print('Neural Network output:')
print(nn_output)
print('Amount of Error:')
print(error)
print('Change in Weights:')
print(del_w)
use gradient descent to train a network on graduate school admissions data (found at http://www.ats.ucla.edu/stat/data/binary.csv. This dataset has three input features: GRE score, GPA, and the rank of the undergraduate school (numbered 1 through 4). Institutions with rank 1 have the highest prestige, those with rank 4 have the lowest.
In [271]:
admissions = pd.read_csv('data/binary.csv')
admissions.head()
Out[271]:
In [272]:
data = pd.concat([admissions, pd.get_dummies(admissions['rank'], prefix='rank')], axis=1)
data = data.drop('rank', axis=1)
data.head()
Out[272]:
In [273]:
# Standarize features
for field in ['gre', 'gpa']:
mean, std = data[field].mean(), data[field].std()
data.loc[:,field] = (data[field]-mean)/std
data.head()
Out[273]:
In [274]:
# Split off random 10% of the data for testing
np.random.seed(42)
sample = np.random.choice(data.index, size=int(len(data)*0.9), replace=False)
data, test_data = data.ix[sample], data.drop(sample)
data.shape, test_data.shape
Out[274]:
In [275]:
# Split into features and targets
features, targets = data.drop('admit', axis=1), data['admit']
features_test, targets_test = test_data.drop('admit', axis=1), test_data['admit']
the actual implmentation
the dataset: features, targets, features_test, targets_test
In [222]:
def sigmoid(x):
"""
Calculate sigmoid
"""
return 1 / (1 + np.exp(-x))
# Use to same seed to make debugging easier
np.random.seed(42)
n_records, n_features = features.shape
last_loss = None
# Initialize weights
weights = np.random.normal(scale=1 / n_features**.5, size=n_features)
# Neural Network hyperparameters
epochs = 10000
learnrate = -0.015
for e in range(epochs):
del_w = np.zeros(weights.shape)
for x, y in zip(features.values, targets):
# Loop through all records, x is the input, y is the target
# TODO: Calculate the output
output = sigmoid(np.dot(x,weights))
# TODO: Calculate the error
error = output - y
# TODO: Calculate change in weights
del_w += error * output * (1 - output) * x
# TODO: Update weights
weights += (del_w * learnrate) / n_records
# Printing out the mean square error on the training set
if e % (epochs / 10) == 0:
out = sigmoid(np.dot(features, weights))
loss = np.mean((out - targets) ** 2)
if last_loss and last_loss < loss:
print("Train loss: ", loss, " WARNING - Loss Increasing")
else:
print("Train loss: ", loss)
last_loss = loss
# Calculate accuracy on test data
tes_out = sigmoid(np.dot(features_test, weights))
predictions = tes_out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))
In [255]:
# Number of records and input units
n_records, n_inputs = features.shape
# Number of hidden units
n_hidden = 2
weights = np.random.normal(0, 1/n_inputs**-2, size=(n_inputs, n_hidden))
weights
Out[255]:
In [256]:
import numpy as np
def sigmoid(x):
"""
Calculate sigmoid
"""
return 1/(1+np.exp(-x))
# Network size
N_input = 4
N_hidden = 3
N_output = 2
np.random.seed(42)
# Make some fake data
X = np.random.randn(4)
weights_in_hidden = np.random.normal(0, scale=0.1, size=(N_input, N_hidden))
weights_hidden_out = np.random.normal(0, scale=0.1, size=(N_hidden, N_output))
# TODO: Make a forward pass through the network
hidden_layer_in = np.dot(X, weights_in_hidden)
hidden_layer_out = sigmoid(hidden_layer_in)
print('Hidden-layer Output:')
print(hidden_layer_out)
output_layer_in = np.dot(hidden_layer_out, weights_hidden_out)
output_layer_out = sigmoid(output_layer_in)
print('Output-layer Output:')
print(output_layer_out)
In [290]:
def sigmoid(x):
"""
Calculate sigmoid
"""
return 1 / (1 + np.exp(-x))
x = np.array([0.5, 0.1, -0.2])
target = 0.6
learnrate = 0.5
weights_input_hidden = np.array([[0.5, -0.6],
[0.1, -0.2],
[0.1, 0.7]])
weights_hidden_output = np.array([0.1, -0.3])
## Forward pass
hidden_layer_input = np.dot(x, weights_input_hidden)
hidden_layer_output = sigmoid(hidden_layer_input)
output_layer_in = np.dot(hidden_layer_output, weights_hidden_output)
output = sigmoid(output_layer_in)
## Backwards pass
## TODO: Calculate error
error = target - output
# TODO: Calculate error gradient for output layer
del_err_output = error * output * (1 - output)
# TODO: Calculate error gradient for hidden layer
del_err_hidden = np.dot(del_err_output, weights_hidden_output) * \
hidden_layer_output * (1 - hidden_layer_output)
# TODO: Calculate change in weights for hidden layer to output layer
delta_w_h_o = learnrate * del_err_output * hidden_layer_output
# TODO: Calculate change in weights for input layer to hidden layer
delta_w_i_o = learnrate * del_err_hidden * x[:, None]
print('Change in weights for hidden layer to output layer:')
print(delta_w_h_o)
print('Change in weights for input layer to hidden layer:')
print(delta_w_i_o)
implement backprop.py
In [291]:
import numpy as np
#from data_prep import features, targets, features_test, targets_test
np.random.seed(42)
def sigmoid(x):
"""
Calculate sigmoid
"""
return 1 / (1 + np.exp(-x))
# Hyperparameters
n_hidden = 3 # number of hidden units
epochs = 500
learnrate = -0.02
n_records, n_features = features.shape
last_loss = None
# Initialize weights
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
size=n_hidden)
for e in range(epochs):
del_w_input_hidden = np.zeros(weights_input_hidden.shape)
del_w_hidden_output = np.zeros(weights_hidden_output.shape)
for x, y in zip(features.values, targets):
## Forward pass ##
# TODO: Calculate the output
hidden_input = np.dot(x, weights_input_hidden)
hidden_activations = sigmoid(hidden_input)
output = np.dot(hidden_activations, weights_hidden_output)
## Backward pass ##
# TODO: Calculate the error
error = y - output
# TODO: Calculate error gradient in output unit
output_error = error * output * (1 - output)
# TODO: propagate errors to hidden layer
hidden_error = np.dot(output_error, weights_hidden_output) \
* hidden_activations * (1 - hidden_activations)
# TODO: Update the change in weights
del_w_hidden_output += hidden_error * hidden_activations
del_w_input_hidden += hidden_error * x[:, None]
# TODO: Update weights
weights_input_hidden += learnrate * del_w_input_hidden / n_records
weights_hidden_output += learnrate * del_w_hidden_output / n_records
# Printing out the mean square error on the training set
if e % (epochs / 10) == 0:
hidden_activations = sigmoid(np.dot(x, weights_input_hidden))
out = sigmoid(np.dot(hidden_activations,
weights_hidden_output))
loss = np.mean((out - targets) ** 2)
if last_loss and last_loss < loss:
print("Train loss: ", loss, " WARNING - Loss Increasing")
else:
print("Train loss: ", loss)
last_loss = loss
# Calculate accuracy on test data
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))
In [ ]: