In [1]:
import random
import numpy as np
from metu.data_utils import load_nextchar_dataset, plain_text_file_to_dataset
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
def rel_error(x, y):
""" returns relative error """
return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
In [2]:
# Create a small net and some toy data to check your implementations.
# Note that we set the random seed for repeatable experiments.
from cs231n.classifiers.mneural_net_for_regression import NLayerNet
input_size = 4
hidden_size = 10
num_classes = 3
num_inputs = 5
sizes = [input_size+1, 11, num_classes]
def init_toy_model():
np.random.seed(0)
return NLayerNet(sizes, std=1e-1)
def init_toy_data():
np.random.seed(1)
X = 10 * np.random.randn(num_inputs, input_size+1)
X[:,input_size] = 1
y = np.array([[0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 1, 4], [2, 1, 4]])
return X, y
net = init_toy_model()
X, y = init_toy_data()
In [3]:
scores = net.loss(X)
print 'Your scores:'
print scores
print
In [4]:
loss, _ = net.loss(X, y, reg=0.1)
print loss
In [5]:
# Load the TEXT data
# If your memory turns out to be sufficient, try the following:
#def get_nextchar_data(training_ratio=0.6, val_ratio=0.1):
def get_nextchar_data(training_ratio=0.1, test_ratio=0.06, val_ratio=0.01):
# Load the nextchar training data
X, y = load_nextchar_dataset(nextchar_datafile)
# Subsample the data
length=len(y)
num_training=int(length*training_ratio)
num_val = int(length*val_ratio)
num_test = min((length-num_training-num_val), int(length*test_ratio))
mask = range(num_training-1)
X_train = X[mask]
y_train = y[mask]
mask = range(num_training, num_training+num_test)
X_test = X[mask]
y_test = y[mask]
mask = range(num_training+num_test, num_training+num_test+num_val)
X_val = X[mask]
y_val = y[mask]
return X_train, y_train, X_val, y_val, X_test, y_test
nextchar_datafile = 'metu/dataset/nextchar_data.pkl'
input_size = 5 # Size of the input of the network
#plain_text_file_to_dataset("metu/dataset/ince_memed_1.txt", nextchar_datafile, input_size)
plain_text_file_to_dataset("metu/dataset/shakespeare.txt", nextchar_datafile, input_size)
X_train, y_train, X_val, y_val, X_test, y_test = get_nextchar_data()
#convert labels
#y_train = np.c_[X_train[:,1:], y_train].astype(int)
#y_val = np.c_[X_val[:,1:], y_val].astype(int)
#y_test = np.c_[X_test[:,1:], y_test].astype(int)
#add bias columns
X_train = np.c_[X_train, np.ones(X_train.shape[0])].astype(int)
X_val = np.c_[X_val, np.ones(X_val.shape[0])].astype(int)
X_test = np.c_[X_test, np.ones(X_test.shape[0])].astype(int)
print "Number of instances in the training set: ", len(X_train)
print "Number of instances in the validation set: ", len(X_val)
print "Number of instances in the testing set: ", len(X_test)
In [6]:
# We have loaded the dataset. That wasn't difficult, was it? :)
# Let's look at a few samples
#
from metu.data_utils import int_list_to_string, int_to_charstr
print "Input - Next char to be predicted"
for i in range(1,10):
print int_list_to_string(X_train[i]) + " - " + int_list_to_string(y_train[i])
In [7]:
# Now, let's train a neural network
input_size = 5
hidden_size = 5000
num_classes = 1
sizes = [input_size+1, hidden_size/100, hidden_size, num_classes]
net = NLayerNet(sizes)
# Train the network
stats = net.train(X_train, y_train, X_val, y_val,
num_iters=5000, batch_size=32*4,
learning_rate=6e-7, learning_rate_decay=0.97,
reg=10, verbose=True)
# Predict on the validation set
val_err = np.sum(np.square(net.predict(X_val) - y_val), axis=1).mean()
print 'Validation error: ', val_err
test_err = np.sum(np.square(net.predict(X_test) - y_test), axis=1).mean()
print 'Test error: ', test_err
In [8]:
# Plot the loss function and train / validation errors
plt.subplot(2, 1, 1)
plt.plot(stats['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.subplot(2, 1, 2)
train = plt.plot(stats['train_err_history'], label='train')
val = plt.plot(stats['val_err_history'], label='val')
plt.legend(loc='upper right', shadow=True)
plt.title('Classification error history')
plt.xlabel('Epoch')
plt.ylabel('Clasification error')
plt.show()
It seems like we are hitting a local minima which is too narrow and steep, I've come up with that conclusion by trying different learning rates, architectures, regularization strengths and batch sizes and that optima has been missed in most of them, which failed to converge to any better parameter set than our original guess.
We will try to change our outputs from just one character to previous characters and the character to be predicted, with that approach I try to achieve more overfitting on the data, which I believe is necessary if we think about how we can predict the next character given previous characters, by memoization.
In [ ]:
# Load the TEXT data
# If your memory turns out to be sufficient, try the following:
#def get_nextchar_data(training_ratio=0.6, val_ratio=0.1):
def get_nextchar_data(training_ratio=0.1, test_ratio=0.06, val_ratio=0.01):
# Load the nextchar training data
X, y = load_nextchar_dataset(nextchar_datafile)
# Subsample the data
length=len(y)
num_training=int(length*training_ratio)
num_val = int(length*val_ratio)
num_test = min((length-num_training-num_val), int(length*test_ratio))
mask = range(num_training-1)
X_train = X[mask]
y_train = y[mask]
mask = range(num_training, num_training+num_test)
X_test = X[mask]
y_test = y[mask]
mask = range(num_training+num_test, num_training+num_test+num_val)
X_val = X[mask]
y_val = y[mask]
return X_train, y_train, X_val, y_val, X_test, y_test
nextchar_datafile = 'metu/dataset/nextchar_data.pkl'
input_size = 5 # Size of the input of the network
#plain_text_file_to_dataset("metu/dataset/ince_memed_1.txt", nextchar_datafile, input_size)
plain_text_file_to_dataset("metu/dataset/shakespeare.txt", nextchar_datafile, input_size)
X_train, y_train, X_val, y_val, X_test, y_test = get_nextchar_data()
#convert labels
y_train = np.c_[X_train, y_train].astype(int)
y_val = np.c_[X_val, y_val].astype(int)
y_test = np.c_[X_test, y_test].astype(int)
#add bias columns
X_train = np.c_[X_train, np.ones(X_train.shape[0])].astype(int)
X_val = np.c_[X_val, np.ones(X_val.shape[0])].astype(int)
X_test = np.c_[X_test, np.ones(X_test.shape[0])].astype(int)
print "Number of instances in the training set: ", len(X_train)
print "Number of instances in the validation set: ", len(X_val)
print "Number of instances in the testing set: ", len(X_test)
# We have loaded the dataset. That wasn't difficult, was it? :)
# Let's look at a few samples
#
from metu.data_utils import int_list_to_string, int_to_charstr
print "Input - Next char to be predicted"
for i in range(1,10):
print int_list_to_string(X_train[i]) + " - " + int_list_to_string(y_train[i])
In [ ]:
# Now, let's train a neural network
input_size = 5
hidden_size = 7000
num_classes = 6 #this time we try to keep 5 input characters also
sizes = [input_size+1, hidden_size, num_classes]
net = NLayerNet(sizes)
# Train the network
stats = net.train(X_train, y_train, X_val, y_val,
num_iters=5000, batch_size=32*4,
learning_rate=.1e-6, learning_rate_decay=0.95,
reg=5, verbose=True)
# Predict on the validation set
val_err = np.sum(np.square(net.predict(X_val) - y_val), axis=1).mean()
print 'Validation error: ', val_err
test_err = np.sum(np.square(net.predict(X_test) - y_test), axis=1).mean()
print 'Test error: ', test_err
# Plot the loss function and train / validation errors
plt.subplot(2, 1, 1)
plt.plot(stats['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.subplot(2, 1, 2)
train = plt.plot(stats['train_err_history'], label='train')
val = plt.plot(stats['val_err_history'], label='val')
plt.legend(loc='upper right', shadow=True)
plt.title('Classification error history')
plt.xlabel('Epoch')
plt.ylabel('Clasification error')
plt.show()
In [ ]:
# Show some sample outputs:
print "Input - predicted chars - true chars"
for i in range(0,100):
print int_list_to_string(X_test[i]) + " - " \
+ int_list_to_string([int(x) for x in net.predict(X_test[i])]) \
+ " - " + int_list_to_string(y_test[i])
It seems like we again hit a local minima, but this time minima is wider than the previous case, we can come around to that minima with various hyperparameters, but can't seem to get on a better minima than approach 1, since in that approach we had around 900 mean error per data, this time we have about 9000 assuming the first 5 outputs are given in the input; our previous algorithm definitely have performed a lot better.
In [2]:
# Load the TEXT data
# If your memory turns out to be sufficient, try the following:
#def get_nextchar_data(training_ratio=0.6, val_ratio=0.1):
def get_nextchar_data(training_ratio=0.1, test_ratio=0.01, val_ratio=0.01):
# Load the nextchar training data
X, y = load_nextchar_dataset(nextchar_datafile)
# Subsample the data
length=len(y)
num_training=int(length*training_ratio)
num_val = int(length*val_ratio)
num_test = min((length-num_training-num_val), int(length*test_ratio))
mask = range(num_training-1)
X_train = X[mask]
y_train = y[mask]
mask = range(num_training, num_training+num_test)
X_test = X[mask]
y_test = y[mask]
mask = range(num_training+num_test, num_training+num_test+num_val)
X_val = X[mask]
y_val = y[mask]
return X_train, y_train, X_val, y_val, X_test, y_test
nextchar_datafile = 'metu/dataset/nextchar_data.pkl'
input_size = 5 # Size of the input of the network
#plain_text_file_to_dataset("metu/dataset/ince_memed_1.txt", nextchar_datafile, input_size)
plain_text_file_to_dataset("metu/dataset/shakespeare.txt", nextchar_datafile, input_size)
X_train, y_train, X_val, y_val, X_test, y_test = get_nextchar_data()
#add bias columns
X_train = np.c_[X_train, np.ones(X_train.shape[0])].astype(int)
X_val = np.c_[X_val, np.ones(X_val.shape[0])].astype(int)
X_test = np.c_[X_test, np.ones(X_test.shape[0])].astype(int)
print "Number of instances in the training set: ", len(X_train)
print "Number of instances in the validation set: ", len(X_val)
print "Number of instances in the testing set: ", len(X_test)
# We have loaded the dataset. That wasn't difficult, was it? :)
# Let's look at a few samples
#
from metu.data_utils import int_list_to_string, int_to_charstr
print "Input - Next char to be predicted"
for i in range(1,10):
print int_list_to_string(X_train[i]) + " - " + int_list_to_string(y_train[i])
In [22]:
# Now, let's train a neural network
from cs231n.classifiers.neural_net import TwoLayerNet
input_size = 6
hidden_size = 300
num_classes = 128
net = TwoLayerNet(input_size, hidden_size, num_classes)
# Train the network
stats = net.train(X_train, y_train, X_val, y_val,
num_iters=5000, batch_size=32,
learning_rate=3.8e-4, learning_rate_decay=0.95,
reg=1e3, verbose=True)
# Predict on the validation set
val_err = np.sum(np.square(net.predict(X_val) - y_val), axis=1).mean()
print 'Validation error: ', val_err
test_err = np.sum(np.square(net.predict(X_test) - y_test), axis=1).mean()
print 'Test error: ', test_err
# Plot the loss function and train / validation errors
plt.subplot(2, 1, 1)
plt.plot(stats['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.subplot(2, 1, 2)
train = plt.plot(stats['train_acc_history'], label='train')
val = plt.plot(stats['val_acc_history'], label='val')
plt.legend(loc='upper right', shadow=True)
plt.title('Classification accuracy history')
plt.xlabel('Epoch')
plt.ylabel('Clasification accuracy')
plt.show()
In [23]:
# Show some sample outputs:
print "Input - predicted chars - true chars"
X_print = X_test[:100]
res = net.predict(X_print)
for i in range(0,20):
print int_list_to_string(X_print[i]) + " - " \
+ int_list_to_string([res[i]]) \
+ " - " + int_list_to_string(y_test[i])
It seems like this approach didn't work at all