In [39]:
import gzip
import itertools
import pickle
import os
import sys
import numpy as np
import lasagne
import theano
import theano.tensor as T
import time
In [40]:
import pandas as pd
import numpy as np
import random
In [41]:
from preprocess_data_lib import *
In [42]:
(X_train, y_train, X_valid, y_valid, X_test, y_test) = getData2(pct=0, cast=False)
In [43]:
(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape, X_test.shape, y_test.shape)
Out[43]:
In [ ]:
In [ ]:
In [44]:
BATCH_SIZE = 500
NUM_HIDDEN_UNITS = 1024
LEARNING_RATE = 0.02
MOMENTUM = 0.9
In [45]:
data_dic = dict(
X_train=theano.shared(lasagne.utils.floatX(X_train)),
y_train=T.cast(theano.shared(y_train), 'int32'),
X_valid=theano.shared(lasagne.utils.floatX(X_valid)),
y_valid=T.cast(theano.shared(y_valid), 'int32'),
X_test=theano.shared(lasagne.utils.floatX(X_test)),
y_test=T.cast(theano.shared(y_test), 'int32'),
num_examples_train=X_train.shape[0],
num_examples_valid=X_valid.shape[0],
num_examples_test=X_test.shape[0],
input_dim=X_train.shape[1],
output_dim=10,
)
In [46]:
def build_model(input_dim, output_dim,
batch_size=BATCH_SIZE, num_hidden_units=NUM_HIDDEN_UNITS):
"""Create a symbolic representation of a neural network with `intput_dim`
input nodes, `output_dim` output nodes and `num_hidden_units` per hidden
layer.
The training function of this model must have a mini-batch size of
`batch_size`.
A theano expression which represents such a network is returned.
"""
l_in = lasagne.layers.InputLayer(
shape=(batch_size, input_dim),
)
l_hidden1 = lasagne.layers.DenseLayer(
l_in,
num_units=num_hidden_units,
nonlinearity=lasagne.nonlinearities.rectify,
)
l_hidden1_dropout = lasagne.layers.DropoutLayer(
l_hidden1,
p=0.5,
)
l_hidden2 = lasagne.layers.DenseLayer(
l_hidden1_dropout,
num_units=num_hidden_units,
nonlinearity=lasagne.nonlinearities.rectify,
)
l_hidden2_dropout = lasagne.layers.DropoutLayer(
l_hidden2,
p=0.5,
)
l_out = lasagne.layers.DenseLayer(
l_hidden2_dropout,
num_units=output_dim,
nonlinearity=lasagne.nonlinearities.softmax,
)
return l_out
In [47]:
def create_iter_functions(dataset, output_layer,
X_tensor_type=T.matrix,
batch_size=BATCH_SIZE,
learning_rate=LEARNING_RATE, momentum=MOMENTUM):
"""Create functions for training, validation and testing to iterate one
epoch.
"""
batch_index = T.iscalar('batch_index')
X_batch = X_tensor_type('x')
y_batch = T.ivector('y')
batch_slice = slice(batch_index * batch_size,
(batch_index + 1) * batch_size)
objective = lasagne.objectives.Objective(output_layer,
loss_function=lasagne.objectives.categorical_crossentropy)
loss_train = objective.get_loss(X_batch, target=y_batch)
loss_eval = objective.get_loss(X_batch, target=y_batch,
deterministic=True)
pred = T.argmax(
output_layer.get_output(X_batch, deterministic=True), axis=1)
accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)
all_params = lasagne.layers.get_all_params(output_layer)
updates = lasagne.updates.nesterov_momentum(
loss_train, all_params, learning_rate, momentum)
iter_train = theano.function(
[batch_index], loss_train,
updates=updates,
givens={
X_batch: dataset['X_train'][batch_slice],
y_batch: dataset['y_train'][batch_slice],
},
)
iter_valid = theano.function(
[batch_index], [loss_eval, accuracy],
givens={
X_batch: dataset['X_valid'][batch_slice],
y_batch: dataset['y_valid'][batch_slice],
},
)
iter_test = theano.function(
[batch_index], [pred],
givens={
X_batch: dataset['X_test'][batch_slice],
},
)
return dict(
train=iter_train,
valid=iter_valid,
test=iter_test,
)
In [48]:
def train(iter_funcs, dataset, batch_size=BATCH_SIZE):
"""Train the model with `dataset` with mini-batch training. Each
mini-batch has `batch_size` recordings.
"""
num_batches_train = dataset['num_examples_train'] // batch_size
num_batches_valid = dataset['num_examples_valid'] // batch_size
for epoch in itertools.count(1):
batch_train_losses = []
for b in range(num_batches_train):
batch_train_loss = iter_funcs['train'](b)
batch_train_losses.append(batch_train_loss)
avg_train_loss = np.mean(batch_train_losses)
batch_valid_losses = []
batch_valid_accuracies = []
for b in range(num_batches_valid):
batch_valid_loss, batch_valid_accuracy = iter_funcs['valid'](b)
batch_valid_losses.append(batch_valid_loss)
batch_valid_accuracies.append(batch_valid_accuracy)
avg_valid_loss = np.mean(batch_valid_losses)
avg_valid_accuracy = np.mean(batch_valid_accuracies)
yield {
'number': epoch,
'train_loss': avg_train_loss,
'valid_loss': avg_valid_loss,
'valid_accuracy': avg_valid_accuracy,
}
In [49]:
def test(iter_funcs, dataset, batch_size=BATCH_SIZE):
num_batches_test = dataset['num_examples_test'] // batch_size
for b in range(num_batches_test):
yield iter_funcs['test'](b)
yield iter_funcs['test'](num_batches_test)
In [ ]:
In [50]:
NUM_EPOCHS = 100
In [51]:
print("Loading data...")
dataset = data_dic
print("Building model and compiling functions...")
output_layer = build_model(
input_dim=dataset['input_dim'],
output_dim=dataset['output_dim'],
)
iter_funcs = create_iter_functions(dataset, output_layer)
print("Starting training...")
now = time.time()
try:
for epoch in train(iter_funcs, dataset):
print("Epoch {} of {} took {:.3f}s".format(
epoch['number'], NUM_EPOCHS, time.time() - now))
now = time.time()
print(" training loss:\t\t{:.6f}".format(epoch['train_loss']))
print(" validation loss:\t\t{:.6f}".format(epoch['valid_loss']))
print(" validation accuracy:\t\t{:.2f} %%".format(
epoch['valid_accuracy'] * 100))
if epoch['number'] >= NUM_EPOCHS:
break
except KeyboardInterrupt:
pass
In [ ]:
In [52]:
print("Loading data...")
dataset = data_dic
iter_funcs = create_iter_functions(dataset, output_layer)
print("Starting training...")
now = time.time()
try:
preds = []
for pred in test(iter_funcs, dataset):
preds.extend(pred[0])
except KeyboardInterrupt:
pass
In [ ]:
In [53]:
pd.DataFrame(preds).to_csv('../../data/intermediate/la4_pred.csv', index=None, header=None)
In [ ]: