In [1]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning, module='.*/IPython/.*')
In [2]:
####################
# necessary imports. !! pip install mxnet if mxnet not installed !!
####################
from __future__ import print_function
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning, module='.*/IPython/.*')
import mxnet as mx # !! pip install mxnet if mxnet not installed !!
from mxnet import nd, autograd
import numpy as np
from collections import defaultdict
import copy
mx.random.seed(1234)
# ctx = mx.gpu(0)
ctx = mx.cpu(0)
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.fftpack
from scipy import stats
from pandas.tools import plotting
from pandas.tools.plotting import autocorrelation_plot
try:
from sklearn.model_selection import train_test_split
except ImportError:
from sklearn.cross_validation import train_test_split
from numpy import polyfit
from datetime import datetime
sns.set_style('whitegrid')
#sns.set_context('notebook')
sns.set_context('poster')
# Make inline plots vector graphics instead of raster graphics
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'png')
In [3]:
# ######################################
# # MAKE SURE data.csv IS IN THE SAME PATH AS THIS JUPYTER NOTEBOOK
# ######################################
# original_data = pd.read_csv("../data/timeseries/data.csv", index_col=0)
# dict_days_to_int = {'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 'Thursday': 4, 'Friday': 5, 'Saturday': 6, 'Sunday': 7}
# original_data.sort_index(inplace=True)
# original_data['date_']=original_data.index
# original_data['current_month'] = original_data['date_'].apply(lambda x: pd.Timestamp(x).month)
# original_data['current_year'] = original_data['date_'].apply(lambda x: pd.Timestamp(x).year-2009)
# original_data['day_of_week_int'] = original_data['day.of.week'].apply(lambda x: dict_days_to_int[x])
# original_data['cloudy_or_not_cloudy'] = original_data['cloud.indicator'].apply(lambda x: 0 if x=='clear' else 1)
# # For the Random Forest, we bin the car # in small bins of 2 cars. This increases training statistics at the cost of a negligible drop in precision
# BIN_SIZE = 2
# original_data['bucketed_car_count'] = original_data['car.count'].apply(lambda x: np.floor(x/BIN_SIZE))
# full_data = original_data.dropna()
# # Split data in cloudy data and non-cloudy data
# data_no_clouds = original_data[original_data['cloudy_or_not_cloudy']==0].dropna()
# data_clouds = original_data[original_data['cloudy_or_not_cloudy']==1].dropna()
# data_no_clouds['previous_bucketed_car_count'] = data_no_clouds['bucketed_car_count'].shift(1).dropna()
# # plotting.scatter_matrix(full_data.sample(300)[['car.count', 'weather']])
# # plotting.scatter_matrix(full_data.sample(300)[['car.count', 'day_of_week_int']])
# # plt.show()
# data_no_clouds['car.count.previous'] = data_no_clouds['car.count'].shift(1)
# # computes correlation coeffs, normalization is done before their computation
# # data_no_clouds.loc[:, ['car.count', 'car.count.previous' ,'weather', 'day_of_week_int']].corr()
# data_no_clouds = data_no_clouds.dropna()
# full_data = data_no_clouds
# # print(full_data.head())
In [4]:
######################################
# LOAD DATA
######################################
# columns_subset = ['car.count', 'day_of_week_int', 'cloudy_or_not_cloudy', 'weather', 'current_month']
# original_data = pd.read_csv("../data/timeseries/data.csv", index_col=0)
# original_data = pd.read_csv("../data/timeseries/daily_returns_stratifi/saroop.csv", index_col=0)
original_data = pd.read_csv("../data/timeseries/daily_returns_stratifi/all_post_2015.csv", index_col=0)
print('Columns: ', original_data.columns.tolist(), '\n')
# original_data = pd.read_csv("../data/timeseries/daily_returns_stratifi/saroop.csv", index_col=0)
original_data.index = original_data.index.to_datetime()
original_data.sort_index(inplace=True)
# original_data['date_']=original_data.index
# original_data['current_month'] = original_data['date_'].apply(lambda x: pd.Timestamp(x).month)
# original_data['current_year'] = original_data['date_'].apply(lambda x: pd.Timestamp(x).year-2009)
full_data = original_data
full_data['underlying_returns'] *= 100.
full_data['value'] *= 100.
if np.median(original_data['underlying_returns'].sample(30)) != 0:
already_overlay_only = True
else:
overlay_notional = 1.0 # to be grabed from db
already_overlay_only = False
if already_overlay_only:
full_data['overlay_only_returns'] = full_data['value']
else:
full_data['overlay_only_returns'] = (full_data['value'] - full_data['underlying_returns']) / overlay_notional
late_data = full_data.iloc[-len(full_data)//10:]
early_data = full_data.iloc[:-len(full_data)//10]
# full_data = full_data[full_data['strategy_id']==1]
# we groupby because we need one vector per day. full_data is a list of dataframes, each being index by unique dates
early_data = [i[1] for i in list(early_data.groupby(['strategy_id', 'finfolio_account_id']))]
late_data = [i[1] for i in list(late_data.groupby(['strategy_id', 'finfolio_account_id']))]
print(early_data[0].head(5))
print('\n -------- \n', late_data[0].head(5))
In [5]:
#############################
# Utility functions for data preparation
#############################
def get_list_unique_block_indices(len_data=100, seq_length=5, n_samples=10):
""" returns a list of unique random int that serve as index of the first element of a block of data
args:
len_data (int): length of the data set
seq_length (int): length of the blocks to extract
n_blocks (int): # of blocks to extract
"""
# print(len_data // seq_length, n_samples, ' --')
set1 = set(np.random.randint(len_data // seq_length, size=n_samples)*seq_length)
full_set = set1
while len(full_set) < n_samples:
set2 = set(np.random.randint(len_data // seq_length, size=n_samples)*seq_length)
full_set = full_set | set2
returned_list = list(full_set)[0:n_samples]
assert(len(returned_list) == n_samples)
return returned_list
def extract_random_sequence(data, seq_length=5, columns_subset=['value'], block_start_index=None):
if block_start_index is None:
block_start_index = np.random.randint(len(data)-seq_length)
data_subset = data.reset_index().loc[block_start_index:block_start_index+seq_length-1, columns_subset]
assert(len(data_subset) == (seq_length))
out_data = [list(i) for i in data_subset.values]
return out_data
def create_batch_ND_time_series(full_data, column_subset=[], seq_length=10, num_samples=4):
out_data = []
# get a list of non-overlapping random sequence start indices
all_samples_start_indices = get_list_unique_block_indices(len(full_data), seq_length, num_samples)
assert(len(all_samples_start_indices) == num_samples)
for one_random_start_index in all_samples_start_indices:
out_data.append(extract_random_sequence(full_data, seq_length, column_subset, one_random_start_index))
assert(len(out_data[-1]) == (seq_length))
return out_data
In [6]:
#############################
# Data Preparation
#############################
SEQ_LENGTH = 3 # we use the last 3 days as inputs
NUM_FEATURES = 2 # we use NUM_FEATURES features (underlying_return, strategy_id etc.)
BATCH_SIZE = 32
BATCH_SIZE_TEST = 1
# columns_subset = ['car.count', 'day_of_week_int', 'weather', 'current_month', 'current_year']
columns_subset = ['overlay_only_returns', 'underlying_returns', 'strategy_id']
def create_random_sequences(data, columns_subset, seq_length):
all_random_sequences = []
n_data_sets = len(data)
if not (type(data) == list):
data = [data]
for i, one_data_batch in enumerate(data):
# print('--> {0} / {1}'.format(i, n_data_sets))
# let's divide data in train (75%), dev (15%), test (10%)
# in sequences of 5 days (SEQ_LENGTH = 5)
one_data_batch_length = len(one_data_batch)
# the actual length of extracted sequence is SEQ_LENGTH + 1 so that we can do the shift of +1 for labels
total_num_of_sequences = one_data_batch_length // (seq_length+1) - 1
if total_num_of_sequences < 2:
continue
# the length of extracted sequence is SEQ_LENGTH so that we can do the shift of +1 for labels
one_batch_ND_time_series = create_batch_ND_time_series(one_data_batch,
columns_subset,
seq_length+1, total_num_of_sequences)
all_random_sequences += one_batch_ND_time_series
return all_random_sequences
all_random_sequences = create_random_sequences(early_data, columns_subset, SEQ_LENGTH)
late_data_random_sequences = create_random_sequences(late_data, columns_subset, SEQ_LENGTH)
# actual total number of sequences
total_num_of_sequences = len(all_random_sequences)
percent_train, percent_dev = 1.0, 0. # we used dev set to try a few sets of parameters
n_seq_train = int(total_num_of_sequences*percent_train)
n_seq_dev = int(total_num_of_sequences*percent_dev) - int(total_num_of_sequences*percent_train)
# n_seq_test = len(all_random_sequences) - int(total_num_of_sequences*percent_dev)
n_seq_test = len(late_data_random_sequences)
data_train = np.array(all_random_sequences[0:n_seq_train])
# data_dev = np.array(all_random_sequences[n_seq_train:n_seq_train+n_seq_dev])
# data_test = np.array(all_random_sequences[n_seq_train+n_seq_dev:])
# we gonna use late data for testing
data_test = np.array(late_data_random_sequences)
print('SHAPES of ALL, TRAIN, TEST:')
print(np.array(all_random_sequences).shape)
print(np.array(data_train).shape)
# print(np.array(data_dev).shape)
print(np.array(data_test).shape)
assert(data_train.shape == (n_seq_train, SEQ_LENGTH+1, NUM_FEATURES+1)) # +1 for the target value (car.count)
# assert(data_dev.shape == (n_seq_dev, SEQ_LENGTH+1, NUM_FEATURES+1))
assert(data_test.shape == (n_seq_test, SEQ_LENGTH+1, NUM_FEATURES+1))
In [7]:
# print(len(full_data[28]))
In [8]:
num_batches_train = data_train.shape[0] // BATCH_SIZE
num_batches_test = data_test.shape[0] // BATCH_SIZE_TEST
dim_inputs = NUM_FEATURES + 1 # dimension of the input vector for a given time stamp
# for labels, we just keep the time-series prediction which has index of 0, we dont keep the features.
# See Remarks Below
indices_outputs = [0]
dim_outputs = len(indices_outputs) # same comment
# inputs are from t0 to t_seq_length - 1. because the last point is kept for the
# output ("label") of the penultimate point
data_train_inputs = data_train[:, :-1, :]
data_train_labels = data_train[:, 1:, indices_outputs]
data_test_inputs = data_test[:, :-1, :]
data_test_labels = data_test[:, 1:, indices_outputs]
# creates 'bad' data for testing
bad_data_test_labels = copy.deepcopy(data_test_labels)
blurring_factors = 10. * (np.random.rand(*bad_data_test_labels[:, -1, :].shape) + 0.1)
bad_data_test_labels[:, -1, :] *= blurring_factors
train_data_inputs = nd.array(data_train_inputs).reshape((num_batches_train, BATCH_SIZE, SEQ_LENGTH, dim_inputs))
train_data_labels = nd.array(data_train_labels).reshape((num_batches_train, BATCH_SIZE, SEQ_LENGTH, dim_outputs))
test_data_inputs = nd.array(data_test_inputs).reshape((num_batches_test, BATCH_SIZE_TEST, SEQ_LENGTH, dim_inputs))
test_data_labels = nd.array(data_test_labels).reshape((num_batches_test, BATCH_SIZE_TEST, SEQ_LENGTH, dim_outputs))
test_bad_data_labels = nd.array(bad_data_test_labels).reshape((num_batches_test, BATCH_SIZE_TEST, SEQ_LENGTH, dim_outputs))
train_data_inputs = nd.swapaxes(train_data_inputs, 1, 2)
train_data_labels = nd.swapaxes(train_data_labels, 1, 2)
test_data_inputs = nd.swapaxes(test_data_inputs, 1, 2)
test_data_labels = nd.swapaxes(test_data_labels, 1, 2)
test_bad_data_labels = nd.swapaxes(test_bad_data_labels, 1, 2)
print('num_mini-batches_train={0} | seq_length={2} | mini-batch_size={1} | dim_input={3} | dim_output={4}'.format(num_batches_train, BATCH_SIZE, SEQ_LENGTH, dim_inputs, dim_outputs))
print('train_data_inputs shape: ', train_data_inputs.shape)
print('train_data_labels shape: ', train_data_labels.shape)
# print(train_data_labels)
In [9]:
num_hidden_units = [64, 16] # num of hidden units in each hidden LSTM layer
num_hidden_layers = len(num_hidden_units) # num of hidden LSTM layers
num_units_layers = [dim_inputs] + num_hidden_units
norm_factor = 0.001 # normalization factor for weight initialization. set to 0.001 for prediction of small values (<1)
########################
# Weights connecting the inputs to the hidden layer
########################
Wxg, Wxi, Wxf, Wxo, Whg, Whi, Whf, Who, bg, bi, bf, bo = {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}
for i_layer in range(1, num_hidden_layers+1):
num_inputs = num_units_layers[i_layer-1]
num_hidden_units = num_units_layers[i_layer]
Wxg[i_layer] = nd.random_normal(shape=(num_inputs,num_hidden_units), ctx=ctx) * norm_factor
Wxi[i_layer] = nd.random_normal(shape=(num_inputs,num_hidden_units), ctx=ctx) * norm_factor
Wxf[i_layer] = nd.random_normal(shape=(num_inputs,num_hidden_units), ctx=ctx) * norm_factor
Wxo[i_layer] = nd.random_normal(shape=(num_inputs,num_hidden_units), ctx=ctx) * norm_factor
########################
# Recurrent weights connecting the hidden layer across time steps
########################
Whg[i_layer] = nd.random_normal(shape=(num_hidden_units, num_hidden_units), ctx=ctx) * norm_factor
Whi[i_layer] = nd.random_normal(shape=(num_hidden_units, num_hidden_units), ctx=ctx) * norm_factor
Whf[i_layer] = nd.random_normal(shape=(num_hidden_units, num_hidden_units), ctx=ctx) * norm_factor
Who[i_layer] = nd.random_normal(shape=(num_hidden_units, num_hidden_units), ctx=ctx) * norm_factor
########################
# Bias vector for hidden layer
########################
bg[i_layer] = nd.random_normal(shape=num_hidden_units, ctx=ctx) * norm_factor
bi[i_layer] = nd.random_normal(shape=num_hidden_units, ctx=ctx) * norm_factor
bf[i_layer] = nd.random_normal(shape=num_hidden_units, ctx=ctx) * norm_factor
bo[i_layer] = nd.random_normal(shape=num_hidden_units, ctx=ctx) * norm_factor
########################
# Weights to the output nodes
########################
Why = nd.random_normal(shape=(num_units_layers[-1], dim_outputs), ctx=ctx) * norm_factor
by = nd.random_normal(shape=dim_outputs, ctx=ctx) * norm_factor
In [10]:
params = []
for i_layer in range(1, num_hidden_layers+1):
params += [Wxg[i_layer], Wxi[i_layer], Wxf[i_layer], Wxo[i_layer], Whg[i_layer], Whi[i_layer], Whf[i_layer], Who[i_layer], bg[i_layer], bi[i_layer], bf[i_layer], bo[i_layer]]
params += [Why, by] # add the output layer
for param in params:
param.attach_grad()
In [11]:
def rmse(yhat, y):
return nd.mean(nd.sqrt(nd.sum(nd.power(y - yhat, 2), axis=0, exclude=True)))
def average_rmse_loss(outputs, labels, last_element_only=False):
# assert(len(outputs) == len(labels))
total_loss = 0.
zipped_elements = zip(outputs,labels)
if last_element_only:
output, label = zipped_elements[-1]
total_loss = rmse(output, label)
return total_loss
for (output, label) in zipped_elements:
total_loss = total_loss + rmse(output, label)
return total_loss / len(outputs)
In [12]:
from exceptions import ValueError
def SGD(params, learning_rate):
for param in params:
param[:] = param - learning_rate * param.grad
def adam(params, learning_rate, M , R, index_adam_call, beta1, beta2, eps):
k = -1
for param in params:
k += 1
M[k] = beta1 * M[k] + (1. - beta1) * param.grad
R[k] = beta2 * R[k] + (1. - beta2) * (param.grad)**2
# bias correction since we initilized M & R to zeros, they're biased toward zero on the first few iterations
m_k_hat = M[k] / (1. - beta1**(index_adam_call))
r_k_hat = R[k] / (1. - beta2**(index_adam_call))
if((np.isnan(M[k].asnumpy())).any() or (np.isnan(R[k].asnumpy())).any()):
raise(ValueError('Nans!!'))
param[:] = param - learning_rate * m_k_hat / (nd.sqrt(r_k_hat) + eps)
return params, M, R
In [13]:
def single_lstm_unit_calcs(X, c, Wxg, h, Whg, bg, Wxi, Whi, bi, Wxf, Whf, bf, Wxo, Who, bo):
g = nd.tanh(nd.dot(X, Wxg) + nd.dot(h, Whg) + bg)
i = nd.sigmoid(nd.dot(X, Wxi) + nd.dot(h, Whi) + bi)
f = nd.sigmoid(nd.dot(X, Wxf) + nd.dot(h, Whf) + bf)
o = nd.sigmoid(nd.dot(X, Wxo) + nd.dot(h, Who) + bo)
#######################
c = f * c + i * g
h = o * nd.tanh(c)
return c, h
def deep_lstm_rnn(inputs, h, c, temperature=1.0):
"""
h: dict of nd.arrays, each key is the index of a hidden layer (from 1 to whatever).
Index 0, if any, is the input layer
"""
outputs = []
# inputs is one BATCH of sequences so its shape is number_of_seq, seq_length, features_dim
# (latter is 1 for a time series, vocab_size for a character, n for a n different times series)
for X in inputs:
# X is batch of one time stamp. E.g. if each batch has 37 sequences, then the first value of X will be a set of the 37 first values of each of the 37 sequences
# that means each iteration on X corresponds to one time stamp, but it is done in batches of different sequences
h[0] = X # the first hidden layer takes the input X as input
for i_layer in range(1, num_hidden_layers+1):
# lstm units now have the 2 following inputs:
# i) h_t from the previous layer (equivalent to the input X for a non-deep lstm net),
# ii) h_t-1 from the current layer (same as for non-deep lstm nets)
c[i_layer], h[i_layer] = single_lstm_unit_calcs(h[i_layer-1], c[i_layer], Wxg[i_layer], h[i_layer], Whg[i_layer], bg[i_layer], Wxi[i_layer], Whi[i_layer], bi[i_layer], Wxf[i_layer], Whf[i_layer], bf[i_layer], Wxo[i_layer], Who[i_layer], bo[i_layer])
yhat_linear = nd.dot(h[num_hidden_layers], Why) + by
# yhat is a batch of several values of the same time stamp
# this is basically the prediction of the sequence, which overlaps most of the input sequence, plus one point (character or value)
# yhat = nd.sigmoid(yhat_linear)
yhat = yhat_linear # we cant use a 1.0-bounded activation function since outputs (car count) can be greater than 1.0
outputs.append(yhat) # outputs has same shape as inputs, i.e. a list of batches of data points.
return (outputs, h, c)
In [14]:
INDEX_TARGET_VALUE = 0
def test_prediction(one_input_seq, one_label_seq, temperature=1.0):
# WE ASSUME the first value in input vector is the variable of interest
#####################################
# Set the initial state of the hidden representation ($h_0$) to the zero vector
##################################### # some better initialization needed??
h, c = {}, {}
for i_layer in range(1, num_hidden_layers+1):
h[i_layer] = nd.zeros(shape=(BATCH_SIZE_TEST, num_units_layers[i_layer]), ctx=ctx)
c[i_layer] = nd.zeros(shape=(BATCH_SIZE_TEST, num_units_layers[i_layer]), ctx=ctx)
outputs, h, c = deep_lstm_rnn(one_input_seq, h, c, temperature=temperature)
return outputs[-1][0].asnumpy()[INDEX_TARGET_VALUE], one_label_seq.asnumpy()[-1].flatten()[INDEX_TARGET_VALUE], outputs, one_label_seq
def check_prediction(index, test_inputs=None, test_labels=None):
if test_inputs is None:
test_inputs = test_data_inputs
if test_labels is None:
test_labels = test_data_labels
if index >= len(test_inputs):
index = np.random.randint(len(test_inputs))
o, label, outputs, labels = test_prediction(test_inputs[index], test_labels[index], temperature=1.0)
prediction = round(o, 3)
true_label = round(label, 3)
# if batch_size_test=1 then this float() will work, otherwise, nope.
outputs = [float(i.asnumpy().flatten()[INDEX_TARGET_VALUE]) for i in outputs]
true_labels = list(test_labels[index].asnumpy()[:,:,INDEX_TARGET_VALUE].flatten())
df = pd.DataFrame([outputs, true_labels]).transpose()
df.columns = ['predicted', 'true']
return df
def compute_test_errors(bias_correction=1., labels=None, test_inputs=None):
if labels is None:
labels = test_data_labels
list_abs_rel_error, list_rel_error, list_abs_error, list_abs_diff = [], [], [], []
for e in range(0, labels.shape[0]):
data_prediction_df = check_prediction(index=e, test_inputs=test_inputs, test_labels=labels)
prediction = round(data_prediction_df.tail(1)['predicted'].values.flatten()[-1] * bias_correction, 3)
true_label = round(data_prediction_df.tail(1)['true'].values.flatten()[-1], 3)
if true_label != 0:
list_abs_rel_error.append( round(100. * np.abs(prediction / (true_label) - 1.0), 2) )
list_rel_error.append( round(100. * (prediction / (true_label) - 1.0), 2) )
list_abs_error.append( round(prediction - true_label, 0) )
list_abs_diff.append( round(abs(prediction - true_label), 2) )
else:
continue
return list_abs_rel_error, list_rel_error, list_abs_error, list_abs_diff
In [21]:
# @hidden_cell
epochs = 100 # one epoch is one pass over the entire training set
moving_loss = 0.
learning_rate = 0.0015 # no more than 0.002, even with Adam, otherwise, convergence will likely saturate
# Adam Optimizer stuff
beta1 = .9
beta2 = .999
index_adam_call = 0
# M & R arrays to keep track of momenta in adam optimizer. params is a list that contains all ndarrays of parameters
M = {k: nd.zeros_like(v) for k, v in enumerate(params)}
R = {k: nd.zeros_like(v) for k, v in enumerate(params)}
df_moving_loss = pd.DataFrame(columns=['Loss', 'Error'])
df_moving_loss.index.name = 'Epoch'
# needed to update plots on the fly
%matplotlib notebook
fig, axes_fig1 = plt.subplots(1,1, figsize=(6,3))
fig2, axes_fig2 = plt.subplots(1,1, figsize=(6,3))
for e in range(epochs):
############################
# Attenuate the learning rate by a factor of 2 every 100 epochs
############################
if ((e+1) % 1000 == 0):
learning_rate = learning_rate / 2.0 # TODO check if its ok to adjust learning_rate when using Adam Optimizer
h, c = {}, {}
for i_layer in range(1, num_hidden_layers+1):
h[i_layer] = nd.zeros(shape=(BATCH_SIZE, num_units_layers[i_layer]), ctx=ctx)
c[i_layer] = nd.zeros(shape=(BATCH_SIZE, num_units_layers[i_layer]), ctx=ctx)
for i in range(num_batches_train):
data_one_hot = train_data_inputs[i]
label_one_hot = train_data_labels[i]
with autograd.record():
outputs, h, c = deep_lstm_rnn(data_one_hot, h, c)
loss = average_rmse_loss(outputs, label_one_hot, last_element_only=True)
loss.backward()
# SGD(params, learning_rate)
index_adam_call += 1 # needed for bias correction in Adam optimizer
params, M, R = adam(params, learning_rate, M, R, index_adam_call, beta1, beta2, 1e-8)
##########################
# Keep a moving average of the losses
##########################
if (i == 0) and (e == 0):
moving_loss = nd.mean(loss).asscalar()
else:
moving_loss = .99 * moving_loss + .01 * nd.mean(loss).asscalar()
df_moving_loss.loc[e] = round((moving_loss), 5)
############################
# Predictions and plots
############################
data_prediction_df = check_prediction(index=e)
if not (e%10):
axes_fig1.clear()
data_prediction_df.plot(ax=axes_fig1)
fig.canvas.draw()
prediction = round(1.*data_prediction_df.tail(1)['predicted'].values.flatten()[-1], 3)
true_label = round(1.*data_prediction_df.tail(1)['true'].values.flatten()[-1], 3)
if true_label != 0:
rel_error = round(100. * np.abs(prediction / (true_label+1e-5) - 1.0), 2)
else:
rel_error = moving_rel_error
if not (e%50):
print("Epoch = {0} | Loss = {1} | Prediction = {2} True = {3} Error = {4}".format(e, (moving_loss), prediction, true_label, rel_error ))
if not (e%50):
axes_fig2.clear()
if e == 0:
moving_rel_error = rel_error
else:
moving_rel_error = .99 * moving_rel_error + .01 * rel_error
df_moving_loss.loc[e, ['Error']] = moving_rel_error
if not (e%50):
axes_loss_plot = df_moving_loss.plot(ax=axes_fig2, secondary_y='Loss', color=['r','b'])
axes_loss_plot.right_ax.grid(False)
# axes_loss_plot.right_ax.set_yscale('log')
fig2.canvas.draw()
%matplotlib inline
In [16]:
list_abs_rel_error, list_rel_error, _, _ = compute_test_errors(bias_correction=1.0,
labels=test_data_labels)
bias_correction = 100. / (100. + np.mean(list_rel_error)) # estimate of the bias
# recompute errors using "de-biased" model
list_abs_rel_error, list_rel_error, list_abs_error, list_abs_diff = compute_test_errors(bias_correction=bias_correction,
labels=test_data_labels)
# recompute errors using "de-biased" model
bad_data_list_abs_rel_error, bad_data_list_rel_error, bad_data_list_abs_error, bad_data_list_abs_diff = compute_test_errors(
bias_correction=bias_correction,
labels=test_bad_data_labels)
In [17]:
# print(test_bad_data_labels)
# print(test_data_labels)
In [18]:
%matplotlib inline
fig_errors, axes_fig_errors = plt.subplots(1,1, figsize=(10,8))
# plt.hist(list_abs_rel_error, 20, alpha=0.4, label='abs percent error')
# plt.hist(list_rel_error, 20, alpha=0.4, label='percent error')
# plt.hist(list_abs_error, 20, alpha=0.4, label='abs error')
plt.hist(list_abs_diff, 100, range=[0., 5.], alpha=0.5, label='good data', color='green')
plt.hist(bad_data_list_abs_diff, 100, range=[0., 5.], alpha=0.5, label='bad data', color='red')
plt.legend(loc='upper right')
plt.xlabel('error')
# plt.gca().set_xlim([-5, 5])
plt.show()
print('Estimate of the Bias: {0}% | Median Abs Rel Error: {1}%'.format(round(bias_correction,1), round(np.median(list_abs_rel_error), 1)))
In [ ]: