In [37]:
from __future__ import print_function
import numpy as np
from collections import defaultdict
import pandas as pd
import tensorflow as tf
from tensorflow.contrib.learn import learn_runner
import shutil
from tensorflow import metrics
from tensorflow.contrib.metrics import streaming_root_mean_squared_error
from tensorflow.contrib.learn import Experiment, Estimator, MetricSpec
In [38]:
SEQ_LEN = 10
def create_time_series():
freq = (np.random.random()*0.5) + 0.1 # 0.1 to 0.6
ampl = np.random.random() + 0.5 # 0.5 to 1.5
x = np.sin(np.arange(0,SEQ_LEN) * freq) * ampl
return x
In [39]:
with open('train.csv', 'w') as train_file:
for counter in range(0, 100):
for item in create_time_series():
train_file.write("%s " % round(item, 3))
train_file.write("\n")
train_file.close()
with open('test.csv', 'w') as test_file:
for counter in range(0, 100):
for item in create_time_series():
test_file.write("%s " % round(item, 3))
test_file.write("\n")
test_file.close()
In [40]:
DEFAULTS = [[0.0] for x in xrange(0, SEQ_LEN)]
BATCH_SIZE = 20
TIMESERIES_COL = 'rawdata'
N_OUTPUTS = 2 # in each sequence, 1-8 are features, and 9-10 is label
N_INPUTS = SEQ_LEN - N_OUTPUTS
# read data and convert to needed format
def read_dataset(filename, mode=tf.contrib.learn.ModeKeys.TRAIN):
def _input_fn():
num_epochs = 100 if mode == tf.contrib.learn.ModeKeys.TRAIN else 1
# could be a path to one file or a file pattern.
filename = "train.csv"
num_epochs = 10
input_file_names = tf.train.match_filenames_once(filename)
filename_queue = tf.train.string_input_producer(input_file_names, num_epochs=num_epochs, shuffle=True)
reader = tf.TextLineReader()
# akey, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE)
# value_column = tf.expand_dims(value, -1)
In [41]:
## peek through file names that got loaded
# sess = tf.Session()
# init_op = tf.local_variables_initializer()
# sess.run(init_op)
# print(sess.run(input_file_names))
akey, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE)
value_column = tf.expand_dims(value, -1)
print('value: ', value)
all_data = tf.decode_csv(value_column, record_defaults=DEFAULTS)
print('all_data: ', all_data)
# sess = tf.Session()
# init_op = tf.local_variables_initializer()
# sess.run(all_data)
print(len(all_data))
In [42]:
# sess = tf.Session()
In [43]:
# with tf.Session() as sess:
# init_op = tf.local_variables_initializer()
# sess.run(init_op)
# tf.global_variables_initializer().run()
# adding these 2 lines fixed the hang forever problem
# coord = tf.train.Coordinator()
# threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# sess.run(all_data)
In [44]:
# all_data = tf.decode_csv(value_column, record_defaults=DEFAULTS)
def get_dict_features_and_labels(all_data):
inputs = all_data[:len(all_data)-N_OUTPUTS] # first few values
label = all_data[len(all_data)-N_OUTPUTS : ] # last few values
# from list of tensors to tensor with one more dimension
inputs = tf.concat(inputs, axis=1)
label = tf.concat(label, axis=1)
print('inputs={}'.format(inputs))
return {TIMESERIES_COL: inputs}, label # dict of features, label
d, l = get_dict_features_and_labels(all_data)
print('d: ', d, '\nl:', l)
In [45]:
LSTM_SIZE = 3
# create the inference model
def simple_rnn(features, targets, mode):
# 0. Reformat input shape to become a sequence
x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1)
#print 'x={}'.format(x)
# 1. configure the RNN
lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0)
outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
# slice to keep only the last cell of the RNN
outputs = outputs[-1]
#print 'last outputs={}'.format(outputs)
# output is result of linear activation of last layer of RNN
weight = tf.Variable(tf.random_normal([LSTM_SIZE, N_OUTPUTS]))
bias = tf.Variable(tf.random_normal([N_OUTPUTS]))
predictions = tf.matmul(outputs, weight) + bias
# 2. Define the loss function for training/evaluation
#print 'targets={}'.format(targets)
#print 'preds={}'.format(predictions)
loss = tf.losses.mean_squared_error(targets, predictions)
eval_metric_ops = {"rmse": tf.metrics.root_mean_squared_error(targets, predictions)}
# 3. Define the training operation/optimizer
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.01,
optimizer="SGD")
# 4. Create predictions
predictions_dict = {"predicted": predictions}
# 5. return ModelFnOps
return tflearn.ModelFnOps(
mode=mode,
predictions=predictions_dict,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops)
In [46]:
def get_train():
return read_dataset('train.csv', mode=tf.contrib.learn.ModeKeys.TRAIN)
def get_valid():
return read_dataset('valid.csv', mode=tf.contrib.learn.ModeKeys.EVAL)
def experiment_fn(output_dir):
# run experiment
return Experiment(
Estimator(model_fn=simple_rnn, model_dir=output_dir),
train_input_fn=get_train(),
eval_input_fn=get_valid(),
eval_metrics={
'rmse': MetricSpec(
metric_fn=streaming_root_mean_squared_error
)
}
)
In [47]:
shutil.rmtree('outputdir', ignore_errors=True) # start fresh each time
learn_runner.run(experiment_fn, 'outputdir')
In [ ]:
In [ ]:
In [ ]: