In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

import src.misc.evaluation as evaluation

In [2]:
# read data
#training_files = "../../dataset/training/"
#trajectories_file = "trajectories(table 5)_training.csv"
#volume_file = "volume(table 6)_training.csv"
#trajectories_df = pd.read_csv(training_files+trajectories_file)
#volume_df = pd.read_csv(training_files+volume_file)

#import src.vector_gen.generateCurrentSituationWithTime as gcswt
#import src.vector_gen.generate_VectorY as gvy
#y_df = gvy.generate_VectorY_df(trajectories_df)


# splited
training_Y = pd.read_csv("src/misc/train_Y.csv", index_col =0)
testing_Y = pd.read_csv("src/misc/test_Y.csv", index_col =0)
training_X = pd.read_csv("src/misc/train_X.csv", index_col =0)
testing_X = pd.read_csv("src/misc/test_X.csv", index_col =0)

In [3]:
print(len(training_X), len(testing_Y))
print(training_X.shape, training_Y.shape)
x_dim = len(training_X.columns)
y_dim = len(training_Y.columns)
print('x: ', x_dim)
print('y: ', y_dim)


1020 48
(1020, 147) (1020, 36)
x:  147
y:  36

In [4]:
# http://radiostud.io/beat-rush-hour-traffic-with-tensorflow-machine-learning/
# https://www.youtube.com/watch?v=PwAGxqrXSCs
# https://www.tensorflow.org/get_started/mnist/beginners

In [5]:
# model
x = tf.placeholder(tf.float32, [None, x_dim], name="x")
y = tf.placeholder(tf.float32, [None, y_dim], name="y")


# one layer
# y_pred = x * weight + bias
weights = tf.Variable(tf.ones([x_dim, y_dim], dtype=tf.float32), name="weight")
biases = tf.Variable(tf.zeros([y_dim], dtype=tf.float32), name="bias")

y_pred = tf.add(tf.matmul(x, weights), biases)

# activation function, relu rectified linear
# https://www.tensorflow.org/api_guides/python/nn
#y_pred = tf.nn.relu(y_pred)


# cost
with tf.name_scope("cost_func"):
    # def cost/loss function
    #cost_func = tf.reduce_mean(evaluation.mape2(y_pred=y_pred, y_true=y))
    #cost_func = tf.metrics.mean_absolute_error(y_pred, y)
    #cost_func = tf.reduce_mean(tf.metrics.mean_absolute_error(y_pred, y))
    #cost_func = -tf.reduce_sum(y*tf.log(y_pred))
    cost_func = tf.reduce_mean(tf.div(tf.abs(y_pred-y), y))

#train
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(cost_func)
    #optimizer = tf.train.AdamOptimizer().minimize(cost_func)

In [6]:
# start session and train
epochs = 30
batch_size = 1


sess = tf.Session()
sess.run(tf.global_variables_initializer())

tf.summary.scalar('cost', cost_func)
tf.summary.histogram('weights', weights)
tf.summary.histogram('biases', biases)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('./tftrain', sess.graph)
#test_writer = tf.summary.FileWriter('./tftest')

for epoch in range(epochs):
    epoch_loss = 0
    for batch in range(0, int(len(training_X)/batch_size)):
        x_batch = training_X[batch*batch_size: batch*batch_size+batch_size]
        y_batch = training_Y[batch*batch_size: batch*batch_size+batch_size]
        
        # Occasionally report accuracy
        #if batch % 100 == 0:
        #    [train_accuracy] = sess.run([cost_func], feed_dict={x: x_batch, y: y_batch})
        #    print("epoch %d, batchstep %d, training accuracy %g" % (epoch, batch, train_accuracy))
            

        #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        #run_metadata = tf.RunMetadata()
        #
        
        # train
        _, c = sess.run([optimizer, cost_func], feed_dict={x: x_batch, y: y_batch})
        
        epoch_loss += c
        
    [train_accuracy] = sess.run([cost_func], feed_dict={x: x_batch, y: y_batch})
    print("epoch %d, loss %d, training accuracy %g" % (epoch, epoch_loss, train_accuracy))
    s = sess.run(merged_summary, feed_dict={x: x_batch, y: y_batch})
    writer.add_summary(s, epoch)

print('Epoch', epoch, 'loss', epoch_loss)

# TODO FALSCH!!!???
prediction = y_pred.eval(feed_dict={x: testing_X}, session = sess)
mape = evaluation.mape(prediction, testing_Y)

print('mean MAPE\n', np.mean(mape))

print('MAPE\n', mape)


epoch 0, loss 23323, training accuracy 27.1096
epoch 1, loss 7615, training accuracy 4.50463
epoch 2, loss 1828, training accuracy 0.74091
epoch 3, loss 1051, training accuracy 0.482714
epoch 4, loss 931, training accuracy 0.419372
epoch 5, loss 844, training accuracy 0.431106
epoch 6, loss 783, training accuracy 0.384041
epoch 7, loss 738, training accuracy 0.33008
epoch 8, loss 702, training accuracy 0.364702
epoch 9, loss 671, training accuracy 0.330244
epoch 10, loss 644, training accuracy 0.288913
epoch 11, loss 622, training accuracy 0.303864
epoch 12, loss 602, training accuracy 0.290959
epoch 13, loss 584, training accuracy 0.280861
epoch 14, loss 568, training accuracy 0.27843
epoch 15, loss 553, training accuracy 0.26571
epoch 16, loss 540, training accuracy 0.265181
epoch 17, loss 528, training accuracy 0.248774
epoch 18, loss 516, training accuracy 0.243498
epoch 19, loss 506, training accuracy 0.271226
epoch 20, loss 497, training accuracy 0.27626
epoch 21, loss 488, training accuracy 0.252426
epoch 22, loss 480, training accuracy 0.263913
epoch 23, loss 472, training accuracy 0.248076
epoch 24, loss 465, training accuracy 0.244745
epoch 25, loss 459, training accuracy 0.256228
epoch 26, loss 452, training accuracy 0.266013
epoch 27, loss 447, training accuracy 0.24262
epoch 28, loss 442, training accuracy 0.246797
epoch 29, loss 437, training accuracy 0.244983
Epoch 29 loss 437.313999414
mean MAPE
 0.5673510600607816
MAPE
 (0, 'A2')    0.310303
(0, 'A3')    0.259794
(0, 'B1')    0.763500
(0, 'B3')    0.328908
(0, 'C1')    1.350998
(0, 'C3')    0.584215
(1, 'A2')    0.322280
(1, 'A3')    0.317750
(1, 'B1')    0.633285
(1, 'B3')    0.384761
(1, 'C1')    0.676494
(1, 'C3')    1.141455
(2, 'A2')    0.255639
(2, 'A3')    0.340932
(2, 'B1')    0.680145
(2, 'B3')    0.367110
(2, 'C1')    0.452015
(2, 'C3')    1.213254
(3, 'A2')    0.318896
(3, 'A3')    0.260254
(3, 'B1')    0.676212
(3, 'B3')    0.321978
(3, 'C1')    0.665480
(3, 'C3')    0.702309
(4, 'A2')    0.343952
(4, 'A3')    0.368121
(4, 'B1')    0.646947
(4, 'B3')    0.386585
(4, 'C1')    0.833131
(4, 'C3')    0.691485
(5, 'A2')    0.324430
(5, 'A3')    0.309331
(5, 'B1')    0.695929
(5, 'B3')    0.412945
(5, 'C1')    0.972528
(5, 'C3')    1.111288
dtype: float64

In [7]:
#pd.DataFrame(prediction, index=testing_Y.index, columns=testing_Y.columns)

In [8]:
#testing_Y