In [1]:
import pdb
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from datetime import datetime
import pdb
%matplotlib inline
In [2]:
'''Data loading'''
f_names_national = [
'2012 Conso Prod.csv',
'2013 Conso Prod.csv',
'2014 Conso Prod.csv',
'2015 Conso Prod.csv'
]
datas = []
data_news = []
for f_name in f_names_national:
# print(f_name)
data = pd.read_csv('data/'+ f_name, delimiter='\t', encoding = "ISO-8859-1")
pd.set_option('max_columns', 100)
headers = list(data)
data = data[data.Consommation.notnull()]
data = data[data.Date.notnull()]
data['timestamp'] = [str(d) + ' ' + str(t) for d, t in zip(data['Date'].values, data['Heures'].values)]
data['timestamp'] = pd.to_datetime(data['timestamp'], format='%Y-%m-%d %H:%M')
datas.append(data)
data_final = pd.concat(datas).reset_index()
In [3]:
# Sorting values : le premier élement du data set est le plus récent :
# y(t), y(t-1) etc...
data_final = data_final.sort_values(by=['Date','Heures'], ascending=[False,False])
'''tau : paramètre de périodicité'''
tau = 48 # 48 : on considère une corrélation de 24h. On pourrait prendre tau = 1 an
# afin de correler les données avec les données de l'année passée
def data_labels(dataframe=data_final, field='Consommation', tau = tau):
X = dataframe[field].values
X_ = np.stack([np.roll(X,i) for i in range(49)], axis=1)
labels = X_[:,:1]
data = X_[:,1:]
return data, labels
In [12]:
# Creating the training set and the crossvalidation set.
# two years of training, 1 year for cv
n_samples = data_final.Consommation.values.shape[0]
data_train, labels_train = data_labels(dataframe = data_final[data_final['Date'] <= '2014-12-31'])
data_test, labels_test = data_labels(dataframe = data_final[data_final['Date'] > '2014-12-31'])
In [5]:
import pandas
from keras.models import Sequential
from keras.layers import Dense
In [6]:
# fix random seed for reproducibility
np.random.seed(7)
In [37]:
# create and fit Multilayer Perceptron model
model = Sequential()
model.add(Dense(20, input_dim=tau, activation='relu'))
model.add(Dense(20, input_dim=tau, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(data_train, labels_train, nb_epoch=20, batch_size=100, verbose=2)
Out[37]:
In [38]:
# Estimate model performance
trainScore = model.evaluate(data_train, labels_train, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore, np.sqrt(trainScore)))
testScore = model.evaluate(data_test, labels_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore, np.sqrt(testScore)))
In [39]:
# generate predictions for training
trainPredict = model.predict(data_train)
testPredict = model.predict(data_test)
# shift train predictions for plotting
trainPredictPlot = np.zeros((n_samples,1))
trainPredictPlot[:, :] = np.nan
trainPredictPlot[tau:len(trainPredict), :] = trainPredict[:-tau,:]
# shift test predictions for plotting
testPredictPlot = np.zeros((n_samples,1))
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(tau)+1:(n_samples), :] = testPredict[:-tau-1,:]
# plot baseline and predictions
sl= slice(20000,21000)
plt.figure(figsize=(20,5))
plt.plot(data_final['Consommation'].values[sl])
plt.plot(trainPredictPlot[sl])
plt.plot(testPredictPlot[sl])
plt.show()
In [ ]: