In [1]:
from tensorflow import keras
In [2]:
import pandas as pd
import numpy as np
df_raw = pd.read_csv("datasets/NAB-art_daily_small_noise.csv")
In [3]:
df_raw.head()
Out[3]:
In [4]:
df_raw.isna().sum()
Out[4]:
In [5]:
df = df_raw.dropna()
df["timestamp"] = pd.to_datetime(df["timestamp"])
In [6]:
df.describe()
Out[6]:
In [7]:
df.columns
Out[7]:
In [8]:
df["timestamp"]
Out[8]:
In [9]:
%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [9.5, 6]
In [10]:
figure = plt.figure()
plt.plot(df["timestamp"], df["value"])
Out[10]:
In [11]:
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
In [12]:
# scaler.fit(df["value"].values.reshape(-1,1))
In [13]:
# scaled_data = scaler.transform(df["value"].values.reshape(-1,1))
In [14]:
# scaled_data
In [15]:
# df["scaled"] = scaled_data
In [16]:
# df
In [17]:
from keras.preprocessing.sequence import TimeseriesGenerator
In [18]:
input_length = 40
batch_size = 10
X_input = df['value']
generator = TimeseriesGenerator(X_input, X_input, length=input_length, batch_size=batch_size)
len(generator)
Out[18]:
In [19]:
generator[0]
Out[19]:
In [20]:
generator[1]
Out[20]:
In [21]:
input_length = 200
batch_size = 20
num_features = 1
X = X_input.values.reshape(X_input.shape[0], 1)
generator = TimeseriesGenerator(X, X, length=input_length, batch_size=batch_size)
len(generator)
Out[21]:
In [22]:
from keras import Sequential
from keras.layers import Dense, LSTM, TimeDistributed
from keras.optimizers import RMSprop, Adam
In [23]:
model = Sequential()
model.add(LSTM(100, input_shape=(input_length, num_features), return_sequences=True))
model.add(LSTM(100, input_shape=(input_length, num_features)))
model.add(Dense(num_features, activation='relu'))
optimizer = Adam()
model.compile(loss='mse', optimizer=optimizer)
model.summary()
In [24]:
model.fit_generator(generator, steps_per_epoch=len(generator), epochs=5)
Out[24]:
In [25]:
steps_to_predict = 300
prediction_timestamps = []
prediction_values = []
start_values = X_input.iloc[-input_length:].copy()
start_times = df['timestamp'].iloc[-input_length:].copy()
for i in range(steps_to_predict):
next_value = model.predict(start_values[-input_length:].values.reshape(1,input_length,1))
start_values = start_values.append(pd.Series(next_value[0]), ignore_index=True)
next_time = start_times[-1:].values + pd.Timedelta(seconds=60*5)
start_times = start_times.append(pd.Series(next_time[0]), ignore_index=True)
In [26]:
start_values
Out[26]:
In [27]:
start_times
Out[27]:
In [28]:
forecast = plt.figure()
plt.plot(start_times, start_values, color='green')
# plt.plot(start_times, scaler.inverse_transform(start_values), color='green')
plt.plot(df["timestamp"].iloc[-400:], X_input.iloc[-400:])
# plt.plot(next_time, next_value, color='green')
Out[28]:
Scaling data didn't help... It doesn't seem like LSTMs are great for square waves. I should probably difference the data first since it is so periodic.