In [50]:
# imports
import sys, os, argparse, logging # NOQA
from pprint import pprint
from tqdm import tqdm
import importlib
from IPython.core.debugger import Tracer
#Tracer()()
### prevent the dying jupyter notebook
stdout = sys.stdout
#sys.stdout = sys.__stdout__ # did not work to restoure print -> console
#sys.stdout = open('keras_output.txt', 'a+')
#sys.stdout = stdout
import utils
importlib.reload(utils)
import twBase
importlib.reload(twBase)
%matplotlib inline
np.random.seed(42)
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
Out[50]:
Out[50]:
In [48]:
import PrognoseModel as Model
importlib.reload(Model)
import PrognoseRun as Run
importlib.reload(Run)
#from twQuoraRun import * # NOQA
args = Run.process_command_line(["train"])
P = Run.get_parameters(args)
Out[48]:
Out[48]:
In [1]:
df = pd.read_csv(P.DATA.DATA_FN, nrows=None, sep=';', decimal=',', parse_dates=[0])
df.info()
In [177]:
raw_cols = ['Datum', 'KProg [MWh]', 'Ist [MWh]', '[%]', '[Lux]', '[W/m²]',
'[Lux].1', '[hPa]', '[°C]', '[°C].1', '[°C].2', '[m/s]', '[°]']
cols = ['Ist [MWh]', '[%]', '[Lux]', '[W/m²]',
'[Lux].1', '[hPa]', '[°C]', '[°C].1', '[°C].2', '[m/s]', '[°]']
ts = df[cols].values
ts = ts.astype('float32')
ts.shape
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
ts = scaler.fit_transform(ts)
utils.plot1(ts[:10,0])
Out[177]:
In [178]:
ts.shape
ts[:2,:]
Out[178]:
Out[178]:
In [231]:
# create 'history' vectore (9,11) per timestep
seq_len = 100
hist_len = seq_len - 1
step = 1
def get_precedingSeqs(ts, time_index, seq_len=1):
'''
takes ts
return: list of preceeding ts vectors, starting from t-seq_len to t-1
'''
vec = []
if len(ts) < seq_len+1:
return 0
for j in range(seq_len, 0, -1):
vec.append(ts[time_index-j])
vec = np.array(vec)
return vec.reshape((vec.shape[0]*vec.shape[1]))
vec = get_precedingSeqs(ts, 10, hist_len)
vec.shape
Out[231]:
In [232]:
def plot_precSeqsVec(vec, seq_len):
'''
plots the n preceding time steps for the Energy variable
'''
y = []
for i in range(0, len(vec), int(len(vec)/seq_len)):
y.append(vec[i])
plt.plot(y)
plot_precSeqsVec(vec, hist_len)
In [233]:
def shift_precedingSeqsVec(vec, new_vec):
x = vec[len(new_vec):] # pop oldest
vec = np.concatenate((x, new_vec))
return vec
new_vec = shift_precedingSeqsVec(ts2, ts[11])
new_vec.shape
Out[233]:
In [234]:
plot_precSeqsVec(new_vec, seq_len-1)
In [237]:
arr = np.zeros((ts.shape[0], ts.shape[1]*hist_len))
for i in range(seq_len, len(ts), 1):
arr[i,:] = get_precedingSeqs(ts, i, hist_len)
data = np.concatenate((ts, arr), axis=1)
data.shape
Out[237]:
In [238]:
X = data[:,1:]
y = data[:,0]
y.shape, X.shape
plt.plot(y[:hist_len])
Out[238]:
Out[238]:
In [239]:
h5_file = P.DATA.H5DATA_FN
if os.path.exists(h5_file):
os.remove(h5_file)
f = utils.h5py.File(h5_file)
f['X'] = X
f['y'] = y
f.close()
log.info("Saved", data=h5_file)
In [5]:
y = df[cols[0]]
utils.autocorrelation_plot2d(y)
Out[5]:
In [6]:
utils.autocorrelation_plot_all(y);
In [28]:
print(utils.test_corr(y))
In [30]:
utils.test_stationary(y)
In [240]:
def load(P, isTest=False):
h5data_fn = P.DATA.H5DATA_FN
assert os.path.exists(h5data_fn), "{} does not exist".format(h5data_fn)
f = utils.h5py.File(h5data_fn)
X = f['X'].value
y = f['y'].value
f.close()
if P.DATA.isSample:
limit = int(len(y) * 0.01)
y = y[:limit]
X = X[:limit]
# shuffle data
if not isTest:
indices = np.arange(y.shape[0])
np.random.shuffle(indices)
y = y[indices]
X = X[indices]
# split into train/valid
nb_validation_samples = int(P.TRAINING.VALIDATION_SPLIT * y.shape[0])
y_train = y[:-nb_validation_samples]
X_train = X[:-nb_validation_samples]
y_val = y[-nb_validation_samples:]
X_valid = X[-nb_validation_samples:]
return (X_train, y_train, X_valid, y_val)
else:
return (X)
X_train, y_train, X_val, y_val = load(P)
X_train.shape, X_val.shape
Out[240]:
In [242]:
def preprocess(x):
return x
class BaseModel():
classtype = 'dense' # class variable
classname = 'BaseModel'
def __init__(self, input_shape, layers):
#self.FILE_PATH = 'http://www.platform.ai/models/'
dpath = P.OUTPUT.MODEL_DIR
self.modelPath = dpath+self.classname+'.h5'
self.checkpoint = utils.ModelCheckpoint(filepath= dpath + 'checkpoint-{epoch:02d}-{loss:.3f}-{val_loss:.3f}.hdf5')
self.csvLogger = utils.CSVLogger(dpath+'trainingLog.csv', separator=';', append=True)
self.create(input_shape, layers)
def create(self, input_shape, layers):
print('> Create Model')
start = time.time()
dropout = 0.2
model = self.model = utils.Sequential()
model.add(utils.Dense(
input_shape=(input_shape,),
units=layers[0],
activation='relu'))
model.add(utils.Dropout(dropout))
model.add(utils.Dense(
units=layers[1],
activation='relu'))
model.add(utils.Dropout(dropout))
model.add(utils.Dense(
units=layers[1],
activation='relu'))
model.add(utils.Dropout(dropout))
model.add(utils.Dense(
units=layers[1],
activation='relu'))
model.add(utils.Dropout(dropout))
model.add(utils.Dense(
units=layers[1],
activation='relu'))
model.add(utils.Dropout(dropout))
model.add(utils.Dense(
units=1,
activation='linear'))
#model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mape'])
model.compile(optimizer='adam', loss='mse')
print("Compilation Time : ", time.time() - start)
def save(self):
self.model.save(modelPath)
print("model saved to: ", self.modelPath)
def load(self):
self.model = load_model(modelPath)
print("model loaded from: ", self.modelPath)
def train(self, X_train, y_train, epochs=1, batch_size=64, val_split=0.05, verbose=1):
global_start_time = time.time()
try:
history = self.model.fit(
X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_val, y_val),
verbose=verbose,
callbacks=[self.checkpoint, self.csvLogger])
except KeyboardInterrupt:
print('Training duration (s) : ', time.time() - global_start_time)
return self.model, history
print('Training duration (s) : ', time.time() - global_start_time)
return self.model, history
m = BaseModel(X_train.shape[1], [256, 512]) #bias added
m.classtype
m.modelPath
m.model.summary()
Out[242]:
Out[242]:
In [ ]:
class BaseLTSM(BaseModel):
classtype = 'ltsm' # class variable
classname = 'BaseLTSM'
def __init__(self, layers):
super(BaseLTSM, self).__init__(layers)
self.create(layers)
def create(self, layers):
print('> Create Model', self.get_classname(), ' type: ', self.classtype)
start = time.time()
inputs = Input(shape=(seq_len, layers[0]))
x = LSTM(layers[1], activation='relu', return_sequences=True)(inputs)
#x = Dropout(0.1)(x)
x = LSTM(layers[2], activation='relu', return_sequences=False)(x)
#x = Dropout(0.1)(x)
preds = Dense(layers[3], activation='linear')(x)
self.model = Model(input=inputs, output=preds)
self.model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])
print("Compilation Time : ", time.time() - start)
#m = BaseLTSM([1, seq_len+1, (seq_len+1)*2, 1]) #bias added
m = BaseLTSM([1, 50, 50, 1]) #bias added
m.modelPath
m.model.summary()
In [243]:
%%time
%%capture output
sys.stdout = open('keras_output.txt', 'a+')
model, hist = m.train(X_train, y_train, epochs=100, verbose=1)
sys.stdout = stdout
hist
#m.save()
Out[243]:
In [227]:
output.show()
In [244]:
plt.plot(hist.history['loss'], label='loss')
plt.plot(hist.history['val_loss'], label='val_loss')
plt.legend()
Out[244]:
Out[244]:
Out[244]:
In [ ]:
from keras.models import load_model
# returns a compiled model
# identical to the previous one
model = load_model(h5PathModel)
In [245]:
offset = 200
i = 1
length = 300
X_new = np.copy(X)
Y_pred = []
for i in range(offset, offset+length):
y_pred = m.model.predict(X_new[i,:].reshape((1, X_new[i].shape[0])))
y_pred = np.squeeze(y_pred)
Y_pred.append(y_pred)
#print("delta: ", y_pred, y[i] - y_pred)
# split up the input vector into v0 and vHist
x0 = X_new[i, :10]
x0 = np.insert(x0, 0, y_pred)
vec0 = X_new[i, 10:]
#plot_precSeqsVec(vec0, hist_len)
# shift the sliding windows by the new vector to left
new_vec = shift_precedingSeqsVec(vec0, x0)
#plot_precSeqsVec(new_vec, hist_len)
# construct the new input vector for predict:
# take the weather data of t+1 and concat with the last prediction
#x1 = X_new[i+1, :10]
X_new[i+1, 10:] = new_vec
In [246]:
x = [i for i in range(offset, offset+length)]
plt.plot(x, y[offset:offset+length], label="true")
plt.plot(x, Y_pred, label="pred")
plt.legend()
Out[246]:
Out[246]:
Out[246]:
In [ ]: