Prognose Werk LPZ

Variables/Functions ^


In [50]:
# imports
import sys, os, argparse, logging  # NOQA
from pprint import pprint
from tqdm import tqdm
import importlib

from IPython.core.debugger import Tracer
#Tracer()()

### prevent the dying jupyter notebook
stdout = sys.stdout
#sys.stdout = sys.__stdout__  # did not work to restoure print -> console
#sys.stdout = open('keras_output.txt', 'a+')
#sys.stdout = stdout

import utils
importlib.reload(utils)

import twBase
importlib.reload(twBase)

%matplotlib inline
np.random.seed(42)

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error


Out[50]:
<module 'utils' from '/home/tw/dev/py/module/twBase/utils.py'>
Out[50]:
<module 'twBase' from '/home/tw/dev/py/module/twBase/twBase.py'>

In [48]:
import PrognoseModel as Model
importlib.reload(Model)

import PrognoseRun as Run
importlib.reload(Run)
#from twQuoraRun import *  # NOQA

args = Run.process_command_line(["train"])
P = Run.get_parameters(args)


Out[48]:
<module 'PrognoseModel' from '/home/tw/nbs/Prognose/PrognoseModel.py'>
/home/tw/nbs/Prognose/PrognoseRun.py
Out[48]:
<module 'PrognoseRun' from '/home/tw/nbs/Prognose/PrognoseRun.py'>

Data ^


In [1]:
df = pd.read_csv(P.DATA.DATA_FN, nrows=None, sep=';', decimal=',', parse_dates=[0])
df.info()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-f622494990ca> in <module>()
----> 1 df = pd.read_csv(P.DATA.DATA_FN, nrows=None, sep=';', decimal=',', parse_dates=[0])
      2 df.info()

NameError: name 'P' is not defined

In [177]:
raw_cols = ['Datum', 'KProg [MWh]', 'Ist [MWh]', '[%]', '[Lux]', '[W/m²]',
       '[Lux].1', '[hPa]', '[°C]', '[°C].1', '[°C].2', '[m/s]', '[°]']

cols = ['Ist [MWh]', '[%]', '[Lux]', '[W/m²]',
       '[Lux].1', '[hPa]', '[°C]', '[°C].1', '[°C].2', '[m/s]', '[°]']

ts = df[cols].values
ts = ts.astype('float32')
ts.shape

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
ts = scaler.fit_transform(ts)

utils.plot1(ts[:10,0])


Out[177]:
(8760, 11)

In [178]:
ts.shape
ts[:2,:]


Out[178]:
(8760, 11)
Out[178]:
array([[  2.4660e-01,   9.9700e-01,   2.6667e-05,   5.0177e-03,   2.8000e-03,   9.9316e-01,
          1.8764e-01,   1.5883e-01,   3.3641e-01,   1.5233e-01,   7.0748e-01],
       [  2.4612e-01,   9.9700e-01,   0.0000e+00,   4.5109e-03,   2.5333e-03,   9.9316e-01,
          1.8764e-01,   1.6510e-01,   3.3641e-01,   1.5067e-01,   7.0481e-01]], dtype=float32)

In [231]:
# create 'history' vectore (9,11) per timestep
seq_len = 100
hist_len = seq_len - 1
step = 1

def get_precedingSeqs(ts, time_index, seq_len=1):
    '''
    takes ts
    return: list of preceeding ts vectors, starting from t-seq_len to t-1
    '''
    vec = []
    if len(ts) < seq_len+1:
        return 0
    for j in range(seq_len, 0, -1):
        vec.append(ts[time_index-j])
    vec = np.array(vec)
    return vec.reshape((vec.shape[0]*vec.shape[1])) 

vec = get_precedingSeqs(ts, 10, hist_len)
vec.shape


Out[231]:
(1089,)

In [232]:
def plot_precSeqsVec(vec, seq_len):
    '''
    plots the n preceding time steps for the Energy variable
    '''
    y = []
    for i in range(0, len(vec), int(len(vec)/seq_len)):
        y.append(vec[i])
    plt.plot(y)
    
plot_precSeqsVec(vec, hist_len)



In [233]:
def shift_precedingSeqsVec(vec, new_vec):
    x = vec[len(new_vec):]  # pop oldest
    vec = np.concatenate((x, new_vec))
    return vec

new_vec = shift_precedingSeqsVec(ts2, ts[11])
new_vec.shape


Out[233]:
(99,)

In [234]:
plot_precSeqsVec(new_vec, seq_len-1)



In [237]:
arr = np.zeros((ts.shape[0], ts.shape[1]*hist_len))

for i in range(seq_len, len(ts), 1):
    arr[i,:] = get_precedingSeqs(ts, i, hist_len)
    
data = np.concatenate((ts, arr), axis=1)

data.shape


Out[237]:
(8760, 1100)

In [238]:
X = data[:,1:]
y = data[:,0]
y.shape, X.shape
plt.plot(y[:hist_len])


Out[238]:
((8760,), (8760, 1099))
Out[238]:
[<matplotlib.lines.Line2D at 0x7f98e36c56d8>]

In [239]:
h5_file = P.DATA.H5DATA_FN

if os.path.exists(h5_file):
    os.remove(h5_file)

f = utils.h5py.File(h5_file)
f['X'] = X
f['y'] = y
f.close()
log.info("Saved", data=h5_file)


data='./data/data.h5' event='Saved'

Correlation


In [5]:
y = df[cols[0]]
utils.autocorrelation_plot2d(y)


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f996c8b7ac8>

In [6]:
utils.autocorrelation_plot_all(y);



In [28]:
print(utils.test_corr(y))


         t-1      t+1
t-1  1.00000  0.95355
t+1  0.95355  1.00000

In [30]:
utils.test_stationary(y)


ADF Statistic: -14.900494
p-value: 0.000000
Critical Values:
	10%: -2.567
	1%: -3.431
	5%: -2.862

Load Data


In [240]:
def load(P, isTest=False):
    h5data_fn = P.DATA.H5DATA_FN
    assert os.path.exists(h5data_fn), "{} does not exist".format(h5data_fn)

    f = utils.h5py.File(h5data_fn)
    X = f['X'].value
    y = f['y'].value
    f.close()

    if P.DATA.isSample:
        limit = int(len(y) * 0.01)
        y = y[:limit]
        X = X[:limit]

    # shuffle data
    if not isTest:
        indices = np.arange(y.shape[0])

        np.random.shuffle(indices)
        y = y[indices]
        X = X[indices]

        # split into train/valid
        nb_validation_samples = int(P.TRAINING.VALIDATION_SPLIT * y.shape[0])
        y_train = y[:-nb_validation_samples]
        X_train = X[:-nb_validation_samples]

        y_val = y[-nb_validation_samples:]
        X_valid = X[-nb_validation_samples:]

        return (X_train, y_train, X_valid, y_val)
    else:
        return (X)

X_train, y_train, X_val, y_val = load(P)
X_train.shape, X_val.shape


Out[240]:
((7008, 1099), (1752, 1099))

Model ^

Abtract Model

Model Sequential


In [242]:
def preprocess(x):
    return x

class BaseModel():
    
    classtype = 'dense'  # class variable
    classname = 'BaseModel'
    def __init__(self, input_shape, layers):
        #self.FILE_PATH = 'http://www.platform.ai/models/'
        dpath = P.OUTPUT.MODEL_DIR
        self.modelPath = dpath+self.classname+'.h5'
        self.checkpoint = utils.ModelCheckpoint(filepath= dpath + 'checkpoint-{epoch:02d}-{loss:.3f}-{val_loss:.3f}.hdf5')
        self.csvLogger = utils.CSVLogger(dpath+'trainingLog.csv', separator=';', append=True)
        self.create(input_shape, layers)

    def create(self, input_shape, layers):
        print('> Create Model')
        start = time.time()
        dropout = 0.2
        
        model = self.model = utils.Sequential()

        model.add(utils.Dense(
            input_shape=(input_shape,),
            units=layers[0],
            activation='relu'))
        model.add(utils.Dropout(dropout))

        model.add(utils.Dense(
            units=layers[1],
            activation='relu'))
        model.add(utils.Dropout(dropout))

        model.add(utils.Dense(
            units=layers[1],
            activation='relu'))
        model.add(utils.Dropout(dropout))
        
        model.add(utils.Dense(
            units=layers[1],
            activation='relu'))
        model.add(utils.Dropout(dropout))
        
        model.add(utils.Dense(
            units=layers[1],
            activation='relu'))
        model.add(utils.Dropout(dropout))

        model.add(utils.Dense(
            units=1,
            activation='linear'))

        #model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mape'])
        model.compile(optimizer='adam', loss='mse')
        print("Compilation Time : ", time.time() - start)
        
    def save(self):
        self.model.save(modelPath)
        print("model saved to: ", self.modelPath)

    def load(self):
        self.model = load_model(modelPath)
        print("model loaded from: ", self.modelPath)
        
    def train(self, X_train, y_train, epochs=1, batch_size=64, val_split=0.05, verbose=1):
        global_start_time = time.time()
        try:
            history = self.model.fit(
                X_train, y_train,
                batch_size=batch_size,
                epochs=epochs,
                validation_data=(X_val, y_val),
                verbose=verbose,
                callbacks=[self.checkpoint, self.csvLogger])
        except KeyboardInterrupt:
            print('Training duration (s) : ', time.time() - global_start_time)
            return self.model, history

        print('Training duration (s) : ', time.time() - global_start_time)
        return self.model, history
        
m = BaseModel(X_train.shape[1], [256, 512])  #bias added
m.classtype
m.modelPath
m.model.summary()


> Create Model
Compilation Time :  0.15849709510803223
Out[242]:
'dense'
Out[242]:
'./data/out/<twBase.Struct.NoneStruct object at 0x7f99663ff198>.<twBase.Struct.NoneStruct object at 0x7f99663e0ef0>.<twBase.Struct.NoneStruct object at 0x7f99663e0eb8>/Model.01BaseModel.h5'
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_42 (Dense)             (None, 256)               281600    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_43 (Dense)             (None, 512)               131584    
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_44 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_8 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_45 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_9 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_46 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_10 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_47 (Dense)             (None, 1)                 513       
=================================================================
Total params: 1,201,665.0
Trainable params: 1,201,665.0
Non-trainable params: 0.0
_________________________________________________________________

Model Functional

Dense

LTSM


In [ ]:
class BaseLTSM(BaseModel):
    
    classtype = 'ltsm'  # class variable
    classname = 'BaseLTSM'
    
    def __init__(self, layers):
        super(BaseLTSM, self).__init__(layers)
        self.create(layers)

    def create(self, layers):
        print('> Create Model', self.get_classname(), ' type: ', self.classtype)
        start = time.time()
        
        inputs = Input(shape=(seq_len, layers[0]))
        
        x = LSTM(layers[1], activation='relu', return_sequences=True)(inputs)
        #x = Dropout(0.1)(x)
        x = LSTM(layers[2], activation='relu', return_sequences=False)(x)
        #x = Dropout(0.1)(x)
        preds = Dense(layers[3], activation='linear')(x)
        
        self.model = Model(input=inputs, output=preds)
        self.model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])
        print("Compilation Time : ", time.time() - start)
        
#m = BaseLTSM([1, seq_len+1, (seq_len+1)*2, 1])  #bias added
m = BaseLTSM([1, 50, 50, 1])  #bias added
m.modelPath
m.model.summary()

Training ^


In [243]:
%%time
%%capture output
sys.stdout = open('keras_output.txt', 'a+')
model, hist = m.train(X_train, y_train, epochs=100, verbose=1)
sys.stdout = stdout
hist

#m.save()


Out[243]:
<keras.callbacks.History at 0x7f98e32755f8>
CPU times: user 2min 40s, sys: 10.7 s, total: 2min 51s
Wall time: 1min 55s

In [227]:
output.show()

In [244]:
plt.plot(hist.history['loss'], label='loss')
plt.plot(hist.history['val_loss'], label='val_loss')
plt.legend()


Out[244]:
[<matplotlib.lines.Line2D at 0x7f98e30356d8>]
Out[244]:
[<matplotlib.lines.Line2D at 0x7f98da490ac8>]
Out[244]:
<matplotlib.legend.Legend at 0x7f98e3035e48>

In [ ]:
from keras.models import load_model
# returns a compiled model
# identical to the previous one
model = load_model(h5PathModel)

Predictions ^


In [245]:
offset = 200
i = 1
length = 300

X_new = np.copy(X)
Y_pred = []

for i in range(offset, offset+length):
    y_pred = m.model.predict(X_new[i,:].reshape((1, X_new[i].shape[0])))
    y_pred = np.squeeze(y_pred)
    Y_pred.append(y_pred)
    #print("delta: ", y_pred, y[i] - y_pred)

    # split up the input vector into v0 and vHist
    x0 = X_new[i, :10]
    x0 = np.insert(x0, 0, y_pred)
    vec0 = X_new[i, 10:]
    #plot_precSeqsVec(vec0, hist_len)

    # shift the sliding windows by the new vector to left
    new_vec = shift_precedingSeqsVec(vec0, x0)
    #plot_precSeqsVec(new_vec, hist_len)

    # construct the new input vector for predict:
    # take the weather data of t+1 and concat with the last prediction
    #x1 = X_new[i+1, :10]
    X_new[i+1, 10:] = new_vec

In [246]:
x = [i for i in range(offset, offset+length)]
plt.plot(x, y[offset:offset+length], label="true")
plt.plot(x, Y_pred, label="pred")
plt.legend()


Out[246]:
[<matplotlib.lines.Line2D at 0x7f98e2bb0c18>]
Out[246]:
[<matplotlib.lines.Line2D at 0x7f98e2be3390>]
Out[246]:
<matplotlib.legend.Legend at 0x7f98e2bb9358>

In [ ]: