In [ ]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
In [ ]:
all_data = tf.contrib.learn.datasets.base.load_csv_without_header(
filename='micro_data.csv',
target_dtype=np.float32,
features_dtype=np.float32)
In [ ]:
def get_sample(data, sample_size = 20000):
h = np.random.permutation(data.target.shape[0])
X_sample = data.data[h[:sample_size], :]
target_sample = data.target[h[:sample_size]]
return X_sample, target_sample
In [ ]:
X, y = get_sample(all_data, sample_size=all_data.target.shape[0])
In [ ]:
X = np.delete(X, [15, 16], 1)
In [ ]:
X = (X - np.mean(X, axis=0, keepdims=True)) / np.std(X, axis=0, keepdims=True)
In [ ]:
y_std = np.std(y)
y_min = np.min(y)
y = (y - y_min) / y_std + 1
In [ ]:
def build_model(no_layers=2, no_units=100, dropout=0.6):
initial_dropout = dropout / no_layers
model = Sequential()
model.add(Dense(no_units, input_dim=X.shape[1], activation='relu'))
model.add(Dropout(initial_dropout))
model.add(BatchNormalization())
for i in range(no_layers - 1):
model.add(Dense(no_units, activation='relu'))
model.add(Dropout(initial_dropout * (i + 2)))
model.add(BatchNormalization())
model.add(Dense(1))
model.compile(loss='mape', metrics=[], optimizer=Adam(lr=0.001))
return model
In [ ]:
model = build_model(no_layers=3)
In [ ]:
def train_test_split(X, y, train_ratio):
h = np.random.permutation(X.shape[0])
n_train = int(train_ratio * X.shape[0])
X_train = X[h[:n_train], :]
X_test = X[h[n_train:], :]
y_train = y[h[:n_train]]
y_test = y[h[n_train:]]
return X_train, X_test, y_train, y_test
In [ ]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 0.8)
In [ ]:
history = model.fit(X_train,
y_train,
validation_data=(X_test, y_test),
epochs=100000,
batch_size=10240,
verbose=0,
callbacks=[EarlyStopping(monitor='val_loss', patience=10000)])
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], c='red')
plt.plot(history.history['val_loss'], c='blue')
In [ ]:
plt.plot(history.history['loss'][200:], c='red')
plt.plot(history.history['val_loss'][200:], c='blue')
In [ ]:
def get_errors(actual, predicted):
actual = actual.flatten()
predicted = predicted.flatten()
actual = (actual - 1) * y_std + y_min
predicted = (predicted - 1) * y_std + y_min
error = np.abs(actual - predicted)
rel_error = np.abs(actual - predicted) / actual
return np.max(error), np.mean(error), np.max(rel_error), np.mean(rel_error)
In [ ]:
predicted = model.predict(X_test)
get_errors(y_test, predicted)
In [ ]:
predicted_train = model.predict(X_train)
get_errors(y_train, predicted_train)