In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
import keras.backend as K
from datetime import datetime
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import RMSprop
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.decomposition import PCA, FastICA
from sklearn.metrics import r2_score
In [2]:
train = pd.read_csv("data/train.csv", index_col="ID")
test = pd.read_csv("data/test.csv", index_col="ID")
In [3]:
for c in train.columns:
if train[c].dtype == "object":
lbl = LabelEncoder()
lbl.fit(list(train[c].values) + list(test[c].values))
train[c] = lbl.transform(list(train[c].values))
test[c] = lbl.transform(list(test[c].values))
In [4]:
n_comp = 10
# PCA
pca = PCA(n_components=n_comp, random_state=42)
pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1))
pca2_results_test = pca.transform(test)
# ICA
ica = FastICA(n_components=n_comp, random_state=42)
ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1))
ica2_results_test = ica.transform(test)
In [5]:
#train = train.iloc[:, :9].copy()
#test = test.iloc[:, :8].copy()
In [6]:
for i in range(1, n_comp+1):
train["pca_" + str(i)] = pca2_results_train[:,i-1]
test["pca_" + str(i)] = pca2_results_test[:, i-1]
train["ica_" + str(i)] = ica2_results_train[:,i-1]
test["ica_" + str(i)] = ica2_results_test[:, i-1]
y_train = train["y"]
y_mean = np.mean(y_train)
In [7]:
x_train = train.drop("y", axis=1).values.astype(np.float32)
x_test = test.values.astype(np.float32)
In [8]:
x_train.shape, x_test.shape
Out[8]:
In [9]:
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)
In [10]:
def r2_keras(y_true, y_pred):
SS_res = K.sum(K.square( y_true-y_pred ))
SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
In [32]:
rmsprop = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.0)
def model():
model = Sequential()
model.add(Dense(units=, activation="relu",input_dim=x_train.shape[1]))
model.add(Dense(units=396, activation="relu"))
model.add(Dense(units=, activation="relu"))
model.add(Dense(units=512, activation="relu"))
model.add(Dense(units=512, activation="relu"))
model.add(Dense(units=512, activation="relu"))
model.add(Dense(units=1, activation="linear"))
model.compile(loss="mse", optimizer=rmsprop, metrics=[r2_keras])
#model.summary()
return model
In [12]:
callbacks = [
EarlyStopping(monitor="val_r2_keras", patience=20)
#ModelCheckpoint("weights.{epoch:02d}-{val_loss:.2f}.hdf5")
]
estimator = KerasRegressor(
build_fn=model,
nb_epoch=100,
batch_size=32,
verbose=0
)
In [ ]:
estimator.fit(x_train_scaled, y_train, batch_size=32, epochs=200, verbose=2, callbacks=callbacks, validation_split=0.02)
In [ ]:
y_pred_train = estimator.predict(x_train_scaled)
In [ ]:
prediction = estimator.predict(x_test_scaled)
In [ ]:
prediction
In [ ]:
print("the R2 score is : {}".format(r2_score(y_train, y_pred_train)))
In [33]:
r2_score_list = []
prediction_list = []
for i in range(1):
estimator.fit(x_train_scaled, y_train, batch_size=32,
epochs=50, verbose=2, callbacks=callbacks,
validation_split=0.1, shuffle=False)
y_pred_train = estimator.predict(x_train_scaled)
prediction = estimator.predict(x_test_scaled)
prediction_list.append(prediction)
r2_value = r2_score(y_train, y_pred_train)
print("Number: {}, R^2: {}".format(i, r2_value))
r2_score_list.append((i, r2_value))
In [34]:
value = np.zeros(len(prediction_list[0]))
for i in prediction_list:
value += i
In [35]:
average_prediction = value / len(prediction_list)
In [36]:
average_prediction
Out[36]:
In [ ]:
output = pd.DataFrame({"id": test.index, "y": average_prediction})
In [ ]:
output.to_csv("submission_neural_network_average.csv", index=False)
In [ ]:
In [ ]:
In [ ]:
In [ ]: