In [1]:
    
import numpy as np
import pandas as pd 
import xgboost as xgb
import keras.backend as K
from datetime import datetime
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import RMSprop
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.decomposition import PCA, FastICA
from sklearn.metrics import r2_score
    
    
In [2]:
    
train = pd.read_csv("data/train.csv", index_col="ID")
test = pd.read_csv("data/test.csv", index_col="ID")
    
In [3]:
    
for c in train.columns:
    if train[c].dtype == "object":
        lbl = LabelEncoder() 
        lbl.fit(list(train[c].values) + list(test[c].values)) 
        train[c] = lbl.transform(list(train[c].values))
        test[c] = lbl.transform(list(test[c].values))
    
In [4]:
    
n_comp = 10
# PCA
pca = PCA(n_components=n_comp, random_state=42)
pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1))
pca2_results_test = pca.transform(test)
# ICA
ica = FastICA(n_components=n_comp, random_state=42)
ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1))
ica2_results_test = ica.transform(test)
    
    
In [5]:
    
#train = train.iloc[:, :9].copy()
#test = test.iloc[:, :8].copy()
    
In [6]:
    
for i in range(1, n_comp+1):
    train["pca_" + str(i)] = pca2_results_train[:,i-1]
    test["pca_" + str(i)] = pca2_results_test[:, i-1]
    
    train["ica_" + str(i)] = ica2_results_train[:,i-1]
    test["ica_" + str(i)] = ica2_results_test[:, i-1]
    
y_train = train["y"]
y_mean = np.mean(y_train)
    
In [7]:
    
x_train = train.drop("y", axis=1).values.astype(np.float32)
x_test = test.values.astype(np.float32)
    
In [8]:
    
x_train.shape, x_test.shape
    
    Out[8]:
In [9]:
    
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)
    
In [10]:
    
def r2_keras(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true-y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )
    
In [32]:
    
rmsprop = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.0)
def model():
    model = Sequential()
    model.add(Dense(units=, activation="relu",input_dim=x_train.shape[1]))
    model.add(Dense(units=396, activation="relu"))
    model.add(Dense(units=, activation="relu"))
    model.add(Dense(units=512, activation="relu"))
    model.add(Dense(units=512, activation="relu"))
    model.add(Dense(units=512, activation="relu"))
    model.add(Dense(units=1, activation="linear"))
    model.compile(loss="mse", optimizer=rmsprop, metrics=[r2_keras])
    #model.summary()
    return model
    
In [12]:
    
callbacks = [
    EarlyStopping(monitor="val_r2_keras", patience=20)
    #ModelCheckpoint("weights.{epoch:02d}-{val_loss:.2f}.hdf5")
]
estimator = KerasRegressor(
    build_fn=model, 
    nb_epoch=100, 
    batch_size=32,
    verbose=0
)
    
In [ ]:
    
estimator.fit(x_train_scaled, y_train, batch_size=32, epochs=200, verbose=2, callbacks=callbacks, validation_split=0.02)
    
In [ ]:
    
y_pred_train = estimator.predict(x_train_scaled)
    
In [ ]:
    
prediction = estimator.predict(x_test_scaled)
    
In [ ]:
    
prediction
    
In [ ]:
    
print("the R2 score is : {}".format(r2_score(y_train, y_pred_train)))
    
In [33]:
    
r2_score_list = []
prediction_list = []
for i in range(1):
    estimator.fit(x_train_scaled, y_train, batch_size=32, 
                  epochs=50, verbose=2, callbacks=callbacks,
                  validation_split=0.1, shuffle=False)
    y_pred_train = estimator.predict(x_train_scaled)
    prediction = estimator.predict(x_test_scaled)
    prediction_list.append(prediction)
    r2_value = r2_score(y_train, y_pred_train)
    print("Number: {}, R^2: {}".format(i, r2_value))
    r2_score_list.append((i, r2_value))
    
    
In [34]:
    
value = np.zeros(len(prediction_list[0]))
for i in prediction_list:
    value += i
    
In [35]:
    
average_prediction = value / len(prediction_list)
    
In [36]:
    
average_prediction
    
    Out[36]:
In [ ]:
    
output = pd.DataFrame({"id": test.index, "y": average_prediction})
    
In [ ]:
    
output.to_csv("submission_neural_network_average.csv", index=False)
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]: