In [1]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential,Model
from keras.layers import Dense, Dropout,BatchNormalization,Input
from keras.optimizers import RMSprop
from keras.regularizers import l2,l1
from keras.optimizers import Adam

from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping


Using TensorFlow backend.

In [2]:
df = pd.read_csv("../../out_data/MLDB.csv")

In [3]:
first_gene_index = df.columns.get_loc("rrrD")

In [4]:
X, Y = np.split(df, [first_gene_index], axis=1)
X = X.values
X = X-0.5
Y1 = Y.values[:,1]
Y2 = Y.values[:,1]

In [5]:
X.shape


Out[5]:
(178, 84)

In [6]:
import collections
Model_setting = collections.namedtuple('Model_setting','num_layers num_node alpha drop_rate act_method lr regularization \
patience')

In [7]:
setting_ = [1,100, 0.5, 0.2, 'tanh', 0.01, 'l2', 3]
setting = Model_setting(*setting_)
setting = setting._asdict()

In [8]:
setting


Out[8]:
OrderedDict([('num_layers', 1),
             ('num_node', 100),
             ('alpha', 0.5),
             ('drop_rate', 0.2),
             ('act_method', 'tanh'),
             ('lr', 0.01),
             ('regularization', 'l2'),
             ('patience', 3)])

In [9]:
def getModel(setting,num_input=84):
    regularizer = l1(setting['alpha']) if setting['regularization']=='l1' else l2(setting['alpha'])
    
    model = Sequential()
    for i in range(setting['num_layers']):
        if i==0:
            model.add(Dense(setting['num_node'], input_shape=(num_input,), activation=setting['act_method'],\
                            kernel_regularizer = regularizer))
            model.add(Dropout(setting['drop_rate']))
        else:
            model.add(Dense(setting['num_node']//(2**i), activation=setting['act_method']))
            model.add(Dropout(setting['drop_rate']))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=setting['lr']), metrics=['accuracy'])
    return model

In [10]:
num_output_ = 3
def create_model(num_input = 84,num_output = num_output_):
    X_input = Input(shape=(num_input,))

    X = Dense(64)(X_input)
    X = Dropout(0.2)(X)
    X = Dense(32)(X)
    Ys= []
    for i in range(num_output):
        Ys.append(Dense(1, activation = 'sigmoid')(X))
    model = Model(inputs=[X_input],outputs = Ys)
    model.compile(loss=['binary_crossentropy']*num_output,loss_weights=[1.]*num_output,optimizer=Adam(lr=setting['lr']), metrics=['accuracy'])
    return model

In [14]:
model = create_model()
callbacks = [EarlyStopping(monitor='loss',min_delta=0,patience=setting['patience'])]

In [15]:
ys = [*((Y.values).T[:num_output_])]

In [16]:
model.fit(X,ys,epochs = 50, verbose = 1,callbacks =callbacks)


Epoch 1/50
178/178 [==============================] - 0s - loss: 1.1467 - dense_8_loss: 0.5328 - dense_9_loss: 0.2510 - dense_10_loss: 0.3629 - dense_8_acc: 0.8258 - dense_9_acc: 0.8876 - dense_10_acc: 0.8371     
Epoch 2/50
178/178 [==============================] - 0s - loss: 0.6560 - dense_8_loss: 0.3028 - dense_9_loss: 0.1767 - dense_10_loss: 0.1766 - dense_8_acc: 0.8820 - dense_9_acc: 0.9775 - dense_10_acc: 0.9775     
Epoch 3/50
178/178 [==============================] - 0s - loss: 0.6066 - dense_8_loss: 0.3507 - dense_9_loss: 0.1281 - dense_10_loss: 0.1278 - dense_8_acc: 0.9213 - dense_9_acc: 0.9775 - dense_10_acc: 0.9719     
Epoch 4/50
178/178 [==============================] - 0s - loss: 0.4402 - dense_8_loss: 0.1729 - dense_9_loss: 0.1714 - dense_10_loss: 0.0959 - dense_8_acc: 0.9382 - dense_9_acc: 0.9775 - dense_10_acc: 0.9775     
Epoch 5/50
178/178 [==============================] - 0s - loss: 0.3534 - dense_8_loss: 0.1642 - dense_9_loss: 0.1119 - dense_10_loss: 0.0773 - dense_8_acc: 0.9270 - dense_9_acc: 0.9719 - dense_10_acc: 0.9831     
Epoch 6/50
178/178 [==============================] - 0s - loss: 0.3680 - dense_8_loss: 0.1766 - dense_9_loss: 0.0904 - dense_10_loss: 0.1010 - dense_8_acc: 0.9494 - dense_9_acc: 0.9775 - dense_10_acc: 0.9775     
Epoch 7/50
178/178 [==============================] - 0s - loss: 0.3434 - dense_8_loss: 0.1552 - dense_9_loss: 0.0953 - dense_10_loss: 0.0929 - dense_8_acc: 0.9551 - dense_9_acc: 0.9775 - dense_10_acc: 0.9775     
Epoch 8/50
178/178 [==============================] - 0s - loss: 0.3200 - dense_8_loss: 0.1422 - dense_9_loss: 0.0737 - dense_10_loss: 0.1041 - dense_8_acc: 0.9438 - dense_9_acc: 0.9775 - dense_10_acc: 0.9775     
Epoch 9/50
178/178 [==============================] - 0s - loss: 0.2566 - dense_8_loss: 0.1182 - dense_9_loss: 0.0744 - dense_10_loss: 0.0640 - dense_8_acc: 0.9607 - dense_9_acc: 0.9775 - dense_10_acc: 0.9888     
Epoch 10/50
178/178 [==============================] - 0s - loss: 0.2773 - dense_8_loss: 0.1246 - dense_9_loss: 0.0789 - dense_10_loss: 0.0738 - dense_8_acc: 0.9607 - dense_9_acc: 0.9775 - dense_10_acc: 0.9775     
Epoch 11/50
178/178 [==============================] - 0s - loss: 0.2452 - dense_8_loss: 0.1198 - dense_9_loss: 0.0804 - dense_10_loss: 0.0451 - dense_8_acc: 0.9663 - dense_9_acc: 0.9775 - dense_10_acc: 0.9719     
Epoch 12/50
178/178 [==============================] - 0s - loss: 0.2215 - dense_8_loss: 0.1145 - dense_9_loss: 0.0667 - dense_10_loss: 0.0403 - dense_8_acc: 0.9607 - dense_9_acc: 0.9719 - dense_10_acc: 0.9944     
Epoch 13/50
178/178 [==============================] - 0s - loss: 0.2623 - dense_8_loss: 0.1333 - dense_9_loss: 0.0784 - dense_10_loss: 0.0507 - dense_8_acc: 0.9494 - dense_9_acc: 0.9775 - dense_10_acc: 0.9775     
Epoch 14/50
178/178 [==============================] - 0s - loss: 0.2615 - dense_8_loss: 0.0934 - dense_9_loss: 0.0827 - dense_10_loss: 0.0854 - dense_8_acc: 0.9719 - dense_9_acc: 0.9775 - dense_10_acc: 0.9551     
Epoch 15/50
178/178 [==============================] - 0s - loss: 0.2276 - dense_8_loss: 0.1153 - dense_9_loss: 0.0632 - dense_10_loss: 0.0492 - dense_8_acc: 0.9663 - dense_9_acc: 0.9831 - dense_10_acc: 0.9775     
Epoch 16/50
178/178 [==============================] - 0s - loss: 0.2404 - dense_8_loss: 0.1128 - dense_9_loss: 0.0941 - dense_10_loss: 0.0335 - dense_8_acc: 0.9438 - dense_9_acc: 0.9775 - dense_10_acc: 0.9831         
Out[16]:
<keras.callbacks.History at 0x7fe5eedf2400>

In [ ]:
history = final_model.fit(X_train, [Y_train, Y_train2], 
              nb_epoch = 100, 
              batch_size = 256, 
              verbose=1, 
              validation_data=(X_test, [Y_test, Y_test2]),
              callbacks=[reduce_lr, checkpointer],
              shuffle=True)

In [164]:
callbacks = [EarlyStopping(monitor='loss',min_delta=0,patience=setting['patience'])]

In [165]:
def cross_validation(X,Y,setting,num_input):
    model = getModel(setting,num_input)
    preds = []
    for train, test in LeaveOneOut().split(X, Y):
        model.fit(X[train,:],Y[train],epochs=20,verbose=0, callbacks =callbacks)
        probas_ = model.predict(X[test,:])
        preds.append(probas_[0][0])
        # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(Y, preds)
    roc_auc = auc(fpr, tpr)
    if roc_auc < 0.5:
        roc_auc = 1 - roc_auc
    return roc_auc

In [175]:
def backward_selection(X,Y,setting):
    survive_index=[i for i in range(X.shape[1])]
    best_perf=0   
    for i in range(len(survive_index)-1):
        perfs = []

        print(survive_index)
        for index in survive_index:
            print(index)
            survive_index_copy = [i for i in survive_index if i!=index]
            perfs.append(cross_validation(X[:,survive_index_copy],Y,setting,num_input = len(survive_index)-1))
        print("best_perf",best_perf)
        max_index = np.argmax(perfs)
        current_best = np.max(perfs)

        print("current_best",current_best)
        if current_best > best_perf:
            best_perf = current_best
            survive_index.remove(survive_index[max_index])
        else:
            break
    return (survive_index,best_perf)

In [ ]:
backward_selection(X[:,0:10],Y,setting)


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
0
1
2
3
4
5
6
7
8
9
best_perf 0
current_best 1.0
[1, 2, 3, 4, 5, 6, 7, 8, 9]
1

In [146]:
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, alpha=0.3)
plt.title('(AUC = %0.2f)' % (roc_auc))
plt.show()



In [27]:
def cross_validation(X=X,Y=Y,epochs_=20,num_input_ = 84):
    model = getModel(num_input=num_input_)
    preds = []
    for train, test in LeaveOneOut().split(X, Y):
        model.fit(X,Y,epochs=epochs_,verbose=0)
    #     print(test)
        probas_ = model.predict(X[test,:])
        preds.append(probas_[0][0])
        # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(Y, preds)
    roc_auc = auc(fpr, tpr)
    return roc_auc

In [36]:
survive_index=[i for i in range(4)]
def backward_selection(survive_index):
    for i in range(len(survive_index)-1):
        perfs = []
        best_perf=0
        for index in survive_index:
            print(index,"\n")
            survive_index_copy = [i for i in survive_index if i!=index]
            perfs.append(cross_validation(X=X[:,survive_index_copy],Y=Y,epochs_=20,num_input_ = len(survive_index)-1))
        
        max_index = np.argmax(perfs)
        current_best = np.max(perfs)
        print(current_best)
        if current_best > best_perf:
            best_perf = current_best
            survive_index.remove(survive_index[max_index])
        else:
            break
    return survive_index

In [37]:
backward_selection(survive_index)


0 

1 

2 

3 

0.807926829268
0 

1 

2 

0.81881533101
0 

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-37-dcc3cb13baba> in <module>()
----> 1 backward_selection(survive_index)

<ipython-input-36-438bce7b19a2> in backward_selection(survive_index)
      7             print(index,"\n")
      8             survive_index_copy = [i for i in survive_index if i!=index]
----> 9             perfs.append(cross_validation(X=X[:,survive_index_copy],Y=Y,epochs_=20,num_input_ = len(survive_index)-1))
     10 
     11         max_index = np.argmax(perfs)

<ipython-input-27-e51115ec81ba> in cross_validation(X, Y, epochs_, num_input_)
      3     preds = []
      4     for train, test in LeaveOneOut().split(X, Y):
----> 5         model.fit(X,Y,epochs=epochs_,verbose=0)
      6     #     print(test)
      7         probas_ = model.predict(X[test,:])

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    865                               class_weight=class_weight,
    866                               sample_weight=sample_weight,
--> 867                               initial_epoch=initial_epoch)
    868 
    869     def evaluate(self, x, y, batch_size=32, verbose=1,

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1593                               initial_epoch=initial_epoch,
   1594                               steps_per_epoch=steps_per_epoch,
-> 1595                               validation_steps=validation_steps)
   1596 
   1597     def evaluate(self, x, y,

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1180                     batch_logs['size'] = len(batch_ids)
   1181                     callbacks.on_batch_begin(batch_index, batch_logs)
-> 1182                     outs = f(ins_batch)
   1183                     if not isinstance(outs, list):
   1184                         outs = [outs]

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2268         updated = session.run(self.outputs + [self.updates_op],
   2269                               feed_dict=feed_dict,
-> 2270                               **self.session_kwargs)
   2271         return updated[:len(self.outputs)]
   2272 

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    787     try:
    788       result = self._run(None, fetches, feed_dict, options_ptr,
--> 789                          run_metadata_ptr)
    790       if run_metadata:
    791         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    995     if final_fetches or final_targets:
    996       results = self._do_run(handle, final_targets, final_fetches,
--> 997                              feed_dict_string, options, run_metadata)
    998     else:
    999       results = []

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1130     if handle is None:
   1131       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1132                            target_list, options, run_metadata)
   1133     else:
   1134       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1137   def _do_call(self, fn, *args):
   1138     try:
-> 1139       return fn(*args)
   1140     except errors.OpError as e:
   1141       message = compat.as_text(e.message)

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1119         return tf_session.TF_Run(session, options,
   1120                                  feed_dict, fetch_list, target_list,
-> 1121                                  status, run_metadata)
   1122 
   1123     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [33]:
max_index = np.argmax(perfs)
survive_index[max_index]


Out[33]:
11

In [ ]:


In [59]:
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, alpha=0.3)
plt.title('(AUC = %0.2f)' % (roc_auc))
plt.show()



In [ ]:


In [ ]: