In [1]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout,BatchNormalization
from keras.optimizers import RMSprop
from keras.regularizers import l2,l1
from keras.optimizers import Adam,RMSprop

from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping

import collections


Using TensorFlow backend.

In [26]:
df = pd.read_csv("../../out_data/MLDB.csv")
first_gene_index = df.columns.get_loc("rrrD")

In [28]:
X, Y = np.split(df, [first_gene_index], axis=1)
X = X.values
X = X-0.5
Y = Y.values[:,1]

In [29]:
Model_setting = collections.namedtuple('Model_setting','num_layers num_node alpha drop_rate act_method lr regularization \
patience opt_method')

In [38]:
def get_setting(file_path,line_num):
    digits = tuple([str(i) for i in range(9)])
    with open(file_path) as fp:
        for i, line in enumerate(fp):
            if i == line_num:
                setting = [float(x)  if x.startswith(digits ) else x for x in line.strip().split(" ")] 
                break
    return setting
setting_ = get_setting("../../out_data/FFN_setting",0)

In [40]:
# setting_ = [1,100, 0.5, 0.2, 'tanh', 0.01, 'l2', 3, "adam"]
setting = Model_setting(*setting_)
setting = setting._asdict()

In [41]:
setting


Out[41]:
OrderedDict([('num_layers', 1.0),
             ('num_node', 100.0),
             ('alpha', 0.5),
             ('drop_rate', 0.2),
             ('act_method', 'tanh'),
             ('lr', 0.01),
             ('regularization', 'l2'),
             ('patience', 3.0),
             ('opt_method', 'adam')])

In [42]:
def getModel(setting,num_input=84):
    regularizer = l1(setting['alpha']) if setting['regularization']=='l1' else l2(setting['alpha'])
    optimizer = Adam(lr=setting['lr']) if setting['opt_method'] == 'adam' else RMSprop(lr=setting['lr'])
    model = Sequential()
    for i in range(int(setting['num_layers'])):
        if i==0:
            model.add(Dense(int(setting['num_node']), input_shape=(num_input,), activation=setting['act_method'],\
                            kernel_regularizer = regularizer))
            model.add(Dropout(setting['drop_rate']))
        else:
            model.add(Dense(int(setting['num_node'])//(2**i), activation=setting['act_method']))
            model.add(Dropout(setting['drop_rate']))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=setting['lr']), metrics=['accuracy'])
    return model

In [48]:
callbacks = [EarlyStopping(monitor='acc',min_delta=0,patience=setting['patience'])]

model = getModel(setting,84) model.fit(X,Y,epochs=20,verbose=1)


In [49]:
def cross_validation(X,Y,setting,num_input):
    model = getModel(setting,num_input)
    preds = []
    for train, test in LeaveOneOut().split(X, Y):
        model.fit(X[train,:],Y[train],epochs=20,verbose=0, callbacks =callbacks)
        probas_ = model.predict(X[test,:])
        preds.append(probas_[0][0])
        # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(Y, preds)
    roc_auc = auc(fpr, tpr)
    if roc_auc < 0.5:
        roc_auc = 1 - roc_auc
    return roc_auc

In [76]:
def backward_selection(X,Y,setting):
    survive_index=[i for i in range(X.shape[1])]
    best_perf=0
    best_perfs =[]
    exclude_index = []
    for i in range(len(survive_index)-1):
        perfs = []

        print(survive_index)
        for index in survive_index:

            survive_index_copy = [i for i in survive_index if i!=index]
            perfs.append(cross_validation(X[:,survive_index_copy],Y,setting,num_input = len(survive_index)-1))
        print("best_perf",best_perf)
        max_index = np.argmax(perfs)
        current_best = np.max(perfs)

        print("current_best",current_best)
        if current_best > best_perf:
            best_perf = current_best
            print("to_exclude: ",survive_index[max_index])
            best_perfs.append(best_perf)
            exclude_index.append(survive_index[max_index])
            
            survive_index.remove(survive_index[max_index])
        else:
            break
    return (best_perfs,exclude_index)

In [77]:
res = backward_selection(X[:,0:9],Y,setting)


[0, 1, 2, 3, 4, 5, 6, 7, 8]
('best_perf', 0)
('current_best', 1.0)
('to_exclude: ', 0)
[1, 2, 3, 4, 5, 6, 7, 8]
('best_perf', 1.0)
('current_best', 0.9985632183908046)

In [78]:
res


Out[78]:
([1.0], [0])

In [70]:
import  csv
with open("../../out_data/feature_selection/ANN/test.csv","w") as f:
    wr = csv.writer(f)
    wr.writerows(res)

In [64]:
f = open('','w') #open a file in write mode

f.write(" ".join(res)) #write the tuple into a file

f.close() #close the file


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-64-ea006f5dcaf0> in <module>()
      1 f = open('../../out_data/feature_selection/ANN/test.txt','w') #open a file in write mode
      2 
----> 3 f.write(" ".join(res)) #write the tuple into a file
      4 
      5 f.close() #close the file

TypeError: sequence item 0: expected string, list found

In [146]:
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, alpha=0.3)
plt.title('(AUC = %0.2f)' % (roc_auc))
plt.show()



In [27]:
def cross_validation(X=X,Y=Y,epochs_=20,num_input_ = 84):
    model = getModel(num_input=num_input_)
    preds = []
    for train, test in LeaveOneOut().split(X, Y):
        model.fit(X,Y,epochs=epochs_,verbose=0)
    #     print(test)
        probas_ = model.predict(X[test,:])
        preds.append(probas_[0][0])
        # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(Y, preds)
    roc_auc = auc(fpr, tpr)
    return roc_auc

In [36]:
survive_index=[i for i in range(4)]
def backward_selection(survive_index):
    for i in range(len(survive_index)-1):
        perfs = []
        best_perf=0
        for index in survive_index:
            print(index,"\n")
            survive_index_copy = [i for i in survive_index if i!=index]
            perfs.append(cross_validation(X=X[:,survive_index_copy],Y=Y,epochs_=20,num_input_ = len(survive_index)-1))
        
        max_index = np.argmax(perfs)
        current_best = np.max(perfs)
        print(current_best)
        if current_best > best_perf:
            best_perf = current_best
            survive_index.remove(survive_index[max_index])
        else:
            break
    return survive_index

In [37]:
backward_selection(survive_index)


0 

1 

2 

3 

0.807926829268
0 

1 

2 

0.81881533101
0 

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-37-dcc3cb13baba> in <module>()
----> 1 backward_selection(survive_index)

<ipython-input-36-438bce7b19a2> in backward_selection(survive_index)
      7             print(index,"\n")
      8             survive_index_copy = [i for i in survive_index if i!=index]
----> 9             perfs.append(cross_validation(X=X[:,survive_index_copy],Y=Y,epochs_=20,num_input_ = len(survive_index)-1))
     10 
     11         max_index = np.argmax(perfs)

<ipython-input-27-e51115ec81ba> in cross_validation(X, Y, epochs_, num_input_)
      3     preds = []
      4     for train, test in LeaveOneOut().split(X, Y):
----> 5         model.fit(X,Y,epochs=epochs_,verbose=0)
      6     #     print(test)
      7         probas_ = model.predict(X[test,:])

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    865                               class_weight=class_weight,
    866                               sample_weight=sample_weight,
--> 867                               initial_epoch=initial_epoch)
    868 
    869     def evaluate(self, x, y, batch_size=32, verbose=1,

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1593                               initial_epoch=initial_epoch,
   1594                               steps_per_epoch=steps_per_epoch,
-> 1595                               validation_steps=validation_steps)
   1596 
   1597     def evaluate(self, x, y,

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1180                     batch_logs['size'] = len(batch_ids)
   1181                     callbacks.on_batch_begin(batch_index, batch_logs)
-> 1182                     outs = f(ins_batch)
   1183                     if not isinstance(outs, list):
   1184                         outs = [outs]

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2268         updated = session.run(self.outputs + [self.updates_op],
   2269                               feed_dict=feed_dict,
-> 2270                               **self.session_kwargs)
   2271         return updated[:len(self.outputs)]
   2272 

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    787     try:
    788       result = self._run(None, fetches, feed_dict, options_ptr,
--> 789                          run_metadata_ptr)
    790       if run_metadata:
    791         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    995     if final_fetches or final_targets:
    996       results = self._do_run(handle, final_targets, final_fetches,
--> 997                              feed_dict_string, options, run_metadata)
    998     else:
    999       results = []

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1130     if handle is None:
   1131       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1132                            target_list, options, run_metadata)
   1133     else:
   1134       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1137   def _do_call(self, fn, *args):
   1138     try:
-> 1139       return fn(*args)
   1140     except errors.OpError as e:
   1141       message = compat.as_text(e.message)

/home/wxk/anaconda2/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1119         return tf_session.TF_Run(session, options,
   1120                                  feed_dict, fetch_list, target_list,
-> 1121                                  status, run_metadata)
   1122 
   1123     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [33]:
max_index = np.argmax(perfs)
survive_index[max_index]


Out[33]:
11

In [ ]:


In [59]:
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, alpha=0.3)
plt.title('(AUC = %0.2f)' % (roc_auc))
plt.show()



In [ ]:


In [ ]: