In [1]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout,BatchNormalization
from keras.optimizers import RMSprop
from keras.regularizers import l2,l1
from keras.optimizers import Adam,RMSprop
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
import collections
In [26]:
df = pd.read_csv("../../out_data/MLDB.csv")
first_gene_index = df.columns.get_loc("rrrD")
In [28]:
X, Y = np.split(df, [first_gene_index], axis=1)
X = X.values
X = X-0.5
Y = Y.values[:,1]
In [29]:
Model_setting = collections.namedtuple('Model_setting','num_layers num_node alpha drop_rate act_method lr regularization \
patience opt_method')
In [38]:
def get_setting(file_path,line_num):
digits = tuple([str(i) for i in range(9)])
with open(file_path) as fp:
for i, line in enumerate(fp):
if i == line_num:
setting = [float(x) if x.startswith(digits ) else x for x in line.strip().split(" ")]
break
return setting
setting_ = get_setting("../../out_data/FFN_setting",0)
In [40]:
# setting_ = [1,100, 0.5, 0.2, 'tanh', 0.01, 'l2', 3, "adam"]
setting = Model_setting(*setting_)
setting = setting._asdict()
In [41]:
setting
Out[41]:
In [42]:
def getModel(setting,num_input=84):
regularizer = l1(setting['alpha']) if setting['regularization']=='l1' else l2(setting['alpha'])
optimizer = Adam(lr=setting['lr']) if setting['opt_method'] == 'adam' else RMSprop(lr=setting['lr'])
model = Sequential()
for i in range(int(setting['num_layers'])):
if i==0:
model.add(Dense(int(setting['num_node']), input_shape=(num_input,), activation=setting['act_method'],\
kernel_regularizer = regularizer))
model.add(Dropout(setting['drop_rate']))
else:
model.add(Dense(int(setting['num_node'])//(2**i), activation=setting['act_method']))
model.add(Dropout(setting['drop_rate']))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=setting['lr']), metrics=['accuracy'])
return model
In [48]:
callbacks = [EarlyStopping(monitor='acc',min_delta=0,patience=setting['patience'])]
model = getModel(setting,84) model.fit(X,Y,epochs=20,verbose=1)
In [49]:
def cross_validation(X,Y,setting,num_input):
model = getModel(setting,num_input)
preds = []
for train, test in LeaveOneOut().split(X, Y):
model.fit(X[train,:],Y[train],epochs=20,verbose=0, callbacks =callbacks)
probas_ = model.predict(X[test,:])
preds.append(probas_[0][0])
# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
if roc_auc < 0.5:
roc_auc = 1 - roc_auc
return roc_auc
In [76]:
def backward_selection(X,Y,setting):
survive_index=[i for i in range(X.shape[1])]
best_perf=0
best_perfs =[]
exclude_index = []
for i in range(len(survive_index)-1):
perfs = []
print(survive_index)
for index in survive_index:
survive_index_copy = [i for i in survive_index if i!=index]
perfs.append(cross_validation(X[:,survive_index_copy],Y,setting,num_input = len(survive_index)-1))
print("best_perf",best_perf)
max_index = np.argmax(perfs)
current_best = np.max(perfs)
print("current_best",current_best)
if current_best > best_perf:
best_perf = current_best
print("to_exclude: ",survive_index[max_index])
best_perfs.append(best_perf)
exclude_index.append(survive_index[max_index])
survive_index.remove(survive_index[max_index])
else:
break
return (best_perfs,exclude_index)
In [77]:
res = backward_selection(X[:,0:9],Y,setting)
In [78]:
res
Out[78]:
In [70]:
import csv
with open("../../out_data/feature_selection/ANN/test.csv","w") as f:
wr = csv.writer(f)
wr.writerows(res)
In [64]:
f = open('','w') #open a file in write mode
f.write(" ".join(res)) #write the tuple into a file
f.close() #close the file
In [146]:
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, alpha=0.3)
plt.title('(AUC = %0.2f)' % (roc_auc))
plt.show()
In [27]:
def cross_validation(X=X,Y=Y,epochs_=20,num_input_ = 84):
model = getModel(num_input=num_input_)
preds = []
for train, test in LeaveOneOut().split(X, Y):
model.fit(X,Y,epochs=epochs_,verbose=0)
# print(test)
probas_ = model.predict(X[test,:])
preds.append(probas_[0][0])
# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
return roc_auc
In [36]:
survive_index=[i for i in range(4)]
def backward_selection(survive_index):
for i in range(len(survive_index)-1):
perfs = []
best_perf=0
for index in survive_index:
print(index,"\n")
survive_index_copy = [i for i in survive_index if i!=index]
perfs.append(cross_validation(X=X[:,survive_index_copy],Y=Y,epochs_=20,num_input_ = len(survive_index)-1))
max_index = np.argmax(perfs)
current_best = np.max(perfs)
print(current_best)
if current_best > best_perf:
best_perf = current_best
survive_index.remove(survive_index[max_index])
else:
break
return survive_index
In [37]:
backward_selection(survive_index)
In [33]:
max_index = np.argmax(perfs)
survive_index[max_index]
Out[33]:
In [ ]:
In [59]:
fpr, tpr, thresholds = roc_curve(Y, preds)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, alpha=0.3)
plt.title('(AUC = %0.2f)' % (roc_auc))
plt.show()
In [ ]:
In [ ]: