notebook.community

Edit and run



In [19]:

    
from sklearn.svm import SVR
from minepy import MINE
import pandas as pd
import numpy as np



In [20]:

    
filename = "./data/nci60.csv"
df = pd.read_csv(filename)
X = df.iloc[:,2:]
y = df.iloc[:,1]



In [21]:

    
# Define the rfe-svm function
def rfe_svm_mic(X, y, num_feature_to_selected):
    import time
    clf = SVR()
    model = clf.set_params(kernel='linear')
    X_new = X
    record = {}
    del_ = X.shape[1]-num_feature_to_selected
    start_time = time.time()
    for i in range(del_):
        # get the column name
        names = list(X_new)
        # fit the svm and get the weights of features
        fit = model.fit(X_new, y)
        # compute the mic scores
        mine = MINE()
        mic_scores = []
        for i in range(X_new.shape[1]):
            mine.compute_score(X_new.iloc[:,i], y)
            m = mine.mic()
            mic_scores.append(m)
        sum_cor = {}
        # sum_cor = (1-0.3)*svm_coef + 0.3 * mic_scores
        for i in range(X_new.shape[1]):
            sum_cor[names[i]] = 0.5 * fit.coef_[0][i] + 0.5 * mic_scores[i]
        # get the column name with the samllest value
        min_colname = min(sum_cor, key=sum_cor.get)
        # print(min_colname)
        # get the column number with the samllest value
        min_colnum = X.columns.get_loc(min_colname)
        # print(min_colnum)
        # print(sum_cor[min_colname])
        # remove the feature with the smallest weight
        X_new = X_new.drop(min_colname, axis=1)
        # record the index and value of the deleted feature
        # record.update({min_colname:sum_cor[min_colname]})
        record[min_colname] = sum_cor[min_colname]
    elapsed_time = time.time() - start_time
    return X_new, record, elapsed_time



In [22]:

    
# Do the feature selecltion and get the selected features
X_new, record, elapsed_time = rfe_svm_mic(X, y, 50)
#for key,value in record.items():
#    print(key,':',value)
len(record)
elapsed_time









    Out[22]:





20999.64202952385



In [23]:

    
# combine the label and write to the local destination
X_new.insert(0, "IC50", y)
X_new.to_csv("./selected_features/rfe_svm_mic_out_50_with_50percent.csv")