In [2]:
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
import pandas as pd
import numpy as np
import time
In [3]:
# Load the dataset
df = pd.read_csv("./data/nci60.csv")
X = df.iloc[:,2:]
y = df.iloc[:,1]
In [4]:
clf = SVR()
estimator = clf.set_params(kernel='linear')
selector = RFE(estimator, 50, step=1)
start_time = time.time()
selector = selector.fit(X, y)
elapsed_time = time.time() - start_time
elapsed_time
Out[4]:
In [24]:
from sklearn.linear_model import (LinearRegression, Ridge,
Lasso, RandomizedLasso)
ridge = Ridge(alpha=7)
selector = RFE(ridge, 50, step=1)
start_time = time.time()
selector = selector.fit(X, y)
elapsed_time = time.time() - start_time
elapsed_time
Out[24]:
In [13]:
print(selector.ranking_)
print(selector.estimator_)
In [25]:
# X_new = X.iloc[:,selector.ranking_]
X_new = X.iloc[:,selector.support_]
In [26]:
X_new.insert(0, "IC50", y)
X_new.to_csv("./selected_features/rfe_sklean_ridge_50.csv")