In [ ]:
import numpy as np
from few import FEW
from sklearn.model_selection import cross_val_score, StratifiedKFold, KFold
import time
from tqdm import tqdm 

def compare_configs(estimators,X,y,classification=False):
    """routine that compares a list of estimators evaluated on a set of data"""
    if classification:
        cv = StratifiedKFold(n_splits=3,shuffle=True)
    else:
        cv = KFold(n_splits=5,shuffle=True)

    trials = 10
    scores = np.ndarray((len(estimators),trials))
    times = np.zeros((len(estimators),trials))
    
    for e,est in tqdm(enumerate(estimators)):
        for t in np.arange(trials):
            t0 = time.time()
            scores[e,t] = np.mean(cross_val_score(est,X,y,cv=cv,n_jobs=-1))
            times[e,t] = time.time() - t0
    
    return scores, times

In [ ]:
# define FEW configurations to compare
from sklearn.linear_model import LassoLarsCV, LogisticRegressionCV
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
%matplotlib inline

# stall_count options
ms = np.arange(1,102,step=10)
estimators = {}
estimators['lasso'] = []
estimators['dt'] = []
for m in ms:
    estimators['lasso'].append(FEW(ml=LassoLarsCV(),generations=100,max_stall=m))
    estimators['dt'].append(FEW(ml=DecisionTreeRegressor(),generations=100,max_stall=m))
problems = ['concrete','enc','housing','uball5d','yacht']
# problems = ['enc','housing']
###################################################################################################### lasso
print('--- lasso ---')
h,ax = plt.subplots(len(problems),sharex=True)
for i,p in enumerate(problems):
    print('problem:',p)
    input_data = pd.read_csv('data/d_' + p + '.txt', sep=None, engine='python')
    X = StandardScaler().fit_transform(input_data.drop('label',axis=1).values)
    y = input_data['label'].values
    scores,times = compare_configs(estimators['lasso'],X,y)
    norm_scores = scores - np.median(scores[0,:])
    # plot results
    ax[i].boxplot(list(norm_scores),positions=ms,widths=5)
    ax[i].plot([0,np.max(ms)+1],[0,0],'-k')
    ax[i].set_xticks(ms)
    ax[i].set_title(p)
################################################################################################## decision tree
print('--- decision tree ---')
h2,ax2 = plt.subplots(len(problems),sharex=True)
for i,p in enumerate(problems):
    print('problem:',p)
    input_data = pd.read_csv('data/d_' + p + '.txt', sep=None, engine='python')
    X = StandardScaler().fit_transform(input_data.drop('label',axis=1).values)
    y = input_data['label'].values
    scores,times = compare_configs(estimators['dt'],X,y)
    norm_scores = scores - np.median(scores[0,:])
    # plot results
    ax2[i].boxplot(list(norm_scores),positions=ms,widths=5)
    ax2[i].plot([0,np.max(ms)+1],[0,0],'-k')
    ax2[i].set_xticks(ms)
    ax2[i].set_title(p)

plt.show()


0it [00:00, ?it/s]
--- lasso ---
problem: concrete
11it [32:58, 279.21s/it]
0it [00:00, ?it/s]
problem: enc
11it [35:21, 359.66s/it]
0it [00:00, ?it/s]
problem: housing
11it [1:18:18, 588.74s/it]
0it [00:00, ?it/s]
problem: uball5d
3it [14:12, 242.96s/it]Process ForkPoolWorker-29259:
Process ForkPoolWorker-29258:
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
Process ForkPoolWorker-29257:
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Process ForkPoolWorker-29256:
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-29255:
Process ForkPoolWorker-29254:
Traceback (most recent call last):
Process ForkPoolWorker-29251:
Process ForkPoolWorker-29250:
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
Traceback (most recent call last):
Process ForkPoolWorker-29247:
Process ForkPoolWorker-29249:
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Process ForkPoolWorker-29246:
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
Traceback (most recent call last):
Process ForkPoolWorker-29243:
Process ForkPoolWorker-29252:
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
KeyboardInterrupt
Process ForkPoolWorker-29260:
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Process ForkPoolWorker-29253:
Traceback (most recent call last):
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Process ForkPoolWorker-29248:
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 362, in get
    return recv()
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
Process ForkPoolWorker-29241:
Traceback (most recent call last):
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/bill/anaconda3/lib/python3.5/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/home/bill/anaconda3/lib/python3.5/site-packages/sklearn/externals/joblib/pool.py", line 360, in get
    racquire()
KeyboardInterrupt

In [ ]:
# define FEW configurations to compare
# CLASSIFICATION
from sklearn.linear_model import LassoLarsCV, LogisticRegressionCV
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
%matplotlib inline

# stall_count options
ms = np.arange(1,102,step=10)
estimators = {}
estimators['lasso'] = []
estimators['dt'] = []
for m in ms:
    estimators['lasso'].append(FEW(ml=LassoLarsCV(),generations=100,max_stall=m))
    estimators['dt'].append(FEW(ml=DecisionTreeRegressor(),generations=100,max_stall=m))
problems = ['concrete','enc','housing','uball5d','yacht']
# problems = ['enc','housing']
###################################################################################################### lasso
print('--- lasso ---')
h,ax = plt.subplots(len(problems),sharex=True)
for i,p in enumerate(problems):
    print('problem:',p)
    input_data = pd.read_csv('data/d_' + p + '.txt', sep=None, engine='python')
    X = StandardScaler().fit_transform(input_data.drop('label',axis=1).values)
    y = input_data['label'].values
    scores,times = compare_configs(estimators['lasso'],X,y)
    norm_scores = scores - np.median(scores[0,:])
    # plot results
    ax[i].boxplot(list(norm_scores),positions=ms,widths=5)
    ax[i].plot([0,np.max(ms)+1],[0,0],'-k')
    ax[i].set_xticks(ms)
    ax[i].set_title(p)
################################################################################################## decision tree
print('--- decision tree ---')
h2,ax2 = plt.subplots(len(problems),sharex=True)
for i,p in enumerate(problems):
    print('problem:',p)
    input_data = pd.read_csv('data/d_' + p + '.txt', sep=None, engine='python')
    X = StandardScaler().fit_transform(input_data.drop('label',axis=1).values)
    y = input_data['label'].values
    scores,times = compare_configs(estimators['dt'],X,y)
    norm_scores = scores - np.median(scores[0,:])
    # plot results
    ax2[i].boxplot(list(norm_scores),positions=ms,widths=5)
    ax2[i].plot([0,np.max(ms)+1],[0,0],'-k')
    ax2[i].set_xticks(ms)
    ax2[i].set_title(p)

plt.show()##