In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.rcParams['figure.figsize'] = (20.0, 20.0)

estimators_reg = ["LassoLarsCV","DecisionTreeRegressor"]
estimators_clf = ["LogisticRegression", "DecisionTreeClassifier"]
df = pd.read_csv("FEW-config-compare-ms:0-10-100.csv", sep="\t");
problems_reg = ['concrete','enc','housing','uball5d','yacht']
problems_clf = ['heart','bc_clean','yeast','seg','wav']

for est in estimators_reg:
    h,ax = plt.subplots(5,sharex=True)
    h.suptitle(est,size=18)
    df_est = df.loc[df['ml'] == est]
    for i,p in enumerate(problems_reg):
        df_prb = df_est.loc[df_est['dataset'] == p]
        df_sc1 = df_prb.loc[df_prb['max_stall'] == 10]
        df_sc2 = df_prb.loc[df_prb['max_stall'] == 100]
        df_sc0 = df_prb.loc[df_prb['max_stall'] == 0]
        norm_scores_sc0 = df_sc0['score'] - df_sc0['score'].median()
        norm_scores_sc1 = df_sc1['score'] - df_sc0['score'].median()
        norm_scores_sc2 = df_sc2['score'] - df_sc0['score'].median()
        ax[i].boxplot(list([norm_scores_sc0.values, norm_scores_sc1.values, norm_scores_sc2.values]))
        ax[i].plot([0,3],[0,0],'-k')
        ax[i].set_xticklabels(['stall count 0', 'stall count 10', 'stall count 100'])
        ax[i].set_title(p)
plt.show()



In [2]:
for est in estimators_clf:
    h,ax = plt.subplots(5,sharex=True)
    h.suptitle(est,size=18)
    df_est = df.loc[df['ml'] == est]
    for i,p in enumerate(problems_clf):
        df_prb = df_est.loc[df_est['dataset'] == p]
        df_sc1 = df_prb.loc[df_prb['max_stall'] == 10]
        df_sc2 = df_prb.loc[df_prb['max_stall'] == 100]
        df_sc0 = df_prb.loc[df_prb['max_stall'] == 0]
        norm_scores_sc0 = df_sc0['score'] - df_sc0['score'].median()
        norm_scores_sc1 = df_sc1['score'] - df_sc0['score'].median()
        norm_scores_sc2 = df_sc2['score'] - df_sc0['score'].median()
        ax[i].boxplot(list([norm_scores_sc0.values, norm_scores_sc1.values, norm_scores_sc2.values]))
        ax[i].plot([0,3],[0,0],'-k')
        ax[i].set_xticklabels(['stall count 0', 'stall count 10', 'stall count 100'])
        ax[i].set_title(p)
plt.show()



In [ ]: