In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.rcParams['figure.figsize'] = (20.0, 20.0)
estimators_reg = ["LassoLarsCV","DecisionTreeRegressor"]
estimators_clf = ["LogisticRegression", "DecisionTreeClassifier"]
df = pd.read_csv("FEW-config-compare-ms:0-10-100.csv", sep="\t");
problems_reg = ['concrete','enc','housing','uball5d','yacht']
problems_clf = ['heart','bc_clean','yeast','seg','wav']
for est in estimators_reg:
h,ax = plt.subplots(5,sharex=True)
h.suptitle(est,size=18)
df_est = df.loc[df['ml'] == est]
for i,p in enumerate(problems_reg):
df_prb = df_est.loc[df_est['dataset'] == p]
df_sc1 = df_prb.loc[df_prb['max_stall'] == 10]
df_sc2 = df_prb.loc[df_prb['max_stall'] == 100]
df_sc0 = df_prb.loc[df_prb['max_stall'] == 0]
norm_scores_sc0 = df_sc0['score'] - df_sc0['score'].median()
norm_scores_sc1 = df_sc1['score'] - df_sc0['score'].median()
norm_scores_sc2 = df_sc2['score'] - df_sc0['score'].median()
ax[i].boxplot(list([norm_scores_sc0.values, norm_scores_sc1.values, norm_scores_sc2.values]))
ax[i].plot([0,3],[0,0],'-k')
ax[i].set_xticklabels(['stall count 0', 'stall count 10', 'stall count 100'])
ax[i].set_title(p)
plt.show()
In [2]:
for est in estimators_clf:
h,ax = plt.subplots(5,sharex=True)
h.suptitle(est,size=18)
df_est = df.loc[df['ml'] == est]
for i,p in enumerate(problems_clf):
df_prb = df_est.loc[df_est['dataset'] == p]
df_sc1 = df_prb.loc[df_prb['max_stall'] == 10]
df_sc2 = df_prb.loc[df_prb['max_stall'] == 100]
df_sc0 = df_prb.loc[df_prb['max_stall'] == 0]
norm_scores_sc0 = df_sc0['score'] - df_sc0['score'].median()
norm_scores_sc1 = df_sc1['score'] - df_sc0['score'].median()
norm_scores_sc2 = df_sc2['score'] - df_sc0['score'].median()
ax[i].boxplot(list([norm_scores_sc0.values, norm_scores_sc1.values, norm_scores_sc2.values]))
ax[i].plot([0,3],[0,0],'-k')
ax[i].set_xticklabels(['stall count 0', 'stall count 10', 'stall count 100'])
ax[i].set_title(p)
plt.show()
In [ ]: