In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import timeit
sns.set(font_scale=1.75)
predictions = pd.read_csv('../multicore_results/roc_data/summit_1_12_24_ml_predictions.csv', header=0, delimiter='\t')
predictions['classifier_id'] = predictions.classifier.map({
'GradientBoosting': 0,
'RandomForest': 1,
'GaussianNB': 2,
'DecisionTree': 3,
'LogisticRegression': 4,
'MLP': 5,
'AdaBoost': 6,
'KNN': 7}).astype(int)
predictions['sampler_id'] = predictions.sampler.map({
'RandomOverSampler': 0,
'SMOTE': 1,
'DummySampler': 2,
'SMOTEENN': 3,
'SMOTETomek': 4,
'ADASYN': 5}).astype(int)
predictions.drop('classifier', inplace=True, axis=1)
predictions.drop('sampler', inplace=True, axis=1)
predictions.drop('training_systems',inplace=True, axis=1)
predictions.drop('testing_systems',inplace=True, axis=1)
print(predictions.info())
In [2]:
new_pred = predictions[predictions.split == 'avg']
#exp_1_1 = new_pred[new_pred.training_numprocs == 1]
#exp_1_1 = new_pred[new_pred.testing_numprocs == 1]
a = sns.jointplot(x="classifier_id", y="roc-auc", data=new_pred, alpha=.15, color='k')
sns.plt.show()
In [3]:
new_pred = predictions[predictions.split == 'avg']
#exp_1_1 = new_pred[new_pred.training_numprocs == 1]
#exp_1_1 = new_pred[new_pred.testing_numprocs == 1]
sns.stripplot(x="sampler_id", y="roc-auc", data=new_pred, jitter=True, color='k')
sns.plt.show()
In [4]:
np.set_printoptions(threshold=np.nan)
pd.options.display.max_rows = 4000
slim = new_pred.drop(['split','time'], axis=1)
slim = slim.groupby([
'training_numprocs','testing_numprocs']).mean()
print(slim)
In [5]:
a = sns.boxplot(x='classifier_id', y='roc-auc', data=new_pred)
a.set_ylabel("AUROC")
a.set_xlabel("Classifier ID")
sns.plt.show()
In [6]:
good_classifiers = new_pred[(new_pred['classifier_id'] > 5) |
(new_pred['classifier_id'] < 2) |
(new_pred['classifier_id'] == 3)]
sns.violinplot(y='classifier_id', x='roc-auc', data=good_classifiers, orient='h')
sns.plt.show()
In [7]:
a = sns.boxplot(x='classifier_id', y='roc-auc', data=good_classifiers)
a.set_ylabel("AUROC")
a.set_xlabel("Classifier ID")
sns.plt.show()
In [8]:
a = sns.factorplot(x="sampler_id", y="roc-auc", col="classifier_id", data=good_classifiers, kind="box")
sns.plt.show()
In [9]:
classifier_1 = new_pred[(new_pred['classifier_id'] == 1)]
a = sns.boxplot(x="sampler_id", y="roc-auc", data=classifier_1)
a.set_ylabel("AUROC")
a.set_xlabel("Sampler ID")
sns.plt.show()
In [ ]: