In [5]:
import pandas as pd
data = pd.read_csv('preprocessing_benchmark_subsets/LogisticRegression.tsv.gz', sep='\t')
data.head()
Out[5]:
dataset
classifier
parameters
accuracy
macrof1
bal_accuracy
preprocessor
0
GAMETES_Epistasis_2-Way_1000atts_0.4H_EDM-1_ED...
LogisticRegression
C=0.5,penalty=l1,fit_intercept=False,dual=False,
0.496250
0.496243
0.496250
Binarizer
1
GAMETES_Epistasis_2-Way_20atts_0.1H_EDM-1_1
LogisticRegression
C=0.5,penalty=l1,fit_intercept=False,dual=False,
0.474375
0.473090
0.474375
Binarizer
2
GAMETES_Epistasis_2-Way_20atts_0.4H_EDM-1_1
LogisticRegression
C=0.5,penalty=l1,fit_intercept=False,dual=False,
0.476875
0.476858
0.476875
Binarizer
3
GAMETES_Epistasis_3-Way_20atts_0.2H_EDM-1_1
LogisticRegression
C=0.5,penalty=l1,fit_intercept=False,dual=False,
0.530625
0.530572
0.530625
Binarizer
4
GAMETES_Heterogeneity_20atts_1600_Het_0.4_0.2_...
LogisticRegression
C=0.5,penalty=l1,fit_intercept=False,dual=False,
0.493750
0.493747
0.493750
Binarizer
In [17]:
data.groupby(['dataset', 'preprocessor'])['accuracy'].max().reset_index().groupby(
'preprocessor').median().sort_values('accuracy', ascending=False)
Out[17]:
accuracy
preprocessor
PolynomialFeatures
0.846442
SelectKBest
0.843472
SelectFwe
0.840338
VarianceThreshold
0.827116
PCA
0.826923
MinMaxScaler
0.826043
RobustScaler
0.825516
MaxAbsScaler
0.824932
StandardScaler
0.821782
FastICA
0.820484
RFE
0.819778
SelectFromModel
0.815152
Normalizer
0.795652
Nystroem
0.791004
SelectPercentile
0.742253
Binarizer
0.690476
FeatureAgglomeration
0.672866
RBFSampler
0.650943
In [20]:
import pandas as pd
from glob import glob
for filename in glob('preprocessing_benchmark_subsets/*gz'):
if 'SGDClassifier' in filename:
continue
data = pd.read_csv(filename, sep='\t')
print(data['classifier'].unique()[0])
print(data.groupby(['dataset', 'preprocessor'])['accuracy'].max().reset_index().groupby(
'preprocessor').median().sort_values('accuracy', ascending=False))
print('')
AdaBoostClassifier
accuracy
preprocessor
SelectFwe 0.842697
PolynomialFeatures 0.840313
VarianceThreshold 0.840198
StandardScaler 0.838587
RobustScaler 0.838587
SelectKBest 0.836735
MaxAbsScaler 0.835719
MinMaxScaler 0.834732
SelectFromModel 0.831764
RFE 0.823365
PCA 0.822222
Normalizer 0.815643
FastICA 0.803479
SelectPercentile 0.740385
Nystroem 0.709239
Binarizer 0.700603
FeatureAgglomeration 0.694203
RBFSampler 0.633399
BernoulliNB
accuracy
preprocessor
SelectFwe 0.783206
StandardScaler 0.782240
MaxAbsScaler 0.780489
MinMaxScaler 0.777300
PCA 0.766667
RobustScaler 0.757936
SelectKBest 0.753137
Normalizer 0.750817
PolynomialFeatures 0.745192
VarianceThreshold 0.738437
FastICA 0.730885
SelectFromModel 0.729375
RFE 0.727273
Nystroem 0.716783
Binarizer 0.697603
SelectPercentile 0.658000
RBFSampler 0.630435
FeatureAgglomeration 0.581159
DecisionTreeClassifier
accuracy
preprocessor
PolynomialFeatures 0.858796
SelectFwe 0.856884
MaxAbsScaler 0.854458
MinMaxScaler 0.854458
StandardScaler 0.854324
VarianceThreshold 0.852041
RobustScaler 0.852041
SelectKBest 0.846995
SelectFromModel 0.833333
Normalizer 0.832708
RFE 0.829932
PCA 0.817935
FastICA 0.790200
SelectPercentile 0.752381
Nystroem 0.725000
FeatureAgglomeration 0.702703
Binarizer 0.693340
RBFSampler 0.639456
ExtraTreesClassifier
accuracy
preprocessor
PolynomialFeatures 0.886792
SelectFwe 0.884615
MaxAbsScaler 0.881188
MinMaxScaler 0.881188
VarianceThreshold 0.880102
StandardScaler 0.879906
RobustScaler 0.879906
Normalizer 0.873789
PCA 0.871207
SelectKBest 0.869565
SelectFromModel 0.855072
FastICA 0.854620
RFE 0.848000
Nystroem 0.785366
SelectPercentile 0.773196
FeatureAgglomeration 0.706747
Binarizer 0.706000
RBFSampler 0.692749
GaussianNB
accuracy
preprocessor
SelectFromModel 0.816316
RFE 0.804392
VarianceThreshold 0.777219
SelectFwe 0.774727
SelectKBest 0.772356
PCA 0.758170
SelectPercentile 0.756361
RobustScaler 0.720313
MaxAbsScaler 0.720296
MinMaxScaler 0.720296
StandardScaler 0.720280
FastICA 0.718750
Normalizer 0.700950
PolynomialFeatures 0.684919
FeatureAgglomeration 0.654825
RBFSampler 0.586580
Nystroem 0.528986
Binarizer 0.501521
GradientBoostingClassifier
accuracy
preprocessor
VarianceThreshold 0.891304
MaxAbsScaler 0.890097
MinMaxScaler 0.888889
RobustScaler 0.887177
StandardScaler 0.884058
SelectKBest 0.881159
PolynomialFeatures 0.879710
RFE 0.876812
SelectFwe 0.876812
SelectFromModel 0.872464
Normalizer 0.868841
PCA 0.868116
FastICA 0.857410
SelectPercentile 0.788462
FeatureAgglomeration 0.771231
Nystroem 0.769231
RBFSampler 0.739318
Binarizer 0.727794
KNeighborsClassifier
accuracy
preprocessor
StandardScaler 0.856232
RobustScaler 0.853000
SelectFwe 0.851598
MaxAbsScaler 0.849800
MinMaxScaler 0.848185
SelectFromModel 0.844928
SelectKBest 0.835224
PCA 0.834582
VarianceThreshold 0.833050
Normalizer 0.831144
FastICA 0.822496
RFE 0.819572
PolynomialFeatures 0.814433
Nystroem 0.783883
SelectPercentile 0.759525
Binarizer 0.701000
FeatureAgglomeration 0.696040
RBFSampler 0.665584
LinearSVC
accuracy
preprocessor
PolynomialFeatures 0.850931
SelectKBest 0.844198
SelectFwe 0.841584
RobustScaler 0.828804
PCA 0.828749
MinMaxScaler 0.824717
VarianceThreshold 0.824531
MaxAbsScaler 0.824477
StandardScaler 0.824341
FastICA 0.819495
RFE 0.817575
SelectFromModel 0.814079
Normalizer 0.792288
Nystroem 0.787649
SelectPercentile 0.736240
Binarizer 0.694805
FeatureAgglomeration 0.669967
RBFSampler 0.650351
LogisticRegression
accuracy
preprocessor
PolynomialFeatures 0.846442
SelectKBest 0.843472
SelectFwe 0.840338
VarianceThreshold 0.827116
PCA 0.826923
MinMaxScaler 0.826043
RobustScaler 0.825516
MaxAbsScaler 0.824932
StandardScaler 0.821782
FastICA 0.820484
RFE 0.819778
SelectFromModel 0.815152
Normalizer 0.795652
Nystroem 0.791004
SelectPercentile 0.742253
Binarizer 0.690476
FeatureAgglomeration 0.672866
RBFSampler 0.650943
MultinomialNB
accuracy
preprocessor
RobustScaler 0.859954
MinMaxScaler 0.738267
MaxAbsScaler 0.736790
SelectFwe 0.729037
SelectKBest 0.710884
Normalizer 0.708771
PolynomialFeatures 0.680206
VarianceThreshold 0.679365
SelectFromModel 0.679113
RFE 0.670884
Binarizer 0.657238
SelectPercentile 0.645870
Nystroem 0.639208
FeatureAgglomeration 0.630363
PassiveAggressiveClassifier
accuracy
preprocessor
StandardScaler 0.813977
RobustScaler 0.813977
MaxAbsScaler 0.794922
MinMaxScaler 0.792000
FastICA 0.787539
Nystroem 0.752577
SelectFwe 0.752577
Normalizer 0.727273
PolynomialFeatures 0.726619
SelectFromModel 0.717014
SelectKBest 0.714286
RFE 0.708255
PCA 0.703704
VarianceThreshold 0.700000
Binarizer 0.671875
SelectPercentile 0.666667
RBFSampler 0.639456
FeatureAgglomeration 0.578207
RandomForestClassifier
accuracy
preprocessor
MinMaxScaler 0.885907
RobustScaler 0.884058
MaxAbsScaler 0.883629
StandardScaler 0.882609
PolynomialFeatures 0.881355
VarianceThreshold 0.873641
PCA 0.872111
Normalizer 0.867048
SelectKBest 0.862772
SelectFwe 0.861400
FastICA 0.853330
SelectFromModel 0.845181
RFE 0.834983
SelectPercentile 0.754085
Nystroem 0.753740
Binarizer 0.714000
FeatureAgglomeration 0.708241
RBFSampler 0.681628
SVC
accuracy
preprocessor
MaxAbsScaler 0.878000
MinMaxScaler 0.872957
StandardScaler 0.872000
RobustScaler 0.870400
FastICA 0.865979
Normalizer 0.845161
SelectFromModel 0.832708
SelectFwe 0.831081
SelectKBest 0.816479
PCA 0.814780
VarianceThreshold 0.802027
RFE 0.786763
Nystroem 0.762664
SelectPercentile 0.752451
Binarizer 0.727794
FeatureAgglomeration 0.690104
RBFSampler 0.672726
PolynomialFeatures 0.666771
XGBClassifier
accuracy
preprocessor
VarianceThreshold 0.883061
StandardScaler 0.881159
RobustScaler 0.881159
MaxAbsScaler 0.880584
MinMaxScaler 0.880435
SelectKBest 0.876087
SelectFwe 0.875067
PolynomialFeatures 0.874618
PCA 0.869948
RFE 0.864130
Normalizer 0.860870
SelectFromModel 0.858491
FastICA 0.856561
Nystroem 0.791384
SelectPercentile 0.764751
Binarizer 0.727533
FeatureAgglomeration 0.713275
RBFSampler 0.682540
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Content source: rhiever/sklearn-benchmarks
Similar notebooks: