In [1]:
import seldon.pipeline.auto_transforms as auto
import seldon.pipeline.util as sutl
import seldon.pipeline.cross_validation as cf
from sklearn.pipeline import Pipeline
import pandas as pd
from seldon import xgb
import seldon.pipeline.basic_transforms as bt
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/crx.data",
header=None,names=["A1","A2","A3","A4","A5","A6","A7","A8","A9","A10","A11","A12","A13","A14","A15","target"])
/home/clive/tools/scikit-learn/sklearn/cross_validation.py:42: DeprecationWarning: This module has been deprecated in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
In [2]:
df.head()
Out[2]:
A1
A2
A3
A4
A5
A6
A7
A8
A9
A10
A11
A12
A13
A14
A15
target
0
b
30.83
0.000
u
g
w
v
1.25
t
t
1
f
g
00202
0
+
1
a
58.67
4.460
u
g
q
h
3.04
t
t
6
f
g
00043
560
+
2
a
24.50
0.500
u
g
q
h
1.50
t
f
0
f
g
00280
824
+
3
b
27.83
1.540
u
g
w
v
3.75
t
t
5
t
g
00100
3
+
4
b
20.17
5.625
u
g
w
v
1.71
t
f
0
f
s
00120
0
+
In [3]:
tTargetId = bt.Feature_id_transform(zero_based=True,input_feature="target",output_feature="targetId")
t_auto = auto.Auto_transform(exclude=["target","targetId"])
xg = xgb.XGBoostClassifier(target="targetId",excluded=["target"])
cv = cf.Seldon_KFold(xg,10)
transformers = [("tId",tTargetId),("tAuto",t_auto),("xgb",cv)]
p = Pipeline(transformers)
p.fit_transform(df)
print "avg cross validation accuracy ",cv.get_score()
avg cross validation accuracy 0.860869565217
In [4]:
from seldon.pipeline import bayes_optimize as bopt
e_opt = bopt.BayesOptimizer(xg,{'learning_rate': (0.0001, 0.1),'n_estimators': (10,1000),'max_depth':(3,20)},
param_int=['n_estimators','max_depth'],cv_folds=5)
transformers = [("tId",tTargetId),("tAuto",t_auto),("bopt",e_opt)]
p = Pipeline(transformers)
p.fit_transform(df)
print "Best accuracy is ",e_opt.get_best_score()
Initializing function at point: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799} | result: 0.833333
Initializing function at point: {'n_estimators': 809.62014847325509, 'learning_rate': 0.059786507579839755, 'max_depth': 18.652239686420895} | result: 0.821739
Initializing function at point: {'n_estimators': 234.61512357766219, 'learning_rate': 0.026478710189946773, 'max_depth': 6.368707132444162} | result: 0.791304
Initializing function at point: {'n_estimators': 990.19559680689167, 'learning_rate': 0.070816139750618817, 'max_depth': 12.430558275918466} | result: 0.826087
Initializing function at point: {'n_estimators': 120.48141815266844, 'learning_rate': 0.062368361965343309, 'max_depth': 8.3352903838553125} | result: 0.781159
Iteration: 1 | Last sampled value: 0.776812 | with parameters: {'n_estimators': 1000.0, 'learning_rate': 0.034517538483140806, 'max_depth': 7.8948236756820274}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 15.610965 seconds
Iteration: 2 | Last sampled value: 0.782609 | with parameters: {'n_estimators': 812.57595048397127, 'learning_rate': 0.065281104065071385, 'max_depth': 3.0}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 8.343149 seconds
Iteration: 3 | Last sampled value: 0.756522 | with parameters: {'n_estimators': 97.356089809422144, 'learning_rate': 0.030571810051815865, 'max_depth': 19.266194641382786}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 4.359313 seconds
Iteration: 4 | Last sampled value: 0.739130 | with parameters: {'n_estimators': 1000.0, 'learning_rate': 0.059143077608004288, 'max_depth': 15.025603024751947}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 9.74062 seconds
Iteration: 5 | Last sampled value: 0.753623 | with parameters: {'n_estimators': 444.44461639340267, 'learning_rate': 0.10000000000000001, 'max_depth': 20.0}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 6.685784 seconds
Iteration: 6 | Last sampled value: 0.833333 | with parameters: {'n_estimators': 898.11551114754445, 'learning_rate': 0.0001, 'max_depth': 3.0}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 10.840565 seconds
Iteration: 7 | Last sampled value: 0.749275 | with parameters: {'n_estimators': 450.24096015896401, 'learning_rate': 0.056713420171400034, 'max_depth': 15.597410911628616}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 7.778307 seconds
Iteration: 8 | Last sampled value: 0.757971 | with parameters: {'n_estimators': 876.96222411514532, 'learning_rate': 0.10000000000000001, 'max_depth': 16.450594995368096}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 7.38782 seconds
Iteration: 9 | Last sampled value: 0.827536 | with parameters: {'n_estimators': 9.9999999999999432, 'learning_rate': 0.0001, 'max_depth': 3.0}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 1.93751 seconds
Iteration: 10 | Last sampled value: 0.831884 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.072441362994008301, 'max_depth': 20.0}
| Current maximum: 0.833333 | with parameters: {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
| Time taken: 0 minutes and 2.480383 seconds
Iteration: 11 | Last sampled value: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 2.001273 seconds
Iteration: 12 | Last sampled value: 0.850725 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.10000000000000001, 'max_depth': 3.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 1.905338 seconds
Iteration: 13 | Last sampled value: 0.742029 | with parameters: {'n_estimators': 397.46125515446755, 'learning_rate': 0.0001, 'max_depth': 20.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 11.601091 seconds
Iteration: 14 | Last sampled value: 0.839130 | with parameters: {'n_estimators': 1000.0, 'learning_rate': 0.046147449096975691, 'max_depth': 3.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 9.972134 seconds
Iteration: 15 | Last sampled value: 0.820290 | with parameters: {'n_estimators': 66.822342468251151, 'learning_rate': 0.0001, 'max_depth': 10.55278044795609}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 3.223813 seconds
Iteration: 16 | Last sampled value: 0.728986 | with parameters: {'n_estimators': 1000.0, 'learning_rate': 0.097754508376174595, 'max_depth': 3.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 10.451882 seconds
Iteration: 17 | Last sampled value: 0.776812 | with parameters: {'n_estimators': 602.28292862050444, 'learning_rate': 0.0001, 'max_depth': 5.1782258338608367}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 9.278974 seconds
Iteration: 18 | Last sampled value: 0.771014 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.10000000000000001, 'max_depth': 20.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 1.62759 seconds
Iteration: 19 | Last sampled value: 0.740580 | with parameters: {'n_estimators': 749.53610377769803, 'learning_rate': 0.07904806758164519, 'max_depth': 20.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 7.91712 seconds
Iteration: 20 | Last sampled value: 0.747826 | with parameters: {'n_estimators': 855.02778780541553, 'learning_rate': 0.0001, 'max_depth': 20.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 25.510986 seconds
Iteration: 21 | Last sampled value: 0.823188 | with parameters: {'n_estimators': 688.30095863851602, 'learning_rate': 0.10000000000000001, 'max_depth': 9.2479711782399932}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 7.006607 seconds
Iteration: 22 | Last sampled value: 0.776812 | with parameters: {'n_estimators': 988.62173128520772, 'learning_rate': 0.10000000000000001, 'max_depth': 9.9887084838701306}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 9.060015 seconds
Iteration: 23 | Last sampled value: 0.836232 | with parameters: {'n_estimators': 675.29328358650321, 'learning_rate': 0.016453461800807232, 'max_depth': 19.746484467931388}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 21.261127 seconds
Iteration: 24 | Last sampled value: 0.765217 | with parameters: {'n_estimators': 210.16554947521249, 'learning_rate': 0.090076931366369128, 'max_depth': 3.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 3.277976 seconds
Iteration: 25 | Last sampled value: 0.805797 | with parameters: {'n_estimators': 199.86966672717645, 'learning_rate': 0.0001, 'max_depth': 3.0}
| Current maximum: 0.852174 | with parameters: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
| Time taken: 0 minutes and 3.424776 seconds
Optimization finished with maximum: 0.852174, at position: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}.
Time taken: 3 minutes and 52.452671 seconds.
Best accuracy is 0.852173913043
In [ ]:
In [ ]:
Content source: SeldonIO/seldon-server
Similar notebooks: