In [1]:
import seldon.pipeline.auto_transforms as auto
import seldon.pipeline.util as sutl
import seldon.pipeline.cross_validation as cf
from sklearn.pipeline import Pipeline
import pandas as pd
from seldon import xgb
import seldon.pipeline.basic_transforms as bt
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/crx.data",
                 header=None,names=["A1","A2","A3","A4","A5","A6","A7","A8","A9","A10","A11","A12","A13","A14","A15","target"])


/home/clive/tools/scikit-learn/sklearn/cross_validation.py:42: DeprecationWarning: This module has been deprecated in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [2]:
df.head()


Out[2]:
A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13 A14 A15 target
0 b 30.83 0.000 u g w v 1.25 t t 1 f g 00202 0 +
1 a 58.67 4.460 u g q h 3.04 t t 6 f g 00043 560 +
2 a 24.50 0.500 u g q h 1.50 t f 0 f g 00280 824 +
3 b 27.83 1.540 u g w v 3.75 t t 5 t g 00100 3 +
4 b 20.17 5.625 u g w v 1.71 t f 0 f s 00120 0 +

In [3]:
tTargetId = bt.Feature_id_transform(zero_based=True,input_feature="target",output_feature="targetId")
t_auto = auto.Auto_transform(exclude=["target","targetId"])
xg = xgb.XGBoostClassifier(target="targetId",excluded=["target"])
cv = cf.Seldon_KFold(xg,10)
transformers = [("tId",tTargetId),("tAuto",t_auto),("xgb",cv)]
p = Pipeline(transformers)
p.fit_transform(df)
print "avg cross validation accuracy ",cv.get_score()


avg cross validation accuracy  0.860869565217

In [4]:
from seldon.pipeline import bayes_optimize as bopt
e_opt = bopt.BayesOptimizer(xg,{'learning_rate': (0.0001, 0.1),'n_estimators': (10,1000),'max_depth':(3,20)},
                            param_int=['n_estimators','max_depth'],cv_folds=5)
transformers = [("tId",tTargetId),("tAuto",t_auto),("bopt",e_opt)]
p = Pipeline(transformers)
p.fit_transform(df)
print "Best accuracy is ",e_opt.get_best_score()


Initializing function at point:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799} | result: 0.833333
Initializing function at point:  {'n_estimators': 809.62014847325509, 'learning_rate': 0.059786507579839755, 'max_depth': 18.652239686420895} | result: 0.821739
Initializing function at point:  {'n_estimators': 234.61512357766219, 'learning_rate': 0.026478710189946773, 'max_depth': 6.368707132444162} | result: 0.791304
Initializing function at point:  {'n_estimators': 990.19559680689167, 'learning_rate': 0.070816139750618817, 'max_depth': 12.430558275918466} | result: 0.826087
Initializing function at point:  {'n_estimators': 120.48141815266844, 'learning_rate': 0.062368361965343309, 'max_depth': 8.3352903838553125} | result: 0.781159
Iteration:   1 | Last sampled value:    0.776812 | with parameters:  {'n_estimators': 1000.0, 'learning_rate': 0.034517538483140806, 'max_depth': 7.8948236756820274}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 15.610965 seconds

Iteration:   2 | Last sampled value:    0.782609 | with parameters:  {'n_estimators': 812.57595048397127, 'learning_rate': 0.065281104065071385, 'max_depth': 3.0}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 8.343149 seconds

Iteration:   3 | Last sampled value:    0.756522 | with parameters:  {'n_estimators': 97.356089809422144, 'learning_rate': 0.030571810051815865, 'max_depth': 19.266194641382786}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 4.359313 seconds

Iteration:   4 | Last sampled value:    0.739130 | with parameters:  {'n_estimators': 1000.0, 'learning_rate': 0.059143077608004288, 'max_depth': 15.025603024751947}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 9.74062 seconds

Iteration:   5 | Last sampled value:    0.753623 | with parameters:  {'n_estimators': 444.44461639340267, 'learning_rate': 0.10000000000000001, 'max_depth': 20.0}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 6.685784 seconds

Iteration:   6 | Last sampled value:    0.833333 | with parameters:  {'n_estimators': 898.11551114754445, 'learning_rate': 0.0001, 'max_depth': 3.0}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 10.840565 seconds

Iteration:   7 | Last sampled value:    0.749275 | with parameters:  {'n_estimators': 450.24096015896401, 'learning_rate': 0.056713420171400034, 'max_depth': 15.597410911628616}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 7.778307 seconds

Iteration:   8 | Last sampled value:    0.757971 | with parameters:  {'n_estimators': 876.96222411514532, 'learning_rate': 0.10000000000000001, 'max_depth': 16.450594995368096}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 7.38782 seconds

Iteration:   9 | Last sampled value:    0.827536 | with parameters:  {'n_estimators': 9.9999999999999432, 'learning_rate': 0.0001, 'max_depth': 3.0}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 1.93751 seconds

Iteration:  10 | Last sampled value:    0.831884 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.072441362994008301, 'max_depth': 20.0}
               | Current maximum:       0.833333 | with parameters:  {'n_estimators': 911.78093095347504, 'learning_rate': 0.058005156981055211, 'max_depth': 11.087767061828799}
               | Time taken: 0 minutes and 2.480383 seconds

Iteration:  11 | Last sampled value:    0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 2.001273 seconds

Iteration:  12 | Last sampled value:    0.850725 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.10000000000000001, 'max_depth': 3.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 1.905338 seconds

Iteration:  13 | Last sampled value:    0.742029 | with parameters:  {'n_estimators': 397.46125515446755, 'learning_rate': 0.0001, 'max_depth': 20.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 11.601091 seconds

Iteration:  14 | Last sampled value:    0.839130 | with parameters:  {'n_estimators': 1000.0, 'learning_rate': 0.046147449096975691, 'max_depth': 3.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 9.972134 seconds

Iteration:  15 | Last sampled value:    0.820290 | with parameters:  {'n_estimators': 66.822342468251151, 'learning_rate': 0.0001, 'max_depth': 10.55278044795609}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 3.223813 seconds

Iteration:  16 | Last sampled value:    0.728986 | with parameters:  {'n_estimators': 1000.0, 'learning_rate': 0.097754508376174595, 'max_depth': 3.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 10.451882 seconds

Iteration:  17 | Last sampled value:    0.776812 | with parameters:  {'n_estimators': 602.28292862050444, 'learning_rate': 0.0001, 'max_depth': 5.1782258338608367}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 9.278974 seconds

Iteration:  18 | Last sampled value:    0.771014 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.10000000000000001, 'max_depth': 20.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 1.62759 seconds

Iteration:  19 | Last sampled value:    0.740580 | with parameters:  {'n_estimators': 749.53610377769803, 'learning_rate': 0.07904806758164519, 'max_depth': 20.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 7.91712 seconds

Iteration:  20 | Last sampled value:    0.747826 | with parameters:  {'n_estimators': 855.02778780541553, 'learning_rate': 0.0001, 'max_depth': 20.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 25.510986 seconds

Iteration:  21 | Last sampled value:    0.823188 | with parameters:  {'n_estimators': 688.30095863851602, 'learning_rate': 0.10000000000000001, 'max_depth': 9.2479711782399932}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 7.006607 seconds

Iteration:  22 | Last sampled value:    0.776812 | with parameters:  {'n_estimators': 988.62173128520772, 'learning_rate': 0.10000000000000001, 'max_depth': 9.9887084838701306}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 9.060015 seconds

Iteration:  23 | Last sampled value:    0.836232 | with parameters:  {'n_estimators': 675.29328358650321, 'learning_rate': 0.016453461800807232, 'max_depth': 19.746484467931388}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 21.261127 seconds

Iteration:  24 | Last sampled value:    0.765217 | with parameters:  {'n_estimators': 210.16554947521249, 'learning_rate': 0.090076931366369128, 'max_depth': 3.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 3.277976 seconds

Iteration:  25 | Last sampled value:    0.805797 | with parameters:  {'n_estimators': 199.86966672717645, 'learning_rate': 0.0001, 'max_depth': 3.0}
               | Current maximum:       0.852174 | with parameters:  {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}
               | Time taken: 0 minutes and 3.424776 seconds

Optimization finished with maximum: 0.852174, at position: {'n_estimators': 10.0, 'learning_rate': 0.041151080254732048, 'max_depth': 3.0}.
Time taken: 3 minutes and 52.452671 seconds.
Best accuracy is  0.852173913043

In [ ]:


In [ ]: