In [1]:
from sklearn import datasets
from sklearn.utils.validation import check_random_state
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from rgf.sklearn import RGFClassifier
In [2]:
iris = datasets.load_iris()
rng = check_random_state(0)
perm = rng.permutation(iris.target.size)
iris.data = iris.data[perm]
iris.target = iris.target[perm]
In [3]:
rgf = RGFClassifier(max_leaf=400,
algorithm="RGF_Sib",
test_interval=100,
verbose=True)
gb = GradientBoostingClassifier(n_estimators=20,
learning_rate=0.01,
subsample=0.6,
random_state=rng)
n_folds = 3
In [4]:
rgf_scores = cross_val_score(rgf,
iris.data,
iris.target,
cv=StratifiedKFold(n_folds))
gb_scores = cross_val_score(gb,
iris.data,
iris.target,
cv=StratifiedKFold(n_folds))
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c0
--------------------
Sat May 27 12:13:09 2017: Reading training data ...
Sat May 27 12:13:09 2017: Start ... #train=99
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:09 2017: Calling optimizer with 50 trees and 100 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=1
Sat May 27 12:13:09 2017: Calling optimizer with 100 trees and 200 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=2
Sat May 27 12:13:09 2017: Calling optimizer with 150 trees and 300 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=3
Sat May 27 12:13:09 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:09 2017: Calling optimizer with 200 trees and 400 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c0-01
temp/rgf_classifier_c0-02
temp/rgf_classifier_c0-03
temp/rgf_classifier_c0-04
Sat May 27 12:13:09 2017: Done ...
elapsed: 0.109
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c1
--------------------
Sat May 27 12:13:09 2017: Reading training data ...
Sat May 27 12:13:09 2017: Start ... #train=99
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:09 2017: Calling optimizer with 29 trees and 101 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=1
Sat May 27 12:13:09 2017: Calling optimizer with 57 trees and 200 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=2
Sat May 27 12:13:09 2017: Calling optimizer with 85 trees and 300 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=3
Sat May 27 12:13:09 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:09 2017: Calling optimizer with 115 trees and 401 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c1-01
temp/rgf_classifier_c1-02
temp/rgf_classifier_c1-03
temp/rgf_classifier_c1-04
Sat May 27 12:13:09 2017: Done ...
elapsed: 0.094
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c2
--------------------
Sat May 27 12:13:09 2017: Reading training data ...
Sat May 27 12:13:09 2017: Start ... #train=99
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:09 2017: Calling optimizer with 34 trees and 100 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=1
Sat May 27 12:13:09 2017: Calling optimizer with 71 trees and 200 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=2
Sat May 27 12:13:09 2017: Calling optimizer with 112 trees and 300 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=3
Sat May 27 12:13:09 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:09 2017: Calling optimizer with 152 trees and 400 leaves
Sat May 27 12:13:09 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c2-01
temp/rgf_classifier_c2-02
temp/rgf_classifier_c2-03
temp/rgf_classifier_c2-04
Sat May 27 12:13:09 2017: Done ...
elapsed: 0.109
None
"predict":
model_fn=temp\rgf_classifier_c0-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:09 2017: Reading test data ...
Sat May 27 12:13:09 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c0-04,#leaf=400,#tree=200
Sat May 27 12:13:10 2017: Done ...
None
"predict":
model_fn=temp\rgf_classifier_c1-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:10 2017: Reading test data ...
Sat May 27 12:13:10 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c1-04,#leaf=401,#tree=115
Sat May 27 12:13:10 2017: Done ...
None
"predict":
model_fn=temp\rgf_classifier_c2-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:10 2017: Reading test data ...
Sat May 27 12:13:10 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c2-04,#leaf=400,#tree=152
Sat May 27 12:13:10 2017: Done ...
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c0
--------------------
Sat May 27 12:13:10 2017: Reading training data ...
Sat May 27 12:13:10 2017: Start ... #train=99
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:10 2017: Calling optimizer with 50 trees and 100 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=1
Sat May 27 12:13:10 2017: Calling optimizer with 100 trees and 200 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=2
Sat May 27 12:13:10 2017: Calling optimizer with 150 trees and 300 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=3
Sat May 27 12:13:10 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:10 2017: Calling optimizer with 200 trees and 400 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c0-01
temp/rgf_classifier_c0-02
temp/rgf_classifier_c0-03
temp/rgf_classifier_c0-04
Sat May 27 12:13:10 2017: Done ...
elapsed: 0.109
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c1
--------------------
Sat May 27 12:13:10 2017: Reading training data ...
Sat May 27 12:13:10 2017: Start ... #train=99
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:10 2017: Calling optimizer with 28 trees and 100 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=1
Sat May 27 12:13:10 2017: Calling optimizer with 57 trees and 200 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=2
Sat May 27 12:13:10 2017: Calling optimizer with 86 trees and 300 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=3
Sat May 27 12:13:10 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:10 2017: Calling optimizer with 115 trees and 400 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c1-01
temp/rgf_classifier_c1-02
temp/rgf_classifier_c1-03
temp/rgf_classifier_c1-04
Sat May 27 12:13:10 2017: Done ...
elapsed: 0.062
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c2
--------------------
Sat May 27 12:13:10 2017: Reading training data ...
Sat May 27 12:13:10 2017: Start ... #train=99
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:10 2017: Calling optimizer with 41 trees and 101 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=1
Sat May 27 12:13:10 2017: Calling optimizer with 83 trees and 200 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=2
Sat May 27 12:13:10 2017: Calling optimizer with 124 trees and 300 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=3
Sat May 27 12:13:10 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:10 2017: Calling optimizer with 165 trees and 400 leaves
Sat May 27 12:13:10 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c2-01
temp/rgf_classifier_c2-02
temp/rgf_classifier_c2-03
temp/rgf_classifier_c2-04
Sat May 27 12:13:10 2017: Done ...
elapsed: 0.094
None
"predict":
model_fn=temp\rgf_classifier_c0-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:10 2017: Reading test data ...
Sat May 27 12:13:10 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c0-04,#leaf=400,#tree=200
Sat May 27 12:13:10 2017: Done ...
None
"predict":
model_fn=temp\rgf_classifier_c1-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:10 2017: Reading test data ...
Sat May 27 12:13:10 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c1-04,#leaf=400,#tree=115
Sat May 27 12:13:10 2017: Done ...
None
"predict":
model_fn=temp\rgf_classifier_c2-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:11 2017: Reading test data ...
Sat May 27 12:13:11 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c2-04,#leaf=400,#tree=165
Sat May 27 12:13:11 2017: Done ...
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c0
--------------------
Sat May 27 12:13:11 2017: Reading training data ...
Sat May 27 12:13:11 2017: Start ... #train=102
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x102, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:11 2017: Calling optimizer with 50 trees and 100 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=1
Sat May 27 12:13:11 2017: Calling optimizer with 100 trees and 200 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=2
Sat May 27 12:13:11 2017: Calling optimizer with 150 trees and 300 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=3
Sat May 27 12:13:11 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:11 2017: Calling optimizer with 200 trees and 400 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c0-01
temp/rgf_classifier_c0-02
temp/rgf_classifier_c0-03
temp/rgf_classifier_c0-04
Sat May 27 12:13:11 2017: Done ...
elapsed: 0.078
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c1
--------------------
Sat May 27 12:13:11 2017: Reading training data ...
Sat May 27 12:13:11 2017: Start ... #train=102
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x102, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:11 2017: Calling optimizer with 33 trees and 100 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=1
Sat May 27 12:13:11 2017: Calling optimizer with 63 trees and 200 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=2
Sat May 27 12:13:11 2017: Calling optimizer with 94 trees and 301 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=3
Sat May 27 12:13:11 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:11 2017: Calling optimizer with 125 trees and 400 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c1-01
temp/rgf_classifier_c1-02
temp/rgf_classifier_c1-03
temp/rgf_classifier_c1-04
Sat May 27 12:13:11 2017: Done ...
elapsed: 0.062
None
"train":
algorithm=RGF_Sib
train_x_fn=temp/train.data.x
train_y_fn=temp/train.data.y
Log:ON
model_fn_prefix=temp/rgf_classifier_c2
--------------------
Sat May 27 12:13:11 2017: Reading training data ...
Sat May 27 12:13:11 2017: Start ... #train=102
--------------------
Forest-level:
loss=Log
max_leaf_forest=400
max_tree=200
opt_interval=100
test_interval=100
num_tree_search=1
Verbose:ON
memory_policy=Generous
Turning on Force_to_refresh_all
-------------
Training data: 4x102, nonzero_ratio=1; managed as dense data.
-------------
Optimization:
loss=Log
num_iteration_opt=5
reg_L2=0.1
opt_stepsize=0.5
max_delta=1
Tree-level: min_pop=10
Node split: reg_L2=0.1
--------------------
Sat May 27 12:13:11 2017: Calling optimizer with 45 trees and 101 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=1
Sat May 27 12:13:11 2017: Calling optimizer with 86 trees and 200 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=2
Sat May 27 12:13:11 2017: Calling optimizer with 127 trees and 301 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=3
Sat May 27 12:13:11 2017: AzRgforest: #leaf reached max
Sat May 27 12:13:11 2017: Calling optimizer with 169 trees and 401 leaves
Sat May 27 12:13:11 2017: Writing model: seq#=4
Generated 4 model file(s):
temp/rgf_classifier_c2-01
temp/rgf_classifier_c2-02
temp/rgf_classifier_c2-03
temp/rgf_classifier_c2-04
Sat May 27 12:13:11 2017: Done ...
elapsed: 0.078
None
"predict":
model_fn=temp\rgf_classifier_c0-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:11 2017: Reading test data ...
Sat May 27 12:13:11 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c0-04,#leaf=400,#tree=200
Sat May 27 12:13:11 2017: Done ...
None
"predict":
model_fn=temp\rgf_classifier_c1-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:11 2017: Reading test data ...
Sat May 27 12:13:11 2017: Predicting ...
elapsed: 0.015
temp/predictions.txt: temp\rgf_classifier_c1-04,#leaf=400,#tree=125
Sat May 27 12:13:11 2017: Done ...
None
"predict":
model_fn=temp\rgf_classifier_c2-04
test_x_fn=temp/test.data.x
prediction_fn=temp/predictions.txt
Log:ON
--------------------
Sat May 27 12:13:11 2017: Reading test data ...
Sat May 27 12:13:11 2017: Predicting ...
elapsed: 0
temp/predictions.txt: temp\rgf_classifier_c2-04,#leaf=401,#tree=169
Sat May 27 12:13:11 2017: Done ...
None
In [5]:
rgf_score = sum(rgf_scores)/n_folds
print('RGF Classfier score: {0:.5f}'.format(rgf_score))
gb_score = sum(gb_scores)/n_folds
print('Gradient Boosting Classfier score: {0:.5f}'.format(gb_score))
RGF Classfier score: 0.95997
Gradient Boosting Classfier score: 0.95997
In [ ]:
Content source: StrikerRUS/rgf_python
Similar notebooks: