In [1]:
import os
from tools import benchmark, calc_auc

# evaluate xgblinear
# https://github.com/dmlc/xgboost/blob/master/doc/parameter.md

xgb = '/usr/local/src/xgboost/xgboost'
conf = 'xgb_tree.conf'
model = 'model.xgb'
pred = 'test.pred'

# train
train_cmd = '{xgb} {conf} model_out={model}'.format(xgb=xgb, conf=conf, model=model)
x = benchmark(train_cmd, print_results=True)

# test
test_cmd = '{xgb} {conf} task=pred model_in={model} name_pred={pred}'.format(xgb=xgb, conf=conf, model=model, pred=pred)
x = benchmark(test_cmd, print_results=True)

test_svm = '/notebook/data/test.svm'
auc = calc_auc(test_file=test_svm, pred_file=pred)
print 'AUC: {}'.format(auc)

# cleanup
for f in [model, pred]:
    os.remove(f)


command: /usr/local/src/xgboost/xgboost xgb_tree.conf model_out=model.xgb
stdout:
100000x653 matrix with 800000 entries is loaded from /notebook/data/train-0.1m.svm.buffer
100000x652 matrix with 796342 entries is loaded from /notebook/data/test.svm.buffer
boosting round 0, 0 sec elapsed
tree prunning end, 1 roots, 1670 extra nodes, 292 pruned nodes ,max_depth=16
boosting round 1, 0 sec elapsed
tree prunning end, 1 roots, 1522 extra nodes, 228 pruned nodes ,max_depth=16
boosting round 2, 0 sec elapsed
tree prunning end, 1 roots, 1500 extra nodes, 304 pruned nodes ,max_depth=16
boosting round 3, 1 sec elapsed
tree prunning end, 1 roots, 1736 extra nodes, 362 pruned nodes ,max_depth=16
boosting round 4, 1 sec elapsed
tree prunning end, 1 roots, 1526 extra nodes, 312 pruned nodes ,max_depth=16
boosting round 5, 1 sec elapsed
tree prunning end, 1 roots, 1576 extra nodes, 306 pruned nodes ,max_depth=16
boosting round 6, 1 sec elapsed
tree prunning end, 1 roots, 1548 extra nodes, 294 pruned nodes ,max_depth=16
boosting round 7, 1 sec elapsed
tree prunning end, 1 roots, 1630 extra nodes, 336 pruned nodes ,max_depth=16
boosting round 8, 2 sec elapsed
tree prunning end, 1 roots, 1700 extra nodes, 320 pruned nodes ,max_depth=16
boosting round 9, 2 sec elapsed
tree prunning end, 1 roots, 1600 extra nodes, 294 pruned nodes ,max_depth=16

updating end, 2 sec in all

stderr:
[0]	test-error:0.220420
[1]	test-error:0.208740
[2]	test-error:0.206130
[3]	test-error:0.205320
[4]	test-error:0.204010
[5]	test-error:0.203110
[6]	test-error:0.203240
[7]	test-error:0.202500
[8]	test-error:0.202360
[9]	test-error:0.202100

duration: 2.512 s
max memory: 0.781 MB

command: /usr/local/src/xgboost/xgboost xgb_tree.conf task=pred model_in=model.xgb name_pred=test.pred
stdout:
100000x652 matrix with 796342 entries is loaded from /notebook/data/test.svm.buffer
start prediction...
writing prediction to test.pred

stderr:
duration: 0.505 s
max memory: 0.004 MB

AUC: 0.718997431483

In [ ]: