In [117]:
import os
from tools import benchmark, calc_auc

# evaluate xgblinear
# https://github.com/dmlc/xgboost/blob/master/doc/parameter.md

xgb = '/usr/local/src/xgboost/xgboost'
conf = 'xgb_linear.conf'
model = 'model.xgb'
pred = 'test.pred'

# train
train_cmd = '{xgb} {conf} model_out={model}'.format(xgb=xgb, conf=conf, model=model)
x = benchmark(train_cmd, print_results=True)

# test
test_cmd = '{xgb} {conf} task=pred model_in={model} name_pred={pred}'.format(xgb=xgb, conf=conf, model=model, pred=pred)
x = benchmark(test_cmd, print_results=True)

test_svm = '/notebook/data/test.svm'
auc = calc_auc(test_file=test_svm, pred_file=pred)
print 'AUC: {}'.format(auc)

# cleanup
for f in [model, pred]:
    os.remove(f)


command: /usr/local/src/xgboost/xgboost xgb_linear.conf model_out=model.xgb
stdout:
100000x653 matrix with 800000 entries is loaded from /notebook/data/train-0.1m.svm.buffer
100000x652 matrix with 796342 entries is loaded from /notebook/data/test.svm.buffer
boosting round 0, 0 sec elapsed
boosting round 1, 0 sec elapsed
boosting round 2, 0 sec elapsed
boosting round 3, 0 sec elapsed
boosting round 4, 0 sec elapsed
boosting round 5, 0 sec elapsed
boosting round 6, 0 sec elapsed
boosting round 7, 0 sec elapsed
boosting round 8, 0 sec elapsed
boosting round 9, 0 sec elapsed
boosting round 10, 0 sec elapsed
boosting round 11, 0 sec elapsed
boosting round 12, 0 sec elapsed
boosting round 13, 0 sec elapsed
boosting round 14, 0 sec elapsed
boosting round 15, 0 sec elapsed
boosting round 16, 0 sec elapsed
boosting round 17, 0 sec elapsed
boosting round 18, 0 sec elapsed
boosting round 19, 0 sec elapsed

updating end, 0 sec in all

stderr:
[0]	test-error:0.216460
[1]	test-error:0.216290
[2]	test-error:0.215980
[3]	test-error:0.215660
[4]	test-error:0.215320
[5]	test-error:0.215030
[6]	test-error:0.214770
[7]	test-error:0.214320
[8]	test-error:0.214090
[9]	test-error:0.213800
[10]	test-error:0.213520
[11]	test-error:0.213340
[12]	test-error:0.213150
[13]	test-error:0.213010
[14]	test-error:0.213010
[15]	test-error:0.212720
[16]	test-error:0.212690
[17]	test-error:0.212620
[18]	test-error:0.212650
[19]	test-error:0.212560

duration: 0.521 s
max memory: 0.004 MB

command: /usr/local/src/xgboost/xgboost xgb_linear.conf task=pred model_in=model.xgb name_pred=test.pred
stdout:
100000x652 matrix with 796342 entries is loaded from /notebook/data/test.svm.buffer
start prediction...
writing prediction to test.pred

stderr:
duration: 0.512 s
max memory: 0.004 MB

AUC: 0.699485000611

In [ ]: