In [15]:
from sklearn.datasets import load_svmlight_file
from sklearn_vw import VWClassifier
from sklearn import metrics
import pickle

with open('/notebook/data/n_features.pkl', 'r') as f:
    n_features = pickle.load(f)
    
train = '/notebook/data/train-0.1m.svm'
test = '/notebook/data/test.svm'

X_train, y_train = load_svmlight_file(train, n_features=n_features)
# convert to labels from 0.0, 1.0 to -1, 1
y_train = (y_train.astype(int) * 2 - 1)

model = VWClassifier()
print 'train model:'
%time model.fit(X_train, y_train)

X_test, y_test = load_svmlight_file(test, n_features=n_features)

print 'evaluate model:'
%time y_pred = model.decision_function(X_test)
print 'AUC: {}'.format(metrics.roc_auc_score(y_test, y_pred))


train model:
CPU times: user 6.7 s, sys: 40 ms, total: 6.74 s
Wall time: 6.88 s
evaluate model:
CPU times: user 8.9 s, sys: 30 ms, total: 8.93 s
Wall time: 9.06 s
AUC: 0.698787322498

In [ ]: