In [15]:
from sklearn.datasets import load_svmlight_file
from sklearn_vw import VWClassifier
from sklearn import metrics
import pickle
with open('/notebook/data/n_features.pkl', 'r') as f:
n_features = pickle.load(f)
train = '/notebook/data/train-0.1m.svm'
test = '/notebook/data/test.svm'
X_train, y_train = load_svmlight_file(train, n_features=n_features)
# convert to labels from 0.0, 1.0 to -1, 1
y_train = (y_train.astype(int) * 2 - 1)
model = VWClassifier()
print 'train model:'
%time model.fit(X_train, y_train)
X_test, y_test = load_svmlight_file(test, n_features=n_features)
print 'evaluate model:'
%time y_pred = model.decision_function(X_test)
print 'AUC: {}'.format(metrics.roc_auc_score(y_test, y_pred))
In [ ]: