In [3]:
from redis import Redis
from tools import csv_to_libsvm
import pickle
In [4]:
# initialize redis
r = Redis(host='redis', port=6379, db=0)
r.flushall()
r.set_response_callback('HGET', int)
In [5]:
# convert training file
logistic = True
truth_idx = 8
pos_val = 'Y'
cat_idx = [0, 1, 2, 4, 5, 6]
num_idx = [3, 7]
train_csv = '/notebook/data/train-0.1m.csv'
train_svm = '/notebook/data/train-0.1m.svm'
csv_to_libsvm(r=r,
csv_file=train_csv,
out_file=train_svm,
logistic=logistic,
truth_idx=truth_idx,
pos_val=pos_val,
cat_idx=cat_idx,
num_idx=num_idx)
# convert testing file
test_csv = '/notebook/data/test.csv'
test_svm = '/notebook/data/test.svm'
csv_to_libsvm(r=r,
csv_file=test_csv,
out_file=test_svm,
logistic=logistic,
truth_idx=truth_idx,
pos_val=pos_val,
cat_idx=cat_idx,
num_idx=num_idx,
use_new=False)
# store total number of features
n_features = r.hget('current', '_index')
with open('/notebook/data/n_features.pkl', 'w') as f:
pickle.dump(n_features, f)
print 'Number of features: {}'.format(n_features)
In [ ]: