In [3]:
from redis import Redis
from tools import csv_to_libsvm
import pickle

In [4]:
# initialize redis

r = Redis(host='redis', port=6379, db=0)
r.flushall()
r.set_response_callback('HGET', int)

In [5]:
# convert training file 

logistic = True
truth_idx = 8
pos_val = 'Y'
cat_idx = [0, 1, 2, 4, 5, 6]
num_idx = [3, 7]

train_csv = '/notebook/data/train-0.1m.csv'
train_svm = '/notebook/data/train-0.1m.svm'

csv_to_libsvm(r=r,
              csv_file=train_csv,
              out_file=train_svm,
              logistic=logistic,
              truth_idx=truth_idx,
              pos_val=pos_val,
              cat_idx=cat_idx,
              num_idx=num_idx)

# convert testing file

test_csv = '/notebook/data/test.csv'
test_svm = '/notebook/data/test.svm'

csv_to_libsvm(r=r,
              csv_file=test_csv,
              out_file=test_svm,
              logistic=logistic,
              truth_idx=truth_idx,
              pos_val=pos_val,
              cat_idx=cat_idx,
              num_idx=num_idx,
              use_new=False)

# store total number of features
n_features = r.hget('current', '_index')
with open('/notebook/data/n_features.pkl', 'w') as f:
    pickle.dump(n_features, f)

print 'Number of features: {}'.format(n_features)


Number of features: 652

In [ ]: