In [36]:
import itertools
import json
In [1]:
liwc_fd = open('/usr/local/data/liwc_2007.csv')
In [2]:
liwc_lines = liwc_fd.readlines()
In [28]:
liwc_lines[-3:]
Out[28]:
In [30]:
def iter_matches(category):
for line in liwc_lines:
match, categories_csv = line.split('\t', 1)
categories = categories_csv.split(',')
if category in categories:
yield match
In [38]:
categories = ['posemo', 'negemo']
for category in categories:
print len(list(iter_matches(category))), category, 'matches'
In [43]:
liwc_json = {category: sorted(list(iter_matches(category))) for category in categories}
# print json.dumps(liwc_json)