notebook.community

Edit and run



In [36]:

    
import itertools
import json



In [1]:

    
liwc_fd = open('/usr/local/data/liwc_2007.csv')



In [2]:

    
liwc_lines = liwc_fd.readlines()



In [28]:

    
liwc_lines[-3:]









    Out[28]:





['bottom*\tspace,relativ\n',
 'revenge*\tanger,negemo,affect\n',
 'tast*\tingest,percept,bio\n']



In [30]:

    
def iter_matches(category):
    for line in liwc_lines:
        match, categories_csv = line.split('\t', 1)
        categories = categories_csv.split(',')
        if category in categories:
            yield match



In [38]:

    
categories = ['posemo', 'negemo']
for category in categories:
    print len(list(iter_matches(category))), category, 'matches'









    



406 posemo matches
495 negemo matches

Results

Commenting these out so that LIWC does not get pushed up to GitHub



In [43]:

    
liwc_json = {category: sorted(list(iter_matches(category))) for category in categories}
# print json.dumps(liwc_json)