In [89]:
import pandas as pd
import json

In [39]:
turkfolder = 'helpers/aggregation_maps/mechanical_turk/'

In [3]:
results_files = !ls $turkfolder/results*

In [45]:
def make_disagreement_files(filename):
    print filename
    adf = pd.DataFrame.from_csv(filename)
    disagreements = adf[adf['Agreement'] == 'No']
    print len(disagreements), ' disgareements'
    print len(disagreements) / float(len(adf)), ' as a percentage'
    disagreements[['qid','en_label','Answer1','Answer2','Answer']].to_csv(filename+'.disagreements.csv', index=False)

In [46]:
for results_file in results_files:
    make_disagreement_files(results_file)


helpers/aggregation_maps/for mechanical turk//results_citizenships.csv
229  disgareements
0.3357771261  as a percentage
helpers/aggregation_maps/for mechanical turk//results_ethnic_groups.csv
284  disgareements
0.387978142077  as a percentage

In [8]:
country_map = pd.read_csv('helpers/aggregation_maps/country_maps.csv')

In [31]:
dq = disagreements['qid'].apply(lambda x: x.split('http://wikidata.org/wiki/')[1])

In [34]:
for d in dq:
    if d in list(country_map['qid']):
        print d


Q191
Q33
Q224
Q262
Q1183
Q37
Q902

In [96]:
def make_cutlure_map(param):
    agreements_path = turkfolder+'results_%s.csv'%param
    disagreements_path = turkfolder+'results_%s.csv.disagreements.csv'%param
    
    agree = ethnic_df[ethnic_df['Agreement']=='Yes']['Answer'].to_dict()
    disagree = ethnic_disagreements_df['Answer'].to_dict()
    cultures_map = dict(agree.items() + disagree.items())
    qid_map = {url.split('http://wikidata.org/wiki/')[1] : culture for url, culture in cultures_map.iteritems()} 
    json.dump(qid_map, open(turkfolder+'%s_map.json'%param,'w'))
    
for param in ['ethnic_groups','citizenship']:
    make_cutlure_map(param)

In [ ]: