In [89]:
import pandas as pd
import json
In [39]:
turkfolder = 'helpers/aggregation_maps/mechanical_turk/'
In [3]:
results_files = !ls $turkfolder/results*
In [45]:
def make_disagreement_files(filename):
print filename
adf = pd.DataFrame.from_csv(filename)
disagreements = adf[adf['Agreement'] == 'No']
print len(disagreements), ' disgareements'
print len(disagreements) / float(len(adf)), ' as a percentage'
disagreements[['qid','en_label','Answer1','Answer2','Answer']].to_csv(filename+'.disagreements.csv', index=False)
In [46]:
for results_file in results_files:
make_disagreement_files(results_file)
In [8]:
country_map = pd.read_csv('helpers/aggregation_maps/country_maps.csv')
In [31]:
dq = disagreements['qid'].apply(lambda x: x.split('http://wikidata.org/wiki/')[1])
In [34]:
for d in dq:
if d in list(country_map['qid']):
print d
In [96]:
def make_cutlure_map(param):
agreements_path = turkfolder+'results_%s.csv'%param
disagreements_path = turkfolder+'results_%s.csv.disagreements.csv'%param
agree = ethnic_df[ethnic_df['Agreement']=='Yes']['Answer'].to_dict()
disagree = ethnic_disagreements_df['Answer'].to_dict()
cultures_map = dict(agree.items() + disagree.items())
qid_map = {url.split('http://wikidata.org/wiki/')[1] : culture for url, culture in cultures_map.iteritems()}
json.dump(qid_map, open(turkfolder+'%s_map.json'%param,'w'))
for param in ['ethnic_groups','citizenship']:
make_cutlure_map(param)
In [ ]: