In [104]:
import json
from os import listdir
from os.path import isfile, join
import csv
In [90]:
mypath = "jsons"
json_files = [f for f in listdir(mypath) if (isfile(join(mypath, f)) and ".json" in f)]
# change this based on the project
pybossa_url = "http://crowd.globalfishingwatch.org/project/maptests2/task/"
In [16]:
users = ["" for i in range(21)]
users[1]='davidkroodsma'
users[3]='davidkroodsmathe2nd'
users[4]='alexwilson'
users[5]='chris'
users[2]='bjornbergman'
users[6]='enriquetuya'
users[7]='kristinaboerder'
users[8]='vaiduke2'
users[9]='katepepler'
users[10]='stephanielewis'
users[11]='AlexCerra'
users[12]='juliecharbonneau'
users[13]='ninagalle'
users[14]='sidneyblack-rotchin'
users[15]='daivdtest'
users[16]='cailinburmaster'
users[17]='elizabethnagel'
users[18]='isabelfleisher'
users[19]='ciarawillis'
users[20]='clairechristie'
user_count = [0 for i in range (21)]
In [96]:
jsons = []
for j in json_files:
f = open("jsons/" + j)
contents = f.read()
jsons.append(json.loads(contents))
In [98]:
jsons[0][0]['task_id']
Out[98]:
In [99]:
results = []
for j in jsons:
r = {} # the results for this vessel
v = {} # counts of the vessel types
vessel_responses = ['longliner','purse_seine','trawler','reefer','multigear',
'baddata,','otherfish','not_fishing','muti_gear','other','not_known']
for vr in vessel_responses:
v[vr] = 0
r['responses'] = 0
r['findyes'] = 0
r['findno'] = 0
r['othertext'] = ''
r['didntsearch'] = 0
r['websitesfound'] = ''
r['theusers'] = ''
responses = 0
for k in j:
if len(k['info'])>20 and k['user_id'] != 1: #ignore david's results
r['responses'] += 1
user_response = json.loads(k['info'].replace("\n",""))
r['mmsi'] = user_response["mmsi"]
vt = user_response['vesselType'].lower()
if vt == vt == 'other fishing ' or vt == "other_fishing" or vt == "other fishing" or vt == " other fishing":
vt = 'otherfish'
if vt == 'bad_data' or vt == 'not_enough_data':
vt = 'baddata'
if vt not in vessel_responses:
othertext += vt+","
vt = "other"
v[vt]+=1
if user_response['search_url']:
r['websitesfound'] += user_response['search_url']+","
r['theusers'] += users[k['user_id']] + ","
user_count[k['user_id']]+=1
r['url'] = pybossa_url + str(j[0]['task_id'])
if len(r['theusers'])>1:r['theusers'] = r['theusers'][:-1] # get rid of comma at end
if len(r['websitesfound'])>1: r['websitesfound'] = r['websitesfound'][:-1] # get rid of comma at end
r['vessel'] = v
results.append(r)
In [143]:
# if there is agreement
outputs = []
for r in results:
output = {}
headers = ['mmsi','model_label','student_label','student_model_agreement',
'model_score','type','type_score','student_confidence','pybossa_url']
m_scores = model_scores[r['mmsi']]
output['mmsi'] = r['mmsi']
output['model_label'] = m_scores['label']
output['model_score'] = round(float(m_scores['label_score']),2)
output['type'] = m_scores['type']
output['type_score'] = m_scores['type_score']
output['pybossa_url'] = r['url']
# now figure out what the students thought it was
v_types = []
v = r['vessel']
for vr in vessel_responses:
if v[vr]:
if vr not in v_types: v_types.append(vr)
# three casses:
# 1) Total agreement and an answer
# 2) Total agreement but all say "not_known"
# 3) some think they know what it is, other say "not_known"
# 4) disagreement
if len(v_types) == 1 and v_types[0] != 'not_known':
output['student_confidence'] = 1
output['student_label'] = v_types[0]
elif len(v_types) == 1 and v_types[0] == 'not_known':
output['student_confidence'] = 0
output['student_label'] = v_types[0]
#print r['mmsi'], model_scores[r['mmsi']]['label'], v_types[0], r['url']# sum([v[i] for i in v])
elif len(v_types) == 2 and "not_known" in v_types:
total = sum([v[i] for i in v])
not_knowns = sum([v[i] for i in v if i !='not_known'])
output['student_confidence'] = float(not_knowns)/total
if v_types[0] == 'not_known': output['student_label'] = v_types[1]
else: output['student_label'] = v_types[0]
else:
total = sum([v[i] for i in v])
not_knowns = sum([v[i] for i in v if i !='not_known'])
output['student_confidence'] = float(not_knowns)/total*.3 # if there is disagreement, it maxes at .3
label_string = ''
for vessel in v_types:
label_string += vessel+":"+str(v[vessel])+","
label_string = label_string[:-1] # get rid of comma
output['student_label'] = label_string
if output['student_confidence'] > .3 and \
output['student_label'].replace("_"," ") == output['model_label'].lower():
output['student_model_agreement'] = 1
else:
output['student_model_agreement'] = 0
outputs.append(output)
outputs = sorted(outputs, key=lambda k: -k['student_model_agreement'])
outputs = sorted(outputs, key=lambda k: -k['student_confidence'])
outdir = "../"
filename = "FishingVesselsV2_HighConfidenceStudents_20160314_agreement.csv"
with open(outdir+filename,'w') as f:
f_csv = csv.DictWriter(f, headers)
f_csv.writeheader()
f_csv.writerows(outputs)
In [115]:
Out[115]:
In [ ]: