Notebook Name: BuildConsolidatedFeaturesFile.ipynb

Created date : Sunday, 27th March

Author : Sreejith Menon

Description : buildFeatureFl(input file,output file) Reads from a consolidated HIT results csv file (input file).

Extracts the below features from the IBEIS dataset:

  1. species_texts
  2. sex_texts
  3. age_months_est
  4. exemplar_flags
  5. quality_texts

Consolidated HIT results contain number of shares and not shares per image in the mechanical turk jobs. Expects an input file in the following format: [GID,SHARE,NOT_SHARE,TOTAL]


In [1]:
import csv
import GetPropertiesAPI as GP
import importlib
importlib.reload(GP) # un-comment if there are any changes made to API


Out[1]:
<module 'GetPropertiesAPI' from '/Users/sreejithmenon/Google Drive/CodeBase/AWESOME/script/GetPropertiesAPI.py'>

Logic for reading data from the consolidatedHITResults file


In [5]:
def buildFeatureFl(inFL,outFL):    
    reader = csv.reader(open(inFL,"r"))
    head = reader.__next__()

    data = {}
    for row in reader:
        data[row[0]] = row[1:]

    # Extracts all the annotation ID's from IBEIS
    aidList = []
    for gid in data.keys():
        aid = GP.getAnnotID(int(gid))
        data[gid].append(aid)

    # Extracts all feature info based on annotation ID's from IBEIS
    for gid in data.keys():
        if data[gid][3] != None:
            aid = data[gid][3]
            spec_text = GP.getImageFeature(aid,"species_texts")
            data[gid].append(spec_text)
            sex_text = GP.getImageFeature(aid,"sex_texts")
            data[gid].append(sex_text)
            est_age = GP.getImageFeature(aid,"age_months_est")
            data[gid].append(est_age)
            exemplar = GP.getImageFeature(aid,"exemplar_flags")
            data[gid].append(exemplar)
            qual_text = GP.getImageFeature(aid,"quality_texts")
            data[gid].append(qual_text)
        else:
            data[gid].append('NULL')
            data[gid].append('NULL')
            data[gid].append('NULL')
            data[gid].append('NULL')
            data[gid].append('NULL')

    # Write all the extracted info to a CSV file
    head += ['ANNOTATION_ID','SPECIES','SEX','AGE_MONTHS','EXEMPLAR_FLAG','IMAGE_QUALITY']
    writeFL = open(outFL,"w")
    writer = csv.writer(writeFL)
    writer.writerow(head)
    for row in data.keys():
        writer.writerow([row] + data[row])
    writeFL.close()

In [6]:
def __main__():
    buildFeatureFl("../data/consolidatedHITResults.csv","../data/consolidatedHITResultsWithInfo1.csv")
    
if __name__ == __main__:
    __main__()

In [3]:
GP.getAnnotID(5381)


Out[3]:
[6679, 6680, 6681, 6682]

In [4]:
gid_aid_map = {}
for gid in range(1,5384):
    gid_aid_map[gid] = GP.getAnnotID(gid)

In [6]:
import json

In [7]:
with open("../data/flickr_zebra_gid_aid_map.json","w") as fl:
    json.dump(gid_aid_map, fl, indent=4)

In [11]:
list(gid_aid_map.values())


aids = [aid for lst in list(gid_aid_map.values()) for aid in lst if len(lst)]

In [17]:
aid_species_map = {aids[i] : features[i] for i in range(len(aids))}

In [15]:
features = GP.getImageFeature(aids, 'species/text')

In [19]:
with open("../data/flickr_zebra_aid_species_map.json", "w") as fl:
    json.dump(aid_species_map, fl, indent = 4)

In [20]:
import UploadAndDetectIBEIS as UD

In [21]:
UD.check_job_status('jobid-5388')


Out[21]:
False

In [29]:
data_dict = {
        'jobid': 'jobid-5388',
    }
response = UD.get('api/engine/job/status', data_dict)

In [30]:
response


Out[30]:
{'jobid': 'jobid-5388', 'jobstatus': 'unknown', 'status': 'ok'}

In [ ]: