Processing data from .results files Author : Sreejith Menon Date : May 25 2016 List of all dataframes resultsPerJobDf['GID','Album','Shared','Not Shared','Proportion'] aidGidDf['AID','GID'] aidFeaturesDf['AID',[FEATURES]] rankListImgsDf['GID','Shared','Not Shared','Proportion'] resultsAIDGIDDf['AID' + [resultsPerJobDf]] gidAidResultsFeaturesDf['GID','AID',[FEATURES],[resultsPerJobDf]]

In [28]:
import importlib
import JobsMapResultsFilesToContainerObjs as ImageMap
import pandas as pd
import statistics as s
import re
import json
import DeriveFinalResultSet as drs
import GetPropertiesAPI as GP
import csv
gidAidMapFl = "../data/experiment2_gid_aid_map.json"
aidFeatureMapFl = "../data/experiment2_aid_features.json"
imgJobMap = "../data/imageGID_job_map_expt2_corrected.csv"
importlib.reload(ImageMap)
pd.set_option('display.max_colwidth', -1)

In [100]:
from ast import literal_eval
df = pd.DataFrame.from_csv("../FinalResults/ImgShrRnkListWithTags.csv")
cols = list(df.columns)
df.drop('URL',1,inplace=True)
df.drop('Album',1,inplace=True)
df.drop('INDIVIDUAL_NAME',1,inplace=True)
df.reset_index(inplace=True)
df.to_csv("/tmp/test.csv",index=False)

In [138]:
reader = csv.reader(open("/tmp/test.csv","r"))
head = reader.__next__()
data = {}
for row in reader:
    temp = {}
    for i in range(1,len(row)):
        temp[head[i]] = row[i] 
    data[row[0]] = temp
    
json.dump(data,open("/tmp/test.json","w"),indent=4)

In [139]:
def genHead(dataDict,ftr):
    if ftr != 'tags':
        ftrList = [data[gid][ftr].split(',') for gid in dataDict.keys()]
    else:
        ftrList = [literal_eval(data[gid][ftr]) for gid in dataDict.keys()]
        
    ftrList = {item for block in ftrList for item in block}
    
    return list(ftrList)

In [141]:
allAttribs = genHead(data,'SPECIES') + genHead(data,'SEX') + genHead(data,'AGE') + genHead(data,'QUALITY') + genHead(data,'VIEW_POINT') + genHead(data,'tags')
print(allAttribs)


['zebra_plains', 'giraffe_masai', 'Female', 'UNKNOWN NAME', 'UNKNOWN SEX', 'Male', 'infant', 'juveniles- two year old', 'juveniles - one year old', 'adult', 'unknown', 'good', 'junk', 'poor', 'ok', 'excellent', 'right', 'left', 'backright', 'frontleft', 'backleft', 'frontright', 'front', 'back', 'standing', 'zebra', 'high', 'way', 'several', 'dry', 'brush', 'river', 'leaf', 'drinking', 'looking', 'conifer', 'lone', 'grass', 'hill', 'brown', 'wild', 'pond', 'lush', 'plant', 'sandy', 'animal', 'wooded', 'hay', 'field', 'tall', 'green', 'big cat', 'highland', 'cheetah', 'donkey', 'wild dog', 'dessert', 'drink', 'walking', 'laying', 'arthropod', 'forest', 'herd', 'eating', 'antelope', 'shore', 'hillside', 'tree', 'road', 'oak', 'giraffe', 'day', 'mountain', 'pasture', 'hyena', 'aircraft', 'mother', 'llama', 'close', 'sky', 'distance', 'group', 'tiger', 'dirt', 'path', 'lion', 'outdoor object', 'megalith', 'land', 'dune', 'area', 'open', 'lake', 'elephant', 'mammal', 'crossing', 'outdoor', 'grazing', 'baby', 'grassy', 'staring', 'cactus', 'deer', 'running', 'ground', 'plain', 'adult', 'sheep', 'desert', 'bushes', 'water']

In [162]:
from collections import OrderedDict
gidAttribDict = {}
for gid in data.keys():
    ftrDict = data[gid]
    attribDict = OrderedDict.fromkeys(allAttribs,0)
    
    ftrs = ['SPECIES','SEX','AGE','QUALITY','VIEW_POINT']
    
    for ftr in ftrs:
        spcs = ftrDict[ftr].split(',')
        for itm in spcs:
            attribDict[itm] = 1
            
    # logic for tgs
    tgs = literal_eval(ftrDict['tags'])
    for tag in tgs:
        attribDict[tag] = 1
    
    gidAttribDict[gid] = attribDict

In [163]:
json.dump(gidAttribDict,open("/tmp/gidAttribDict.json","w"),indent=4)

pd.DataFrame(gidAttribDict).transpose()


Out[163]:
Female Male UNKNOWN NAME UNKNOWN SEX adult aircraft animal antelope area arthropod ... tree unknown walking water way wild wild dog wooded zebra zebra_plains
10 1 0 0 0 1 0 1 0 0 0 ... 0 0 1 0 0 1 0 0 0 0
1005 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
1012 0 1 0 0 1 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 0 0
1016 0 1 0 0 1 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 0 0
1024 0 0 1 0 0 0 1 0 0 0 ... 1 0 0 0 0 0 0 0 1 1
1041 0 0 1 0 0 0 1 0 1 0 ... 1 0 0 0 0 1 0 0 1 1
1045 0 0 1 0 0 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 1 1
1053 0 0 1 0 0 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 1 0
1057 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 1 0 0
1060 0 0 1 0 0 0 1 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
1061 1 0 0 0 1 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 0 0
1062 1 0 1 0 1 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
1071 0 1 0 0 1 0 1 0 0 0 ... 1 0 0 0 0 0 0 0 1 1
1073 0 1 0 0 1 0 1 0 0 0 ... 1 0 1 0 0 0 0 0 1 1
1094 0 0 1 0 0 0 1 0 0 0 ... 1 0 1 0 0 1 0 0 0 0
1099 0 0 1 0 0 0 1 0 0 0 ... 1 0 1 0 0 1 0 0 0 0
1102 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
1105 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
1152 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1155 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
1157 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
1159 1 0 0 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
1161 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
1164 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
1167 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
1168 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
1170 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
1171 0 1 0 0 1 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
1172 0 0 1 0 0 0 1 0 1 0 ... 1 0 0 0 0 0 0 0 1 1
1173 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
927 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 0 0 1 0 0 0 0
928 0 0 1 0 0 0 1 0 0 0 ... 0 0 1 0 0 1 0 0 0 0
9306 0 0 0 1 0 0 1 0 0 0 ... 1 0 0 0 0 1 0 1 1 1
9307 0 1 0 0 1 0 1 0 1 0 ... 1 0 0 0 0 0 0 0 1 1
9313 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 1 0 0 0 0
9323 1 0 0 0 1 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
9332 0 1 0 0 0 0 1 0 1 0 ... 1 0 0 0 0 0 0 1 0 0
9333 0 1 0 1 1 0 1 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
9368 0 1 0 0 1 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 0 0
9382 1 0 0 0 1 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
9384 1 0 0 0 1 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
952 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
954 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
955 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
956 0 0 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
962 0 1 0 0 1 0 1 0 1 0 ... 1 0 0 0 0 1 0 0 0 0
963 0 1 0 0 1 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 0 0
965 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 1 0 0 1 1
967 1 0 0 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
968 1 0 0 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 1 1
975 0 0 0 1 0 0 0 0 0 0 ... 1 0 0 0 0 1 0 1 0 1
976 0 0 1 0 0 0 1 0 0 0 ... 1 0 1 0 0 1 0 0 0 0
977 0 0 1 0 0 0 1 0 0 0 ... 1 0 0 0 0 0 0 0 1 1
978 0 0 1 0 0 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 1 1
979 0 0 1 0 0 0 0 0 1 0 ... 1 0 0 0 0 1 0 1 0 0
980 0 0 1 0 0 0 1 0 0 0 ... 1 0 0 0 0 1 0 0 0 0
981 0 0 1 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
982 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
983 0 0 1 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
999 0 0 0 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 1

1151 rows × 110 columns


In [123]:
literal_eval(l)


Out[123]:
['zebra',
 'grass',
 'outdoor',
 'sky',
 'field',
 'animal',
 'tree',
 'mammal',
 'standing',
 'group',
 'grassy',
 'tall']

In [ ]: