Processing data from .results files Author : Sreejith Menon Date : May 25 2016 List of all dataframes resultsPerJobDf['GID','Album','Shared','Not Shared','Proportion'] aidGidDf['AID','GID'] aidFeaturesDf['AID',[FEATURES]] rankListImgsDf['GID','Shared','Not Shared','Proportion'] resultsAIDGIDDf['AID' + [resultsPerJobDf]] gidAidResultsFeaturesDf['GID','AID',[FEATURES],[resultsPerJobDf]]

In [28]:
import importlib
import JobsMapResultsFilesToContainerObjs as ImageMap
import pandas as pd
import statistics as s
import re
import json
import DeriveFinalResultSet as drs
import GetPropertiesAPI as GP
import csv
gidAidMapFl = "../data/experiment2_gid_aid_map.json"
aidFeatureMapFl = "../data/experiment2_aid_features.json"
imgJobMap = "../data/imageGID_job_map_expt2_corrected.csv"
importlib.reload(ImageMap)
pd.set_option('display.max_colwidth', -1)

In [100]:
from ast import literal_eval
df = pd.DataFrame.from_csv("../FinalResults/ImgShrRnkListWithTags.csv")
cols = list(df.columns)
df.drop('URL',1,inplace=True)
df.drop('Album',1,inplace=True)
df.drop('INDIVIDUAL_NAME',1,inplace=True)
df.reset_index(inplace=True)
df.to_csv("/tmp/test.csv",index=False)

In [138]:
reader = csv.reader(open("/tmp/test.csv","r"))
head = reader.__next__()
data = {}
for row in reader:
    temp = {}
    for i in range(1,len(row)):
        temp[head[i]] = row[i] 
    data[row[0]] = temp
    
json.dump(data,open("/tmp/test.json","w"),indent=4)

In [139]:
def genHead(dataDict,ftr):
    if ftr != 'tags':
        ftrList = [data[gid][ftr].split(',') for gid in dataDict.keys()]
    else:
        ftrList = [literal_eval(data[gid][ftr]) for gid in dataDict.keys()]
        
    ftrList = {item for block in ftrList for item in block}
    
    return list(ftrList)

In [165]:
allAttribs = genHead(data,'SPECIES') + genHead(data,'SEX') + genHead(data,'AGE') + genHead(data,'QUALITY') + genHead(data,'VIEW_POINT') + genHead(data,'tags')

In [162]:
from collections import OrderedDict
gidAttribDict = {}
for gid in data.keys():
    ftrDict = data[gid]
    attribDict = OrderedDict.fromkeys(allAttribs,0)
    
    ftrs = ['SPECIES','SEX','AGE','QUALITY','VIEW_POINT']
    
    for ftr in ftrs:
        spcs = ftrDict[ftr].split(',')
        for itm in spcs:
            attribDict[itm] = 1
            
    # logic for tgs
    tgs = literal_eval(ftrDict['tags'])
    for tag in tgs:
        attribDict[tag] = 1
    
    gidAttribDict[gid] = attribDict
    
json.dump(gidAttribDict,open("/tmp/gidAttribDict.json","w"),indent=4)

pd.DataFrame(gidAttribDict).transpose().to_csv("/tmp/gidAttribDict.csv")

In [164]:


In [123]:
literal_eval(l)


Out[123]:
['zebra',
 'grass',
 'outdoor',
 'sky',
 'field',
 'animal',
 'tree',
 'mammal',
 'standing',
 'group',
 'grassy',
 'tall']

In [ ]: