In [28]:
import importlib
import JobsMapResultsFilesToContainerObjs as ImageMap
import pandas as pd
import statistics as s
import re
import json
import DeriveFinalResultSet as drs
import GetPropertiesAPI as GP
import csv
gidAidMapFl = "../data/experiment2_gid_aid_map.json"
aidFeatureMapFl = "../data/experiment2_aid_features.json"
imgJobMap = "../data/imageGID_job_map_expt2_corrected.csv"
importlib.reload(ImageMap)
pd.set_option('display.max_colwidth', -1)
In [100]:
from ast import literal_eval
df = pd.DataFrame.from_csv("../FinalResults/ImgShrRnkListWithTags.csv")
cols = list(df.columns)
df.drop('URL',1,inplace=True)
df.drop('Album',1,inplace=True)
df.drop('INDIVIDUAL_NAME',1,inplace=True)
df.reset_index(inplace=True)
df.to_csv("/tmp/test.csv",index=False)
In [138]:
reader = csv.reader(open("/tmp/test.csv","r"))
head = reader.__next__()
data = {}
for row in reader:
temp = {}
for i in range(1,len(row)):
temp[head[i]] = row[i]
data[row[0]] = temp
json.dump(data,open("/tmp/test.json","w"),indent=4)
In [139]:
def genHead(dataDict,ftr):
if ftr != 'tags':
ftrList = [data[gid][ftr].split(',') for gid in dataDict.keys()]
else:
ftrList = [literal_eval(data[gid][ftr]) for gid in dataDict.keys()]
ftrList = {item for block in ftrList for item in block}
return list(ftrList)
In [165]:
allAttribs = genHead(data,'SPECIES') + genHead(data,'SEX') + genHead(data,'AGE') + genHead(data,'QUALITY') + genHead(data,'VIEW_POINT') + genHead(data,'tags')
In [162]:
from collections import OrderedDict
gidAttribDict = {}
for gid in data.keys():
ftrDict = data[gid]
attribDict = OrderedDict.fromkeys(allAttribs,0)
ftrs = ['SPECIES','SEX','AGE','QUALITY','VIEW_POINT']
for ftr in ftrs:
spcs = ftrDict[ftr].split(',')
for itm in spcs:
attribDict[itm] = 1
# logic for tgs
tgs = literal_eval(ftrDict['tags'])
for tag in tgs:
attribDict[tag] = 1
gidAttribDict[gid] = attribDict
json.dump(gidAttribDict,open("/tmp/gidAttribDict.json","w"),indent=4)
pd.DataFrame(gidAttribDict).transpose().to_csv("/tmp/gidAttribDict.csv")
In [164]:
In [123]:
literal_eval(l)
Out[123]:
In [ ]: