In [2]:
import importlib
import JobsMapResultsFilesToContainerObjs as ImageMap
import pandas as pd
import statistics as s
import re
import GetPropertiesAPI as GP
import matplotlib.pyplot as plt
import csv
gidAidMapFl = "../data/experiment2_gid_aid_map.json"
aidFeatureMapFl = "../data/experiment2_aid_features.json"
imgJobMap = "../data/imageGID_job_map_expt2_corrected.csv"
importlib.reload(ImageMap)
importlib.reload(GP)
Out[2]:
In [3]:
def genTotCnts(ovrCnts):
dSum = {}
#dStddev = {}
for key in ovrCnts:
dSum[key] = sum(ovrCnts[key])
#dStddev[key] = s.stdev(ovrCnts[key])
return dSum#,dStddev
In [4]:
def getShrProp(ovrAggCnts) :
totCnt = genTotCnts(ovrAggCnts)
shareKeys = list(filter(lambda x : 'share' in x,totCnt.keys()))
totKeys = list(filter(lambda x : 'total' in x,totCnt.keys()))
shareKeys = sorted(shareKeys,key=lambda x: (x[:len(x)-1]))
totKeys = sorted(totKeys,key=lambda x: (x[:len(x)-1]))
lenKey = len(shareKeys[0])-1
propDict = {}
for i in range(len(shareKeys)):
propDict[shareKeys[i][:lenKey]] = totCnt[shareKeys[i]] * 100 / totCnt[totKeys[i]]
return propDict
In [6]:
# verified
def ovrallShrCntsByFtr(gidAidMapFl,aidFeatureMapFl,feature,imgJobMap,resSetStrt,resSetEnd):
countLogic = getCountingLogic(gidAidMapFl,aidFeatureMapFl,feature)
imgAlbumDict = ImageMap.genImgAlbumDictFromMap(imgJobMap)
master = ImageMap.createResultDict(resSetStrt,resSetEnd)
imgShareNotShareList,noResponse = ImageMap.imgShareCountsPerAlbum(imgAlbumDict,master)
answerSet = {}
for tup in imgShareNotShareList:
if tup[0] not in countLogic.keys(): # where the image has no associated annotation, tup[0] = GID
answerSet[('other' , 'share')] = answerSet.get(('other' , 'share'),[]) + [tup[2]]
answerSet[('other' , 'not_share')] = answerSet.get(('other' , 'not_share'),[]) + [tup[3]]
answerSet[('other', 'total')] = answerSet.get(('other' , 'total'),[]) + [tup[2] + tup[3]]
else:
logic = countLogic[tup[0]]
for countForEle in logic[1]:
varNameShare = (countForEle , "share")
varNameNotShare = (countForEle , "not_share")
varNameTot = (countForEle , "total")
answerSet[varNameShare] = answerSet.get(varNameShare,[]) + [tup[2]]
answerSet[varNameNotShare] = answerSet.get(varNameNotShare,[]) + [tup[3]]
answerSet[varNameTot] = answerSet.get(varNameTot,[]) + [tup[2] + tup[3]]
return answerSet
In [7]:
def shrCntsByFtrPrAlbm(gidAidMapFl,aidFeatureMapFl,feature,imgJobMap,resSetStrt,resSetEnd):
countLogic = getCountingLogic(gidAidMapFl,aidFeatureMapFl,feature)
imgAlbumDict = ImageMap.genImgAlbumDictFromMap(imgJobMap)
master = ImageMap.createResultDict(resSetStrt,resSetEnd)
imgShareNotShareList,noResponse = ImageMap.imgShareCountsPerAlbum(imgAlbumDict,master)
answerSet = {}
for tup in imgShareNotShareList:
if tup[0] not in countLogic.keys(): # where the image has no associated annotation, tup[0] = GID
answerSet[('other' , 'share', tup[1])] = answerSet.get(('other' , 'share', tup[1]),[]) + [tup[2]]
answerSet[('other' , 'not_share', tup[1])] = answerSet.get(('other' , 'not_share', tup[1]),[]) + [tup[3]]
answerSet[('other', 'total', tup[1])] = answerSet.get(('other' , 'total', tup[1]),[]) + [tup[2] + tup[3]]
else:
logic = countLogic[tup[0]]
for countForEle in logic[1]:
varNameShare = (countForEle , tup[1], "share")
varNameNotShare = (countForEle , tup[1], "not_share")
varNameTot = (countForEle , tup[1], "total")
answerSet[varNameShare] = answerSet.get(varNameShare,[]) + [tup[2]]
answerSet[varNameNotShare] = answerSet.get(varNameNotShare,[]) + [tup[3]]
answerSet[varNameTot] = answerSet.get(varNameTot,[]) + [tup[2] + tup[3]]
return answerSet
In [8]:
def ovrallShrCntsByTwoFtrs(gidAidMapFl,aidFeatureMapFl,ftr1,ftr2,imgJobMap,resSetStrt,resSetEnd):
countLogic1 = getCountingLogic(gidAidMapFl,aidFeatureMapFl,ftr1)
countLogic2 = getCountingLogic(gidAidMapFl,aidFeatureMapFl,ftr2)
imgAlbumDict = ImageMap.genImgAlbumDictFromMap(imgJobMap)
master = ImageMap.createResultDict(resSetStrt,resSetEnd)
imgShareNotShareList,noResponse = ImageMap.imgShareCountsPerAlbum(imgAlbumDict,master)
answerSet = {}
unEvnFtrsTups =[]
for tup in imgShareNotShareList:
if tup[0] not in countLogic1.keys(): # where the image has no associated annotation, tup[0] = GID
pass
answerSet[('other' , None,'share')] = answerSet.get(('other' ,None, 'share'),[]) + [tup[2]]
answerSet[('other' , None, 'not_share')] = answerSet.get(('other' , None, 'not_share'),[]) + [tup[3]]
answerSet[('other' , None, 'total')] = answerSet.get(('other' , None, 'total'),[]) + [tup[2]+tup[3]]
else:
logic1 = countLogic1[tup[0]]
logic2 = countLogic2[tup[0]]
for i in range(len(logic1[1])):
if len(logic1[1]) == len(logic2[1]): # there are two individuals with matching features
varNameShare = (logic1[1][i] , logic2[1][i], "share")
varNameNotShare = (logic1[1][i] , logic2[1][i], "not_share")
varNameTot = (logic1[1][i] , logic2[1][i], "total")
# there are more logic1 features than logic2 features
elif len(logic1[1]) == 1 or len(logic2[1]) == 1: # one of the logic has just 1 feature
if len(logic1[1]) == 1:
varNameShare = (logic1[1][0] , logic2[1][i], "share")
varNameNotShare = (logic1[1][0] , logic2[1][i], "not_share")
varNameTot = (logic1[1][0] , logic2[1][i], "total")
else:
varNameShare = (logic1[1][i] , logic2[1][0], "share")
varNameNotShare = (logic1[1][i] , logic2[1][0], "not_share")
varNameTot = (logic1[1][i] , logic2[1][0], "total")
else: # uneven features in logic1 and logic2
unEvnFtrsTups.append(tup)
answerSet[varNameShare] = answerSet.get(varNameShare,[]) + [tup[2]]
answerSet[varNameNotShare] = answerSet.get(varNameNotShare,[]) + [tup[3]]
answerSet[varNameTot] = answerSet.get(varNameTot,[]) + [tup[2] + tup[3]]
# handling un-even features
unEvnFtrsTups = list(set(unEvnFtrsTups))
for tup in unEvnFtrsTups:
aidList = GP.getAnnotID(tup[0])
for aid in aidList:
feature1 = GP.getImageFeature(aid,GP.ftrNms[ftr1])
feature2 = GP.getImageFeature(aid,GP.ftrNms[ftr2])
if ftr2 == 'AGE':
feature2 = GP.getAgeFeatureReadableFmt(feature2)
varNameShare = (feature1[0],feature2[0],"share")
varNameNotShare = (feature1[0],feature2[0],"not_share")
varNameTot = (feature1[0],feature2[0],"total")
answerSet[varNameShare] = answerSet.get(varNameShare,[]) + [tup[2]]
answerSet[varNameNotShare] = answerSet.get(varNameNotShare,[]) + [tup[3]]
answerSet[varNameTot] = answerSet.get(varNameTot,[]) + [tup[2] + tup[3]]
return answerSet
In [95]:
'''
resultsPerJobDf > Gives you shares/not shares per image per album (Python Object of .results file converted to DF)
resultsPerJobDf['GID','Album','Shared','Not Shared','Proportion']
'''
imgAlbumDict = ImageMap.genImgAlbumDictFromMap("../data/imageGID_job_map_expt2_corrected.csv")
master = ImageMap.createResultDict(1,100)
imgShareNotShareList,noResponse = ImageMap.imgShareCountsPerAlbum(imgAlbumDict,master)
resultsPerJobDf = pd.DataFrame(imgShareNotShareList,columns = ['GID','Album','Shared','Not Shared','Proportion'])
'''
Code for reading from json files into data frames
aidGidDf['AID','GID']
aidFeaturesDf['AID',[FEATURES]]
'''
aidGidDict = ImageMap.genAidGidTupListFromMap('../data/experiment2_gid_aid_map.json')
aidGidDf= pd.DataFrame(aidGidDict,columns = ['AID','GID'])
aidFeaturesDf = pd.DataFrame(ImageMap.genAidFeatureDictList('../data/experiment2_aid_features.json'))
aidFeaturesDf['AID'] = aidFeaturesDf['AID'].astype('int32')
'''
rankListImgsDf > Gives you the results of number of times each image was shared overall
rankListImgsDf['GID','Shared','Not Shared','Proportion']
'''
rankListImgsDf = resultsPerJobDf.groupby(['GID'])['Shared','Not Shared'].sum()
rankListImgsDf['Total'] = rankListImgsDf['Shared'] + rankListImgsDf['Not Shared']
rankListImgsDf['Proportion'] = rankListImgsDf['Shared'] * 100 / rankListImgsDf['Total']
rankListImgsDf = rankListImgsDf.sort_values(by = ['Proportion'],ascending = False)
rankListImgsDf.to_csv('../data/rankListImages.csv')
'''
resultsAIDGIDDf > Merged data frame that add's AID info to the results data
resultsAIDGIDDf['AID' + [resultsPerJobDf]]
gidAidResultsFeaturesDf > A master data frame that has results data merged along with all the image features
gidAidResultsFeaturesDf['GID','AID',[FEATURES],[resultsPerJobDf]]
'''
resultsAIDGIDDf = pd.merge(aidGidDf,resultsPerJobDf,left_on='GID',right_on = 'GID',how="right")
gidAidResultsFeaturesDf = pd.merge(resultsAIDGIDDf,aidFeaturesDf,left_on = 'AID',right_on = 'AID') # most important data frame with all the info
gidAidResultsFeaturesDf.to_csv("../data/resultsFeaturesComb.csv",index=False)
In [18]:
# no. of individuals per image
countLogic = getCountingLogic(gidAidMapFl,aidFeatureMapFl,"SPECIES")
imgAlbumDict = ImageMap.genImgAlbumDictFromMap(imgJobMap)
master = ImageMap.createResultDict(1,100)
imgShareNotShareList,noResponse = ImageMap.imgShareCountsPerAlbum(imgAlbumDict,master)
totOfIndsPerImg = {}
for key in countLogic:
totOfIndsPerImg[countLogic[key][0]] = totOfIndsPerImg.get(countLogic[key][0],0) + 1
# Rank list by number of images
noOfIndsPerImgSharesRnkLst = {}
noOfIndsPerImgNotSharesRnkLst = {}
for tup in imgShareNotShareList:
if tup[0] in countLogic.keys():
noOfIndsPerImgSharesRnkLst[countLogic[tup[0]][0]] = noOfIndsPerImgSharesRnkLst.get(countLogic[tup[0]][0],0) + tup[2]
noOfIndsPerImgNotSharesRnkLst[countLogic[tup[0]][0]] = noOfIndsPerImgNotSharesRnkLst.get(countLogic[tup[0]][0],0) + tup[3]
In [ ]: