In [85]:
import csv
import json
import JobsMapResultsFilesToContainerObjs as ImageMap
import importlib
importlib.reload(ImageMap)
Out[85]:
In [6]:
jsonObj = json.load(open("../data/experiment2_gid_aid_features.json"))
In [7]:
gidSpeciesList = []
for gid in jsonObj.keys():
if jsonObj[gid] != None:
gidSpecies = {}
for dct in jsonObj[gid]:
for aid in dct.keys():
gidSpecies[gid] = gidSpecies.get(gid,[]) + [dct[aid][2][0]]
gidSpeciesList.append(gidSpecies)
In [12]:
for dct in gidSpeciesList:
for speciesLst in dct.values():
firstEle = speciesLst[0]
for ele in speciesLst:
if ele != firstEle:
print(dct.keys())
In [69]:
def extractImageFeaturesFromMap(gidAidMapFl,aidFtrMapFl,feature):
aidFeatureDict = ImageMap.genAidFeatureDictDict(aidFtrMapFl)
gidAidDict = ImageMap.genAidGidDictFromMap(gidAidMapFl)
gidFeatureLst = []
for gid in gidAidDict:
if gidAidJson[gid]!= None:
gidFtr = {}
for aid in gidAidJson[gid]:
gidFtr[gid] = gidFtr.get(gid,[]) + [aidFeatureDict[str(aid)][feature]]
gidFeatureLst.append(gidFtr)
return gidFeatureLst
In [95]:
aidFeatureDict = ImageMap.genAidFeatureDictDict("../data/experiment2_aid_features.json")
gidAidJson = ImageMap.genAidGidDictFromMap("../data/experiment2_gid_aid_map.json")
featuresPerImg = ImageMap.extractImageFeaturesFromMap("../data/experiment2_gid_aid_map.json","../data/experiment2_aid_features.json","SPECIES")
In [96]:
shareCountLogic = {}
for gid in featuresPerImg.keys():
numInds = len(featuresPerImg[ele])
isHomogeneous = True
firstEle = featuresPerImg[ele][0]
for species in featuresPerImg[ele]:
if species != firstEle:
isHomogeneous = False
if isHomogeneous:
countFor = firstEle
else:
countFor = None
shareCountLogic[gid] = [numInds,isHomogeneous,countFor]
In [97]:
list(filter(lambda x: not x[2],l))
Out[97]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [17]:
In [21]:
import PopulationEstimatorFromClf as PE
import pandas as pd
import json
import DeriveFinalResultSet as DRS
import DataStructsHelperAPI as DS
In [4]:
clfTypes = ['bayesian','logistic','svm','dtree','random_forests','ada_boost']
attribTypes = ['sparse','non_sparse','non_zero','abv_mean']
clfTypes = ['bayesian']
attribTypes = ['sparse']
for clf in clfTypes:
for attrib in attribTypes:
print("Starting to run %s classifer on test data\nAttribute Selection Method : %s" %(clf,attrib))
clfObj,predResults = PE.trainTestClf("../FinalResults/ImgShrRnkListWithTags.csv",
"../data/full_gid_aid_ftr_agg.csv",
clf,
attrib,
"../data/infoGainsExpt2.csv")
In [12]:
dfPredRes = pd.DataFrame(predResults,index=['share']).transpose().reset_index()
dfPredRes.columns = ['GID','share']
dfPredRes.head()
Out[12]:
In [24]:
gidMarkRecapSet = genNidMarkRecapDict("../data/imgs_exif_data_full.json","../data/full_gid_aid_map.json","../data/full_aid_features.json","../FinalResults/rankListImages_expt2.csv",days,filterBySpecies='giraffe_masai',shareData=None)
In [27]:
dfGidDays = pd.DataFrame(gidMarkRecapSet,index=['day']).transpose().reset_index()
dfGidDays.columns = ['GID','day']
dfGidDays.head()
Out[27]:
In [29]:
pd.DataFrame.merge(dfPredRes,dfGidDays,on='GID').to_csv("/tmp/audit.dump.csv")
In [ ]:
fixedK = {k : kSharesPerContributor(prediction_probabs,inExifFl,inGidAidMapFl,inAidFtrFl,lambda : k) for k in range(1,11)}