Recognize individuals that appeared on day 1 and then on day 2
Individuals that appear on day 1 are marks.
If the same individuals appear on day 2 then these are recaptures
Appeared means the individuals who were photographed on day 1 as well as day 2
To change the behavior of the script only change the values of the dictionary days. Changing days dict can filter out the images to the days the images were clicked.
The first level calculations are based on what pictures were clicked and by applying the Pertersen-Lincoln Index calculations
The second level calculations will filter out only the images that were shared (only highly shared images with proportion >= 80).
In [33]:
import json
from datetime import datetime
import DataStructsHelperAPI as DS
import JobsMapResultsFilesToContainerObjs as J
import importlib
importlib.reload(J)
import pandas as pd
import cufflinks as cf # this is necessary to link pandas to plotly
cf.go_online()
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import MarkRecapHelper as MR
import importlib
importlib.reload(MR)
import DeriveFinalResultSet as DRS
from collections import Counter
In [ ]:
days = {'2015-02-18' : '2015-02-18',
'2015-02-19' : '2015-02-19',
'2015-02-20' : '2015-02-20',
'2015-02-25' : '2015-02-25',
'2015-02-26' : '2015-02-26',
'2015-03-01' : '2015-03-01',
'2015-03-02' : '2015-03-02'}
nidMarkRecapSet = MR.genNidMarkRecapDict("../data/imgs_exif_data_full.json","../data/full_gid_aid_map.json","../data/full_aid_features.json",days)
Visualizations on how individuals were identified across different days of the Great Zebra Count (GZC) rally. There are visuals which show how many individuals were identified on the first day, how many individuals were seen only on that day and how many individuals were first seen on that day.
In [ ]:
# How many individuals were identified on each day,
# i.e. how many different individuals did we see each day?
indsPerDay = {}
for nid in nidMarkRecapSet:
for day in nidMarkRecapSet[nid]:
indsPerDay[day] = indsPerDay.get(day,0) + 1
df1 = pd.DataFrame(indsPerDay,index=['IndsIdentified']).transpose()
fig1 = df1.iplot(kind='bar',filename='Individuals seen per day',title='Individuals seen per day')
iframe1 = fig1.embed_code
In [ ]:
# How many individuals did we see only on that day,
# i.e. how many individuals were only seen that day and not any other day.
uniqIndsPerDay = {}
for nid in nidMarkRecapSet:
if len(nidMarkRecapSet[nid]) == 1:
uniqIndsPerDay[nidMarkRecapSet[nid][0]] = uniqIndsPerDay.get(nidMarkRecapSet[nid][0],0) + 1
df2 = pd.DataFrame(uniqIndsPerDay,index=['IndsIdentifiedOnlyOnce']).transpose()
fig2 = df2.iplot(kind='bar',filename='Individuals seen only that day',title='Individuals seen only that day')
iframe2 = fig2.embed_code
In [ ]:
# How many individuals were first seen on that day, i.e. the unique number of animals that were identified on that day.
# The total number of individuals across all the days is indeed equal to all the unique individuals in the database. We have 1997 identified individuals.
indsSeenFirst = {}
for nid in nidMarkRecapSet:
indsSeenFirst[min(nidMarkRecapSet[nid])] = indsSeenFirst.get(min(nidMarkRecapSet[nid]),0) + 1
df3 = pd.DataFrame(indsSeenFirst,index=['FirstTimeInds']).transpose()
fig3 = df3.iplot(kind='bar',filename='Individuals first seen on that day',title='Individuals first seen on that day')
iframe3 = fig3.embed_code
In [ ]:
df1['IndsIdentifiedOnlyOnce'] = df2['IndsIdentifiedOnlyOnce']
df1['FirstTimeInds'] = df3['FirstTimeInds']
df1.columns = ['Total inds seen today','Inds seen only today','Inds first seen today']
fig4 = df1.iplot(kind='bar',filename='Distribution of sightings',title='Distribution of sightings')
iframe4 = fig4.embed_code
In [4]:
days = {'2015-03-01' : 1,
'2015-03-02' : 2 }
In [15]:
# Entire population estimate (includes giraffes and zebras)
nidMarkRecapSet = MR.genNidMarkRecapDict("../data/imgs_exif_data_full.json","../data/full_gid_aid_map.json","../data/full_aid_features.json","../FinalResults/rankListImages_expt2.csv",days,shareData=None)
marks,recaptures,population = MR.applyMarkRecap(nidMarkRecapSet)
print("Population of all animals = %f" %population)
marks,recaptures
Out[15]:
In [16]:
nidMarkRecapSet_Zebras = MR.genNidMarkRecapDict("../data/imgs_exif_data_full.json","../data/full_gid_aid_map.json","../data/full_aid_features.json","../FinalResults/rankListImages_expt2.csv",days,'zebra_plains',shareData=None)
marks,recaptures,population = MR.applyMarkRecap(nidMarkRecapSet_Zebras)
print("Population of zebras = %f" %population)
marks,recaptures
Out[16]:
In [17]:
nidMarkRecapSet_Giraffes = MR.genNidMarkRecapDict("../data/imgs_exif_data_full.json","../data/full_gid_aid_map.json","../data/full_aid_features.json","../FinalResults/rankListImages_expt2.csv",days,'giraffe_masai',shareData=None)
marks,recaptures,population = MR.applyMarkRecap(nidMarkRecapSet_Giraffes)
print("Population of giraffes = %f" %population)
marks,recaptures
Out[17]:
In [25]:
nidMarkRecapSet_share = MR.genNidMarkRecapDict("../data/imgs_exif_data_full.json",
"../data/full_gid_aid_map.json",
"../data/full_aid_features.json",
"../FinalResults/rankListImages_expt2.csv",
days,
None,
shareData='proportion')
mark,recapture,population = MR.applyMarkRecap(nidMarkRecapSet_share)
print("Population of all animals = %f" %population)
marks,recaptures
Out[25]:
In [26]:
nidMarkRecapSet_share = MR.genNidMarkRecapDict("../data/imgs_exif_data_full.json",
"../data/full_gid_aid_map.json",
"../data/full_aid_features.json",
"../FinalResults/rankListImages_expt2.csv",
days,
'zebra_plains',
shareData='proportion')
mark,recapture,population = MR.applyMarkRecap(nidMarkRecapSet_share)
print("Population of zebras = %f" %population)
marks,recaptures
Out[26]:
In [27]:
nidMarkRecapSet_share = MR.genNidMarkRecapDict("../data/imgs_exif_data_full.json",
"../data/full_gid_aid_map.json",
"../data/full_aid_features.json",
"../FinalResults/rankListImages_expt2.csv",
days,
'giraffe_masai',
shareData='proportion')
mark,recapture,population = MR.applyMarkRecap(nidMarkRecapSet_share)
print("Population of giraffes = %f" %population)
marks,recaptures
Out[27]:
In [31]:
days = [{'2004' : 1, '2005' : 2 },{'2005' : 1, '2006' : 2 }, {'2006' : 1, '2007' : 2 }, {'2007' : 1, '2008' : 2 }, {'2008' : 1, '2009' : 2 }, {'2009' : 1, '2010' : 2 }, {'2010' : 1, '2011' : 2 }, {'2014' : 1, '2015' : 2 }]
for i in range(len(days)):
nidMarkRecapSet = MR.genNidMarkRecapDict("../data/Flickr_Giraffe_EXIF.json",
"../data/Flickr_IBEIS_Ftrs_gid_aid_map.json",
"../data/Flickr_IBEIS_Giraffe_Ftrs_aid_features.json",
"../FinalResults/rankListImages_expt2.csv", # this is useless
days[i],
shareData='other',
filterBySpecies='giraffe_reticulated')
print(len(nidMarkRecapSet))
marks, recaps, population, confidence = MR.applyMarkRecap(nidMarkRecapSet)
print("Estimate for the year : " + ' & '.join(list(days[i].keys())))
print("Number of marks : %i" %marks)
print("Number of recaptures : %i" %recaps)
print("Estimated population : %f" %population)
print()
In [3]:
inGidAidMapFl, inAidFtrFl = "../data/Flickr_IBEIS_Ftrs_gid_aid_map.json", "../data/Flickr_IBEIS_Ftrs_aid_features.json",
gidNid = DRS.getCountingLogic(inGidAidMapFl,inAidFtrFl,"NID",False)
flickr_nids = list(gidNid.values())
flickr_nids = [item for sublist in flickr_nids for item in sublist]
print("Number of unique individuals identified : %i" %len(set(flickr_nids)))
In [14]:
occurence = Counter(flickr_nids)
In [2]:
inExifFl = "../data/Flickr_EXIF_full.json"
with open(inExifFl, "r") as fl:
obj = json.load(fl)
In [19]:
'''
lat in between -1.50278 and 1.504953
long in between 35.174045 and 38.192836
'''
gids_geoTagged = [gid for gid in obj.keys() if int(gid) < 1702 and obj[gid]['lat'] != 0 ]
gids_nairobi = [gid for gid in obj.keys() if int(gid) <1702 and obj[gid]['lat'] >= -1.50278 and obj[gid]['lat'] <= 1.504953 and obj[gid]['long'] >= 35.174045 and obj[gid]['long'] <= 38.192836 ]
gids_zoo = list(set(gids_geoTagged) - set(gids_nairobi))
In [6]:
import DeriveFinalResultSet as DRS, DataStructsHelperAPI as DS
In [4]:
inGidAidMapFl, inAidFtrFl = "../data/Flickr_IBEIS_Ftrs_gid_aid_map.json", "../data/Flickr_IBEIS_Ftrs_aid_features.json",
gidNid = DRS.getCountingLogic(inGidAidMapFl,inAidFtrFl,"NID",False)
In [25]:
locs = []
for gid in gidNid.keys():
if gid in gids:
for nid in gidNid[gid]:
locs.append((obj[gid]['lat'], obj[gid]['long']))
In [10]:
nid_gid = DS.flipKeyValue(gidNid)
In [22]:
nids_zoo = []
for gid in gidNid.keys():
if gid in gids_zoo:
nids_zoo.extend(gidNid[gid])
In [26]:
len(gids_zoo), len(nids_zoo)
Out[26]:
In [32]:
# removing all nids that are in zoos, with it you will also remove the other occurences of images in which that individual occurs.
nids_only_wild_gid = {nid : nid_gid[nid] for nid in nid_gid.keys() if nid not in nids_zoo}
nids_zoo_wild_gid = {nid : nid_gid[nid] for nid in nid_gid.keys() if nid in nids_zoo}
In [34]:
len(list(nids_only_wild_gid.values())), len(nids_zoo_wild_gid.values())
Out[34]:
In [39]:
len({gid for sublist in list(nids_only_wild_gid.values()) for gid in sublist})
Out[39]:
In [40]:
len({gid for sublist in list(nids_zoo_wild_gid.values()) for gid in sublist})
Out[40]:
In [42]:
max(list(map(int, list(gidNid.keys()))))
Out[42]:
In [45]:
gidNid['110']
Out[45]:
In [47]:
l =[12,12,12,12,12]
l.extend([1,2,3])
In [21]:
a = 5
print("a = %d" %a)
In [34]:
gidsDayNumFull, gidSpecies = MR.genNidMarkRecapDict("../data/Flickr_Giraffe_EXIF.json",
"../data/Flickr_IBEIS_Ftrs_gid_aid_map.json",
"../data/Flickr_IBEIS_Giraffe_Ftrs_aid_features.json",
"../FinalResults/rankListImages_expt2.csv", # this is useless
days[i],
shareData='other',
filterBySpecies='giraffe_reticulated')
Out[34]:
In [ ]: