In [11]:
import json
import importlib
import DataStructsHelperAPI as DS
importlib.reload(DS)
Out[11]:
In [5]:
gid_aid_map = DS.json_loader("giraffe_gid_aid_map.json") # all gid-aid
aid_ftr_map = DS.json_loader("aid_details_map.json") # all gid-ftrs
flickr_gids = list(map(str, DS.json_loader("gid_filename_flickr.json").values()))
bing_gids = list(map(str, DS.json_loader("gid_filename_bing.json").values()))
In [7]:
# this method will filter out the full_map to contain only the items contained in source_list
def filter_by_src(source_list, full_map):
filtered_dict = {}
for key in full_map.keys():
if key in set(source_list):
filtered_dict[key] = full_map[key]
return filtered_dict
In [12]:
flickr_gid_aid_map = filter_by_src(flickr_gids, gid_aid_map)
DS.json_dumper(flickr_gid_aid_map, "flickr_giraffe_gid_aid_map_soc_m.json")
bing_gid_aid_map = filter_by_src(bing_gids, gid_aid_map)
DS.json_dumper(bing_gid_aid_map, "bing_giraffe_gid_aid_map_soc_m.json.json")
In [18]:
'''
ideally,
len(gid_aid_map) = len(flickr_gids) + len(bing_gids) -- violated
no overlap between flickr and bing images -- violated
'''
print(len(gid_aid_map) == len(flickr_gids) + len(bing_gids))
print(len(gid_aid_map) - (len(flickr_gids) + len(bing_gids)))
# 698 images are neither from flickr nor bing, where is this coming from?
print(len(set(flickr_gids).intersection(set(bing_gids))))
# should be 0, ideally
# 2 images overlap? Why?
In [25]:
flickr_aids = [str(aid) for aid_list in list(flickr_gid_aid_map.values()) for aid in aid_list[0]]
bing_aids = [str(aid) for aid_list in list(bing_gid_aid_map.values()) for aid in aid_list[0]]
In [32]:
flickr_aid_ftr_map = filter_by_src(flickr_aids, aid_ftr_map)
print(len(flickr_aid_ftr_map))
DS.json_dumper(flickr_gid_aid_map, "flickr_giraffe_aid_ftr_map_soc_m.json")
bing_aid_ftr_map = filter_by_src(bing_aids, aid_ftr_map) # why is the length 1?
print(len(bing_aid_ftr_map))
DS.json_dumper(flickr_gid_aid_map, "bing_giraffe_aid_ftr_map_soc_m.json")
In [36]:
'''
ideally, len(aid_ftr_map) = len(flickr_aids) + len(bing_aids) -- violated
no overlap between flickr annotations and bing annotations
'''
print(len(aid_ftr_map) == len(flickr_aids) + len(bing_aids))
print(len(set(flickr_aids).intersection(set(bing_aids))))
In [45]:
# notice the discrepancy here -- this is possibly because features were not extracted for bing images at all.
# maybe, identification didn't complete
len(flickr_aids) + len(bing_aids), len(flickr_aids), len(bing_aids), len(aid_ftr_map)
Out[45]:
In [ ]: