In [11]:
import json
import importlib

import DataStructsHelperAPI as DS
importlib.reload(DS)


Out[11]:
<module 'DataStructsHelperAPI' from '/Users/sreejithmenon/CodeBase/AWESOME/script/DataStructsHelperAPI.py'>

In [5]:
gid_aid_map = DS.json_loader("giraffe_gid_aid_map.json") # all gid-aid
aid_ftr_map = DS.json_loader("aid_details_map.json") # all gid-ftrs

flickr_gids = list(map(str, DS.json_loader("gid_filename_flickr.json").values()))
bing_gids = list(map(str, DS.json_loader("gid_filename_bing.json").values()))

In [7]:
# this method will filter out the full_map to contain only the items contained in source_list
def filter_by_src(source_list, full_map):
    filtered_dict = {}
    for key in full_map.keys():
        if key in set(source_list):
            filtered_dict[key] = full_map[key]
            
    return filtered_dict

Filter out Flickr/Bing data - For GID-AID


In [12]:
flickr_gid_aid_map = filter_by_src(flickr_gids, gid_aid_map)
DS.json_dumper(flickr_gid_aid_map, "flickr_giraffe_gid_aid_map_soc_m.json")

bing_gid_aid_map = filter_by_src(bing_gids, gid_aid_map)
DS.json_dumper(bing_gid_aid_map, "bing_giraffe_gid_aid_map_soc_m.json.json")

In [18]:
'''
ideally, 
len(gid_aid_map) = len(flickr_gids) + len(bing_gids) -- violated
no overlap between flickr and bing images -- violated
''' 

print(len(gid_aid_map) == len(flickr_gids) + len(bing_gids))

print(len(gid_aid_map) - (len(flickr_gids) + len(bing_gids))) 
# 698 images are neither from flickr nor bing, where is this coming from?

print(len(set(flickr_gids).intersection(set(bing_gids)))) 
# should be 0, ideally
# 2 images overlap? Why?


False
698
2

Filter out Flickr/Bing data - For AID-Features


In [25]:
flickr_aids = [str(aid) for aid_list in list(flickr_gid_aid_map.values()) for aid in aid_list[0]]
bing_aids = [str(aid) for aid_list in list(bing_gid_aid_map.values()) for aid in aid_list[0]]

In [32]:
flickr_aid_ftr_map = filter_by_src(flickr_aids, aid_ftr_map)
print(len(flickr_aid_ftr_map))
DS.json_dumper(flickr_gid_aid_map, "flickr_giraffe_aid_ftr_map_soc_m.json")

bing_aid_ftr_map = filter_by_src(bing_aids, aid_ftr_map) # why is the length 1? 
print(len(bing_aid_ftr_map))
DS.json_dumper(flickr_gid_aid_map, "bing_giraffe_aid_ftr_map_soc_m.json")


2381
1

In [36]:
'''
ideally, len(aid_ftr_map) = len(flickr_aids) + len(bing_aids) -- violated
no overlap between flickr annotations and bing annotations
'''

print(len(aid_ftr_map) == len(flickr_aids) + len(bing_aids))
print(len(set(flickr_aids).intersection(set(bing_aids))))


False
2

In [45]:
# notice the discrepancy here -- this is possibly because features were not extracted for bing images at all. 
# maybe, identification didn't complete
len(flickr_aids) + len(bing_aids), len(flickr_aids), len(bing_aids), len(aid_ftr_map)


Out[45]:
(5013, 4307, 706, 4711)

In [ ]: