In [1]:
import csv
import numpy as np
import pickle

%matplotlib inline

import copy as cp
import pandas as pd

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path
import re
import sys
import tarfile

from six.moves import urllib
import tensorflow as tf

In [2]:
paul_features = pickle.load(open("paul_features.p","rb"))

print(len(paul_features))


10493

In [4]:
image_ids = paul_features.keys()
master_features = pickle.load(open("../dataset/image/master_features_filtered.p","rb"))
all_ids = master_features.keys()

set1 = set(image_ids)
set2 = set(all_ids)

common = set1.intersection(set2)
print(len(common))


2548

In [5]:
to_dload = list()
count =0

for key in image_ids:
    if key not in all_ids:
        to_dload.append(key)
    else:
        #print(key)
        count =count+1

print(len(to_dload))


7945

In [6]:
pickle.dump(to_dload, open( "paul_recipe_to_dload.p", "wb" ) )

In [4]:
master_features = pickle.load(open("../dataset/image/master_features.p","rb"))

recipe1 = pickle.load(open("../dataset/caption/sharath_recipes.p","rb"))
recipe2 = pickle.load(open("../dataset/caption/train_recipes_al.p","rb"))

print(len(recipe1))
print(len(recipe2))


15282
28522

In [7]:
#Merge the two recipes

one = recipe1.copy()
one.update(recipe2)
print(len(one))

set1 = set(recipe1)
set2 = set(recipe2)

common = set1.intersection(set2)
print(len(common))


38638
5166

In [6]:
print(len(one))
print(len(master_features.keys()))


38638
36420

In [11]:
recipe_keys = one.keys()
count =0
new_master =dict()

for key in master_features.keys():
    if key in recipe_keys:
        new_master[key] = master_features[key].copy()
    else:
        print(key)
        count =count+1
print(len(new_master))


205540 2
16421
242436
244887
24922
244680
0
21321
15641
15810
18897
27739
46609
14676
24878
35361
17743
244831
25285
245085
21256
16690
246726
32513
1
245721
244579
18015
12816
244816
15861
15023
20663
241240
31972
20179
241238
17192
8990
14039
244830
16338
24685
36377

In [12]:
pickle.dump(new_master, open( "../dataset/image/master_features_filtered.p", "wb" ) )

In [16]:
rcount =0
new_master_recipe =dict()

for key in recipe_keys:
    if key in new_master.keys():
        new_master_recipe[key] = one[key]
    else:
        #print(key)
        rcount =rcount+1
print(len(new_master_recipe))
print(rcount)


36377
2261

In [17]:
pickle.dump(new_master_recipe, open( "../dataset/caption/new_master_recipe.p", "wb" ) )

In [22]:
paul_features = pickle.load(open("paul_features.p","rb"))
paul_recipe = pickle.load(open("paul_recipes.p","rb"))

print(len(paul_features))
print(len(paul_recipe))

image_merge = new_master.copy()
image_merge.update(paul_features)

cap_merge = new_master_recipe.copy()
cap_merge.update(paul_recipe)

print(len(image_merge))
print(len(cap_merge))


10493
11653
44322
46567

In [ ]:
pickle.dump(image_merge, open( "../dataset/caption/final_features.p", "wb" ))
pickle.dump(cap_merge, open( "../dataset/caption/final_recipes.p", "wb" ))

In [ ]:
all_image_features = pickle.load(open("../dataset/image/al_sha_features.p","rb"))
print(len(all_image_features))
test_features = pickle.load(open("../dataset/image/test_features.p","rb"))
print(len(test_features))
master_features = all_image_features.copy()
master_features.update(test_features)
print(len(master_features))

In [ ]:
pickle.dump(master_features, open( "../dataset/image/master_features.p", "wb" ) )

In [ ]:
list_of_image_ids =list()
for name in os.listdir("../../FoodImages"):
    name = os.path.splitext(name)[0]
    #print(name)
    list_of_image_ids.append(name)

print(len(list_of_image_ids))

In [ ]:
recs = pickle.load(open("../recipe_sharath.p","rb"))
print(len(recs))

initial_recipe = pickle.load(open("data_processing/all_recipes.p","rb"))
print(len(initial_recipe))

hunted_recipe = pickle.load(open("hunted_recipes2.p","rb"))
print(len(hunted_recipe))

z = initial_recipe.copy()
merged_recipe = z.update(recs)
   
print(len(z))

z.update(hunted_recipe)
print(len(z))

In [ ]:
imageset = set(list_of_image_ids)
recipeset = set(z)

common_ids = imageset.intersection(recipeset)
print(len(common_ids))

In [ ]:
download_recipes_for_these_ids = list()
for id in list_of_image_ids:
    if id not in common_ids:
        download_recipes_for_these_ids.append(id)
        
print(len(download_recipes_for_these_ids))

In [ ]:
pickle.dump(download_recipes_for_these_ids, open( "download_recipes_for_these_ids.p", "wb" ) )
print(download_recipes_for_these_ids)

real_image_ids =list()
print(len(recs.items()))
for image_id in list_of_image_ids:
    if image_id in recs.keys():
        real_image_ids.append(image_id)
    #else:
        #print("Missing")
        #print(image_id)

print("Total Real Images")
print(len(real_image_ids))  


real_recipes = dict()
count = 0
for recipe in recs.items():
    if recipe[0] in list_of_image_ids:
        real_recipes[recipe[0]] = recipe[1]
    else:
        #print(recipe[1])
        count = count + 1
print(count)
print("Total Real Recipe")
print(len(real_recipes.items()))

In [ ]:
pickle.dump( features, open( "../web_data/data/sharath/features.p", "wb" ) )