notebook.community

Edit and run



In [1]:

    
import csv
import numpy as np
import pickle

%matplotlib inline

import copy as cp
import pandas as pd

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path
import re
import sys
import tarfile

from six.moves import urllib
import tensorflow as tf



In [2]:

    
paul_features = pickle.load(open("paul_features.p","rb"))

print(len(paul_features))



In [4]:

    
image_ids = paul_features.keys()
master_features = pickle.load(open("../dataset/image/master_features_filtered.p","rb"))
all_ids = master_features.keys()

set1 = set(image_ids)
set2 = set(all_ids)

common = set1.intersection(set2)
print(len(common))



In [5]:

    
to_dload = list()
count =0

for key in image_ids:
    if key not in all_ids:
        to_dload.append(key)
    else:
        #print(key)
        count =count+1

print(len(to_dload))



In [6]:

    
pickle.dump(to_dload, open( "paul_recipe_to_dload.p", "wb" ) )



In [4]:

    
master_features = pickle.load(open("../dataset/image/master_features.p","rb"))

recipe1 = pickle.load(open("../dataset/caption/sharath_recipes.p","rb"))
recipe2 = pickle.load(open("../dataset/caption/train_recipes_al.p","rb"))

print(len(recipe1))
print(len(recipe2))



In [7]:

    
#Merge the two recipes

one = recipe1.copy()
one.update(recipe2)
print(len(one))

set1 = set(recipe1)
set2 = set(recipe2)

common = set1.intersection(set2)
print(len(common))



In [6]:

    
print(len(one))
print(len(master_features.keys()))



In [11]:

    
recipe_keys = one.keys()
count =0
new_master =dict()

for key in master_features.keys():
    if key in recipe_keys:
        new_master[key] = master_features[key].copy()
    else:
        print(key)
        count =count+1
print(len(new_master))



In [12]:

    
pickle.dump(new_master, open( "../dataset/image/master_features_filtered.p", "wb" ) )



In [16]:

    
rcount =0
new_master_recipe =dict()

for key in recipe_keys:
    if key in new_master.keys():
        new_master_recipe[key] = one[key]
    else:
        #print(key)
        rcount =rcount+1
print(len(new_master_recipe))
print(rcount)



In [17]:

    
pickle.dump(new_master_recipe, open( "../dataset/caption/new_master_recipe.p", "wb" ) )



In [22]:

    
paul_features = pickle.load(open("paul_features.p","rb"))
paul_recipe = pickle.load(open("paul_recipes.p","rb"))

print(len(paul_features))
print(len(paul_recipe))

image_merge = new_master.copy()
image_merge.update(paul_features)

cap_merge = new_master_recipe.copy()
cap_merge.update(paul_recipe)

print(len(image_merge))
print(len(cap_merge))



In [ ]:

    
pickle.dump(image_merge, open( "../dataset/caption/final_features.p", "wb" ))
pickle.dump(cap_merge, open( "../dataset/caption/final_recipes.p", "wb" ))



In [ ]:

    
all_image_features = pickle.load(open("../dataset/image/al_sha_features.p","rb"))
print(len(all_image_features))
test_features = pickle.load(open("../dataset/image/test_features.p","rb"))
print(len(test_features))
master_features = all_image_features.copy()
master_features.update(test_features)
print(len(master_features))



In [ ]:

    
pickle.dump(master_features, open( "../dataset/image/master_features.p", "wb" ) )



In [ ]:

    
list_of_image_ids =list()
for name in os.listdir("../../FoodImages"):
    name = os.path.splitext(name)[0]
    #print(name)
    list_of_image_ids.append(name)

print(len(list_of_image_ids))



In [ ]:

    
recs = pickle.load(open("../recipe_sharath.p","rb"))
print(len(recs))

initial_recipe = pickle.load(open("data_processing/all_recipes.p","rb"))
print(len(initial_recipe))

hunted_recipe = pickle.load(open("hunted_recipes2.p","rb"))
print(len(hunted_recipe))

z = initial_recipe.copy()
merged_recipe = z.update(recs)
   
print(len(z))

z.update(hunted_recipe)
print(len(z))



In [ ]:

    
imageset = set(list_of_image_ids)
recipeset = set(z)

common_ids = imageset.intersection(recipeset)
print(len(common_ids))



In [ ]:

    
download_recipes_for_these_ids = list()
for id in list_of_image_ids:
    if id not in common_ids:
        download_recipes_for_these_ids.append(id)
        
print(len(download_recipes_for_these_ids))



In [ ]:

    
pickle.dump(download_recipes_for_these_ids, open( "download_recipes_for_these_ids.p", "wb" ) )
print(download_recipes_for_these_ids)

real_image_ids =list()
print(len(recs.items()))
for image_id in list_of_image_ids:
    if image_id in recs.keys():
        real_image_ids.append(image_id)
    #else:
        #print("Missing")
        #print(image_id)

print("Total Real Images")
print(len(real_image_ids))  


real_recipes = dict()
count = 0
for recipe in recs.items():
    if recipe[0] in list_of_image_ids:
        real_recipes[recipe[0]] = recipe[1]
    else:
        #print(recipe[1])
        count = count + 1
print(count)
print("Total Real Recipe")
print(len(real_recipes.items()))



In [ ]:

    
pickle.dump( features, open( "../web_data/data/sharath/features.p", "wb" ) )