In [1]:
import csv
import numpy as np
import pickle
%matplotlib inline
import copy as cp
import pandas as pd
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import re
import sys
import tarfile
from six.moves import urllib
import tensorflow as tf
In [2]:
paul_features = pickle.load(open("paul_features.p","rb"))
print(len(paul_features))
In [4]:
image_ids = paul_features.keys()
master_features = pickle.load(open("../dataset/image/master_features_filtered.p","rb"))
all_ids = master_features.keys()
set1 = set(image_ids)
set2 = set(all_ids)
common = set1.intersection(set2)
print(len(common))
In [5]:
to_dload = list()
count =0
for key in image_ids:
if key not in all_ids:
to_dload.append(key)
else:
#print(key)
count =count+1
print(len(to_dload))
In [6]:
pickle.dump(to_dload, open( "paul_recipe_to_dload.p", "wb" ) )
In [4]:
master_features = pickle.load(open("../dataset/image/master_features.p","rb"))
recipe1 = pickle.load(open("../dataset/caption/sharath_recipes.p","rb"))
recipe2 = pickle.load(open("../dataset/caption/train_recipes_al.p","rb"))
print(len(recipe1))
print(len(recipe2))
In [7]:
#Merge the two recipes
one = recipe1.copy()
one.update(recipe2)
print(len(one))
set1 = set(recipe1)
set2 = set(recipe2)
common = set1.intersection(set2)
print(len(common))
In [6]:
print(len(one))
print(len(master_features.keys()))
In [11]:
recipe_keys = one.keys()
count =0
new_master =dict()
for key in master_features.keys():
if key in recipe_keys:
new_master[key] = master_features[key].copy()
else:
print(key)
count =count+1
print(len(new_master))
In [12]:
pickle.dump(new_master, open( "../dataset/image/master_features_filtered.p", "wb" ) )
In [16]:
rcount =0
new_master_recipe =dict()
for key in recipe_keys:
if key in new_master.keys():
new_master_recipe[key] = one[key]
else:
#print(key)
rcount =rcount+1
print(len(new_master_recipe))
print(rcount)
In [17]:
pickle.dump(new_master_recipe, open( "../dataset/caption/new_master_recipe.p", "wb" ) )
In [22]:
paul_features = pickle.load(open("paul_features.p","rb"))
paul_recipe = pickle.load(open("paul_recipes.p","rb"))
print(len(paul_features))
print(len(paul_recipe))
image_merge = new_master.copy()
image_merge.update(paul_features)
cap_merge = new_master_recipe.copy()
cap_merge.update(paul_recipe)
print(len(image_merge))
print(len(cap_merge))
In [ ]:
pickle.dump(image_merge, open( "../dataset/caption/final_features.p", "wb" ))
pickle.dump(cap_merge, open( "../dataset/caption/final_recipes.p", "wb" ))
In [ ]:
all_image_features = pickle.load(open("../dataset/image/al_sha_features.p","rb"))
print(len(all_image_features))
test_features = pickle.load(open("../dataset/image/test_features.p","rb"))
print(len(test_features))
master_features = all_image_features.copy()
master_features.update(test_features)
print(len(master_features))
In [ ]:
pickle.dump(master_features, open( "../dataset/image/master_features.p", "wb" ) )
In [ ]:
list_of_image_ids =list()
for name in os.listdir("../../FoodImages"):
name = os.path.splitext(name)[0]
#print(name)
list_of_image_ids.append(name)
print(len(list_of_image_ids))
In [ ]:
recs = pickle.load(open("../recipe_sharath.p","rb"))
print(len(recs))
initial_recipe = pickle.load(open("data_processing/all_recipes.p","rb"))
print(len(initial_recipe))
hunted_recipe = pickle.load(open("hunted_recipes2.p","rb"))
print(len(hunted_recipe))
z = initial_recipe.copy()
merged_recipe = z.update(recs)
print(len(z))
z.update(hunted_recipe)
print(len(z))
In [ ]:
imageset = set(list_of_image_ids)
recipeset = set(z)
common_ids = imageset.intersection(recipeset)
print(len(common_ids))
In [ ]:
download_recipes_for_these_ids = list()
for id in list_of_image_ids:
if id not in common_ids:
download_recipes_for_these_ids.append(id)
print(len(download_recipes_for_these_ids))
In [ ]:
pickle.dump(download_recipes_for_these_ids, open( "download_recipes_for_these_ids.p", "wb" ) )
print(download_recipes_for_these_ids)
real_image_ids =list()
print(len(recs.items()))
for image_id in list_of_image_ids:
if image_id in recs.keys():
real_image_ids.append(image_id)
#else:
#print("Missing")
#print(image_id)
print("Total Real Images")
print(len(real_image_ids))
real_recipes = dict()
count = 0
for recipe in recs.items():
if recipe[0] in list_of_image_ids:
real_recipes[recipe[0]] = recipe[1]
else:
#print(recipe[1])
count = count + 1
print(count)
print("Total Real Recipe")
print(len(real_recipes.items()))
In [ ]:
pickle.dump( features, open( "../web_data/data/sharath/features.p", "wb" ) )