In [21]:
import csv
import numpy as np
import pickle
%matplotlib inline
import copy as cp
import pandas as pd
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import re
import sys
import tarfile
from six.moves import urllib
import tensorflow as tf
We get some of the TensorFlow specific items out of the way first:
In [22]:
FLAGS = tf.app.flags.FLAGS
# pylint: disable=line-too-long
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
# pylint: enable=line-too-long
class NodeLookup(object):
"""Converts integer node ID's to human readable labels."""
def __init__(self,
label_lookup_path=None,
uid_lookup_path=None):
if not label_lookup_path:
label_lookup_path = os.path.join(
FLAGS.model_dir, 'imagenet_2012_challenge_label_map_proto.pbtxt')
if not uid_lookup_path:
uid_lookup_path = os.path.join(
FLAGS.model_dir, 'imagenet_synset_to_human_label_map.txt')
self.node_lookup = self.load(label_lookup_path, uid_lookup_path)
def load(self, label_lookup_path, uid_lookup_path):
"""Loads a human readable English name for each softmax node.
Args:
label_lookup_path: string UID to integer node ID.
uid_lookup_path: string UID to human-readable string.
Returns:
dict from integer node ID to human-readable string.
"""
if not tf.gfile.Exists(uid_lookup_path):
tf.logging.fatal('File does not exist %s', uid_lookup_path)
if not tf.gfile.Exists(label_lookup_path):
tf.logging.fatal('File does not exist %s', label_lookup_path)
# Loads mapping from string UID to human-readable string
proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
uid_to_human = {}
p = re.compile(r'[n\d]*[ \S,]*')
for line in proto_as_ascii_lines:
parsed_items = p.findall(line)
uid = parsed_items[0]
human_string = parsed_items[2]
uid_to_human[uid] = human_string
# Loads mapping from string UID to integer node ID.
node_id_to_uid = {}
proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
for line in proto_as_ascii:
if line.startswith(' target_class:'):
target_class = int(line.split(': ')[1])
if line.startswith(' target_class_string:'):
target_class_string = line.split(': ')[1]
node_id_to_uid[target_class] = target_class_string[1:-2]
# Loads the final mapping of integer node ID to human-readable string
node_id_to_name = {}
for key, val in node_id_to_uid.items():
if val not in uid_to_human:
tf.logging.fatal('Failed to locate: %s', val)
name = uid_to_human[val]
node_id_to_name[key] = name
return node_id_to_name
def id_to_string(self, node_id):
if node_id not in self.node_lookup:
return ''
return self.node_lookup[node_id]
def create_graph():
"""Creates a graph from saved GraphDef file and returns a saver."""
# Creates graph from saved graph_def.pb.
with tf.gfile.FastGFile(os.path.join(
"imagenet", 'classify_image_graph_def.pb'), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
In [23]:
def maybe_download_and_extract():
"""Download and extract model tar file."""
dest_directory = "imagenet"
if not os.path.exists(dest_directory):
os.makedirs(dest_directory)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(dest_directory, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath,
reporthook=_progress)
print()
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
maybe_download_and_extract()
Functions to generate features
In [24]:
# function to create a generator over images
def dataset_gen(directory):
for name in os.listdir(directory):
full_path = os.path.join(directory, name)
if os.path.isfile(full_path):
yield (os.path.splitext(name)[0], open(full_path, 'rb').read())
else:
print('Unidentified name %s. It could be a symbolic link' % full_path)
def getImage(data_dir, im):
jpg_filepath = os.path.join(data_dir, '%s.jpg' % im)
png_filepath = os.path.join(data_dir, '%s.png' % im)
if os.path.exists(jpg_filepath):
return open(jpg_filepath, 'rb').read()
elif os.path.exists(png_filepath):
return open(png_filepath, 'rb').read()
else:
raise IOError('No file %s.{jpg, png} found in %s' % (im, data_dir))
In [25]:
# function to generate features
def generateFeatures(layer_name, dataset):
"""Generate and save features as csv for a particular layer and dataset.
Keyword arguments:
layer_name -- String: the name of the tensor, ex 'pool_3:0'
dataset -- Generator: an iterator over the image dataset
"""
create_graph()
all_features = {}
with tf.Session() as sess:
layer = sess.graph.get_tensor_by_name(layer_name)
for (rec_id,image_data) in dataset:
try:
features = sess.run(layer, {'DecodeJpeg/contents:0': image_data})
features = np.reshape(features, (np.product(features.shape)))
all_features[rec_id] = features
except Exception as e:
print("Error for ",rec_id)
return all_features
In [ ]:
dataset = dataset_gen("../../FoodImages/")
features = generateFeatures('pool_3:0',dataset)
In [ ]:
pickle.dump( features, open( "../web_data/data/sharath/features.p", "wb" ) )
In [16]:
#get the captions
recs = pickle.load(open("../recipe_sharath.p","rb"))
print(len(recs.keys()))
In [ ]:
test_caps = {}
for k in features.keys():
if k in recs:
test_caps[k]=recs[k]
else:
print("not in recs",k)
In [22]:
print(len(test_caps.keys()))
In [23]:
test_feats={}
for k in test_caps.keys():
test_feats[k]=features[k]
In [24]:
pickle.dump( test_feats, open( "test_data/features/features.p", "wb" ) )
pickle.dump( test_caps, open( "test_data/caption/caption.p", "wb" ) )
In [ ]: