In [21]:
import csv
import numpy as np
import pickle

%matplotlib inline

import copy as cp
import pandas as pd

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path
import re
import sys
import tarfile

from six.moves import urllib
import tensorflow as tf

We get some of the TensorFlow specific items out of the way first:


In [22]:
FLAGS = tf.app.flags.FLAGS

# pylint: disable=line-too-long
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
# pylint: enable=line-too-long

class NodeLookup(object):
  """Converts integer node ID's to human readable labels."""

  def __init__(self,
               label_lookup_path=None,
               uid_lookup_path=None):
    if not label_lookup_path:
      label_lookup_path = os.path.join(
          FLAGS.model_dir, 'imagenet_2012_challenge_label_map_proto.pbtxt')
    if not uid_lookup_path:
      uid_lookup_path = os.path.join(
          FLAGS.model_dir, 'imagenet_synset_to_human_label_map.txt')
    self.node_lookup = self.load(label_lookup_path, uid_lookup_path)

  def load(self, label_lookup_path, uid_lookup_path):
    """Loads a human readable English name for each softmax node.

    Args:
      label_lookup_path: string UID to integer node ID.
      uid_lookup_path: string UID to human-readable string.

    Returns:
      dict from integer node ID to human-readable string.
    """
    if not tf.gfile.Exists(uid_lookup_path):
      tf.logging.fatal('File does not exist %s', uid_lookup_path)
    if not tf.gfile.Exists(label_lookup_path):
      tf.logging.fatal('File does not exist %s', label_lookup_path)

    # Loads mapping from string UID to human-readable string
    proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
    uid_to_human = {}
    p = re.compile(r'[n\d]*[ \S,]*')
    for line in proto_as_ascii_lines:
      parsed_items = p.findall(line)
      uid = parsed_items[0]
      human_string = parsed_items[2]
      uid_to_human[uid] = human_string

    # Loads mapping from string UID to integer node ID.
    node_id_to_uid = {}
    proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
    for line in proto_as_ascii:
      if line.startswith('  target_class:'):
        target_class = int(line.split(': ')[1])
      if line.startswith('  target_class_string:'):
        target_class_string = line.split(': ')[1]
        node_id_to_uid[target_class] = target_class_string[1:-2]

    # Loads the final mapping of integer node ID to human-readable string
    node_id_to_name = {}
    for key, val in node_id_to_uid.items():
      if val not in uid_to_human:
        tf.logging.fatal('Failed to locate: %s', val)
      name = uid_to_human[val]
      node_id_to_name[key] = name

    return node_id_to_name

  def id_to_string(self, node_id):
    if node_id not in self.node_lookup:
      return ''
    return self.node_lookup[node_id]

def create_graph():
  """Creates a graph from saved GraphDef file and returns a saver."""
  # Creates graph from saved graph_def.pb.
  with tf.gfile.FastGFile(os.path.join(
     "imagenet", 'classify_image_graph_def.pb'), 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    _ = tf.import_graph_def(graph_def, name='')

In [23]:
def maybe_download_and_extract():
    """Download and extract model tar file."""
    
    dest_directory = "imagenet"
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\r>> Downloading %s %.1f%%' % (
                    filename, float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()
        
        filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath,
                                                 reporthook=_progress)
        print()
        statinfo = os.stat(filepath)
        print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
    
    tarfile.open(filepath, 'r:gz').extractall(dest_directory)

maybe_download_and_extract()

Functions to generate features


In [24]:
# function to create a generator over images
def dataset_gen(directory):
    for name in os.listdir(directory):
        full_path = os.path.join(directory, name)
        if os.path.isfile(full_path):
            yield (os.path.splitext(name)[0], open(full_path, 'rb').read())
        else:
            print('Unidentified name %s. It could be a symbolic link' % full_path)


    
def getImage(data_dir, im):
    jpg_filepath = os.path.join(data_dir, '%s.jpg' % im) 
    png_filepath = os.path.join(data_dir, '%s.png' % im)
    if os.path.exists(jpg_filepath):
        return open(jpg_filepath, 'rb').read()
    elif os.path.exists(png_filepath):
        return open(png_filepath, 'rb').read()
    else:
        raise IOError('No file %s.{jpg, png} found in %s' % (im, data_dir))

In [25]:
# function to generate features
def generateFeatures(layer_name, dataset):
    """Generate and save features as csv for a particular layer and dataset.
    Keyword arguments:
    layer_name -- String: the name of the tensor, ex 'pool_3:0'
    dataset -- Generator: an iterator over the image dataset
    """ 
    create_graph()
    all_features = {}
    with tf.Session() as sess:
        layer = sess.graph.get_tensor_by_name(layer_name)
        for (rec_id,image_data) in dataset:
            try:
                features = sess.run(layer, {'DecodeJpeg/contents:0': image_data})
                features = np.reshape(features, (np.product(features.shape)))
                all_features[rec_id] = features
            except Exception as e:
                print("Error for ",rec_id)
    return all_features

In [ ]:
dataset = dataset_gen("../../FoodImages/")
features = generateFeatures('pool_3:0',dataset)

In [ ]:
pickle.dump( features, open( "../web_data/data/sharath/features.p", "wb" ) )

In [16]:
#get the captions
recs = pickle.load(open("../recipe_sharath.p","rb"))
print(len(recs.keys()))


9413

In [ ]:
test_caps = {}
for k in features.keys():
    if k in recs:
        test_caps[k]=recs[k]
    else:
        print("not in recs",k)

In [22]:
print(len(test_caps.keys()))


35

In [23]:
test_feats={}
for k in test_caps.keys():
    test_feats[k]=features[k]

In [24]:
pickle.dump( test_feats, open( "test_data/features/features.p", "wb" ) )
pickle.dump( test_caps, open( "test_data/caption/caption.p", "wb" ) )

In [ ]: