Inception

imports


In [1]:
from csv import DictReader
# Transfer Learning using convolutional neural network (Inception) trained over Imagenet
import matplotlib.pylab as plt
%matplotlib inline
import numpy as np
from six.moves import urllib
import tensorflow as tf
import csv

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path
import re
import sys
import tarfile

Inception's functions


In [2]:
FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string(
    'model_dir', '/tmp/imagenet',
    """Path to classify_image_graph_def.pb, """
    """imagenet_synset_to_human_label_map.txt, and """
    """imagenet_2012_challenge_label_map_proto.pbtxt.""")
tf.app.flags.DEFINE_string('image_file', '',
                           """Absolute path to image file.""")
tf.app.flags.DEFINE_integer('num_top_predictions', 5,
                            """Display this many predictions.""")

# pylint: disable=line-too-long
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
# pylint: enable=line-too-long

class NodeLookup(object):
  """Converts integer node ID's to human readable labels."""

  def __init__(self,
               label_lookup_path=None,
               uid_lookup_path=None):
    if not label_lookup_path:
      label_lookup_path = os.path.join(
          FLAGS.model_dir, 'imagenet_2012_challenge_label_map_proto.pbtxt')
    if not uid_lookup_path:
      uid_lookup_path = os.path.join(
          FLAGS.model_dir, 'imagenet_synset_to_human_label_map.txt')
    self.node_lookup = self.load(label_lookup_path, uid_lookup_path)

  def load(self, label_lookup_path, uid_lookup_path):
    """Loads a human readable English name for each softmax node.

    Args:
      label_lookup_path: string UID to integer node ID.
      uid_lookup_path: string UID to human-readable string.

    Returns:
      dict from integer node ID to human-readable string.
    """
    if not tf.gfile.Exists(uid_lookup_path):
      tf.logging.fatal('File does not exist %s', uid_lookup_path)
    if not tf.gfile.Exists(label_lookup_path):
      tf.logging.fatal('File does not exist %s', label_lookup_path)

    # Loads mapping from string UID to human-readable string
    proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
    uid_to_human = {}
    p = re.compile(r'[n\d]*[ \S,]*')
    for line in proto_as_ascii_lines:
      parsed_items = p.findall(line)
      uid = parsed_items[0]
      human_string = parsed_items[2]
      uid_to_human[uid] = human_string

    # Loads mapping from string UID to integer node ID.
    node_id_to_uid = {}
    proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
    for line in proto_as_ascii:
      if line.startswith('  target_class:'):
        target_class = int(line.split(': ')[1])
      if line.startswith('  target_class_string:'):
        target_class_string = line.split(': ')[1]
        node_id_to_uid[target_class] = target_class_string[1:-2]

    # Loads the final mapping of integer node ID to human-readable string
    node_id_to_name = {}
    for key, val in node_id_to_uid.items():
      if val not in uid_to_human:
        tf.logging.fatal('Failed to locate: %s', val)
      name = uid_to_human[val]
      node_id_to_name[key] = name

    return node_id_to_name

  def id_to_string(self, node_id):
    if node_id not in self.node_lookup:
      return ''
    return self.node_lookup[node_id]

def create_graph():
  """Creates a graph from saved GraphDef file and returns a saver."""
  # Creates graph from saved graph_def.pb.
  with tf.gfile.FastGFile(os.path.join(
      FLAGS.model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    _ = tf.import_graph_def(graph_def, name='')
    
#print all op names
def print_ops():
    create_graph()
    with tf.Session() as sess:
        ops = sess.graph.get_operations()
        for op in ops:
            print(op.name)

test with panda


In [3]:
with tf.Session() as sess:
    create_graph()

    image_data = tf.gfile.FastGFile('datasets/test/panda.jpg', 'rb').read()

    softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
    predictions = sess.run(softmax_tensor,{'DecodeJpeg/contents:0': image_data})

    # Classification
    predictions = np.squeeze(predictions)
    node_lookup = NodeLookup()
    top_k = predictions.argsort()[-FLAGS.num_top_predictions:][::-1]
    for node_id in top_k:
      human_string = node_lookup.id_to_string(node_id)
      score = predictions[node_id]
      print('%s (score = %.5f)' % (human_string, score))


giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca (score = 0.89233)
indri, indris, Indri indri, Indri brevicaudatus (score = 0.00859)
lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens (score = 0.00264)
custard apple (score = 0.00141)
earthstar (score = 0.00107)

Feature extraction functions

saving generated features in csv format


In [4]:
# function to generate features
def generateFeatures(layer_name ,dataset, name):
    """Generate and save features as csv for a particular layer and dataset.
    Keyword arguments:
    layer_name -- String: the name of the tensor, ex 'pool_3:0'
    dataset -- Generator: an iterator over the image dataset
    name -- String: the name of the dataset
    """ 
    create_graph()
    all_features = []
    directory = os.path.join('features',name)
    filepath = os.path.join(directory,layer_name+".csv")
    with tf.Session() as sess:
        layer = sess.graph.get_tensor_by_name(layer_name)
        for image_data in dataset:
            features = sess.run(layer,{'DecodeJpeg/contents:0': image_data})
            features = np.reshape(features,(np.product(features.shape)))
            all_features.append(features)
    all_features= np.asarray(all_features)
        
    # if one wants to see the result without saving
    #labels = []
    #with open("datasets/synthetic/trainLabels.csv",'rU') as f:
    #    rows = csv.DictReader(f)
    #    for row in rows:
    #        labels.append(row['Class'])
    #labels = np.asarray(labels).astype(int)
    #from sklearn.manifold import TSNE
    #tsne_model = TSNE(n_components=2, random_state=0)
    #np.set_printoptions(suppress=True)
    #points = tsne_model.fit_transform(all_features)
    #plt.scatter(points[:,0],points[:,1], c=labels)
    
    if not os.path.exists(directory):
        os.makedirs(directory)
    np.savetxt(filepath, all_features, delimiter=",")

functions to create iterator for datasets


In [13]:
# function to create image set iterator
def dataset_gen(samplesPath, data_dir):
    rows = DictReader(open(samplesPath,'rU'))
    for row in rows:
        filepath = data_dir+'/'+row['Id']+'.jpg'
        if not os.path.exists(filepath):
            tf.logging.fatal('File does not exist %s', filepath)
        yield open(filepath, 'rb').read()
        #print("Processing: "+ row['Id'])

Feature Extraction

all possible operations (layers) that one can put as layer_name


In [ ]:
print_ops()

synthetic dataset


In [15]:
layer_name = "pool_3:0"
image_itor = dataset_gen("datasets/synthetic/trainLabels.csv","datasets/synthetic")
generateFeatures(layer_name, image_itor, "synthetic")

images dataset


In [16]:
layer_name = "pool_3:0"
image_itor = dataset_gen("datasets/images/trainLabels.csv","datasets/images")
generateFeatures(layer_name, image_itor, "images")

emotion dataset


In [ ]:
layer_name = "pool_3:0"
image_itor = dataset_gen("datasets/emotion/samples.csv","datasets/emotion")
generateFeatures(layer_name, image_itor, "emotion")

In [ ]:


In [ ]:


In [ ]: