Since a large CNN is very time-consuming to train (even on a GPU), and requires huge amounts of data, is there any way to use a pre-calculated one instead of retraining the whole thing from scratch?
This notebook shows how this can be done. And it works surprisingly well.
This notebook extracts a vector representation of a set of images using the GoogLeNet CNN pretrained on ImageNet. It then builds a 'simple SVM classifier', allowing new images can be classified directly. No retraining of the original CNN is required.
In [ ]:
import tensorflow as tf
import numpy as np
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
import time
from urllib.request import urlopen # Python 3+ version (instead of urllib2)
CLASS_DIR='./images/cars'
#CLASS_DIR='./images/seefood' # for HotDog vs NotHotDog
In [ ]:
import os, sys
slim_models_dir = './models/tensorflow_zoo'
if not os.path.exists(slim_models_dir):
print("Creating model/tensorflow_zoo directory")
os.makedirs(slim_models_dir)
if not os.path.isfile( os.path.join(slim_models_dir, 'models', 'README.md') ):
print("Cloning tensorflow model zoo under %s" % (slim_models_dir, ))
!cd {slim_models_dir}; git clone https://github.com/tensorflow/models.git
sys.path.append(slim_models_dir + "/models/slim")
print("Model Zoo model code installed")
In [ ]:
from datasets import dataset_utils
targz = "inception_v1_2016_08_28.tar.gz"
url = "http://download.tensorflow.org/models/"+targz
checkpoints_dir = './data/tensorflow_zoo/checkpoints'
if not os.path.exists(checkpoints_dir):
os.makedirs(checkpoints_dir)
if not os.path.isfile( os.path.join(checkpoints_dir, 'inception_v1.ckpt') ):
tarfilepath = os.path.join(checkpoints_dir, targz)
if os.path.isfile(tarfilepath):
import tarfile
tarfile.open(tarfilepath, 'r:gz').extractall(checkpoints_dir)
else:
dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir)
# Get rid of tarfile source (the checkpoint itself will remain)
os.unlink(tarfilepath)
print("Checkpoint available locally")
Build the model and select layers we need - the features are taken from the final network layer, before the softmax nonlinearity.
In [ ]:
slim = tf.contrib.slim
from nets import inception
from preprocessing import inception_preprocessing
image_size = inception.inception_v1.default_image_size
image_size
In [ ]:
imagenet_labels_file = './data/imagenet_synset_words.txt'
if os.path.isfile(imagenet_labels_file):
print("Loading ImageNet synset data locally")
with open(imagenet_labels_file, 'r') as f:
imagenet_labels = {0: 'background'}
for i, line in enumerate(f.readlines()):
# n01440764 tench, Tinca tinca
synset,human = line.strip().split(' ', 1)
imagenet_labels[ i+1 ] = human
else:
print("Downloading ImageNet synset data from repo")
from datasets import imagenet
imagenet_labels = imagenet.create_readable_names_for_imagenet_labels()
print("ImageNet synset labels available")
In [ ]:
tf.reset_default_graph()
# This creates an image 'placeholder'
# input_image = tf.image.decode_jpeg(image_string, channels=3)
input_image = tf.placeholder(tf.uint8, shape=[None, None, 3], name='input_image')
# Define the pre-processing chain within the graph - based on the input 'image' above
processed_image = inception_preprocessing.preprocess_image(input_image, image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)
# Reverse out some of the transforms, so we can see the area/scaling of the inception input
numpyish_image = tf.multiply(processed_image, 0.5)
numpyish_image = tf.add(numpyish_image, 0.5)
numpyish_image = tf.multiply(numpyish_image, 255.0)
# Create the model - which uses the above pre-processing on image
# it also uses the default arg scope to configure the batch norm parameters.
print("Model builder starting")
# Here is the actual model zoo model being instantiated :
with slim.arg_scope(inception.inception_v1_arg_scope()):
logits, _ = inception.inception_v1(processed_images, num_classes=1001, is_training=False)
probabilities = tf.nn.softmax(logits)
# Create an operation that loads the pre-trained model from the checkpoint
init_fn = slim.assign_from_checkpoint_fn(
os.path.join(checkpoints_dir, 'inception_v1.ckpt'),
slim.get_model_variables('InceptionV1')
)
print("Model defined")
In [ ]:
#writer = tf.summary.FileWriter(logdir='../tensorflow.logdir/', graph=tf.get_default_graph())
#writer.flush()
In [ ]:
if False:
# Read from the Web
from io import BytesIO
url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg'
image_string = urlopen(url).read()
im = plt.imread(BytesIO(image_string), format='jpg')
In [ ]:
if False:
# Read from a file via a queue ==> brain damage in jupyter
#filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once("./images/*.jpg") )
filename_queue = tf.train.string_input_producer( ['./images/cat-with-tongue_224x224.jpg'] )
#_ = filename_queue.dequeue() # Ditch the first value
image_reader = tf.WholeFileReader()
_, image_string = image_reader.read(filename_queue)
In [ ]:
# Read from a file
im = plt.imread("./images/cat-with-tongue_224x224.jpg")
In [ ]:
print(im.shape, im[0,0]) # (height, width, channels), (uint8, uint8, uint8)
In [ ]:
def crop_middle_square_area(np_image):
h, w, _ = np_image.shape
h = int(h/2)
w = int(w/2)
if h>w:
return np_image[ h-w:h+w, : ]
return np_image[ :, w-h:w+h ]
im_sq = crop_middle_square_area(im)
im_sq.shape
In [ ]:
# Now let's run the pre-trained model
with tf.Session() as sess:
# This is the loader 'op' we defined above
init_fn(sess)
# This is two ops : one merely loads the image from numpy,
# the other runs the network to get the class probabilities
np_image, np_probs = sess.run([numpyish_image, probabilities], feed_dict={input_image:im_sq})
# These are regular numpy operations
probs = np_probs[0, :]
sorted_inds = [i[0] for i in sorted(enumerate(-probs), key=lambda x:x[1])]
# And now plot out the results
plt.figure()
plt.imshow(np_image.astype(np.uint8))
plt.axis('off')
plt.show()
for i in range(5):
index = sorted_inds[i]
print('Probability %0.2f%% => [%s]' % (probs[index], imagenet_labels[index]))
In [ ]:
import os
classes = sorted( [ d for d in os.listdir(CLASS_DIR) if os.path.isdir("%s/%s" % (CLASS_DIR, d)) ] )
classes # Sorted for for consistency
In [ ]:
train = dict(filepath=[], features=[], target=[])
with tf.Session() as sess:
# This is the loader 'op' we defined above
init_fn(sess)
print("Loaded pre-trained model")
t0 = time.time()
for class_i, directory in enumerate(classes):
for filename in os.listdir("%s/%s" % (CLASS_DIR, directory, )):
filepath = '%s/%s/%s' % (CLASS_DIR, directory, filename, )
if os.path.isdir(filepath): continue
im = plt.imread(filepath)
im_sq = crop_middle_square_area(im)
# This is two ops : one merely loads the image from numpy,
# the other runs the network to get the 'logit features'
rawim, np_logits = sess.run([numpyish_image, logits], feed_dict={input_image:im_sq})
train['filepath'].append(filepath)
train['features'].append(np_logits[0])
train['target'].append( class_i )
plt.figure()
plt.imshow(rawim.astype('uint8'))
plt.axis('off')
plt.text(320, 50, '{}'.format(filename), fontsize=14)
plt.text(320, 80, 'Train as class "{}"'.format(directory), fontsize=12)
print("DONE : %6.2f seconds each" %(float(time.time() - t0)/len(train),))
In [ ]:
#train['features'][0]
In [ ]:
from sklearn import svm
classifier = svm.LinearSVC()
classifier.fit(train['features'], train['target']) # learn from the data
In [ ]:
test_image_files = [f for f in os.listdir(CLASS_DIR) if not os.path.isdir("%s/%s" % (CLASS_DIR, f))]
with tf.Session() as sess:
# This is the loader 'op' we defined above
init_fn(sess)
print("Loaded pre-trained model")
t0 = time.time()
for filename in sorted(test_image_files):
im = plt.imread('%s/%s' % (CLASS_DIR,filename,))
im_sq = crop_middle_square_area(im)
# This is two ops : one merely loads the image from numpy,
# the other runs the network to get the class probabilities
rawim, np_logits = sess.run([numpyish_image, logits], feed_dict={input_image:im_sq})
prediction_i = classifier.predict([ np_logits[0] ])
decision = classifier.decision_function([ np_logits[0] ])
plt.figure()
plt.imshow(rawim.astype('uint8'))
plt.axis('off')
prediction = classes[ prediction_i[0] ]
plt.text(350, 50, '{} : Distance from boundary = {:5.2f}'.format(prediction, decision[0]), fontsize=20)
plt.text(350, 75, '{}'.format(filename), fontsize=14)
print("DONE : %6.2f seconds each" %(float(time.time() - t0)/len(test_image_files),))
The whole training regime here is based on the way the image directories are structured. So building your own example shouldn't be very difficult.
Suppose you wanted to classify pianos into Upright and Grand :
pianos
directory and point the CLASS_DIR
variable at itpianos
directory, create subdirectories for each of the classes (i.e. Upright
and Grand
). The directory names will be used as the class labelspianos
directory itelf (which is logical, since we don't know their classes yet)Finally, re-run everything - checking that the training images are read in correctly, that there are no errors along the way, and that (finally) the class predictions on the test set come out as expected.
If/when it works - please let everyone know : We can add that as an example for next time...
In [ ]: