Since a large CNN is very time-consuming to train (even on a GPU), and requires huge amounts of data, is there any way to use a pre-calculated one instead of retraining the whole thing from scratch?
This notebook shows how this can be done. And it works surprisingly well.
This notebook extracts a vector representation of a set of images using a CNN created by Google and pretrained on ImageNet. It then builds a 'simple SVM classifier', allowing new images can be classified directly. No retraining of the original CNN is required.
In [ ]:
import os
from tensorflow import keras # Works with TF 1.12
#import keras
import numpy as np
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
import time
CLASS_DIR='./images/cars'
#CLASS_DIR='./images/seefood' # for HotDog vs NotHotDog
In [ ]:
# https://www.tensorflow.org/api_docs/python/tf/keras/applications/
#from tensorflow.keras.preprocessing import image as keras_preprocessing_image
from tensorflow.keras.preprocessing import image as keras_preprocessing_image
In [ ]:
#from tensorflow.python.keras.applications.nasnet import NASNetLarge, preprocess_input
#model = NASNetLarge(weights='imagenet', include_top=False) # 343,608,736
from tensorflow.keras.applications.nasnet import NASNetMobile, preprocess_input, decode_predictions
model_imagenet = NASNetMobile(weights='imagenet', include_top=True) # 24,226,656 bytes
print("Model Loaded")
Build the model and select layers we need - the features are taken from the final network layer, before the softmax nonlinearity.
In [ ]:
def image_to_input(model, img_path):
target_size=model.input_shape[1:]
img = keras_preprocessing_image.load_img(img_path, target_size=target_size)
x = keras_preprocessing_image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
return x
In [ ]:
def get_single_prediction(img_path, top=5):
x = image_to_input(model_imagenet, img_path)
preds = model_imagenet.predict(x)
predictions = decode_predictions(preds, top=top)
return predictions[0]
In [ ]:
img_path = './images/cat-with-tongue_224x224.jpg'
im = plt.imread(img_path)
plt.imshow(im)
plt.show()
for t in get_single_prediction(img_path):
print("%6.2f %s" % (t[2],t[1],))
In [ ]:
image_dir = './images/'
image_files = [ os.path.join(image_dir, f) for f in os.listdir(image_dir)
if (f.lower().endswith('png') or f.lower().endswith('jpg')) and f!='logo.png' ]
t0 = time.time()
for i, f in enumerate(image_files):
im = plt.imread(f)
if not (im.shape[0]==224 and im.shape[1]==224):
continue
plt.figure()
plt.imshow(im.astype('uint8'))
top5 = get_single_prediction(f)
for n, (id,label,prob) in enumerate(top5):
plt.text(350, 50 + n * 25, '{}. {}'.format(n+1, label), fontsize=14)
plt.axis('off')
print("DONE : %6.2f seconds each" %(float(time.time() - t0)/len(image_files),))
In [ ]:
#model_imagenet=None
In [ ]:
model_imagenet.summary()
In [ ]:
#model_logits = NASNetMobile(weights='imagenet', include_top=False, pooling=None) # 19,993,200 bytes
#logits_layer = model_imagenet.get_layer('global_average_pooling2d_1')
logits_layer = model_imagenet.get_layer('predictions')
model_logits = keras.Model(inputs=model_imagenet.input,
outputs=logits_layer.output)
print("Model Loaded")
In [ ]:
#writer = tf.summary.FileWriter(logdir='../tensorflow.logdir/', graph=tf.get_default_graph())
#writer.flush()
In [ ]:
def crop_middle_square_area(np_image):
h, w, _ = np_image.shape
h = int(h/2)
w = int(w/2)
if h>w:
return np_image[ h-w:h+w, : ]
return np_image[ :, w-h:w+h ]
im_sq = crop_middle_square_area(im)
im_sq.shape
In [ ]:
def get_logits_from_non_top(np_logits):
# ~ average pooling
#return np_logits[0].sum(axis=0).sum(axis=0)
# ~ max-pooling
return np_logits[0].max(axis=0).max(axis=0)
In [ ]:
classes = sorted( [ d for d in os.listdir(CLASS_DIR) if os.path.isdir(os.path.join(CLASS_DIR, d)) ] )
classes # Sorted for for consistency
In [ ]:
train = dict(filepath=[], features=[], target=[])
t0 = time.time()
for class_i, directory in enumerate(classes):
for filename in os.listdir(os.path.join(CLASS_DIR, directory)):
filepath = os.path.join(CLASS_DIR, directory, filename)
if os.path.isdir(filepath): continue
im = plt.imread(filepath)
im_sq = crop_middle_square_area(im)
x = image_to_input(model_logits, filepath)
#np_logits = model_logits.predict(x) # Shape = 1x7x7x1056 if pooling=None
#print(np_logits.shape)
#np_logits_pooled = get_logits_from_non_top( np_logits )
np_logits_pooled = model_logits.predict(x)[0] # Shape = 1x1056 if pooling=avg
train['filepath'].append(filepath)
train['features'].append(np_logits_pooled)
train['target'].append( class_i )
plt.figure()
plt.imshow(im_sq.astype('uint8'))
plt.axis('off')
plt.text(2*320, 50, '{}'.format(filename), fontsize=14)
plt.text(2*320, 80, 'Train as class "{}"'.format(directory), fontsize=12)
print("DONE : %6.2f seconds each" %(float(time.time() - t0)/len(train),))
In [ ]:
from sklearn import svm
classifier = svm.LinearSVC()
classifier.fit(train['features'], train['target']) # learn from the data
In [ ]:
test_image_files = [f for f in os.listdir(CLASS_DIR) if not os.path.isdir(os.path.join(CLASS_DIR, f))]
t0 = time.time()
for filename in sorted(test_image_files):
filepath = os.path.join(CLASS_DIR, filename)
im = plt.imread(filepath)
im_sq = crop_middle_square_area(im)
# This is two ops : one merely loads the image from numpy,
# the other runs the network to get the class probabilities
x = image_to_input(model_logits, filepath)
#np_logits = model_logits.predict(x) # Shape = 1x7x7x1056
#np_logits_pooled = get_logits_from_non_top( np_logits )
np_logits_pooled = model_logits.predict(x)[0] # Shape = 1x1056
prediction_i = classifier.predict([ np_logits_pooled ])
decision = classifier.decision_function([ np_logits_pooled ])
plt.figure()
plt.imshow(im_sq.astype('uint8'))
plt.axis('off')
prediction = classes[ prediction_i[0] ]
plt.text(2*320, 50, '{} : Distance from boundary = {:5.2f}'.format(prediction, decision[0]), fontsize=20)
plt.text(2*320, 75, '{}'.format(filename), fontsize=14)
print("DONE : %6.2f seconds each" %(float(time.time() - t0)/len(test_image_files),))
The whole training regime here is based on the way the image directories are structured. So building your own example shouldn't be very difficult.
Suppose you wanted to classify pianos into Upright and Grand :
pianos
directory and point the CLASS_DIR
variable at itpianos
directory, create subdirectories for each of the classes (i.e. Upright
and Grand
). The directory names will be used as the class labelspianos
directory itelf (which is logical, since we don't know their classes yet)Finally, re-run everything - checking that the training images are read in correctly, that there are no errors along the way, and that (finally) the class predictions on the test set come out as expected.
If/when it works - please let everyone know : We can add that as an example for next time...
In [ ]: