In [1]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This notebook demonstrates building a hierachical image classifer based on a HD-CNN deriative which uses cascading classifers to predict the class of a label from a coarse to finer classes.
In this demonstration, we have two classes in the heirarchy: fruits and varieties of fruit. The model will first predict the coarse class (type of fruit) and then within that class of fruit, the variety. For example, if given an image of Apple Granny Smith, it would first predict 'Apple' (fruit) and then predict the 'Apple Granny Smith'.
This deriative of the HD-CNN is designed to demonstrate both the methodology of heirarchical classification, as well as design improvements not available at the time (2014) when the model was first published Zhicheng Yan.
Our HD-CNN deriative archirecture consists of:
1. An stem convolutional block.
- The output from the stem convolutional head is shared with the coarse and finer classifiers
(referred to as the shared layers in the paper).
2. A coarse classifier.
- A Convolution and Dense layers for classifying the coarse level class.
3. A set of finer classifiers, one per coarse level class.
- A Convolution and Dense layers per coarse level class for classifying the corresponding finer
level class.
4. A conditional execution step for predicting a specific finer classifier based on the output of the
coarse classifier.
- The coarse level classifier is predicted.
- The index of the prediction is used to select a finer classifier.
- An im-memory copy of the shared bottleneck layer (i.e., last convolution layer in stem) is passed as the
input to the finer level classifier.
Our HD-CNN deriative is trained as follows:
1. Train the coarse level classifier using the coarse level labels in the dataset.
2. Train the finer level classifier per coarse level class, using the corresponding subset (with finer
labels) from the dataset.
We will be using the Fruits-360 dataset, which was formerly a Kaggle competition. It consists of images of fruit labeled by fruit type and the variety.
1. There are a total of 47 types of fruit (e.g., Apple, Orange, Pear, etc) and 81 varieties.
2. On average, there are 656 images per variety.
3. Each image is 128x128 RGB.
The objective is to train a hierarchical image classifier (coarse and then finer label) using a cascading layer architecture. First, the shared layers and coarse classifier are trained. Then the cascading finer classifiers are trained.
For prediction, the outcome (softmax) of the coarse classifier will conditionally execute the corresponding finer classifier and reuse the feature maps from the shared layers.
This notebook requires 17GB of memory. It will not run on a Standard TF JaaS instance (15GB). You will need to select an instance with memory > 17GB.
In [1]:
!gsutil cp gs://cloud-samples-data/air/fruits360/fruits360-combined.zip .
!ls
!unzip -qn fruits360-combined.zip
We will be using the fully frameworks and Python modules:
1. Keras framework for building and training models.
2. Keras builtin models (resnet50).
3. Keras preprocessing for feeding and augmenting the dataset during training.
4. Gap data engineering framework for preprocessing the image data.
5. Numpy for general image/matrix manipulation.
In [2]:
import os
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import GlobalAveragePooling2D, Dense
from keras import Sequential, Model, Input
from keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Dropout, BatchNormalization, ReLU
from keras import Model, optimizers
from keras.models import load_model
from keras.utils import to_categorical
import keras.layers as layers
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np
import cv2
In [3]:
def Fruits(root):
n_label = 0
images = []
labels = []
classes = {}
os.chdir(root)
classes_ = os.scandir('./')
for class_ in classes_:
print(class_.name)
os.chdir(class_.name)
classes[class_.name] = n_label
# Finer Level Subdirectories per Coarse Level
subclasses = os.scandir('./')
for subclass in subclasses:
os.chdir(subclass.name)
files = os.listdir('./')
for file in files:
image = cv2.imread(file)
images.append(image)
labels.append(n_label)
os.chdir('../')
os.chdir('../')
n_label += 1
os.chdir('../')
images = np.asarray(images)
images = (images / 255.0).astype(np.float32)
labels = to_categorical(labels, n_label)
print("Images", images.shape, "Labels", labels.shape, "Classes", classes)
# Split the processed image dataset into training and test data
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.20, shuffle=True)
return x_train, x_test, y_train, y_test, classes
In [4]:
def Varieties(root):
''' Generate Cascade (Finer) Level Dataset for Fruit Varieties'''
datasets = {}
os.chdir(root)
fruits = os.scandir('./')
for fruit in fruits:
n_label = 0
images = []
labels = []
classes = {}
print('FRUIT', fruit.name)
os.chdir(fruit.name)
varieties = os.scandir('./')
for variety in varieties:
print('VARIETY', variety.name)
classes[variety.name] = n_label
os.chdir(variety.name)
files = os.listdir('./')
for file in files:
image = cv2.imread(file)
images.append(image)
labels.append(n_label)
os.chdir('../')
n_label += 1
images = np.asarray(images)
images = (images / 255.0).astype(np.float32)
labels = to_categorical(labels, n_label)
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.20, shuffle=True)
datasets[fruit.name] = (x_train, x_test, y_train, y_test, classes)
os.chdir('../')
print("IMAGES", x_train.shape, y_train.shape, "CLASSES", classes)
os.chdir('../')
return datasets
In [5]:
!free -m
x_train, x_test, y_train, y_test, fruits_classes = Fruits('Training')
!free -m
In [6]:
# Split out 10% of Train to use for Validation
pivot = int(len(x_train) * 0.9)
x_val = x_train[pivot:]
y_val = y_train[pivot:]
x_train = x_train[:pivot]
y_train = y_train[:pivot]
print("train", x_train.shape, y_train.shape)
print("val ", x_val.shape, y_val.shape)
print("test ", x_test.shape, y_test.shape)
!free -m
In [7]:
def Feeder():
datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=30)
return datagen
In [8]:
def Train(model, datagen, x_train, y_train, x_test, y_test, epochs=10, batch_size=32):
model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size, shuffle=True),
steps_per_epoch=len(x_train) / batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
scores = model.evaluate(x_train, y_train, verbose=1)
print("Train", scores)
We will use this base model as the stem convolutional block of cascading model:
1. The output of this model are a set of pooled feature maps.
2. The last layer that produces this set of pooled feature maps is referred to as the bottleneck layer.
The coarse classifier is an independent block layer for classifying the coarse level label:
1. Input is the bottleneck layer from the stem convolutional block.
2. Layer consists of a convolution layer and a dense layer, where the dense layer is the classifier.
The finer classifiers are a set of independent block layers for classifying the finer label. There is one finer classifier per unique coarse level label.
1. Input is the bottleneck layer from the stem convolutional block.
2. Layer consists of a convolution layer and a dense layer, where the dense layer is the classifier.
3. The finer classifer is conditionally executed based on the softmax output from the coarse classifier.
In [9]:
def ResNet(shape=(128, 128, 3), nclasses=47, optimizer='adam', weights=None):
base_model = ResNet50(weights=weights, include_top=False, input_shape=shape)
for i, layer in enumerate(base_model.layers):
# first: train only the top layers (which were randomly initialized) for Transfer Learning
if weights is not None:
layer.trainable = False
# label the last convolutional layer in the base model as the bottleneck
layer.name = 'bottleneck'
# Get the last convolutional layer of the ResNet base model
x = base_model.output
# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
#x = Dense(1024, activation='relu')(x)
# and a logistic layer
predictions = Dense(nclasses, activation='softmax')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()
return model
The stem convolutional block consists of a mini-VGG, which consists of:
1. A convolutional input (stem)
2. Three convolutional groups, each doubling the number of filers.
3. Each convolutional group consists of one convolutional block.
4. A dropout of 50% is added to the first convolutional group.
The coarse classifier consists of:
1. A 1024 none dense layer
2. A 47 node dense layer for classification.
In [10]:
def ConvNet(shape=(128, 128, 3), nclasses=47, optimizer='adam'):
model = Sequential()
# stem convolutional group
model.add(Conv2D(16, (3,3), padding='same', activation='relu', input_shape=shape))
# conv block - double filters
model.add(Conv2D(32, (3,3), padding='same'))
model.add(ReLU())
model.add(Dropout(0.50))
model.add(MaxPooling2D((2,2)))
# conv block - double filters
model.add(Conv2D(64, (3,3), padding='same'))
model.add(ReLU())
model.add(MaxPooling2D((2,2)))
# conv block - double filters + bottleneck layer
model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2), name="bottleneck"))
# dense block
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.25))
# classifier
model.add(Dense(nclasses, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()
return model
In [11]:
# Select the model for the stem convolutional group (shared layers)
stem = 'ConvNet'
if stem == 'ConvNet':
model = ConvNet(shape=(100, 100, 3))
elif stem == 'ResNet-imagenet':
model = ResNet(weights='imagenet', optimizer='adagrad')
elif stem == 'ResNet':
model = ResNet()
# load previously stored model
else:
model = load_model('model.h5')
In [12]:
datagen = Feeder()
Train(model, datagen, x_train, y_train, x_val, y_val, 5)
scores = model.evaluate(x_test, y_test, verbose=1)
print("Test", scores)
In [13]:
# Save the model and weights
model.save("model-coarse.h5")
In [14]:
def Bottleneck(model):
for layer in model.layers:
layer.trainable = False
if layer.name == 'bottleneck':
bottleneck = layer
print("BOTTLENECK", bottleneck.output.shape)
return bottleneck
In [15]:
# Converse memory by releasing training data for coarse model
import gc
x_train = y_train = x_val = y_val = x_test = y_test = None
gc.collect()
Out[15]:
In [16]:
varieties_datasets = Varieties('Training')
for key, dataset in varieties_datasets.items():
_x_train, _x_test, _y_train, _y_test, classes = dataset
# Separate out 10% of train for validation
pivot = int(len(_x_train) * 0.9)
_x_val = _x_train[pivot:]
_y_val = _y_train[pivot:]
_x_train = _x_train[:pivot]
_y_train = _y_train[:pivot]
# save the dataset for this fruit (key)
varieties_datasets[key] = { 'classes': classes, 'train': (_x_train, _y_train), 'val': (_x_val, _y_val), 'test': (_x_test, _y_test) }
!free -m
In [17]:
bottleneck = Bottleneck(model)
cascades = []
for key, val in varieties_datasets.items():
classes = val['classes']
print("KEY", key, classes)
# if only one subclassifier, then skip (i.e., coarse == finer)
if len(classes) == 1:
continue
x = layers.Conv2D(128, (3,3), padding='same', activation='relu')(bottleneck.output)
x = BatchNormalization()(x)
x = MaxPooling2D((2,2))(x)
x = layers.Flatten()(bottleneck.output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dense(len(classes), activation='softmax', name=key.replace(' ', ''))(x)
cascades.append(x)
In [18]:
classifiers = []
for cascade in cascades:
_model = Model(model.input, cascade)
_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
_model.summary()
classifiers.append(_model)
In [19]:
for classifier in classifiers:
# get the output layer for this subclassifier
last = classifier.layers[len(classifier.layers)-1]
print(last, last.name)
# find the corresponding variety dataset
for key, dataset in varieties_datasets.items():
if key == last.name:
x_train, y_train = dataset['train']
x_val, y_val = dataset['val']
datagen = Feeder()
Train(classifier, datagen, x_train, y_train, x_val, y_val, 5)
In [20]:
for classifier in classifiers:
# get the output layer for this subclassifier
last = classifier.layers[len(classifier.layers)-1]
print(last, last.name)
# find the corresponding variety dataset
for key, dataset in varieties_datasets.items():
if key == last.name:
x_test, y_test = dataset['test']
scores = classifier.evaluate(x_test, y_test, verbose=1)
print("Test", scores)
In [21]:
n = 0
for classifier in classifiers:
classifier.save('model-finer-' + str(n) + '.h5')
n += 1
We will take one random selected image per type of fruit, and:
1. Run the image through the coarse classifier (by fruit).
2. Based on the predicted output, select the corresponding finer classifier (by variety).
3. Run the image through the corresponding finer classifier.
In [22]:
import random
# Let's make a prediction for each type of fruit
for key, dataset in varieties_datasets.items():
# Get the variety test data for this type of fruit
x_test, y_test = dataset['test']
# pick a random image in the variety datast
index = random.randint(0, len(x_test))
# use the coarse model to predict the type of fruit
yhat = np.argmax( model.predict(x_test[index:index+1]) )
# let's find the class name (type of fruit) for this predicted label
for fruit, label in fruits_classes.items():
if label == yhat:
break
print("Yhat", yhat, "Coarse Prediction", key, "=", fruit)
# Prediction was correct
if key == fruit:
if len(dataset['classes']) == 1:
print("No Finer Classifier")
continue
# find the corresponding finer classifier for this type of fruit
for classifier in classifiers:
# get the output layer for this subclassifier
last = classifier.layers[len(classifier.layers)-1]
if last.name == fruit:
# use the finer model to predict the variety of this type of fruit
yhat = np.argmax(classifier.predict(x_test[index:index+1]))
for variety, value in dataset['classes'].items():
if value == np.argmax(y_test[index]):
break
for yhat_variety, value in dataset['classes'].items():
if value == yhat:
break
print("Yhat", yhat, "Finer Prediction", variety, "=", yhat_variety)
break
In [ ]:
# extractfeatures = Model(input=model.input, output=model.get_layer('bottleneck').output)