Here, we present the process of fine-tuning the ResNET50 network (from keras.applications).
We use Resnet50 from keras.applications), which is already pretrained on ImageNET database. Next we add some additional layers in order to train the network on CIFAR10 dataset.
We used the keras python deep learning library. Namely, we follow keras.applications tutorial.
One can try to fine-tune all of the following pretrained networks (from keras.applications):
All of them can be initialized to be already pretrained on ImageNET dataset. Here is the example to load the Inception_v3 CNN with keras
from keras.applications.inception_v3 import InceptionV3
model = InceptionV3(include_top=False, weights='imagenet', \
input_tensor=None, input_shape=None, pooling=None, classes=1000)
The most important for transfer learning is to give the paramater
include_top=False
since it builds the CNN model without the last (top) layer which is responsible to classify ImageNET categorized to 1000 classes.
In [1]:
network_names = [ 'incv3', 'resnet50', 'vgg16', 'vgg19' ]
print("Available networks = ", network_names)
cnnid = 1; # int( input("Please choose the CNN network [0-{n}]: ".format(n=len(network_names)-1)) )
# hardcoded selection of InceptionV3 network
selected_network = network_names[cnnid]
print("Selected network: ", selected_network)
In [2]:
import time
import myutils
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, AveragePooling2D, GlobalAveragePooling2D
from keras import backend as K
Here we use keras.datasets which is pretty similar to our myutils.load_CIFAR10_dataset() procedure.
In [3]:
n_classes = 10
from keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
n_training = X_train.shape[0]
n_testing = X_test.shape[0]
y_train = y_train.flatten()
y_test = y_test.flatten()
print( X_train.shape, y_train.shape, X_test.shape, y_test.shape )
from matplotlib import pyplot as plt
plt.imshow( X_train[0] )
plt.show()
In [4]:
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3
from keras.applications.resnet50 import ResNet50
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
input_shape = {
'incv3' : (299,299,3),
'resnet50': (224,224,3),
'vgg16' : (224,224,3),
'vgg19' : (224,224,3)
}[selected_network]
def create_model_incv3():
tf_input = Input(shape=input_shape)
base_model = InceptionV3(input_tensor=tf_input, weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(n_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
return base_model, model
def create_model_resnet50():
tf_input = Input(shape=input_shape)
base_model = ResNet50(input_tensor=tf_input, include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(n_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
return base_model, model
# TODO: build all the rest :-)
def create_model_vgg16():
tf_input = Input(shape=input_shape)
model = VGG16(input_tensor=tf_input, include_top=False)
output_pooled = AveragePooling2D((7, 7))(model.output)
return Model(model.input, output_pooled )
def create_model_vgg19():
tf_input = Input(shape=input_shape)
model = VGG19(input_tensor=tf_input, include_top=False)
output_pooled = AveragePooling2D((7, 7))(model.output)
return Model(model.input, output_pooled )
create_model = {
'incv3' : create_model_incv3,
'resnet50' : create_model_resnet50,
'vgg16' : create_model_vgg16,
'vgg19' : create_model_vgg19
}[selected_network]
In [5]:
# tensorflow placeholder for batch of images from CIFAR10 dataset
batch_of_images_placeholder = tf.placeholder("uint8", (None, 32, 32, 3))
batch_size = {
'incv3' : 16,
'resnet50' : 16,
'vgg16' : 16,
'vgg19' : 16
}[selected_network]
# Inception default size is 299x299
tf_resize_op = tf.image.resize_images(batch_of_images_placeholder, (input_shape[:2]), method=0)
In [6]:
# data generator for tensorflow session
from keras.applications.inception_v3 import preprocess_input as incv3_preprocess_input
from keras.applications.resnet50 import preprocess_input as resnet50_preprocess_input
from keras.applications.vgg16 import preprocess_input as vgg16_preprocess_input
from keras.applications.vgg19 import preprocess_input as vgg19_preprocess_input
preprocess_input = {
'incv3' : incv3_preprocess_input,
'resnet50': resnet50_preprocess_input,
'vgg16' : vgg16_preprocess_input,
'vgg19' : vgg19_preprocess_input
}[selected_network]
def data_generator(sess,data,labels):
def generator():
start = 0
end = start + batch_size
n = data.shape[0]
while True:
batch_of_images_resized = sess.run(tf_resize_op, {batch_of_images_placeholder: data[start:end]})
batch_of_images__preprocessed = preprocess_input(batch_of_images_resized)
batch_of_labels = labels[start:end]
start += batch_size
end += batch_size
if start >= n:
start = 0
end = batch_size
yield (batch_of_images__preprocessed, batch_of_labels)
return generator
In [7]:
sess = tf.InteractiveSession()
In [8]:
K.set_session(sess)
K.set_learning_phase(1) # 0 - test, 1 - train
In [9]:
base_model, model = create_model()
In [10]:
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
print(i, layer.name)
In [11]:
for i, layer in enumerate(model.layers):
print(i, layer.name)
In [12]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
In [13]:
y_train_one_hot = tf.one_hot( y_train, n_classes ).eval()
In [14]:
data_train_gen = data_generator(sess, X_train, y_train_one_hot )
In [15]:
# train the model on the new data for a "few" epochs
model.fit_generator(data_train_gen(), n_training/batch_size, verbose=1)
Out[15]:
In [16]:
images_resized = sess.run(tf_resize_op, {batch_of_images_placeholder: X_test})
images = preprocess_input(images_resized)
In [17]:
result = model.predict(images, verbose=1)
In [18]:
y_pred = [ np.argmax( result[i] ) for i in range(n_testing) ]
In [19]:
np.sum( y_pred == y_test ) / n_testing
Out[19]:
In [20]:
# train the model on the new data for a "few" epochs
# model.fit_generator(data_train_gen(), n_training/batch_size, epochs=5, verbose=1)
In [30]:
result = model.predict(images, verbose=1)
y_pred = [ np.argmax( result[i] ) for i in range(n_testing) ]
np.sum( y_pred == y_test ) / n_testing
Out[30]:
In [31]:
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.
In [21]:
for layer in model.layers[:16]:
layer.trainable = False
for layer in model.layers[16:]:
layer.trainable = True
In [22]:
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
In [23]:
# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit_generator(data_train_gen(), n_training/batch_size, epochs=10, verbose=1)
Out[23]:
In [24]:
# model.fit_generator(data_train_gen(), n_training/batch_size, epochs=3, verbose=1)
In [25]:
result = model.predict(images, verbose=1)
y_pred = [ np.argmax( result[i] ) for i in range(n_testing) ]
np.sum( y_pred == y_test ) / n_testing
Out[25]:
So we obtained 92.97% on testing dataset
In [26]:
from keras.models import load_model
base_model.save('model/{network}-tuned.h5'.format(network=selected_network))
In [27]:
from keras.models import load_model
model.save('model/{network}-full-tuned.h5'.format(network=selected_network))
In [28]:
# output_pooled = AveragePooling2D((8, 8), strides=(8, 8))(base_model.output)
model_tuned = base_model # Model(base_model.input, output_pooled)
In [29]:
def Xdata_generator(sess,data):
def generator():
start = 0
end = start + batch_size
n = data.shape[0]
while True:
batch_of_images_resized = sess.run(tf_resize_op, {batch_of_images_placeholder: data[start:end]})
batch_of_images__preprocessed = preprocess_input(batch_of_images_resized)
start += batch_size
end += batch_size
if start >= n:
start = 0
end = batch_size
yield batch_of_images__preprocessed
return generator
In [30]:
Xdata_train_gen = Xdata_generator(sess, X_train)
In [31]:
ftrs_training = model_tuned.predict_generator(Xdata_train_gen(), n_training/batch_size, verbose=1)
In [32]:
Xdata_test_gen = Xdata_generator(sess, X_test)
In [33]:
ftrs_testing = model_tuned.predict_generator(Xdata_test_gen(), n_testing/batch_size, verbose=1)
In [34]:
sess.close()
In [35]:
ftrs_training.shape
Out[35]:
In [36]:
ftrs_testing.shape
Out[36]:
In [ ]:
In [37]:
features_training = np.array( [ftrs_training[i].flatten() for i in range(n_training)] )
features_testing = np.array( [ftrs_testing[i].flatten() for i in range(n_testing )] )
np.savez_compressed("features/CIFAR10_{}-tuned-keras_features.npz".format(selected_network), \
features_training=features_training, \
features_testing=features_testing, \
labels_training=y_train, \
labels_testing=y_test)
In [38]:
features_training.shape, features_testing.shape
Out[38]:
In [39]:
print('Ten first features of X_train[0] (see figure above, with the frog)')
features_training[0][0:10]
Out[39]:
Ten first features of X_train[0] (see figure above, with the frog)
'incv3':
array([ 0.29244769, 0.50564754, 0.21346107, 0.3144542 , 0.33176085,
0.3695876 , 0.36724254, 0.24510881, 0.27722129, 0.14876673], dtype=float32)
'vgg16'
In [2]:
import numpy as np
import myutils
data = np.load("features/CIFAR10_{}-tuned-keras_features.npz".format(selected_network))
X_training = data['features_training']
y_training = data['labels_training']
X_testing = data['features_testing']
y_testing = data['labels_testing']
# data_training, data_testing = myutils.load_CIFAR_dataset(shuffle=False)
# assert( (np.array( [data_training[i][1] for i in range(len(data_training))] ) == y_training).all() )
# assert( (np.array( [data_testing[i][1] for i in range(len(data_testing))] ) == y_testing).all() )
print( 'X_training size = {}'.format(X_training.shape))
In [3]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=2)
pca.fit(X_training)
Out[3]:
In [4]:
print(pca.explained_variance_ratio_)
In [5]:
X = pca.transform(X_training)
In [6]:
X.shape
Out[6]:
In [7]:
from matplotlib import pyplot as plt
plt.figure( figsize=(15,15) )
plt.scatter( X[:, 0], X[:, 1], c=y_training, cmap='tab10' )
# plt.colorbar()
plt.show()
In [16]:
from sklearn.manifold import TSNE
pca = decomposition.PCA(n_components=60)
X_training_reduced = pca.fit_transform(X_training)
Since t-SNE is quite complex, it is worth to compress the data. Let say that 55% is enough.
In [17]:
np.sum( pca.explained_variance_ratio_ )
Out[17]:
In [18]:
tsne = TSNE(n_components=2)
In [19]:
X_training_reduced_tsne = tsne.fit_transform(X_training_reduced)
In [20]:
X_training_reduced_tsne.shape
Out[20]:
In [21]:
plt.figure( figsize=(15,15) )
plt.scatter( X_training_reduced_tsne[:, 0], X_training_reduced_tsne[:, 1], c=y_training, cmap='tab10' )
plt.show()
In [ ]: