Feature Extractor:

Extracts 2nd to last FC layer of VGG16 for MIT67 Dataset (4096,) size feature


In [1]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle
import random
import scipy
import pickle
from tqdm import tqdm

from utils import optimistic_restore, save
import layers

import pdb

In [2]:
'''
HYPERPARAMS
'''
DATA_PATH = '/media/red/capstone/data/mit_67/Images/'
NUM_CLASSES = 1000
PRETRAINED_WEIGHT_FILE = '/media/red/capstone/pretrained_weights/vgg16_weights.npz'
SAVE_FILE = '/media/red/capstone/data/mit_67/vgg16_features.pickle'

In [3]:
'''
Load MIT_67
'''

src = []

classes = os.listdir(DATA_PATH)
data_paths = {}
for c in classes:
    data_paths[c] = []
    class_dir = os.path.join(DATA_PATH, c)
    images = os.listdir(class_dir)
    data_paths[c] = [fname for fname in images if fname.endswith('.jpg')]

In [4]:
'''
Declare model
'''
class vgg16:
    '''
    VGG16 Model with ImageNet pretrained weight loader method
    Weights can be downloaded from:
    https://www.cs.toronto.edu/~frossard/vgg16/vgg16_weights.npz
    '''

    def __init__(self, x, phase):
        '''
        Sets up network enough to do a forward pass.
        '''

        """ init the model with hyper-parameters etc """

        # List used for loading weights from vgg16.npz (if necessary)
        self.parameters = []
        self.CONV_ACTIVATION = 'relu'
        self.FC_ACTIVATION   = 'relu'

        ########
        # Misc #
        ########
        self.IM_SHAPE = [224, 224, 3]

        ####################
        # I/O placeholders #
        ####################
        self.x = x
        self.x.set_shape([None]+self.IM_SHAPE)

        ###############
        # Main Layers #
        ###############
        with tf.variable_scope('conv_layers'):
            self._convlayers()
        with tf.variable_scope('fc_layers'):
            self._fc_layers()

        ######################
        # Define Collections #
        ######################
        self.conv_trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                "conv_layers")
        self.fc_trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                "fc_layers")

    #####################
    # Private Functions #
    #####################
    def _convlayers(self):
        '''
        All conv and pooling layers of VGG16
        '''
        # zero-mean input; resizing has to be done beforehand for uniform tensor shape
        with tf.variable_scope('preprocess'):
            mean = tf.constant([123.68, 116.779, 103.939],
                    dtype=tf.float32,
                    shape=[1, 1, 1, 3],
                    name='img_mean')
            self.images = self.x*255.0 - mean

        # conv1_1
        self.conv1_1, weights, biases = layers.conv2d(name='conv1_1',
                input=self.images,
                shape=(3,3,3,64),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv1_2
        self.conv1_2, weights, biases = layers.conv2d(name='conv1_2',
                input=self.conv1_1,
                shape=(3,3,64,64),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool1
        self.pool1 = tf.nn.max_pool(self.conv1_2,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool1')

        # conv2_1
        self.conv2_1, weights, biases = layers.conv2d(name='conv2_1',
                input=self.pool1,
                shape=(3,3,64,128),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv2_2
        self.conv2_2, weights, biases = layers.conv2d(name='conv2_2',
                input=self.conv2_1,
                shape=(3,3,128,128),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool2
        self.pool2 = tf.nn.max_pool(self.conv2_2,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool2')

        # conv3_1
        self.conv3_1, weights, biases = layers.conv2d(name='conv3_1',
                input=self.pool2,
                shape=(3,3,128,256),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv3_2
        self.conv3_2, weights, biases = layers.conv2d(name='conv3_2',
                input=self.conv3_1,
                shape=(3,3,256,256),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv3_3
        self.conv3_3, weights, biases = layers.conv2d(name='conv3_3',
                input=self.conv3_2,
                shape=(3,3,256,256),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool3
        self.pool3 = tf.nn.max_pool(self.conv3_3,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool3')

        # conv4_1
        self.conv4_1, weights, biases = layers.conv2d(name='conv4_1',
                input=self.pool3,
                shape=(3,3,256,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv4_2
        self.conv4_2, weights, biases = layers.conv2d(name='conv4_2',
                input=self.conv4_1,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv4_3
        self.conv4_3, weights, biases = layers.conv2d(name='conv4_3',
                input=self.conv4_2,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool4
        self.pool4 = tf.nn.max_pool(self.conv4_3,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool4')

        # conv5_1
        self.conv5_1, weights, biases = layers.conv2d(name='conv5_1',
                input=self.pool4,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv5_2
        self.conv5_2, weights, biases = layers.conv2d(name='conv5_2',
                input=self.conv5_1,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv5_3
        self.conv5_3, weights, biases = layers.conv2d(name='conv5_3',
                input=self.conv5_2,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool5
        self.pool5 = tf.nn.max_pool(self.conv5_3,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool5')

    def _fc_layers(self):
        '''
        All FC layers of VGG16 (+custom layers)
        '''
        # fc1
        self.fc1, weights, biases = layers.fc(name='fc1',
                input=tf.contrib.layers.flatten(self.pool5),
                units=4096,
                activation=self.FC_ACTIVATION)
        self.parameters += [weights, biases]

        # fc2
        self.fc2, weights, biases = layers.fc(name='fc2',
                input=self.fc1,
                units=4096,
                activation=self.FC_ACTIVATION)
        self.parameters += [weights, biases]

        # fc3
        self.fc3, weights, biases = layers.fc(name='fc3',
                input=self.fc2,
                units=NUM_CLASSES,
                activation='linear')
        self.parameters += [weights, biases]

    def load_pretrained_weights(self, sess):
        '''
        Load Pretrained VGG16 weights from .npz file
        (weights converted from Caffe)
        To only be used when no TensorFlow Snapshot is avaialable.
        Assumes layers are properly added to self.parameters.
        '''
        print("Loading Imagenet Weights.")

        weights = np.load(PRETRAINED_WEIGHT_FILE)
        keys = sorted(weights.keys())
        for i, k in enumerate(keys):
            print(i, k, np.shape(weights[k]))
            try:
                sess.run(self.parameters[i].assign(weights[k]))
            except:
                print("%s layer not found." % k)

In [5]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(1, None, None, 3))

with tf.name_scope('preprocess') as scope:
    x_resized = tf.image.resize_images(x, (224, 224))
    mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
    x_normalized = x_resized-mean

is_training = tf.placeholder(dtype=tf.bool)
net = vgg16(x_normalized, is_training)

'''
Tensorflow Session Setup
'''
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
                tf.local_variables_initializer())
sess.run(init)

'''
Load Pretrained Weights (ImageNet)
'''
net.load_pretrained_weights(sess)
        
'''
Loop Through Images
'''
features = {}
for c in tqdm(classes):
    class_feat = []
    for im_path in data_paths[c]:
        im = scipy.misc.imread(os.path.join(DATA_PATH,c,im_path), mode='RGB')
        im = np.expand_dims(im, 0)
        fc2_v = sess.run(net.fc2,
                feed_dict={x:im, is_training:False})[0]
        class_feat.append(fc2_v)
    features[c]=np.stack(class_feat) #n_images x 4096 matrix
# also save src with it, n_images long list of source image_paths
with open(SAVE_FILE, 'wb') as f:
    pickle.dump([features, data_paths], f)


Loading Imagenet Weights.
0 conv1_1_W (3, 3, 3, 64)
1 conv1_1_b (64,)
2 conv1_2_W (3, 3, 64, 64)
3 conv1_2_b (64,)
4 conv2_1_W (3, 3, 64, 128)
5 conv2_1_b (128,)
6 conv2_2_W (3, 3, 128, 128)
7 conv2_2_b (128,)
8 conv3_1_W (3, 3, 128, 256)
9 conv3_1_b (256,)
10 conv3_2_W (3, 3, 256, 256)
11 conv3_2_b (256,)
12 conv3_3_W (3, 3, 256, 256)
13 conv3_3_b (256,)
14 conv4_1_W (3, 3, 256, 512)
15 conv4_1_b (512,)
16 conv4_2_W (3, 3, 512, 512)
17 conv4_2_b (512,)
18 conv4_3_W (3, 3, 512, 512)
19 conv4_3_b (512,)
20 conv5_1_W (3, 3, 512, 512)
21 conv5_1_b (512,)
22 conv5_2_W (3, 3, 512, 512)
23 conv5_2_b (512,)
24 conv5_3_W (3, 3, 512, 512)
25 conv5_3_b (512,)
26 fc6_W (25088, 4096)
27 fc6_b (4096,)
28 fc7_W (4096, 4096)
  0%|          | 0/67 [00:00<?, ?it/s]
29 fc7_b (4096,)
30 fc8_W (4096, 1000)
31 fc8_b (1000,)
100%|██████████| 67/67 [03:15<00:00,  2.72s/it]

In [ ]: