In [1]:
from keras.applications.vgg19 import VGG19
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from keras.models import Model
import json
import cPickle as pk
import os
import numpy as np
import pandas as pd
from time import time
import cv2
import matplotlib.pyplot as plt
import re
%matplotlib inline


Using Theano backend.

In [56]:
def load_everything():
    # load configuration
    with open('../config.json', 'rb') as f:
        config = json.load(f)

    mainImPath = config['image_dir']
    pDir = config['pickle_dir']

    bb = pk.load(open(pDir + 'pDogs-bounding-boxes-clean.pd.pk', 'rb'))
    bb.dropna(inplace=True)
    bb.path = bb.path.apply(lambda x: re.sub('scrape-ims', 'scrape_ims', x))

    breeds = list(sorted(bb.breed.unique().tolist()))

    return breeds, bb, mainImPath

def create_model(output_layer='block4_pool'):
    base_model = VGG19(weights='imagenet')
    model = Model(input=base_model.input, output=base_model.get_layer(output_layer).output)
    return model

def extract_feats(img_path):
        try:
            img = image.load_img(img_path, target_size=(224, 224))
        except IOError:
            print 'couldn\'t load file'
            return None

        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        block4_pool_features = model.predict(x)
        return block4_pool_features

def extract_all_feats(timing=True):
    all_feats = None
    for b in breeds:
        print 'on breed:', b
        imDir = mainImPath + breeds[0] + '/'
        ims = os.listdir(imDir)
        for im in ims:
            start = time()
            print 'on image:', im
            b4 = extract_feats(imDir + im)
            if b4 is None:
                continue

            if all_feats is None:
                files = [im]
                all_feats = b4
            else:
                files.append(im)
                all_feats = np.c_[all_feats, all_feats]

            if timing:
                print 'took', time() - start, 'seconds'

    return files, all_feats

def test(output_layer='block4_pool', timing=True, verbose=True):
    start = time()
    testImDir = mainImPath + breeds[0] + '/'
    testIms = os.listdir(testImDir)
    if verbose:
        print 'extracting features for', breeds[0]
        print 'file:', testIms[0]
    b4 = extract_feats(testImDir + testIms[0])
    if timing:
        runtime = time() - start
        if verbose:
            print 'took', runtime, 'seconds'

    return b4, runtime, testImDir + testIms[0]

In [3]:
breeds, bb, mainImPath = load_everything()

In [4]:
base_model = VGG19(weights='imagenet')

Here's the layers we can use as intermediate outputs:


In [5]:
for i in base_model.get_config()['layers']:
    print i['name']


input_1
block1_conv1
block1_conv2
block1_pool
block2_conv1
block2_conv2
block2_pool
block3_conv1
block3_conv2
block3_conv3
block3_conv4
block3_pool
block4_conv1
block4_conv2
block4_conv3
block4_conv4
block4_pool
block5_conv1
block5_conv2
block5_conv3
block5_conv4
block5_pool
flatten
fc1
fc2
predictions

In [59]:
from collections import OrderedDict
testFeats = OrderedDict()
for i in base_model.get_config()['layers']:
    layerName = i['name']
    if layerName in ['input_1', 'predictions']:
        continue
    model = create_model(output_layer=layerName)
    testFeat, runtime, filename = test(verbose=False)
    testFeats[layerName] = testFeat
    print layerName, testFeat[0].shape


block1_conv1 (64, 224, 224)
block1_conv2 (64, 224, 224)
block1_pool (64, 112, 112)
block2_conv1 (128, 112, 112)
block2_conv2 (128, 112, 112)
block2_pool (128, 56, 56)
block3_conv1 (256, 56, 56)
block3_conv2 (256, 56, 56)
block3_conv3 (256, 56, 56)
block3_conv4 (256, 56, 56)
block3_pool (256, 28, 28)
block4_conv1 (512, 28, 28)
block4_conv2 (512, 28, 28)
block4_conv3 (512, 28, 28)
block4_conv4 (512, 28, 28)
block4_pool (512, 14, 14)
block5_conv1 (512, 14, 14)
block5_conv2 (512, 14, 14)
block5_conv3 (512, 14, 14)
block5_conv4 (512, 14, 14)
block5_pool (512, 7, 7)
flatten (25088,)
fc1 (4096,)
fc2 (4096,)

In [64]:
weights = base_model.get_weights()

In [70]:
len(weights)


Out[70]:
38

In [50]:
for i in base_model.get_config()['layers']:
    print i['name']
    print i
    #print i['config']['batch_input_shape']


input_1
{'class_name': 'InputLayer', 'config': {'batch_input_shape': (None, 3, 224, 224), 'input_dtype': 'float32', 'sparse': False, 'name': 'input_1'}, 'inbound_nodes': [], 'name': 'input_1'}
block1_conv1
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block1_conv1', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 64, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['input_1', 0, 0]]], 'name': 'block1_conv1'}
block1_conv2
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block1_conv2', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 64, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block1_conv1', 0, 0]]], 'name': 'block1_conv2'}
block1_pool
{'class_name': 'MaxPooling2D', 'config': {'name': 'block1_pool', 'trainable': True, 'dim_ordering': 'th', 'pool_size': (2, 2), 'strides': (2, 2), 'border_mode': 'valid'}, 'inbound_nodes': [[['block1_conv2', 0, 0]]], 'name': 'block1_pool'}
block2_conv1
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block2_conv1', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 128, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block1_pool', 0, 0]]], 'name': 'block2_conv1'}
block2_conv2
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block2_conv2', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 128, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block2_conv1', 0, 0]]], 'name': 'block2_conv2'}
block2_pool
{'class_name': 'MaxPooling2D', 'config': {'name': 'block2_pool', 'trainable': True, 'dim_ordering': 'th', 'pool_size': (2, 2), 'strides': (2, 2), 'border_mode': 'valid'}, 'inbound_nodes': [[['block2_conv2', 0, 0]]], 'name': 'block2_pool'}
block3_conv1
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block3_conv1', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 256, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block2_pool', 0, 0]]], 'name': 'block3_conv1'}
block3_conv2
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block3_conv2', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 256, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block3_conv1', 0, 0]]], 'name': 'block3_conv2'}
block3_conv3
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block3_conv3', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 256, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block3_conv2', 0, 0]]], 'name': 'block3_conv3'}
block3_conv4
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block3_conv4', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 256, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block3_conv3', 0, 0]]], 'name': 'block3_conv4'}
block3_pool
{'class_name': 'MaxPooling2D', 'config': {'name': 'block3_pool', 'trainable': True, 'dim_ordering': 'th', 'pool_size': (2, 2), 'strides': (2, 2), 'border_mode': 'valid'}, 'inbound_nodes': [[['block3_conv4', 0, 0]]], 'name': 'block3_pool'}
block4_conv1
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block4_conv1', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block3_pool', 0, 0]]], 'name': 'block4_conv1'}
block4_conv2
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block4_conv2', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block4_conv1', 0, 0]]], 'name': 'block4_conv2'}
block4_conv3
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block4_conv3', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block4_conv2', 0, 0]]], 'name': 'block4_conv3'}
block4_conv4
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block4_conv4', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block4_conv3', 0, 0]]], 'name': 'block4_conv4'}
block4_pool
{'class_name': 'MaxPooling2D', 'config': {'name': 'block4_pool', 'trainable': True, 'dim_ordering': 'th', 'pool_size': (2, 2), 'strides': (2, 2), 'border_mode': 'valid'}, 'inbound_nodes': [[['block4_conv4', 0, 0]]], 'name': 'block4_pool'}
block5_conv1
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block5_conv1', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block4_pool', 0, 0]]], 'name': 'block5_conv1'}
block5_conv2
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block5_conv2', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block5_conv1', 0, 0]]], 'name': 'block5_conv2'}
block5_conv3
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block5_conv3', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block5_conv2', 0, 0]]], 'name': 'block5_conv3'}
block5_conv4
{'class_name': 'Convolution2D', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'block5_conv4', 'activity_regularizer': None, 'trainable': True, 'dim_ordering': 'th', 'nb_col': 3, 'subsample': (1, 1), 'init': 'glorot_uniform', 'bias': True, 'nb_filter': 512, 'border_mode': 'same', 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'nb_row': 3}, 'inbound_nodes': [[['block5_conv3', 0, 0]]], 'name': 'block5_conv4'}
block5_pool
{'class_name': 'MaxPooling2D', 'config': {'name': 'block5_pool', 'trainable': True, 'dim_ordering': 'th', 'pool_size': (2, 2), 'strides': (2, 2), 'border_mode': 'valid'}, 'inbound_nodes': [[['block5_conv4', 0, 0]]], 'name': 'block5_pool'}
flatten
{'class_name': 'Flatten', 'config': {'trainable': True, 'name': 'flatten'}, 'inbound_nodes': [[['block5_pool', 0, 0]]], 'name': 'flatten'}
fc1
{'class_name': 'Dense', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'fc1', 'activity_regularizer': None, 'trainable': True, 'init': 'glorot_uniform', 'bias': True, 'input_dim': None, 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'output_dim': 4096}, 'inbound_nodes': [[['flatten', 0, 0]]], 'name': 'fc1'}
fc2
{'class_name': 'Dense', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'fc2', 'activity_regularizer': None, 'trainable': True, 'init': 'glorot_uniform', 'bias': True, 'input_dim': None, 'b_regularizer': None, 'W_regularizer': None, 'activation': 'relu', 'output_dim': 4096}, 'inbound_nodes': [[['fc1', 0, 0]]], 'name': 'fc2'}
predictions
{'class_name': 'Dense', 'config': {'W_constraint': None, 'b_constraint': None, 'name': 'predictions', 'activity_regularizer': None, 'trainable': True, 'init': 'glorot_uniform', 'bias': True, 'input_dim': None, 'b_regularizer': None, 'W_regularizer': None, 'activation': 'softmax', 'output_dim': 1000}, 'inbound_nodes': [[['fc2', 0, 0]]], 'name': 'predictions'}

In [6]:
base_model.get_config()['layers'][0]


Out[6]:
{'class_name': 'InputLayer',
 'config': {'batch_input_shape': (None, 3, 224, 224),
  'input_dtype': 'float32',
  'name': 'input_1',
  'sparse': False},
 'inbound_nodes': [],
 'name': 'input_1'}

In [7]:
model = create_model() # creates VGG19 model with block4_pool_features out

In [8]:
testFeat, runtime, filename = test()


extracting features for Affenpinscher
file: Affenpinscher3-2.jpeg
took 8.01857185364 seconds

In [9]:
f = plt.imshow(testFeat[0][0])
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')



In [10]:
im = cv2.imread(filename)
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
ax.axis('off')
f = plt.imshow(im)



In [24]:
model = create_model(output_layer='block1_pool')
testFeat, runtime, filename = test()
f = plt.imshow(testFeat[0][1], cmap='Greys')
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')


extracting features for Affenpinscher
file: Affenpinscher3-2.jpeg
took 1.01802301407 seconds

In [25]:
f = plt.imshow(testFeat[0][1])
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')



In [26]:
testFeat[0].shape


Out[26]:
(64, 112, 112)

In [17]:
c = plt.figure(figsize=(12, 12))
for i in range (512):
    ax = c.add_subplot(8, 8, i + 1)
    ax.imshow(testFeat[0][i], cmap='Greys')
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')
    if i == 63:
        break



In [44]:
c = plt.figure(figsize=(12, 12))
for i in range (512):
    ax = c.add_subplot(8, 8, i + 1)
    ax.imshow(testFeat[0][i])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')
    if i == 63:
        break



In [28]:
f = plt.imshow(testFeat[0][11], cmap='Greys')
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')



In [29]:
f = plt.imshow(testFeat[0][11])
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')



In [41]:
model = create_model(output_layer='block2_conv2') # creates VGG19 model with block4_pool_features out
testFeat, runtime, filename = test()


extracting features for Affenpinscher
file: Affenpinscher3-2.jpeg
took 4.40923094749 seconds

In [42]:
testFeat.shape


Out[42]:
(1, 128, 112, 112)

In [43]:
model = create_model(output_layer='block2_pool') # creates VGG19 model with block4_pool_features out
testFeat, runtime, filename = test()
f = plt.imshow(testFeat[0][1], cmap='Greys')
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')


extracting features for Affenpinscher
file: Affenpinscher3-2.jpeg
took 2.34523677826 seconds

In [44]:
testFeat.shape


Out[44]:
(1, 128, 56, 56)

In [45]:
model = create_model(output_layer='block3_conv1') # creates VGG19 model with block4_pool_features out
testFeat, runtime, filename = test()


extracting features for Affenpinscher
file: Affenpinscher3-2.jpeg
took 2.23546504974 seconds

In [46]:
testFeat.shape


Out[46]:
(1, 256, 56, 56)

In [31]:
f = plt.imshow(testFeat[0][1])
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')



In [33]:
model = create_model(output_layer='block3_pool') # creates VGG19 model with block4_pool_features out
testFeat, runtime, filename = test()
f = plt.imshow(testFeat[0][1], cmap='Greys')
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')


extracting features for Affenpinscher
file: Affenpinscher3-2.jpeg
took 3.85950803757 seconds

In [34]:
f = plt.imshow(testFeat[0][1])
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')



In [36]:
model = create_model(output_layer='block4_pool') # creates VGG19 model with block4_pool_features out
testFeat, runtime, filename = test()
f = plt.imshow(testFeat[0][1], cmap='Greys')
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')


extracting features for Affenpinscher
file: Affenpinscher3-2.jpeg
took 5.95858693123 seconds

In [37]:
f = plt.imshow(testFeat[0][0])
ax = plt.gca()
ax.set_xticks([])
ax.set_yticks([])
p = ax.axis('off')



In [38]:
testFeat.shape


Out[38]:
(1, 512, 14, 14)

In [39]:
c = plt.figure(figsize=(12, 12))
for i in range (512):
    ax = c.add_subplot(8, 8, i + 1)
    ax.imshow(testFeat[0][i], cmap='Greys')
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')
    if i == 63:
        break



In [40]:
c = plt.figure(figsize=(12, 12))
for i in range (512):
    ax = c.add_subplot(8, 8, i + 1)
    ax.imshow(testFeat[0][i])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')
    if i == 63:
        break



In [51]:
c = plt.figure(figsize=(12, 12))
for i in range (512):
    ax = c.add_subplot(16, 32, i + 1)
    ax.imshow(testFeat[0][i])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.axis('off')


model = create_model(output_layer='block5_pool') # creates VGG19 model with block4_pool_features out testFeat, runtime, filename = test() f = plt.imshow(testFeat[0][0]) ax = plt.gca() ax.set_xticks([]) ax.set_yticks([]) p = ax.axis('off')