In [1]:
# Let's see how to use DBT to:
# 1: train a VGG-like network on CIFAR-10
# 2: continue a train from the last iteration
# 3: do TRANSFER LEARNING from the trained model to another model that will be able to classify CIFAR-100
# 4: do FINE TUNING of the model trained on CIFAR-10 to solve the CIFAR-100 classification problem
# 5: compare the train/validation/test performance of the models

import pandas as pd
import pprint
import tensorflow as tf
from dytb.inputs.predefined import Cifar10, Cifar100
from dytb.train import train
from dytb.models.predefined.VGG import VGG

In [2]:
# Instantiate the model
vgg = VGG()

In [3]:
# Instantiate the CIFAR-10 input source
cifar10 = Cifar10.Cifar10()

In [4]:
# 1: Train VGG on Cifar10 for an Epoch

# Place the train process on GPU:0
device = '/gpu:0'
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    # On average the training set size double appling this
                    # transformation, thus factor=2
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True)


Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/W:0' shape=(3, 3, 256, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/fc/W:0' shape=(512, 512) dtype=float32_ref>
<tf.Variable 'VGG/fc/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/W:0' shape=(512, 10) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/b:0' shape=(10,) dtype=float32_ref>
Model VGG: trainable parameters: 14982474. Size: 59929.896 KB
[!] No checkpoint file found
2017-05-22 15:54:25.113484: step 0, loss = 2.5562 (18.3 examples/sec; 2.726 sec/batch)
2017-05-22 15:54:31.518555: step 200, loss = 2.0407 (1772.2 examples/sec; 0.028 sec/batch)
2017-05-22 15:54:38.177017: step 400, loss = 1.7758 (1668.4 examples/sec; 0.030 sec/batch)
2017-05-22 15:54:44.512619: step 600, loss = 1.6580 (1730.2 examples/sec; 0.029 sec/batch)
2017-05-22 15:54:51.121688: step 800, loss = 1.6563 (1893.3 examples/sec; 0.026 sec/batch)
2017-05-22 15:54:57.477840: step 1000, loss = 1.6784 (1622.5 examples/sec; 0.031 sec/batch)
2017-05-22 15:55:03.983570: step 1200, loss = 1.4601 (1897.1 examples/sec; 0.026 sec/batch)
2017-05-22 15:55:10.386923: step 1400, loss = 1.2355 (1740.0 examples/sec; 0.029 sec/batch)
2017-05-22 15:55:17.030403: step 1600, loss = 1.2162 (1677.3 examples/sec; 0.030 sec/batch)
2017-05-22 15:55:23.402104: step 1800, loss = 1.0021 (1757.7 examples/sec; 0.028 sec/batch)
2017-05-22 15:55:29.881070: step 2000, loss = 1.1827 (1824.0 examples/sec; 0.027 sec/batch)
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/model.ckpt-2000
2017-05-22 15:55:47.020577 (1): train accuracy = 0.587 validation accuracy = 0.572
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-2000

In [5]:
# Info contains every information related to the trained model.
# We're interested in stats only, thus we extract only them from the info dict
# Display the results in a table. Let's use a Pandas DataFrame for that

# Extract the accuracyes measured in every set (train/validation/test)
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df


Out[5]:
test train validation
accuracy 0.5718 0.59188 0.5717

In [6]:
# Extract the confusion matrices 
confusion_matrices = {key: value["confusion_matrix"] for key, value in info["stats"].items()}
# Display the confusione matrices for the training set
df = pd.DataFrame(confusion_matrices["train"])
df


Out[6]:
0 1 2 3 4 5 6 7 8 9
0 2945.0 178.0 75.0 60.0 6.0 62.0 31.0 37.0 1137.0 423.0
1 12.0 4019.0 1.0 16.0 0.0 8.0 17.0 6.0 99.0 838.0
2 1138.0 28.0 1146.0 525.0 265.0 757.0 597.0 207.0 127.0 202.0
3 244.0 23.0 138.0 1234.0 60.0 2354.0 367.0 118.0 65.0 340.0
4 302.0 33.0 422.0 399.0 1887.0 516.0 602.0 611.0 36.0 233.0
5 140.0 16.0 109.0 516.0 75.0 3598.0 109.0 161.0 32.0 228.0
6 64.0 62.0 115.0 588.0 106.0 171.0 3610.0 24.0 55.0 245.0
7 115.0 10.0 47.0 118.0 167.0 999.0 36.0 3006.0 23.0 530.0
8 494.0 253.0 14.0 43.0 1.0 29.0 14.0 10.0 3904.0 229.0
9 63.0 434.0 3.0 14.0 0.0 33.0 4.0 18.0 88.0 4331.0

In [7]:
# 2: train it again for another epoch
# Note the `force_restart` parameter removed.
# `epochs` is the TOTAL number of epoch for the trained model
# Thus since we trained it before for a single epoch,
# we set "epochs": 2 in order to train it for another epoch

with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 2,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        })


Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/W:0' shape=(3, 3, 256, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/fc/W:0' shape=(512, 512) dtype=float32_ref>
<tf.Variable 'VGG/fc/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/W:0' shape=(512, 10) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/b:0' shape=(10,) dtype=float32_ref>
Model VGG: trainable parameters: 14982474. Size: 59929.896 KB
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
2017-05-22 15:56:41.150636: step 2200, loss = 0.6995 (1489.9 examples/sec; 0.034 sec/batch)
2017-05-22 15:56:47.728752: step 2400, loss = 1.2488 (1825.8 examples/sec; 0.027 sec/batch)
2017-05-22 15:56:54.283833: step 2600, loss = 0.9503 (1493.5 examples/sec; 0.033 sec/batch)
2017-05-22 15:57:00.739826: step 2800, loss = 0.8572 (1549.1 examples/sec; 0.032 sec/batch)
2017-05-22 15:57:07.264069: step 3000, loss = 1.0171 (1491.0 examples/sec; 0.034 sec/batch)
2017-05-22 15:57:13.645133: step 3200, loss = 0.7402 (1206.1 examples/sec; 0.041 sec/batch)
2017-05-22 15:57:20.242200: step 3400, loss = 0.9686 (1741.3 examples/sec; 0.029 sec/batch)
2017-05-22 15:57:26.501463: step 3600, loss = 0.9150 (1587.2 examples/sec; 0.032 sec/batch)
2017-05-22 15:57:33.058041: step 3800, loss = 0.6963 (1597.9 examples/sec; 0.031 sec/batch)
2017-05-22 15:57:39.353625: step 4000, loss = 1.1301 (1725.1 examples/sec; 0.029 sec/batch)
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/model.ckpt-4000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/model.ckpt-4000
2017-05-22 15:57:57.462822 (2): train accuracy = 0.749 validation accuracy = 0.724
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-4000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-4000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-4000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-4000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-4000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best/model.ckpt-4000

In [8]:
# Display the results in a table. Let's use a Pandas DataFrame for that
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df


Out[8]:
test train validation
accuracy 0.7241 0.74684 0.724

In [9]:
# Save last trained model info
vggInfo = info

In [10]:
# 3: TRANSFER LEARNING
# Use the best model trained on Cifar10, to classify Cifar 100 images.
# Thus we train ONLY the softmax linear scope (that has 100 neurons, now),
# keeping constant any other previosly trained layer
# We load the weights from the previous trained model, or better
# DyTB saves the "best" model (w.r.t. a metric) in a separate folder
# So we extract the info["paths"]["best"] path, that's the path of the best
# model trained so far.
cifar100 = Cifar100.Cifar100()
with tf.device(device):
    transferInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                    }
                }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear",
            "trainable_scopes": "VGG/softmax_linear"
        })


Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/W:0' shape=(3, 3, 256, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/fc/W:0' shape=(512, 512) dtype=float32_ref>
<tf.Variable 'VGG/fc/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/W:0' shape=(512, 100) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/b:0' shape=(100,) dtype=float32_ref>
Model VGG: trainable parameters: 15028644. Size: 60114.576 KB
[!] No checkpoint file found
2017-05-22 15:58:46.456678: step 0, loss = 4.8550 (18.5 examples/sec; 2.706 sec/batch)
2017-05-22 15:58:53.423647: step 200, loss = 4.6459 (1422.8 examples/sec; 0.035 sec/batch)
2017-05-22 15:58:59.787396: step 400, loss = 4.6378 (1633.2 examples/sec; 0.031 sec/batch)
2017-05-22 15:59:06.429601: step 600, loss = 4.6263 (1706.8 examples/sec; 0.029 sec/batch)
2017-05-22 15:59:12.840329: step 800, loss = 4.6233 (1677.9 examples/sec; 0.030 sec/batch)
2017-05-22 15:59:19.697066: step 1000, loss = 4.6179 (1469.4 examples/sec; 0.034 sec/batch)
2017-05-22 15:59:26.260296: step 1200, loss = 4.6106 (1363.8 examples/sec; 0.037 sec/batch)
2017-05-22 15:59:32.943655: step 1400, loss = 4.6101 (1664.8 examples/sec; 0.030 sec/batch)
2017-05-22 15:59:39.250269: step 1600, loss = 4.6050 (1547.8 examples/sec; 0.032 sec/batch)
2017-05-22 15:59:46.037072: step 1800, loss = 4.6037 (1602.8 examples/sec; 0.031 sec/batch)
2017-05-22 15:59:52.438255: step 2000, loss = 4.6048 (1608.5 examples/sec; 0.031 sec/batch)
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/model.ckpt-2000
2017-05-22 16:00:11.160246 (1): train accuracy = 0.010 validation accuracy = 0.010
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000

In [11]:
# 4: FINE TUNING:
# Use the model pointed by vggInfo to fine tune the whole network
# and tune it on Cifar100.
# Let's retrain the whole network end-to-end, starting from the learned weights
# Just remove the "traiable_scopes" section from the surgery parameter
with tf.device(device):
    fineTuningInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear"
        })


Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/W:0' shape=(3, 3, 256, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv8/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv9/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512/conv10/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv11/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv12/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/W:0' shape=(3, 3, 512, 512) dtype=float32_ref>
<tf.Variable 'VGG/512b2/conv13/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/fc/W:0' shape=(512, 512) dtype=float32_ref>
<tf.Variable 'VGG/fc/b:0' shape=(512,) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/W:0' shape=(512, 100) dtype=float32_ref>
<tf.Variable 'VGG/softmax_linear/b:0' shape=(100,) dtype=float32_ref>
Model VGG: trainable parameters: 15028644. Size: 60114.576 KB
[!] No checkpoint file found
2017-05-22 16:00:58.062810: step 0, loss = 4.7794 (23.6 examples/sec; 2.122 sec/batch)
2017-05-22 16:01:04.976976: step 200, loss = 4.6412 (1874.0 examples/sec; 0.027 sec/batch)
2017-05-22 16:01:11.427974: step 400, loss = 4.6381 (1594.2 examples/sec; 0.031 sec/batch)
2017-05-22 16:01:18.114518: step 600, loss = 4.6311 (1877.8 examples/sec; 0.027 sec/batch)
2017-05-22 16:01:24.520331: step 800, loss = 4.6238 (1550.1 examples/sec; 0.032 sec/batch)
2017-05-22 16:01:31.066939: step 1000, loss = 4.6181 (1501.1 examples/sec; 0.033 sec/batch)
2017-05-22 16:01:37.486925: step 1200, loss = 4.6126 (1623.5 examples/sec; 0.031 sec/batch)
2017-05-22 16:01:43.917332: step 1400, loss = 4.6089 (1853.3 examples/sec; 0.027 sec/batch)
2017-05-22 16:01:50.444527: step 1600, loss = 4.6070 (1363.7 examples/sec; 0.037 sec/batch)
2017-05-22 16:01:56.757677: step 1800, loss = 4.6046 (1828.3 examples/sec; 0.027 sec/batch)
2017-05-22 16:02:03.368083: step 2000, loss = 4.6055 (1511.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/model.ckpt-2000
2017-05-22 16:02:21.476925 (1): train accuracy = 0.010 validation accuracy = 0.010
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000
INFO:tensorflow:Restoring parameters from /data/pgaleone/dtb_work/examples/log/VGG/CIFAR-100_Adam_l2=1e-05_fliplr/best/model.ckpt-2000

In [12]:
# Compare the performance of Transfer learning and Fine Tuning
accuracies = {key: value["accuracy"] for key, value in transferInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df


Out[12]:
test train validation
accuracy 0.01 0.01032 0.01

In [13]:
accuracies = {key: value["accuracy"] for key, value in fineTuningInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df


Out[13]:
test train validation
accuracy 0.01 0.0101 0.01

In [14]:
# For completeness, lets see what a info object contains
pprint.pprint(info, indent=4)


{   'args': {   'batch_size': 50,
                'checkpoint_path': '',
                'comment': '',
                'dataset': <dytb.inputs.predefined.Cifar10.Cifar10 object at 0x7f42e04a86a0>,
                'epochs': 2,
                'exclude_scopes': None,
                'force_restart': False,
                'gd': {   'args': {   'beta1': 0.9,
                                      'beta2': 0.99,
                                      'epsilon': 1e-08,
                                      'learning_rate': 0.001},
                          'optimizer': <class 'tensorflow.python.training.adam.AdamOptimizer'>},
                'lr_decay': {'enabled': False, 'epochs': 25, 'factor': 0.1},
                'model': <dytb.models.predefined.VGG.VGG object at 0x7f4289c98ef0>,
                'regularizations': {   'augmentation': {   'factor': 2,
                                                           'fn': <function random_flip_left_right at 0x7f4289d5c7b8>,
                                                           'name': 'FlipLR'},
                                       'l2': 1e-05},
                'seed': None,
                'trainable_scopes': None},
    'paths': {   'best': '/data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr/best',
                 'current': '/data/pgaleone/dtb_work/examples',
                 'log': '/data/pgaleone/dtb_work/examples/log/VGG/CIFAR-10_Adam_l2=1e-05_fliplr'},
    'stats': {   'test': {   'accuracy': 0.72409998625516891,
                             'confusion_matrix': array([[ 628.,    7.,  141.,   10.,   42.,    2.,   10.,   58.,   73.,
          29.],
       [   9.,  861.,    3.,    0.,    5.,    2.,   25.,    5.,   20.,
          70.],
       [  26.,    0.,  655.,   29.,  108.,   54.,  107.,   16.,    3.,
           2.],
       [  10.,    5.,  107.,  364.,   94.,  194.,  179.,   26.,   10.,
          12.],
       [   8.,    0.,   67.,   14.,  766.,   10.,   75.,   57.,    3.,
           0.],
       [   3.,    0.,   79.,  119.,   64.,  657.,   39.,   35.,    1.,
           2.],
       [   2.,    0.,   44.,   23.,   26.,    3.,  895.,    3.,    1.,
           3.],
       [   2.,    0.,   39.,   27.,   63.,   95.,   14.,  757.,    1.,
           2.],
       [  61.,   21.,   32.,    6.,    6.,    4.,   21.,    4.,  828.,
          17.],
       [  17.,   72.,    5.,    8.,    4.,    0.,   19.,   33.,   12.,
         830.]])},
                 'train': {   'accuracy': 0.74683998459577561,
                              'confusion_matrix': array([[  3.20800000e+03,   1.50000000e+01,   7.17000000e+02,
          3.60000000e+01,   2.13000000e+02,   1.40000000e+01,
          4.40000000e+01,   3.37000000e+02,   2.77000000e+02,
          1.06000000e+02],
       [  2.70000000e+01,   4.43800000e+03,   1.30000000e+01,
          9.00000000e+00,   1.90000000e+01,   2.00000000e+00,
          1.38000000e+02,   1.00000000e+01,   7.70000000e+01,
          2.66000000e+02],
       [  1.49000000e+02,   4.00000000e+00,   3.37000000e+03,
          1.65000000e+02,   5.16000000e+02,   1.84000000e+02,
          5.26000000e+02,   8.60000000e+01,   3.30000000e+01,
          5.00000000e+00],
       [  4.30000000e+01,   2.00000000e+00,   4.88000000e+02,
          1.95500000e+03,   4.33000000e+02,   1.08700000e+03,
          8.02000000e+02,   7.80000000e+01,   4.20000000e+01,
          3.00000000e+01],
       [  2.80000000e+01,   0.00000000e+00,   2.98000000e+02,
          7.20000000e+01,   3.93200000e+03,   6.40000000e+01,
          3.22000000e+02,   2.93000000e+02,   5.00000000e+00,
          4.00000000e+00],
       [  5.00000000e+00,   0.00000000e+00,   3.10000000e+02,
          6.19000000e+02,   3.21000000e+02,   3.36200000e+03,
          2.34000000e+02,   1.62000000e+02,   5.00000000e+00,
          1.20000000e+01],
       [  8.00000000e+00,   6.00000000e+00,   1.94000000e+02,
          1.13000000e+02,   1.28000000e+02,   2.10000000e+01,
          4.54100000e+03,   3.00000000e+00,   1.70000000e+01,
          4.00000000e+00],
       [  1.40000000e+01,   0.00000000e+00,   1.75000000e+02,
          1.22000000e+02,   3.33000000e+02,   3.60000000e+02,
          4.20000000e+01,   3.89200000e+03,   1.00000000e+01,
          1.60000000e+01],
       [  2.30000000e+02,   7.80000000e+01,   1.45000000e+02,
          3.40000000e+01,   8.00000000e+00,   5.00000000e+00,
          8.50000000e+01,   2.20000000e+01,   4.32600000e+03,
          4.60000000e+01],
       [  9.30000000e+01,   2.57000000e+02,   2.40000000e+01,
          4.70000000e+01,   2.10000000e+01,   1.40000000e+01,
          6.10000000e+01,   1.01000000e+02,   4.20000000e+01,
          4.35000000e+03]])},
                 'validation': {   'accuracy': 0.72399998486042028,
                                   'confusion_matrix': array([[ 627.,    7.,  141.,   10.,   42.,    2.,   10.,   59.,   73.,
          29.],
       [   9.,  861.,    3.,    0.,    5.,    2.,   25.,    5.,   20.,
          70.],
       [  26.,    0.,  655.,   29.,  108.,   54.,  107.,   16.,    3.,
           2.],
       [  10.,    5.,  107.,  363.,   94.,  194.,  179.,   26.,   10.,
          12.],
       [   8.,    0.,   67.,   14.,  766.,   10.,   75.,   57.,    3.,
           0.],
       [   3.,    0.,   79.,  119.,   64.,  658.,   39.,   35.,    1.,
           2.],
       [   2.,    0.,   44.,   23.,   26.,    3.,  895.,    3.,    1.,
           3.],
       [   2.,    0.,   39.,   27.,   63.,   95.,   14.,  757.,    1.,
           2.],
       [  61.,   21.,   32.,    6.,    6.,    4.,   21.,    4.,  828.,
          17.],
       [  17.,   72.,    5.,    8.,    4.,    0.,   19.,   33.,   12.,
         830.]])}},
    'steps': {'decay': 50000, 'epoch': 2000, 'log': 200, 'max': 4000}}