Monte Carlo Dropout -- Example Notebook

This notebook is a modified fork of the Bayesian MNIST classifier implementation here.

In this notebook, a Bayesian LeNet model is trained using the MNIST data.

A Bayesian inference function generates the mean prediction accuracy and the associated prediction uncertainty of the trained model.


In [1]:
! wget https://media.githubusercontent.com/media/rahulremanan/python_tutorial/master/Machine_Vision/07_Bayesian_deep_learning/weights/bayesianLeNet.h5 -O ./bayesianLeNet.h5


--2019-05-20 14:12:42--  https://media.githubusercontent.com/media/rahulremanan/python_tutorial/master/Machine_Vision/07_Bayesian_deep_learning/weights/bayesianLeNet.h5
Resolving media.githubusercontent.com (media.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to media.githubusercontent.com (media.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1750208 (1.7M) [application/octet-stream]
Saving to: ‘./bayesianLeNet.h5’

./bayesianLeNet.h5  100%[===================>]   1.67M  --.-KB/s    in 0.09s   

2019-05-20 14:12:42 (19.2 MB/s) - ‘./bayesianLeNet.h5’ saved [1750208/1750208]

Build a Bayesian network

The network used in this example is a LeNet.


In [2]:
from keras import Input, Model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout


Using TensorFlow backend.

In [0]:
def LeNet(input_shape, num_classes):
  
  inp = Input(shape=input_shape)
  
  x = Conv2D(filters=20, kernel_size=5, strides=1)(inp)
  x = MaxPool2D(pool_size=2, strides=2)(x)
  
  x = Conv2D(filters=50, kernel_size=5, strides=1)(x)
  x = MaxPool2D(pool_size=2, strides=2)(x)
  x = Flatten()(x)
  
  x = Dense(500, activation='relu')(x)
  x = Dense(num_classes, activation='softmax')(x)
  
  return Model(inp, x, name='LeNet')

In [0]:
def bayesianLeNet(input_shape, num_classes, enable_dropout=True):
  """
    An example implementation of a Bayesian LeNet convolutional neural network.
    
    This network uses the Bayesian approximation by Monte Carlo estimations using dropouts.
    
    To enable Bayesian approxiamtion, set the enable_dropout flag to True.
  """
  
  inp = Input(shape=input_shape)
  x = Conv2D(filters=20, kernel_size=5, strides=1)(inp)
  
  x = Dropout(0.5)(x, training=True)
  x = MaxPool2D(pool_size=2, strides=2)(x)
  x = Conv2D(filters=50, kernel_size=5, strides=1)(x)
  
  x = Dropout(0.5)(x, training=enable_dropout)
  x = MaxPool2D(pool_size=2, strides=2)(x)
  x = Flatten()(x)
  
  x = Dropout(0.5)(x, training=enable_dropout)
  x = Dense(500, activation='relu')(x)
  
  x = Dropout(0.5)(x, training=enable_dropout)
  x = Dense(num_classes, activation='softmax')(x)

  return Model(inp, x, name='bayesianLeNet')

In [0]:
import argparse
import os
from keras.callbacks import TensorBoard
from keras.datasets import mnist
from keras import utils
import numpy as np
from tqdm import tqdm

In [0]:
TENSORBOARD_DIR = './tensorboard'
MODEL_PATH = './bayesianLeNet.h5'

In [0]:
def make_dirs():
    if not os.path.isdir(TENSORBOARD_DIR):
        os.makedirs(TENSORBOARD_DIR)

In [0]:
make_dirs()

In [0]:
def prepare_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
    X_train = X_train.astype(np.float32) / 255.
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))
    X_test = X_test.astype(np.float32) / 255.

    y_train, y_test = utils.to_categorical(y_train, 10), utils.to_categorical(y_test, 10)

    return (X_train, y_train), (X_test, y_test)

In [10]:
(X_train, y_train), (X_test, y_test) = prepare_data()


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
11493376/11490434 [==============================] - 0s 0us/step

In [0]:
bayesian_network=True
download_weights=True
batch_size=1000
epochs=10

In [12]:
if bayesian_network:
  model = bayesianLeNet(input_shape=X_train.shape[1:],
                        num_classes=10)
else:
  model = LeNet(input_shape=X_train.shape[1:],
                num_classes=10)


WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Compile model


In [0]:
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['acc'])

Load model weights


In [14]:
if os.path.exists(MODEL_PATH):
  model.load_weights(MODEL_PATH)
  print ('Loaded model weights from: {}'.format(MODEL_PATH))


Loaded model weights from: ./bayesianLeNet.h5

Train a Bayesian network


In [15]:
model.fit(x=X_train,
          y=y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(X_test, 
                           y_test),
          callbacks=[TensorBoard(log_dir=os.path.join(TENSORBOARD_DIR, 
                                                      model.name), 
                                 write_images=True)])


WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 6s 96us/step - loss: 0.0436 - acc: 0.9865 - val_loss: 0.0495 - val_acc: 0.9861
Epoch 2/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0418 - acc: 0.9868 - val_loss: 0.0492 - val_acc: 0.9842
Epoch 3/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0420 - acc: 0.9867 - val_loss: 0.0437 - val_acc: 0.9860
Epoch 4/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0404 - acc: 0.9872 - val_loss: 0.0417 - val_acc: 0.9862
Epoch 5/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0390 - acc: 0.9874 - val_loss: 0.0449 - val_acc: 0.9847
Epoch 6/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0398 - acc: 0.9876 - val_loss: 0.0422 - val_acc: 0.9866
Epoch 7/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0382 - acc: 0.9874 - val_loss: 0.0468 - val_acc: 0.9857
Epoch 8/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0383 - acc: 0.9875 - val_loss: 0.0397 - val_acc: 0.9869
Epoch 9/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0384 - acc: 0.9877 - val_loss: 0.0457 - val_acc: 0.9858
Epoch 10/10
60000/60000 [==============================] - 3s 43us/step - loss: 0.0382 - acc: 0.9876 - val_loss: 0.0427 - val_acc: 0.9865
Out[15]:
<keras.callbacks.History at 0x7fdfd5489a20>

Save model weights


In [0]:
model.save_weights(MODEL_PATH)

Build a Bayesian inference function


In [0]:
def bayesianInference(model, X_test, y_test, eval_steps=10):
    batch_size = 1000
    
    bayesian_error = []

    for batch_id in tqdm(range(X_test.shape[0] // batch_size)):
        # take batch of data
        x = X_test[batch_id * batch_size: (batch_id + 1) * batch_size]
        # init empty predictions
        y_ = np.zeros((eval_steps, batch_size, y_test[0].shape[0]))

        for sample_id in range(eval_steps):
            # save predictions from a sample pass
            y_[sample_id] = model.predict(x, batch_size)

        # average over all passes
        mean_y = y_.mean(axis=0)
        # evaluate against labels
        y = y_test[batch_size * batch_id: (batch_id + 1) * batch_size]
        # compute error
        point_error = np.count_nonzero(np.not_equal(mean_y.argmax(axis=1), y.argmax(axis=1)))
        bayesian_error.append(point_error)

    mean_error = np.sum(bayesian_error) / X_test.shape[0]
    uncertainty = np.std(bayesian_error) / X_test.shape[0]
    mean_accuracy = 1 - mean_error

    return [mean_accuracy, uncertainty]

Run Bayesian inference


In [18]:
if bayesian_network:
  out = bayesianInference(model, X_test, y_test)
  print ('\n')
  print ('\nValidation accuracy: {} ...'.format(out[0]))
  print ('Validation uncertainty: {} ...'.format(out[1]))
else:
  (_, acc) = model.evaluate(x=X_test,
                            y=y_test,
                            batch_size=args.batch_size)
  print('\nValidation accuracy: {}'.format(acc))


100%|██████████| 10/10 [00:01<00:00,  6.07it/s]


Validation accuracy: 0.9944 ...
Validation uncertainty: 0.00032619012860600184 ...


In [0]:
if download_weights:
  from google.colab import files
  files.download(MODEL_PATH)