In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from edward.models import Categorical, Normal
import edward as ed
import pandas as pd
from tqdm import tqdm
In [2]:
# Use the TensorFlow method to download and/or load the data.
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
In [3]:
ed.set_seed(314159)
N = 100 # number of images in a minibatch.
D = 784 # number of features.
K = 10 # number of classes.
In [4]:
# define the feedforward neural network function
def neural_network(x, W_0, W_1, b_0, b_1):
h = tf.tanh(tf.matmul(x, W_0) + b_0)
h = tf.matmul(h, W_1) + b_1
return h #tf.reshape(h, [-1])
In [5]:
# Create a placeholder to hold the data (in minibatches) in a TensorFlow graph.
x = tf.placeholder(tf.float32, [N, D])
# Normal(0,1) priors for the variables. Note that the syntax assumes TensorFlow 1.1.
W_0 = Normal(loc=tf.zeros([D, 20]), scale=tf.ones([D, 20]))
W_1 = Normal(loc=tf.zeros([20, 10]), scale=tf.ones([20, 10]))
b_0 = Normal(loc=tf.zeros(20), scale=tf.ones(20))
b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10))
# Categorical likelihood for classication.
y = Categorical(neural_network(x, W_0, W_1, b_0, b_1))
In [ ]:
In [8]:
# Contruct the q(w) and q(b). in this case we assume Normal distributions.
qW_0 = Normal(loc=tf.Variable(tf.random_normal([D, 20])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, 20]))))
qW_1 = Normal(loc=tf.Variable(tf.random_normal([20, 10])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([20, 10]))))
qb_0 = Normal(loc=tf.Variable(tf.random_normal([20])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([20]))))
qb_1 = Normal(loc=tf.Variable(tf.random_normal([10])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([10]))))
In [ ]:
In [23]:
# We use a placeholder for the labels in anticipation of the traning data.
y_ph = tf.placeholder(tf.int32, [N])
# Define the VI inference technique, ie. minimise the KL divergence between q and p.
inference = ed.KLqp({W_0: qW_0, W_1: qW_1,
b_0: qb_0, b_1: qb_1}, data={y:y_ph})
In [24]:
# Initialise the inference variables
inference.initialize(n_iter=5000, n_print=100, scale={y: float(mnist.train.num_examples) / N})
In [14]:
# We will use an interactive session.
sess = tf.InteractiveSession()
# Initialise all the vairables in the session.
tf.global_variables_initializer().run()
In [15]:
# Let the training begin. We load the data in minibatches and update the VI infernce using each new batch.
for _ in range(inference.n_iter):
X_batch, Y_batch = mnist.train.next_batch(N)
# TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label.
Y_batch = np.argmax(Y_batch,axis=1)
info_dict = inference.update(feed_dict={x: X_batch, y_ph: Y_batch})
inference.print_progress(info_dict)
In [16]:
Y_batch.shape
Out[16]:
In [17]:
# Load the test images.
X_test = mnist.test.images
# TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label.
Y_test = np.argmax(mnist.test.labels,axis=1)
In [18]:
# Generate samples the posterior and store them.
n_samples = 10
prob_lst = []
samples = []
w_0_samples = []
b_0_samples = []
w_1_samples = []
b_1_samples = []
for _ in tqdm(range(n_samples)):
w0_samp = qW_0.sample()
b0_samp = qb_0.sample()
w1_samp = qW_1.sample()
b1_samp = qb_1.sample()
w_0_samples.append(w0_samp)
b_0_samples.append(b0_samp)
w_1_samples.append(w1_samp)
b_1_samples.append(b1_samp)
# Also compue the probabiliy of each class for each (w,b) sample.
prob = tf.nn.softmax(neural_network(X_test, w0_samp, w1_samp, b0_samp, b1_samp))
#prob = tf.nn.softmax(tf.matmul( X_test,w_samp ) + b_samp)
prob_lst.append(prob.eval())
sample = tf.concat([tf.reshape(w0_samp,[-1]),b0_samp],0)
samples.append(sample.eval())
In [28]:
prob
Out[28]:
In [29]:
# Here we compute the mean of probabilties for each class for all the (w,b) samples.
# We then use the class with maximum of the mean proabilities as the prediction.
# In other words, we have used (w,b) samples to construct a set of models and
# used their combined outputs to make the predcitions.
Y_pred = np.argmax(np.mean(prob_lst,axis=0),axis=1)
print("accuracy in predicting the test data = ", (Y_pred == Y_test).mean()*100)
In [30]:
# Create a Pandas DataFrame of posterior samples.
samples_df = pd.DataFrame(data = samples, index=range(n_samples))
# Now create a small subset by taking the first 5 weights, labelled as W_0, ... , W_4.
samples_5 = pd.DataFrame(data = samples_df[list(range(5))].values,columns=["W_0", "W_1", "W_2", "W_3", "W_4"])
# We use Seaborn PairGrid to make a triale plot to show auto and cross correlations.
g = sns.PairGrid(samples_5, diag_sharey=False)
g.map_lower(sns.kdeplot, n_levels = 4,cmap="Blues_d")
g.map_upper(plt.scatter)
g.map_diag(sns.kdeplot,legend=False)
plt.subplots_adjust(top=0.95)
g.fig.suptitle('Joint posterior distribution of the first 5 weights')
Out[30]:
In [31]:
# Load the first image from the test data and its label.
test_image = X_test[1:2]
test_label = Y_test[1]
print('truth = ',test_label)
pixels = test_image.reshape((28, 28))
plt.imshow(pixels,cmap='Blues')
Out[31]:
In [ ]:
In [ ]:
# Now the check what the model perdicts for each (w,b) sample from the posterior. This may take a few seconds...
sing_img_probs = []
n_samples = 100
for _ in tqdm(range(n_samples)):
w0_samp = qW_0.sample()
b0_samp = qb_0.sample()
w1_samp = qW_1.sample()
b1_samp = qb_1.sample()
# Also compue the probabiliy of each class for each (w,b) sample.
prob = tf.nn.softmax(neural_network(X_test[1:2], w0_samp, w1_samp, b0_samp, b1_samp))
sing_img_probs.append(prob.eval())
In [ ]:
# Create a histogram of these predictions.
plt.hist(np.argmax(sing_img_probs,axis=2),bins=range(10))
plt.xticks(np.arange(0,10))
plt.xlim(0,10)
plt.xlabel("Accuracy of the prediction of the test digit")
plt.ylabel("Frequency")
In [ ]:
## how does the model handle unfamiliar data ?
In [ ]:
In [ ]:
prob = tf.nn.softmax(neural_network(X_test, w0_samp, w1_samp, b0_samp, b1_samp))
In [ ]:
a = prob.eval()
In [ ]:
np.argmax(a[1])
In [27]:
qW_0.eval().shape
Out[27]:
In [ ]: