In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/tensorflow/mnist/input_data', one_hot=True)
In [2]:
import seaborn as sns
sns.set_style('white')
colors_list = sns.color_palette("Paired", 10)
In [3]:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
y_pred = tf.nn.softmax(y_conv)
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
In [4]:
model_path = './MNIST.ckpt'
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
tf.train.Saver().restore(sess, model_path)
In [5]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [6]:
index_mask = np.where(mnist.test.labels[:, 2])[0]
In [7]:
subset_mask = np.random.choice(index_mask, 10)
In [8]:
subset_mask
Out[8]:
In [9]:
origin_images = mnist.test.images[subset_mask]
origin_labels = mnist.test.labels[subset_mask]
In [10]:
origin_labels
Out[10]:
In [11]:
prediction=tf.argmax(y_pred,1)
prediction_val = prediction.eval(feed_dict={x: origin_images, keep_prob: 1.0}, session=sess)
print("predictions", prediction_val)
probabilities=y_pred
probabilities_val = probabilities.eval(feed_dict={x: origin_images, keep_prob: 1.0}, session=sess)
print ("probabilities", probabilities_val)
In [12]:
for i in range(0, 10):
print('correct label:', np.argmax(origin_labels[i]))
print('predict label:', prediction_val[i])
print('Confidence:', np.max(probabilities_val[i]))
plt.figure(figsize=(2, 2))
plt.axis('off')
plt.imshow(origin_images[i].reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
plt.show()
In [13]:
target_number = 6
In [14]:
target_labels = np.zeros(origin_labels.shape)
In [15]:
target_labels[:, target_number] = 1
In [16]:
origin_labels
Out[16]:
In [17]:
target_labels
Out[17]:
In [18]:
img_gradient = tf.gradients(cross_entropy, x)[0]
In [19]:
eta = 0.5
iter_num = 10
In [20]:
adversarial_img = origin_images.copy()
for i in range(0, iter_num):
gradient = img_gradient.eval({x: adversarial_img, y_: target_labels, keep_prob: 1.0})
adversarial_img = adversarial_img - eta * gradient
prediction=tf.argmax(y_pred,1)
prediction_val = prediction.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
print("predictions", prediction_val)
probabilities=y_pred
probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
print('Confidence 2:', probabilities_val[:, 2])
print('Confidence 6:', probabilities_val[:, 6])
print('-----------------------------------')
In [21]:
eta = 0.02
iter_num = 10
In [22]:
adversarial_img = origin_images.copy()
for i in range(0, iter_num):
gradient = img_gradient.eval({x: adversarial_img, y_: target_labels, keep_prob: 1.0})
adversarial_img = adversarial_img - eta * np.sign(gradient)
prediction=tf.argmax(y_pred,1)
prediction_val = prediction.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
print("predictions", prediction_val)
probabilities=y_pred
probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
print('Confidence 2:', probabilities_val[:, 2])
print('Confidence 6:', probabilities_val[:, 6])
print('-----------------------------------')
In [23]:
threshold = 0.99
In [24]:
eta = 0.001
prediction=tf.argmax(y_pred,1)
probabilities=y_pred
adversarial_img = origin_images[1: 2].copy()
adversarial_label = target_labels[1: 2]
start_img = adversarial_img.copy()
confidence = 0
iter_num = 0
prob_history = list()
while confidence < threshold:
gradient = img_gradient.eval({x: adversarial_img, y_: adversarial_label, keep_prob: 1.0})
adversarial_img -= eta * np.sign(gradient)
probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
confidence = probabilities_val[:, 6]
prob_history.append(probabilities_val[0])
iter_num += 1
print(iter_num)
In [25]:
sns.set_style('whitegrid')
prob_history = np.array(prob_history)
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
for i, record in enumerate(prob_history.T):
plt.plot(record, color=colors_list[i])
ax.legend([str(x) for x in range(0, 10)],
loc='center left', bbox_to_anchor=(1.05, 0.5), fontsize=14)
ax.set_xlabel('Iteration')
ax.set_ylabel('Prediction Confidence')
Out[25]:
In [26]:
sns.set_style('white')
fig = plt.figure(figsize=(9, 4))
ax1 = fig.add_subplot(1,3,1)
ax1.axis('off')
ax1.imshow(start_img.reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax1.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[0][2])
+ '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[0][6]))
ax2 = fig.add_subplot(1,3,2)
ax2.axis('off')
ax2.imshow((adversarial_img - start_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax2.title.set_text('Delta')
ax3 = fig.add_subplot(1,3,3)
ax3.axis('off')
ax3.imshow((adversarial_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax3.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[-1][2])
+ '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[-1][6]))
plt.show()
print("Difference Measure:", np.sum((adversarial_img - start_img) ** 2))
In [27]:
eta = 0.01
prediction=tf.argmax(y_pred,1)
probabilities=y_pred
adversarial_img = origin_images[1: 2].copy()
adversarial_label = target_labels[1: 2]
start_img = adversarial_img.copy()
confidence = 0
iter_num = 0
prob_history = list()
while confidence < threshold:
gradient = img_gradient.eval({x: adversarial_img, y_: adversarial_label, keep_prob: 1.0})
adversarial_img -= eta * gradient
probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
confidence = probabilities_val[:, 6]
prob_history.append(probabilities_val[0])
iter_num += 1
print(iter_num)
In [28]:
sns.set_style('white')
fig = plt.figure(figsize=(9, 4))
ax1 = fig.add_subplot(1,3,1)
ax1.axis('off')
ax1.imshow(start_img.reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax1.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[0][2])
+ '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[0][6]))
ax2 = fig.add_subplot(1,3,2)
ax2.axis('off')
ax2.imshow((adversarial_img - start_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax2.title.set_text('Delta')
ax3 = fig.add_subplot(1,3,3)
ax3.axis('off')
ax3.imshow((adversarial_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax3.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[-1][2])
+ '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[-1][6]))
plt.show()
print("Difference Measure:", np.sum((adversarial_img - start_img) ** 2))
In [29]:
sns.set_style('whitegrid')
prob_history = np.array(prob_history)
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
for i, record in enumerate(prob_history.T):
plt.plot(record, color=colors_list[i])
ax.legend([str(x) for x in range(0, 10)],
loc='center left', bbox_to_anchor=(1.05, 0.5), fontsize=14)
ax.set_xlabel('Iteration')
ax.set_ylabel('Prediction Confidence')
Out[29]:
We can observe that when taking the value of gradients into account, when it comes close to local optima, the gradient is becoming small and helps us to converge
In [ ]: