This is a sketch for Adversarial images in MNIST


In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/tensorflow/mnist/input_data', one_hot=True)


Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz

In [2]:
import seaborn as sns
sns.set_style('white')
colors_list = sns.color_palette("Paired", 10)

recreate the network structure


In [3]:
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

x_image = tf.reshape(x, [-1,28,28,1])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
y_pred = tf.nn.softmax(y_conv)

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))

Load previous model


In [4]:
model_path = './MNIST.ckpt'
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
tf.train.Saver().restore(sess, model_path)

In [5]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

Extract some "2" images from test set


In [6]:
index_mask = np.where(mnist.test.labels[:, 2])[0]

In [7]:
subset_mask = np.random.choice(index_mask, 10)

In [8]:
subset_mask


Out[8]:
array([9010,   43, 1341, 3811, 6480, 7789, 8262, 1341, 8915, 6064])

In [9]:
origin_images = mnist.test.images[subset_mask]
origin_labels = mnist.test.labels[subset_mask]

In [10]:
origin_labels


Out[10]:
array([[ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

In [11]:
prediction=tf.argmax(y_pred,1)
prediction_val = prediction.eval(feed_dict={x: origin_images, keep_prob: 1.0}, session=sess)
print("predictions", prediction_val)
probabilities=y_pred
probabilities_val = probabilities.eval(feed_dict={x: origin_images, keep_prob: 1.0}, session=sess)
print ("probabilities", probabilities_val)


predictions [2 2 2 2 2 2 2 2 2 2]
probabilities [[  1.68790103e-11   1.04460350e-10   9.99990940e-01   4.87415809e-06
    6.91035228e-14   4.10699903e-13   1.38419029e-12   7.07499055e-08
    4.17573347e-06   2.63317731e-12]
 [  1.50550719e-07   7.52393389e-03   9.87179160e-01   3.15451246e-07
    5.28161833e-03   3.42207557e-07   1.41383189e-05   2.95582026e-07
    7.38369650e-08   3.04421571e-10]
 [  4.02434095e-14   5.18871945e-10   1.00000000e+00   7.36044212e-12
    3.96116959e-11   9.11448532e-16   2.68945306e-15   7.65700697e-12
    7.59310573e-15   7.34182578e-15]
 [  1.25397404e-03   7.70974736e-07   9.96126950e-01   2.49213097e-03
    6.36435834e-06   2.00931572e-05   1.77277016e-07   7.27009028e-05
    1.58615767e-05   1.11465779e-05]
 [  4.99005437e-05   1.13155475e-05   9.99935031e-01   2.62967137e-09
    1.03853085e-06   4.65414568e-10   3.99612041e-08   2.78365087e-06
    3.01886480e-08   3.22925263e-11]
 [  5.71090376e-13   3.97125555e-10   1.00000000e+00   1.17869670e-08
    1.28539714e-12   3.67561375e-14   2.48410064e-14   7.60623298e-09
    2.59703058e-11   3.01168183e-12]
 [  6.95616942e-09   2.31470767e-05   9.99972463e-01   2.54789541e-07
    1.43252720e-07   1.45724433e-09   1.19073404e-10   4.00210592e-06
    7.80533860e-09   9.53596993e-11]
 [  4.02434095e-14   5.18871945e-10   1.00000000e+00   7.36044212e-12
    3.96116959e-11   9.11448532e-16   2.68945306e-15   7.65700697e-12
    7.59310573e-15   7.34182578e-15]
 [  6.33278355e-14   8.21794410e-10   1.00000000e+00   3.18889290e-11
    5.03287386e-12   1.52704235e-14   2.85616291e-14   3.60496827e-10
    1.04232909e-12   1.76853720e-14]
 [  1.90051690e-11   2.66234167e-07   9.99999762e-01   2.22919159e-08
    6.02706843e-11   1.17165975e-13   6.59776234e-10   2.66559327e-11
    7.27193950e-09   2.79776321e-14]]

In [12]:
for i in range(0, 10):
    print('correct label:', np.argmax(origin_labels[i]))
    print('predict label:', prediction_val[i])
    print('Confidence:', np.max(probabilities_val[i]))
    plt.figure(figsize=(2, 2))
    plt.axis('off')
    plt.imshow(origin_images[i].reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
    plt.show()


correct label: 2
predict label: 2
Confidence: 0.999991
correct label: 2
predict label: 2
Confidence: 0.987179
correct label: 2
predict label: 2
Confidence: 1.0
correct label: 2
predict label: 2
Confidence: 0.996127
correct label: 2
predict label: 2
Confidence: 0.999935
correct label: 2
predict label: 2
Confidence: 1.0
correct label: 2
predict label: 2
Confidence: 0.999972
correct label: 2
predict label: 2
Confidence: 1.0
correct label: 2
predict label: 2
Confidence: 1.0
correct label: 2
predict label: 2
Confidence: 1.0

In [13]:
target_number = 6

In [14]:
target_labels = np.zeros(origin_labels.shape)

In [15]:
target_labels[:, target_number] = 1

In [16]:
origin_labels


Out[16]:
array([[ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

In [17]:
target_labels


Out[17]:
array([[ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.]])

In [18]:
img_gradient = tf.gradients(cross_entropy, x)[0]

one Adversarial vs one image


In [19]:
eta = 0.5
iter_num = 10

Method 1: update using the info in gradient

This means we will update the image based on the value of gradient, ideally, this will give us a adversarial image with less wiggle, as we only need to add a little wiggle when the gradient at that point is large.


In [20]:
adversarial_img = origin_images.copy()
for i in range(0, iter_num):
    gradient = img_gradient.eval({x: adversarial_img, y_: target_labels, keep_prob: 1.0})
    adversarial_img = adversarial_img - eta * gradient
    prediction=tf.argmax(y_pred,1)
    prediction_val = prediction.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
    print("predictions", prediction_val)
    probabilities=y_pred
    probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
    print('Confidence 2:', probabilities_val[:, 2])
    print('Confidence 6:', probabilities_val[:, 6])
    print('-----------------------------------')


predictions [2 2 2 2 2 2 2 2 2 2]
Confidence 2: [ 0.99839801  0.50398463  0.99999976  0.94279677  0.99306434  0.99999869
  0.99774051  0.99999976  0.99999988  0.99998116]
Confidence 6: [  6.17733331e-09   3.38034965e-02   3.61205510e-11   5.49222386e-05
   1.65044228e-04   2.51908945e-11   4.98797135e-07   3.61205510e-11
   8.44649004e-11   1.06398193e-06]
-----------------------------------
predictions [2 6 2 2 6 2 2 2 2 2]
Confidence 2: [ 0.90054828  0.03599812  0.99992478  0.47941697  0.3857542   0.99992812
  0.88223279  0.99992478  0.99999475  0.99883395]
Confidence 6: [  5.24239840e-06   9.09998178e-01   3.14857857e-07   1.03679458e-02
   4.14035559e-01   2.03342374e-08   7.65050703e-04   3.14857573e-07
   9.70845377e-08   6.13783835e-04]
-----------------------------------
predictions [3 6 2 6 6 2 2 2 2 2]
Confidence 2: [ 0.20391738  0.02125967  0.99488431  0.12929185  0.01710233  0.99819332
  0.36685336  0.99488431  0.99973804  0.86787164]
Confidence 6: [  5.72559598e-04   9.47188795e-01   2.24302203e-04   3.12704206e-01
   9.43210959e-01   3.14465137e-06   7.00001568e-02   2.24301548e-04
   6.08862283e-05   1.23816974e-01]
-----------------------------------
predictions [8 6 2 6 6 2 6 2 2 6]
Confidence 2: [ 0.43293276  0.01552619  0.83097196  0.03268598  0.0135146   0.98310214
  0.17826064  0.83097178  0.97425836  0.11591232]
Confidence 6: [  1.79927237e-02   9.61492419e-01   3.42250541e-02   7.99241543e-01
   9.55691159e-01   1.36969538e-04   6.16287053e-01   3.42250690e-02
   1.96619965e-02   8.76042128e-01]
-----------------------------------
predictions [3 6 6 6 6 2 6 6 6 6]
Confidence 2: [ 0.17021255  0.01231071  0.19562197  0.01843761  0.01121253  0.88237929
  0.04999156  0.19562216  0.23194622  0.06901591]
Confidence 6: [ 0.28051642  0.9694531   0.53274441  0.88252693  0.96344072  0.00382947
  0.86769354  0.53274429  0.73012829  0.9247852 ]
-----------------------------------
predictions [6 6 6 6 6 2 6 6 6 6]
Confidence 2: [ 0.07458363  0.01019469  0.06034603  0.01337874  0.00959486  0.66686749
  0.03255163  0.06034593  0.07704844  0.05089864]
Confidence 6: [ 0.72089374  0.974684    0.84580153  0.91406661  0.96881437  0.0405265
  0.91041219  0.84580171  0.89538473  0.94383085]
-----------------------------------
predictions [6 6 6 6 6 2 6 6 6 6]
Confidence 2: [ 0.03893126  0.00872765  0.03884212  0.01059401  0.00841283  0.46066824
  0.02436219  0.0388421   0.05182601  0.04104275]
Confidence 6: [ 0.84897608  0.97832572  0.89983678  0.9321211   0.9727276   0.18205585
  0.93081117  0.8998369   0.92495954  0.95425797]
-----------------------------------
predictions [6 6 6 6 6 6 6 6 6 6]
Confidence 2: [ 0.02573399  0.00763769  0.02883839  0.0087844   0.00748532  0.29014409
  0.01946484  0.02883845  0.03953246  0.03457938]
Confidence 6: [ 0.89540702  0.98103446  0.92485535  0.9435631   0.97574246  0.44339713
  0.94352108  0.92485535  0.94018751  0.9611299 ]
-----------------------------------
predictions [6 6 6 6 6 6 6 6 6 6]
Confidence 2: [ 0.01902132  0.00679732  0.02307542  0.00752009  0.00675084  0.18342426
  0.01634321  0.0230754   0.03184611  0.02982386]
Confidence 6: [ 0.9198994   0.983105    0.93942189  0.95158327  0.97813272  0.62893689
  0.95190406  0.93942195  0.9500286   0.96620733]
-----------------------------------
predictions [6 6 6 6 6 6 6 6 6 6]
Confidence 2: [ 0.01520571  0.00613233  0.0192919   0.00655318  0.0061516   0.13245167
  0.01406015  0.01929193  0.02656174  0.02627148]
Confidence 6: [ 0.93462354  0.98475128  0.94931847  0.95763385  0.98007178  0.73152864
  0.95811945  0.94931847  0.95700026  0.97002554]
-----------------------------------

Method 2: update using the sign of gradient

perform some step size for each pixel


In [21]:
eta = 0.02
iter_num = 10

In [22]:
adversarial_img = origin_images.copy()
for i in range(0, iter_num):
    gradient = img_gradient.eval({x: adversarial_img, y_: target_labels, keep_prob: 1.0})
    adversarial_img = adversarial_img - eta * np.sign(gradient)
    prediction=tf.argmax(y_pred,1)
    prediction_val = prediction.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
    print("predictions", prediction_val)
    probabilities=y_pred
    probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
    print('Confidence 2:', probabilities_val[:, 2])
    print('Confidence 6:', probabilities_val[:, 6])
    print('-----------------------------------')


predictions [2 2 2 2 2 2 2 2 2 2]
Confidence 2: [ 0.99979955  0.86275303  1.          0.9779107   0.99902475  0.99999976
  0.99971646  1.          1.          0.99999583]
Confidence 6: [  1.66726910e-10   1.24624989e-03   4.56519967e-13   8.34497041e-06
   5.59669525e-06   1.79199841e-12   1.30735716e-08   4.56519967e-13
   3.46567068e-12   6.27776799e-08]
-----------------------------------
predictions [2 2 2 2 2 2 2 2 2 2]
Confidence 2: [ 0.99511552  0.40977556  0.99999964  0.85962117  0.98393112  0.99999559
  0.99609464  0.99999964  0.99999964  0.99994993]
Confidence 6: [  2.01981152e-08   8.79419371e-02   1.22339749e-10   4.89167869e-04
   1.19251851e-03   1.82640972e-10   1.73009698e-06   1.22339749e-10
   5.76917680e-10   6.33407490e-06]
-----------------------------------
predictions [2 6 2 2 2 2 2 2 2 2]
Confidence 2: [ 0.92691237  0.0824458   0.99998283  0.54052806  0.69164306  0.99994981
  0.94957453  0.99998283  0.99999595  0.99876642]
Confidence 6: [  1.97517147e-06   7.88923085e-01   2.59027715e-08   1.52549399e-02
   1.51991054e-01   1.05832694e-08   1.59343646e-04   2.59027715e-08
   7.01664717e-08   5.28034056e-04]
-----------------------------------
predictions [3 6 2 6 6 2 2 2 2 2]
Confidence 2: [ 0.38114282  0.00284192  0.99941409  0.21674696  0.04668415  0.99948311
  0.68562496  0.99941409  0.99993396  0.96271199]
Confidence 6: [  8.61597146e-05   9.92703676e-01   5.69670192e-06   2.89392889e-01
   8.71554732e-01   4.64192766e-07   6.55736076e-03   5.69670192e-06
   6.37889843e-06   3.00177168e-02]
-----------------------------------
predictions [2 6 2 6 6 2 2 2 2 6]
Confidence 2: [  5.83209932e-01   6.27083209e-05   9.90212023e-01   2.70510484e-02
   2.11280608e-03   9.95150447e-01   3.76711369e-01   9.90212023e-01
   9.98733342e-01   4.64150667e-01]
Confidence 6: [  2.44543725e-03   9.99762475e-01   3.85647581e-04   8.70872498e-01
   9.93551373e-01   1.34517468e-05   1.35343209e-01   3.85647581e-04
   4.81195719e-04   5.04597306e-01]
-----------------------------------
predictions [3 6 2 6 6 2 6 2 2 6]
Confidence 2: [  1.45977870e-01   2.26086172e-06   8.54788423e-01   2.14479375e-03
   8.69234063e-05   9.71471608e-01   1.03391998e-01   8.54788423e-01
   9.68404591e-01   4.15184237e-02]
Confidence 6: [  3.94732542e-02   9.99990463e-01   1.52496705e-02   9.87855494e-01
   9.99670744e-01   2.56853382e-04   7.45402575e-01   1.52496705e-02
   2.36869231e-02   9.47378218e-01]
-----------------------------------
predictions [6 6 2 6 6 2 6 2 2 6]
Confidence 2: [  2.31417045e-01   1.05129189e-07   3.71916145e-01   1.65524441e-04
   4.47992488e-06   8.64461243e-01   6.83465134e-03   3.71916145e-01
   5.43019056e-01   2.49437825e-03]
Confidence 6: [ 0.3545565   0.9999994   0.22301799  0.99881208  0.99998033  0.00355855
  0.98034912  0.22301799  0.42559034  0.99609852]
-----------------------------------
predictions [6 6 6 6 6 2 6 6 6 6]
Confidence 2: [  1.95937138e-02   6.35231245e-09   7.78834969e-02   2.18999739e-05
   2.25597717e-07   5.93729377e-01   3.56450648e-04   7.78834969e-02
   3.73114012e-02   1.58468843e-04]
Confidence 6: [ 0.85764623  1.          0.81097031  0.99987864  0.99999869  0.03135163
  0.99828064  0.81097031  0.94914585  0.9996804 ]
-----------------------------------
predictions [6 6 6 6 6 2 6 6 6 6]
Confidence 2: [  2.98802019e-03   4.13267927e-08   6.95284083e-03   2.13227167e-06
   1.25024888e-08   4.91525024e-01   7.30973698e-05   6.95284083e-03
   1.61215290e-03   1.72482469e-05]
Confidence 6: [ 0.98444527  1.          0.98080987  0.99998796  1.          0.19622776
  0.99981946  0.98080987  0.99635267  0.99996758]
-----------------------------------
predictions [6 6 6 6 6 6 6 6 6 6]
Confidence 2: [  2.74159829e-04   2.28510810e-09   5.19630907e-04   2.98820567e-07
   8.52226556e-09   2.46330112e-01   4.67527661e-06   5.19630907e-04
   1.09362918e-04   1.23258530e-06]
Confidence 6: [ 0.99770629  1.          0.99812537  0.99999869  1.          0.58065033
  0.99997211  0.99812537  0.99967241  0.99999702]
-----------------------------------

Take a look at individual image


In [23]:
threshold = 0.99

In [24]:
eta = 0.001

prediction=tf.argmax(y_pred,1)
probabilities=y_pred

adversarial_img = origin_images[1: 2].copy()
adversarial_label = target_labels[1: 2]
start_img = adversarial_img.copy()
confidence = 0
iter_num = 0
prob_history = list()
while confidence < threshold:
    gradient = img_gradient.eval({x: adversarial_img, y_: adversarial_label, keep_prob: 1.0})
    adversarial_img -= eta * np.sign(gradient)
    probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
    confidence = probabilities_val[:, 6]
    prob_history.append(probabilities_val[0])
    iter_num += 1
print(iter_num)


69

In [25]:
sns.set_style('whitegrid')
prob_history = np.array(prob_history)

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)

for i, record in enumerate(prob_history.T):
    plt.plot(record, color=colors_list[i])
    
ax.legend([str(x) for x in range(0, 10)], 
            loc='center left', bbox_to_anchor=(1.05, 0.5), fontsize=14)
ax.set_xlabel('Iteration')
ax.set_ylabel('Prediction Confidence')


Out[25]:
<matplotlib.text.Text at 0x7f41047d1630>

In [26]:
sns.set_style('white')
fig = plt.figure(figsize=(9, 4))

ax1 = fig.add_subplot(1,3,1)
ax1.axis('off')
ax1.imshow(start_img.reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax1.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[0][2]) 
                   + '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[0][6]))

ax2 = fig.add_subplot(1,3,2)
ax2.axis('off')
ax2.imshow((adversarial_img - start_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax2.title.set_text('Delta')

ax3 = fig.add_subplot(1,3,3)
ax3.axis('off')
ax3.imshow((adversarial_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax3.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[-1][2]) 
                   + '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[-1][6]))

plt.show()

print("Difference Measure:", np.sum((adversarial_img - start_img) ** 2))


Difference Measure: 2.06413

In [27]:
eta = 0.01

prediction=tf.argmax(y_pred,1)
probabilities=y_pred

adversarial_img = origin_images[1: 2].copy()
adversarial_label = target_labels[1: 2]
start_img = adversarial_img.copy()
confidence = 0
iter_num = 0
prob_history = list()
while confidence < threshold:
    gradient = img_gradient.eval({x: adversarial_img, y_: adversarial_label, keep_prob: 1.0})
    adversarial_img -= eta * gradient
    probabilities_val = probabilities.eval(feed_dict={x: adversarial_img, keep_prob: 1.0}, session=sess)
    confidence = probabilities_val[:, 6]
    prob_history.append(probabilities_val[0])
    iter_num += 1
print(iter_num)


109

In [28]:
sns.set_style('white')
fig = plt.figure(figsize=(9, 4))

ax1 = fig.add_subplot(1,3,1)
ax1.axis('off')
ax1.imshow(start_img.reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax1.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[0][2]) 
                   + '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[0][6]))

ax2 = fig.add_subplot(1,3,2)
ax2.axis('off')
ax2.imshow((adversarial_img - start_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax2.title.set_text('Delta')

ax3 = fig.add_subplot(1,3,3)
ax3.axis('off')
ax3.imshow((adversarial_img).reshape([28, 28]), interpolation=None, cmap=plt.cm.gray)
ax3.title.set_text('Confidence for 2: ' + '{:.4f}'.format(prob_history[-1][2]) 
                   + '\nConfidence for 6: ' + '{:.4f}'.format(prob_history[-1][6]))

plt.show()

print("Difference Measure:", np.sum((adversarial_img - start_img) ** 2))


Difference Measure: 1.59228

In [29]:
sns.set_style('whitegrid')
prob_history = np.array(prob_history)

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)

for i, record in enumerate(prob_history.T):
    plt.plot(record, color=colors_list[i])
    
ax.legend([str(x) for x in range(0, 10)], 
            loc='center left', bbox_to_anchor=(1.05, 0.5), fontsize=14)
ax.set_xlabel('Iteration')
ax.set_ylabel('Prediction Confidence')


Out[29]:
<matplotlib.text.Text at 0x7f4124680208>

We can observe that when taking the value of gradients into account, when it comes close to local optima, the gradient is becoming small and helps us to converge


In [ ]: