In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from __future__ import unicode_literals  

import os
import numpy as np
import pandas as pd
from sklearn import linear_model, preprocessing, cluster
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.linalg as slin
import scipy.sparse.linalg as sparselin
import scipy.sparse as sparse
import IPython
import copy

import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets import base

from influence.inceptionModel import BinaryInceptionModel
from influence.binaryLogisticRegressionWithLBFGS import BinaryLogisticRegressionWithLBFGS
import influence.experiments as experiments
from influence.image_utils import plot_flat_bwimage, plot_flat_bwgrad, plot_flat_colorimage, plot_flat_colorgrad
from influence.dataset import DataSet
from influence.dataset_poisoning import generate_inception_features

from load_animals import load_animals, load_dogfish_with_koda

sns.set(color_codes=True)


Using TensorFlow backend.

Attacking individual test images


In [5]:
num_classes = 2
num_train_ex_per_class = 900
num_test_ex_per_class = 300

dataset_name = 'dogfish_%s_%s' % (num_train_ex_per_class, num_test_ex_per_class)
image_data_sets = load_animals(
    num_train_ex_per_class=num_train_ex_per_class, 
    num_test_ex_per_class=num_test_ex_per_class,
    classes=['dog', 'fish'])

train_f = np.load('output/%s_inception_features_new_train.npz' % dataset_name)
train = DataSet(train_f['inception_features_val'], train_f['labels'])
test_f = np.load('output/%s_inception_features_new_test.npz' % dataset_name)
test = DataSet(test_f['inception_features_val'], test_f['labels'])
validation = None

data_sets = base.Datasets(train=train, validation=validation, test=test)

Y_train = image_data_sets.train.labels
Y_test = image_data_sets.test.labels


Loading animals from disk...

In [6]:
input_dim = 2048
weight_decay = 0.001
batch_size = 30
initial_learning_rate = 0.001 
keep_probs = None
decay_epochs = [1000, 10000]
max_lbfgs_iter = 1000
num_classes = 2

tf.reset_default_graph()

model = BinaryLogisticRegressionWithLBFGS(
    input_dim=input_dim,
    weight_decay=weight_decay,
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes, 
    batch_size=batch_size,
    data_sets=data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output_ipynb',
    log_dir='log',
    model_name='%s_inception_onlytop' % dataset_name)

model.train()
weights = model.sess.run(model.weights)

orig_Y_train_pred = model.sess.run(model.preds, feed_dict=model.all_train_feed_dict)
orig_Y_pred = model.sess.run(model.preds, feed_dict=model.all_test_feed_dict)


Total number of parameters: 2048
Using normal model
/u/nlp/packages/anaconda2/envs/pw/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py:93: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
LBFGS training took [41] iter.
After training with LBFGS: 
Train loss (w reg) on all data: 0.012129
Train loss (w/o reg) on all data: 0.00397613
Test loss (w/o reg) on all data: 0.048454
Train acc on all data:  1.0
Test acc on all data:   0.985
Norm of the mean of gradients: 3.74273e-07
Norm of the params: 4.03805

In [13]:
num_train_attacks_needed = np.empty(len(Y_test))
num_train_attacks_needed[:] = -1
mask_orig_correct = np.zeros(len(Y_test), dtype=bool)

step_size = 0.02
weight_decay = 0.001
max_deviation = 0.5

model_name = '%s_inception_wd-%s' % (dataset_name, weight_decay)

for test_idx in range(len(Y_test)):
    if orig_Y_pred[test_idx, int(Y_test[test_idx])] >= 0.5:
        mask_orig_correct[test_idx] = True
    else:
        mask_orig_correct[test_idx] = False
        
    filenames = [filename for filename in os.listdir('./output') if (
        (('%s_attack_normal_loss_testidx-%s_trainidx-' % (model_name, test_idx)) in filename) and        
        (filename.endswith('stepsize-%s_proj_final.npz' % step_size)))]
    
    assert len(filenames) <= 1
    
    if len(filenames) == 1:
        attack_f = np.load(os.path.join('output', filenames[0]))
        indices_to_poison = attack_f['indices_to_poison']
        num_train_attacks_needed[test_idx] = len(indices_to_poison)
        poisoned_X_train_image = attack_f['poisoned_X_train_image']
        for counter, idx_to_poison in enumerate(indices_to_poison):
            image_diff = np.max(np.abs(image_data_sets.train.x[idx_to_poison, :] - poisoned_X_train_image[counter, :]) * 255 / 2)        
            assert image_diff < max_deviation + 1e-5
        assert np.all(poisoned_X_train_image >= -1)
        assert np.all(poisoned_X_train_image <= 1)

In [10]:
print('Number of test predictions flipped as the number of training images attacked increases:')
pd.Series(num_train_attacks_needed[mask_orig_correct]).value_counts()


Number of test predictions flipped as the number of training images attacked increases:
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-a40e6f438b99> in <module>()
      1 print('Number of test predictions flipped as the number of training images attacked increases:')
----> 2 pd.Series(num_train_attacks_needed[mask_orig_correct]).value_counts()

NameError: name 'num_train_attacks_needed' is not defined