In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from __future__ import unicode_literals
import os
import numpy as np
import pandas as pd
from sklearn import linear_model, preprocessing, cluster
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.linalg as slin
import scipy.sparse.linalg as sparselin
import scipy.sparse as sparse
import IPython
import copy
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets import base
from influence.inceptionModel import BinaryInceptionModel
from influence.binaryLogisticRegressionWithLBFGS import BinaryLogisticRegressionWithLBFGS
import influence.experiments as experiments
from influence.image_utils import plot_flat_bwimage, plot_flat_bwgrad, plot_flat_colorimage, plot_flat_colorgrad
from influence.dataset import DataSet
from influence.dataset_poisoning import generate_inception_features
from load_animals import load_animals, load_dogfish_with_koda
sns.set(color_codes=True)
In [5]:
num_classes = 2
num_train_ex_per_class = 900
num_test_ex_per_class = 300
dataset_name = 'dogfish_%s_%s' % (num_train_ex_per_class, num_test_ex_per_class)
image_data_sets = load_animals(
num_train_ex_per_class=num_train_ex_per_class,
num_test_ex_per_class=num_test_ex_per_class,
classes=['dog', 'fish'])
train_f = np.load('output/%s_inception_features_new_train.npz' % dataset_name)
train = DataSet(train_f['inception_features_val'], train_f['labels'])
test_f = np.load('output/%s_inception_features_new_test.npz' % dataset_name)
test = DataSet(test_f['inception_features_val'], test_f['labels'])
validation = None
data_sets = base.Datasets(train=train, validation=validation, test=test)
Y_train = image_data_sets.train.labels
Y_test = image_data_sets.test.labels
In [6]:
input_dim = 2048
weight_decay = 0.001
batch_size = 30
initial_learning_rate = 0.001
keep_probs = None
decay_epochs = [1000, 10000]
max_lbfgs_iter = 1000
num_classes = 2
tf.reset_default_graph()
model = BinaryLogisticRegressionWithLBFGS(
input_dim=input_dim,
weight_decay=weight_decay,
max_lbfgs_iter=max_lbfgs_iter,
num_classes=num_classes,
batch_size=batch_size,
data_sets=data_sets,
initial_learning_rate=initial_learning_rate,
keep_probs=keep_probs,
decay_epochs=decay_epochs,
mini_batch=False,
train_dir='output_ipynb',
log_dir='log',
model_name='%s_inception_onlytop' % dataset_name)
model.train()
weights = model.sess.run(model.weights)
orig_Y_train_pred = model.sess.run(model.preds, feed_dict=model.all_train_feed_dict)
orig_Y_pred = model.sess.run(model.preds, feed_dict=model.all_test_feed_dict)
In [13]:
num_train_attacks_needed = np.empty(len(Y_test))
num_train_attacks_needed[:] = -1
mask_orig_correct = np.zeros(len(Y_test), dtype=bool)
step_size = 0.02
weight_decay = 0.001
max_deviation = 0.5
model_name = '%s_inception_wd-%s' % (dataset_name, weight_decay)
for test_idx in range(len(Y_test)):
if orig_Y_pred[test_idx, int(Y_test[test_idx])] >= 0.5:
mask_orig_correct[test_idx] = True
else:
mask_orig_correct[test_idx] = False
filenames = [filename for filename in os.listdir('./output') if (
(('%s_attack_normal_loss_testidx-%s_trainidx-' % (model_name, test_idx)) in filename) and
(filename.endswith('stepsize-%s_proj_final.npz' % step_size)))]
assert len(filenames) <= 1
if len(filenames) == 1:
attack_f = np.load(os.path.join('output', filenames[0]))
indices_to_poison = attack_f['indices_to_poison']
num_train_attacks_needed[test_idx] = len(indices_to_poison)
poisoned_X_train_image = attack_f['poisoned_X_train_image']
for counter, idx_to_poison in enumerate(indices_to_poison):
image_diff = np.max(np.abs(image_data_sets.train.x[idx_to_poison, :] - poisoned_X_train_image[counter, :]) * 255 / 2)
assert image_diff < max_deviation + 1e-5
assert np.all(poisoned_X_train_image >= -1)
assert np.all(poisoned_X_train_image <= 1)
In [10]:
print('Number of test predictions flipped as the number of training images attacked increases:')
pd.Series(num_train_attacks_needed[mask_orig_correct]).value_counts()