In [1]:
import os
import shutil
import numpy as np
import pandas
from matplotlib import pyplot as plt
from scipy.misc import imresize,imsave
from multiprocessing import Pool
from functools import partial
%matplotlib inline
In [2]:
def resize_all(id,namelist,path2):
im = plt.imread(namelist[id])
resizeim = imresize(im,(299,299))
if len(resizeim.shape) == 2:
resizeim = np.array([resizeim]*3).transpose([1,2,0])
imsave(os.path.join(path2,os.path.basename(namelist[id]).split('.')[0]+'.png'),resizeim)
In [3]:
imagenet_path = '/work/imagenet/train/'
path2 = './Originset/'
n_per_class = 4 # train
n_per_class_test = [10,40] # test
n_train = int(n_per_class*0.75 )
subdirs = os.listdir(imagenet_path)
subdirs = np.sort(subdirs)
label_mapping={}
example = pandas.read_csv('./sample_dev_dataset.csv')
for id,name in enumerate(subdirs):
label_mapping[name] = id+1
In [4]:
class1 = np.load('utils/dataset2_trainclass.npy')
class2 = np.load('utils/dataset2_valclass.npy')
class1 = [label_mapping[name] for name in class1]
class2 = [label_mapping[name] for name in class2]
In [5]:
n_repeat = n_per_class
info_list = np.zeros([n_repeat*1000,12]).astype('str')
trainset_d1 = np.array([])
valset_d1 = np.array([])
trainset_d2 = np.array([])
valset_d2 = np.array([])
namelist = np.array([])
i_cum =0
for i_dir,dir in enumerate(subdirs):
fullpath = os.path.join(imagenet_path,dir)
filelist = os.listdir(fullpath)
randid = np.random.permutation(len(filelist))[:n_repeat]
chosen_im = np.array(filelist)[randid]
rename_im = np.array([n.split('.')[0]+'png' for n in chosen_im])
trainset_d1 = np.concatenate([trainset_d1,rename_im[:n_train]])
valset_d1 = np.concatenate([valset_d1,rename_im[n_train:]])
fullimpath = [os.path.join(fullpath,f) for f in chosen_im]
namelist = np.concatenate([namelist,fullimpath])
labels = label_mapping[dir]
if labels in class1:
trainset_d2 = np.concatenate([trainset_d2,rename_im])
valset_d2 = np.concatenate([valset_d2,rename_im])
for i in range(n_repeat):
target_class = labels
while target_class==labels:
target_class = np.random.randint(1000)
# info_list[i].append([chosen_im[i].split('.')[0],0,0,0,1,1,labels,target_class,0,0,0,0])
info_list[i_cum] = np.array([chosen_im[i].split('.')[0],0,0,0,1,1,labels,target_class,0,0,0,0])
i_cum += 1
newpd = pandas.DataFrame(info_list)
newpd.columns = example.columns
newpd.to_csv('dev_dataset.csv')
In [6]:
pool = Pool()
resize_partial = partial(resize_all,namelist=namelist,path2=path2)
_ = pool.map(resize_partial,range(len(namelist)))
In [7]:
np.save('./utils/dataset1_train_split.npy',trainset_d1)
np.save('./utils/dataset1_val_split.npy',valset_d1)
np.save('./utils/dataset2_train_split.npy',trainset_d2)
np.save('./utils/dataset2_val_split.npy',valset_d2)
In [8]:
imagenet_path = '/work/imagenet/val/'
path2 = './Originset_test/'
In [9]:
with open('/work/imagenet/meta/val.txt') as f:
tmp = f.readlines()
label_val = {}
for line in tmp:
label_val[line.split(' ')[0]] = int(line.split(' ')[1].split('\n')[0])+1
In [10]:
example = pandas.read_csv('/work/adv/toolkit/dataset/dev_dataset.csv')
keys = np.array(label_val.keys())
values = np.array(label_val.values())
In [11]:
i_cum =0
namelist = []
# info_list = np.zeros([n_repeat*1000,12]).astype('str')
info_list = []
for i_class in range(1,1001):
if i_class in class1:
n_repeat = n_per_class_test[0]
else:
n_repeat = n_per_class_test[1]
filelist = keys[values == i_class]
randid = np.random.permutation(len(filelist))[:n_repeat]
chosen_im = np.array(filelist)[randid]
fullimpath = [os.path.join(imagenet_path,f) for f in chosen_im]
labels = i_class
for i in range(n_repeat):
target_class = labels
while target_class==labels:
target_class = np.random.randint(1000)
info_list.append([chosen_im[i].split('.')[0],0,0,0,1,1,labels,target_class,0,0,0,0])
# info_list[i_cum] = np.array([chosen_im[i].split('.')[0],0,0,0,1,1,labels,target_class,0,0,0,0])
namelist.append(fullimpath[i])
i_cum += 1
newpd = pandas.DataFrame(info_list)
newpd.columns = example.columns
newpd.to_csv('dev_dataset_test.csv')
In [12]:
label1 = pandas.read_csv('dev_dataset.csv')
label1 = np.array([label1['ImageId'],label1['TrueLabel']])
label2 = pandas.read_csv('dev_dataset_test.csv')
label2 = np.array([label2['ImageId'],label2['TrueLabel']])
tmp = np.concatenate([label1,label2],1).T
labels = {}
for key,value in tmp:
labels[key] = value
np.save('utils/labels.npy',labels)
In [13]:
names = label2[0]
values = label2[1]
allnames = []
for i in range(1,1001):
class_names = names[values==i][:10]
allnames.append(class_names)
allnames = np.concatenate(allnames)
np.save('utils/dataset1_test_split.npy',allnames)
allnames = []
class2 = np.load('utils/dataset2_valclass.npy')
class2 = [label_mapping[name] for name in class2]
for i in class2:
class_names = names[values==i]
allnames.append(class_names)
allnames = np.concatenate(allnames)
np.save('utils/dataset2_test_split.npy',allnames)
In [14]:
resize_partial = partial(resize_all,namelist=namelist,path2=path2)
_ = pool.map(resize_partial,range(len(namelist)))