In [4]:
# creating PIPLELINE
# __author__ == shubham0704
# dependencies
import numpy as np
from PIL import Image
from scipy import ndimage
Load datasets
Images will be loaded into train and test data set and will be converted to gray scale
In [5]:
import glob
import os
def label_getter(file_name):
labels = []
for file in os.listdir(file_name):
print(file_name+file)
inner_files_regex = file_name + file + '/*.jpeg'
labels.append(glob.glob(inner_files_regex))
return labels
In [6]:
train_folder_labels = label_getter('./TARGETS/TRAIN/')
test_folder_labels = label_getter('./TARGETS/TEST/')
print(len(train_folder_labels))
print(len(test_folder_labels))
A sample file looks like -
In [7]:
img = Image.open(train_folder_labels[0][0])
print(img.size)
image_size = img.size[0]
img
Out[7]:
Create pickle of all files
Pickling these files will help to load it on demand
In [8]:
import pickle
pixel_depth = 255
# Note - dividing by pixel depth distrubutes these values around 0. So uniform variance and 0 mean
print(image_size)
# create datasets
def load_tank_type(image_labels):
images = image_labels
print(len(images))
dataset = np.ndarray(shape=(len(images), image_size, image_size),
dtype=np.float32)
for index,image in enumerate(images):
image_path = image
image_data = (ndimage.imread(image_path).astype(float) -
pixel_depth / 2) / pixel_depth
#image_data = ndimage.imread(image_path)
dataset[index,:,:] = image_data
print('Full dataset tensor:', dataset.shape)
print('Mean:', np.mean(dataset))
print('Standard deviation:', np.std(dataset))
return dataset
def pickle_files(folder_labels, folder_type, force=False):
tank_names = ['BMP2','T72','BTR70']
dataset_names = []
for index,name in enumerate(tank_names):
set_filename = folder_type + '_' + name + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = load_tank_type(folder_labels[index])
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
train_dataset = pickle_files(train_folder_labels,'TRAIN')
test_dataset = pickle_files(test_folder_labels,'TEST')
Spot check to see if the file still looks good
In [9]:
import matplotlib.pyplot as plt
pickle_file = train_dataset[0]
with open(pickle_file, 'rb') as file:
tank_set = pickle.load(file)
sample_image = tank_set[5,:,:]
plt.figure()
plt.imshow(sample_image)
plt.show()
In [16]:
# we need to merge datasets for training in order to train well
def make_arrays(nb_rows, img_size):
if nb_rows:
dataset = np.ndarray((nb_rows, img_size, img_size), dtype=np.float32)
labels = np.ndarray(nb_rows, dtype=np.int32)
else:
dataset, labels = None, None
return dataset, labels
def merge_datasets(pickle_files, train_size, valid_size=0):
num_classes = len(pickle_files)
valid_dataset, valid_labels = make_arrays(valid_size, image_size)
train_dataset, train_labels = make_arrays(train_size, image_size)
vsize_per_class = valid_size // num_classes
tsize_per_class = train_size // num_classes
start_v, start_t = 0, 0
end_v, end_t = vsize_per_class, tsize_per_class
end_l = vsize_per_class+tsize_per_class
for label, pickle_file in enumerate(pickle_files):
try:
with open(pickle_file, 'rb') as f:
tank_set = pickle.load(f)
# let's shuffle the tank_type to have random validation and training set
np.random.shuffle(tank_set)
if valid_dataset is not None:
valid_tank = tank_set[:vsize_per_class, :, :]
valid_dataset[start_v:end_v, :, :] = valid_tank
valid_labels[start_v:end_v] = label
start_v += vsize_per_class
end_v += vsize_per_class
train_tank = tank_set[vsize_per_class:end_l, :, :]
train_dataset[start_t:end_t, :, :] = train_tank
train_labels[start_t:end_t] = label
start_t += tsize_per_class
end_t += tsize_per_class
except Exception as e:
print('Unable to process data from', pickle_file, ':', e)
raise
return valid_dataset, valid_labels, train_dataset, train_labels
In [27]:
# pass test data first and then I will pass train data
# break test into validation and test data set
import math
valid_dataset, valid_labels, test_dataset, test_labels = merge_datasets(
test_dataset,math.floor(0.7*195)*3,
math.floor(0.3*195)*3)
_, _, train_dataset, train_labels = merge_datasets(train_dataset,232*3)
In [29]:
print(len(valid_dataset))
print(len(test_dataset))
print(len(train_dataset))
print(valid_labels[:5])
As we can see labels are not shuffled so lets shuffle labels accordingly
In [30]:
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
train_dataset, train_labels = randomize(train_dataset, train_labels)
test_dataset, test_labels = randomize(test_dataset, test_labels)
valid_dataset, valid_labels = randomize(valid_dataset, valid_labels)
Finally lets save data for further reuse -
In [33]:
pickle_file = os.path.join(os.getcwd(), 'final_dataset.pickle')
try:
f = open(pickle_file, 'wb')
save = {
'train_dataset': train_dataset,
'train_labels': train_labels,
'valid_dataset': valid_dataset,
'valid_labels': valid_labels,
'test_dataset': test_dataset,
'test_labels': test_labels,
}
pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
f.close()
except Exception as e:
print('Unable to save data to', pickle_file, ':', e)
raise
In [34]:
statinfo = os.stat(pickle_file)
print('Compressed pickle size:', statinfo.st_size)
Next step is to use it in a Fuzzy Neural Network