In [1]:
import sys
sys.path.insert(0, '../')
In [2]:
import pandas as pd
import numpy as np
np.set_printoptions(precision=3, linewidth=200, suppress=True)
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split as train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sklearn.metrics as skm
from sklearn import svm
from sklearn.externals import joblib
import os, time
import matplotlib.pyplot as plt
from scipy.misc import toimage
%matplotlib inline
In [3]:
from library.hog.hog import HOG
from library.datasets.cifar10 import CIFAR10
from library.utils import file_utils
from library.plot_tools import plot
In [4]:
total_time = 0
In [5]:
exp_no = 1
file_no = 2
data_source = 'Website'
train_validate_split = 0.2
train_validate_split_data = None
num_images_required = 1.0
In [6]:
one_hot = True
make_image = True
mode='grey'
scale_method = 'StandardScaler'
In [7]:
svm_kernel = 'rbf'
svm_gamma = 0.001
svm_c = 1.0
svm_max_iter = 10000
svm_cs =1000
In [8]:
param_name = 'exp_' + str(exp_no).zfill(3) + '_kernel_' + svm_kernel
output_directory = '../logs/cifar10/' + str(file_no).zfill(3) + '_svm_hog/' + 'exp_no_' + str(exp_no).zfill(3) + '/'
In [9]:
block_size = (8,8)
cell_size = (2,2)
nbins = 9
In [10]:
start = time.time()
cifar10 = CIFAR10(one_hot_encode=one_hot, num_images=num_images_required, make_image=make_image, image_mode=mode,
train_validate_split=train_validate_split_data, endian='little')
cifar10.load_data(train=True, test=True, data_directory='./datasets/cifar10/')
end = time.time()
print('[ Step 0] Dataset loaded in %5.6f ms' %((end-start)*1000))
print('Dataset size: ' + str(cifar10.train.data.shape))
num_train_images = cifar10.train.data.shape[0]
total_time += (end-start)
In [11]:
print('Train data shape:', cifar10.train.data.shape)
if one_hot is True:
print('Train labels shape:', cifar10.train.one_hot_labels.shape)
if make_image is True:
print('Train images shape:', cifar10.train.images.shape)
print('Train class labels shape:', cifar10.train.class_labels.shape)
if train_validate_split_data is not None:
print('Validate data shape:', cifar10.validate.data.shape)
if one_hot is True:
print('Validate labels shape:', cifar10.validate.one_hot_labels.shape)
if make_image is True:
print('Validate images shape:', cifar10.vaidate.images.shape)
print('Validate class labels shape:', cifar10.validate.class_labels.shape)
print('Test data shape:', cifar10.test.data.shape)
if one_hot is True:
print('Test labels shape:', cifar10.test.one_hot_labels.shape)
if make_image is True:
print('Test images shape:', cifar10.test.images.shape)
print('Test class labels shape:', cifar10.test.class_labels.shape)
In [12]:
print('Training images')
print(cifar10.train.data[:5])
if make_image is True and mode=='rgb':
print('Training images rgb')
print(cifar10.train.images[:5])
if make_image is True and mode=='float':
print('Training images float')
print(cifar10.train.images[:5])
if one_hot is True:
print('Training labels')
print(cifar10.train.one_hot_labels[:5])
print('Training classes')
print(cifar10.train.class_labels[:5])
print('Testing images')
print(cifar10.test.data[:5])
if make_image is True and mode=='rgb':
print('Testing images rgb')
print(cifar10.test.images[:5])
if make_image is True and mode=='float':
print('Testing images float')
print(cifar10.test.images[:5])
if one_hot is True:
print('Testing labels')
print(cifar10.test.one_hot_labels[:5])
print('Testing classes')
print(cifar10.test.class_labels[:5])
In [13]:
start = time.time()
data_hog = []
feature_size = 0
hog = HOG(block_size=block_size, cell_size=cell_size, nbins=nbins)
print('Generating HOG features for %d data images' %cifar10.train.images.shape[0])
for fig_num in range(cifar10.train.images.shape[0]):
img = cifar10.train.images[fig_num, :]
gradients = hog.make_hog_gradients(img.astype('uint8'))
data_hog.append(gradients.flatten())
feature_size = gradients.size
data_hog = np.array(data_hog)
print('HOG Features for data: ' + str(data_hog.shape))
end = time.time()
print('Generated HOG for train images in %.6f ms' %((end-start)*1000))
In [14]:
start = time.time()
test_hog = []
feature_size = 0
print('Generating HOG features for %d test images' %cifar10.test.images.shape[0])
for fig_num in range(cifar10.test.images.shape[0]):
img = cifar10.test.images[fig_num, :]
gradients = hog.make_hog_gradients(img.astype('uint8'))
test_hog.append(gradients.flatten())
feature_size = gradients.size
test_hog = np.array(test_hog)
print('HOG Features for test: ' + str(test_hog.shape))
end = time.time()
print('Generated HOG for test images in %.6f ms' %((end-start)*1000))
In [15]:
cifar10.plot_sample(plot_data=True, plot_test=True, fig_size=(7, 7))
In [16]:
cifar10.plot_images(cifar10.train.data[:50, :], cifar10.train.class_names[:50],
nrows=5, ncols=10, fig_size=(20,50), fontsize=35, convert=True)
Out[16]:
In [17]:
start = time.time()
if scale_method == 'StandardScaler':
ss = StandardScaler()
elif scale_method == 'MinMaxScaler':
ss = MinMaxScaler()
else:
ss = StandardScaler()
data_images = ss.fit_transform(data_hog)
test_images = ss.fit_transform(test_hog)
end = time.time()
print('[ Step 2] Dataset transformations done in %.6f ms' %((end-start)*1000))
print('Training the classifier on %d images' % num_train_images)
print('Dataset size: ' + str(cifar10.train.data.shape))
total_time += (end-start)
In [18]:
start = time.time()
X_train, X_test, y_train, y_test = train_test_split(data_images, cifar10.train.class_labels,
test_size=train_validate_split)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
print('Train set shape : ' + str(X_train.shape))
print('Train labels shape : ' + str(y_train.shape))
print('Validation set shape : ' + str(X_test.shape))
print('Validation labels shape : ' + str(y_test.shape))
end = time.time()
print('[ Step 3] Train-test split done in %.6f ms' %((end-start)*1000))
total_time += (end-start)
In [19]:
start = time.time()
clf = svm.SVC(kernel=svm_kernel, C=svm_c, verbose=True, max_iter=svm_max_iter, cache_size=svm_cs, gamma=svm_gamma)
print(clf)
end = time.time()
print('[ Step 4] Made the SVM classifier in %.6f ms' %((end-start)*1000))
total_time += (end-start)
In [20]:
start = time.time()
print(X_train.shape)
print(y_train.shape)
clf.fit(X_train, y_train)
end = time.time()
print('[ Step 5] Made the model in %.6f ms' %((end-start)*1000))
total_time += (end-start)
In [21]:
start = time.time()
file_utils.mkdir_p(output_directory)
model_output_path = output_directory + '02_' + param_name + '.pkl'
joblib.dump(clf, model_output_path)
end = time.time()
print('[ Step 6] Write obtained model to %s in %.6f ms' %(model_output_path, ((end-start)*1000)))
total_time += (end-start)
In [22]:
y_predict = clf.predict(X_test)
labels = sorted(list(set(cifar10.train.class_labels)))
print('Labels: ' + str(labels))
plot.plot_confusion_matrix(y_test, y_predict, classes=cifar10.classes, fig_size=(8,6),
normalize=True, title='Confusion matrix for validation set with SVC')
end = time.time()
print('[ Step 7] Make prediction on validation dataset in %.6f ms' %((end-start)*1000))
total_time += (end-start)
In [23]:
start = time.time()
print('Classification report:')
print(skm.classification_report(y_test, y_predict, target_names=cifar10.classes))
validate_accuracy = skm.accuracy_score(y_test, y_predict, normalize=True)
print('Validation accuracy score: ' + str(validate_accuracy))
end = time.time()
print('[ Step 8] Generating classification on validation dataset in %.6f ms' %((end-start)*1000))
total_time += (end-start)
In [24]:
start = time.time()
prediction_numbers = clf.predict(test_images)
prediction_classes = []
num_test_images = test_hog.shape[0]
for i in range(num_test_images):
prediction_classes.append(cifar10.classes[int(prediction_numbers[i])])
end = time.time()
print('[ Step 9] Make prediction on test dataset in %.6f ms' %((end-start)*1000))
total_time += (end-start)
In [25]:
cifar10.plot_images(cifar10.test.data[:50], cifar10.test.class_names[:50], cls_pred=prediction_classes[:50],
nrows=5, ncols=10, fig_size=(20,50), fontsize=30, convert=True)
Out[25]:
In [26]:
start = time.time()
plot.plot_confusion_matrix(cifar10.test.class_labels, prediction_numbers, classes=cifar10.classes,
normalize=True, title='Confusion matrix for test set with SVC')
print(skm.classification_report(cifar10.test.class_labels, prediction_numbers, target_names=cifar10.classes))
test_accuracy = skm.accuracy_score(cifar10.test.class_labels, prediction_numbers, normalize=True)
print('Accuracy score on test data: ' + str(test_accuracy))
end = time.time()
total_time += (end-start)
In [27]:
start = time.time()
print('Prediction done on %d images' %test_images.shape[0])
print('Accuracy of the classifier: %.4f' %clf.score(test_images, cifar10.test.class_labels))
end = time.time()
In [28]:
start = time.time()
indices = np.arange(1, test_images.shape[0]+1)
predictions = np.column_stack((indices, prediction_classes))
file_utils.mkdir_p(output_directory)
output_csv_file = output_directory + '02_' + param_name + '.csv'
column_names = ['id', 'label']
predict_test_df = pd.DataFrame(data=predictions, columns=column_names)
predict_test_df.to_csv(output_csv_file, index=False)
end = time.time()
print('[ Step 11] Writing the test data to file: %s in %.6f ms' %(output_csv_file, (end-start)*1000))
total_time += (end-start)
In [29]:
print('Code took %.6f s to run on training with %d examples' % (total_time,num_train_images))
In [30]:
print('| %d | ' %exp_no, end='')
print('%d | '%num_train_images, end='')
print('Kernel = %s, C = %.2f, Max. Iterations: %d, Cache size = %d, Random State: 1 | '
%(svm_kernel, svm_c, svm_max_iter, svm_cs), end='')
print('[Link](%s) | ' %output_csv_file, end='')
print('%.4f | ' %validate_accuracy, end='')
print('%.4f | ' %test_accuracy, end='')
print('%s | ' %data_source, end='')
print('[Link](%s) |' %model_output_path)
In [31]:
def output_HTML(read_file, output_file):
from nbconvert import HTMLExporter
import codecs
import nbformat
exporter = HTMLExporter()
output_notebook = nbformat.read(read_file, as_version=4)
print()
output, resources = exporter.from_notebook_node(output_notebook)
codecs.open(output_file, 'w', encoding='utf-8').write(output)
In [32]:
%%javascript
var notebook = IPython.notebook
notebook.save_notebook()
In [35]:
%%javascript
var kernel = IPython.notebook.kernel;
var thename = window.document.getElementById("notebook_name").innerHTML;
var command = "theNotebook = " + "'"+thename+"'";
kernel.execute(command);
In [36]:
current_file = './' + theNotebook + '.ipynb'
output_file = output_directory + str(file_no).zfill(3) + '_exp_no_' + str(exp_no) + '_' + theNotebook + '.html'
print('Current file: ' + str(current_file))
print('Output file: ' + str(output_file))
file_utils.mkdir_p(output_directory)
output_HTML(current_file, output_file)
In [37]:
print('Code took %.6f s to run on training with %d examples' % (total_time,num_train_images))
In [ ]: