In [1]:
import myutils
raw_data_training, raw_data_testing = myutils.load_CIFAR_dataset(shuffle=False)
# raw_data_training = raw_data_training[:5000]
class_names = myutils.load_CIFAR_classnames()
n_training = len( raw_data_training )
n_testing = len( raw_data_testing )
print('Loaded CIFAR10 database with {} training and {} testing samples'.format(n_training, n_testing))
In [2]:
# Converting to greyscale
def rgb2gray(image):
import cv2
return cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
Xdata_training = [ rgb2gray(raw_data_training[i][0]) for i in range(n_training)]
Xdata_testing = [ rgb2gray(raw_data_testing[i][0]) for i in range(n_testing)]
Lets look how images looks like
In [3]:
import random
import matplotlib.pyplot as plt
%matplotlib inline
# lets choose some random sample of 10 training images
examples_id = random.sample(range(n_training), 10)
fig, axarr = plt.subplots(2,len(examples_id), figsize=(15,3))
for i in range(len(examples_id)):
id = examples_id[i]
axarr[0,i].imshow(raw_data_training[id][0][:,:])
axarr[0,i].axis('off')
axarr[1,i].imshow(Xdata_training[id],cmap='gray')
axarr[1,i].axis('off')
print('Few examples after preprocessing')
plt.show()
We use HOG descriptor from scikit-image library.
In [4]:
# Configuring HOG descriptor
# see http://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.hog
# Configuration of HOG descriptor
normalize = True # True ==> yields a little bit better score
#
block_norm = 'L2-Hys' # or 'L1'
orientations = 9 #
pixels_per_cell = [8, 8] # see section 'Additional remarks' for some explanation
cells_per_block = [2, 2] #
def extractFeature(img, vis=False):
from skimage.feature import hog
return hog(img, orientations, pixels_per_cell, cells_per_block, block_norm, visualise=vis, transform_sqrt=normalize)
In [5]:
# extracting one sample data
nfeatures = extractFeature(Xdata_training[0], vis=False).size
print('Number of features = {}'.format(nfeatures))
fig, axarr = plt.subplots(3,len(examples_id), figsize=(16,5))
for i in range(len(examples_id)):
id = examples_id[i]
axarr[0,i].imshow(raw_data_training[id][0][:,:])
axarr[0,i].axis('off')
axarr[1,i].imshow(Xdata_training[id],cmap='gray')
axarr[1,i].axis('off')
_, hog_vis = extractFeature(Xdata_training[id], vis=True)
axarr[2,i].imshow(hog_vis,cmap='gray')
axarr[2,i].axis('off')
plt.show()
In [6]:
# feature extraction
import numpy as np
X_training = np.array( [ extractFeature(Xdata_training[i], vis=False) for i in range(n_training) ] )
y_training = np.array( [ raw_data_training[i][1] for i in range(n_training) ] )
X_testing = np.array( [ extractFeature(Xdata_testing[i], vis=False) for i in range(n_testing) ] )
y_testing = np.array( [ raw_data_testing[i][1] for i in range(n_testing) ] )
In [7]:
print( 'X_training shape is {}'.format( X_training.shape ) )
print( 'y_training shape is {}'.format( y_training.shape ) )
print( 'X_testing shape is {}'.format( X_testing.shape ) )
print( 'y_testing shape is {}'.format( y_testing.shape ) )
In [8]:
import pandas as pd
print( 'X_training data description')
pd.DataFrame( X_training ).describe()
Out[8]:
In [9]:
print( 'y_training data description')
pd.DataFrame( y_training ).describe()
Out[9]:
In [12]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=2)
pca.fit(X_training)
X = pca.transform(X_training)
print(pca.explained_variance_ratio_)
plt.figure( figsize=(15,15) )
plt.scatter( X[:, 0], X[:, 1], c=y_training, cmap='tab10' )
# plt.colorbar()
plt.show()
In [14]:
# TODO: remove outliers
We simply use LinearSVC from scikit-learn library.
In [10]:
from sklearn.svm import LinearSVC
# parameter C chosen experimentally (see explanation below)
C = 1.0
clf = LinearSVC(C=C)
In [11]:
# this may take some time
clf.fit(X_training, y_training)
Out[11]:
We obtain 0.4914 score on CIFAR10 testing dataset.
In [12]:
clf.score( X_testing, y_testing )
Out[12]:
Short look at the prediction
In [13]:
y_predict = clf.predict( X_testing )
In [14]:
import numpy as np
np.unique( y_predict )
Out[14]:
Our results for different parameters
TODO:
In [15]:
for C in [ 0.001, 0.01, 0.1, 1.0, 1.2, 1.5, 2.0, 10.0 ]:
clf = LinearSVC(C=C)
clf.fit(X_training, y_training)
print( 'normalize={norm}, C={C}, score={score}'.format(norm=normalize, C=C, score=clf.score( X_testing, y_testing )) )
In [16]:
from sklearn.svm import SVC
In [17]:
svc_lin_clf = SVC(kernel='linear', C=1)
svc_lin_clf.fit(X_training, y_training)
svc_lin_clf.score(X_testing, y_testing)
Out[17]:
In [18]:
from sklearn.svm import SVC
svc_clf = SVC(C=1)
svc_clf.fit(X_training, y_training)
svc_clf.score(X_testing, y_testing)
Out[18]:
LinearSVC
normalize=False, C=0.001, score=0.281 normalize=False, C=0.01, score=0.3522 normalize=False, C=0.1, score=0.4432 normalize=False, C=1.0, score=0.4765 normalize=False, C=1.2, score=0.4786 normalize=False, C=1.5, score=0.4783 normalize=False, C=2.0, score=0.4802 normalize=False, C=10.0, score=0.4841 normalize=False, C=20.0, score=0.4834 normalize=False, C=50.0, score=0.4833 normalize=False, C=100.0, score=0.4833 normalize=False, C=1000.0, score=0.4667 normalize=True, C=0.001, score=0.4758 normalize=True, C=0.01, score=0.4857 normalize=True, C=0.1, score=0.4903 normalize=True, C=1.0, score=**0.4913** normalize=True, C=1.2, score=0.4904 normalize=True, C=1.5, score=0.4906 normalize=True, C=2.0, score=0.491 normalize=True, C=10.0, score=0.4902 normalize=True, C=20.0, score=0.4913 normalize=True, C=50.0, score=0.4222 normalize=True, C=100.0, score=0.4166 normalize=True, C=1000.0, score=0.3139 TODO: better presentation
SVC(kernel='linear')
normalize=True, C=1.0, score=0.5118
SVC()
normalize=True, C=1.0, score=0.4755
In [ ]: