In [375]:
%matplotlib inline
import scipy.io
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.cm as cm
src = "../tmp/"
affines = 0
images = 0
targets = 0
entities = 10
for i in range(1,100):
aff = 0
for en in range(1,entities+1):
if en == 1:
aff = scipy.io.loadmat(src + "testbatch_aff_" + str(en) + "_" + str(i))['aff']
else:
tmp = scipy.io.loadmat(src + "testbatch_aff_" + str(en) + "_" + str(i))['aff']
aff = np.hstack((aff, tmp))
tgt = np.array(scipy.io.loadmat(src + "testbatch_labels_" + str(i))['labels'], dtype='int')
imgs = scipy.io.loadmat(src + "testbatch_imgs_" + str(i))['imgs']
if i == 1:
affines = aff
images = imgs
targets = tgt
else:
affines = np.vstack((affines, aff))
images = np.vstack((images, imgs))
targets = np.vstack((targets, tgt))
#load train data
src = "../tmp/"
train_affines = 0
train_targets = 0
for i in range(1,600):
aff = 0
for en in range(1,entities+1):
if en == 1:
aff = scipy.io.loadmat(src + "trainbatch_aff_" + str(en) + "_" + str(i))['aff']
else:
tmp = scipy.io.loadmat(src + "trainbatch_aff_" + str(en) + "_" + str(i))['aff']
aff = np.hstack((aff, tmp))
tgt = np.array(scipy.io.loadmat(src + "trainbatch_labels_" + str(i))['labels'],dtype='int')
if i == 1:
train_affines = aff
train_targets = tgt
else:
train_affines = np.vstack((train_affines, aff))
train_targets = np.vstack((train_targets, tgt))
# print(np.unique(targets))
# print(np.shape(affines))
# plt.imshow(images[0][0], cmap = cm.Greys_r)
# plt.show()
print('DONE')
In [393]:
test_affines = affines
test_targets = targets
N = 200 #number of items per class
indices = np.random.permutation(len(train_targets))
final_train_data = 0
final_train_targets = 0
flag=0
for c in range(1,11):
cnt=0
for ii in range(len(train_targets)):
i = indices[ii]
if cnt >= N:
break
else:
if c == train_targets[i]:
if flag == 0 :
flag = 1
final_train_data= train_affines[i]
final_train_targets = train_targets[i]
else:
final_train_data = np.vstack((final_train_data, train_affines[i]))
final_train_targets = np.vstack((final_train_targets, train_targets[i]))
cnt=cnt+1
print('DONE')
print(np.shape(final_train_data))
print(np.shape(final_train_targets))
In [382]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
In [391]:
from sklearn.svm import NuSVC, LinearSVC
clf = LinearSVC()
In [394]:
clf.fit(final_train_data, np.ravel(final_train_targets))
preds = clf.predict(affines)
targets = np.ravel(targets)
correct = np.where((targets - preds) == 0)
print('accuracy:', (1.0*len(correct[0]))/len(targets) * 100.0 )
In [412]:
#(distance based - fails)
import scipy.spatial
import numpy as np
correct = 0
for i in range(len(targets)):
maxc = 1000000
chosen_cid = -1
for c in range(len(final_train_targets)):
if scipy.spatial.distance.euclidean(affines[i], final_train_data[c]) <= maxc:
maxc = c; chosen_cid = final_train_targets[c]
if chosen_cid == targets[i]:
correct = correct + 1
print('accuracy:', (1.0*correct)/len(targets) * 100.0 )
In [186]:
#classify using KNN
from sklearn.cluster import KMeans
import matplotlib.gridspec as gridspec
estimator = KMeans(n_clusters=10, n_init=10, init='random', n_jobs=-1)
estimator.fit(affines)
labels = estimator.labels_
#visualize results
display_cid = 8
cnt = 0
rows = 5
cols = 5
gs = gridspec.GridSpec(rows, cols, wspace=0.0)
ax = [plt.subplot(gs[i]) for i in range(rows*cols)]
for i in range(len(labels)):
if labels[i] == display_cid and cnt < rows*cols:
ax[cnt].imshow(images[i][0], cmap=cm.Greys_r)
ax[cnt].axis('off')
cnt = cnt + 1