also try hingeloss - as it would be anaolgous to using SVM
add morph features in fc layer
size/ bounding box features
feed features into labelspread
Find the problematic classes (low probs's - massive uncertainty), or high log loss for training data. Augment those particlar classes.
t0 Log loss: 1.33067
classify everything and then aggreage the results of which images are just augmentations of the others and then take the average prediction probabilities
I SCREWED UP... rotation around 45 degrees leaves black background
In [11]:
import numpy as np
import matplotlib.pyplot as plt
import time
import tools.my_io as my_io
%matplotlib inline
import caffe
# Set the right path to your model definition file, pretrained model weights,
# and the image you would like to classify.
MODEL_FILE = '/media/raid_arr/data/ndsb/config/deploy_cnn_v3_maxout_supersparse.prototxt'
PRETRAINED = '/media/raid_arr/data/ndsb/models/zoomed_out_vanilla_smallmaxout/simple_fold0_iter_3000.caffemodel'
MEAN_VALUE = 23
IMAGE_FILE = '/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/acantharia_protist/100224.jpg'
VALIDATION_DB = '/media/raid_arr/tmp/test0_norm_lmdb'
In [7]:
# Loading From Database
print 'Loading data...'
tic = time.time()
data = my_io.load_lmdb(VALIDATION_DB)
print "Done in %.2f s." % (time.time() - tic)
val_files_all, images, labels = zip(*data)
test_labels = labels
In [8]:
# Grab 1 image from file
input_image = caffe.io.load_image(IMAGE_FILE, color=False)
In [9]:
image_dims = images[0].shape[:2]
# image_dims = (57, 57)
print image_dims
net = caffe.Classifier(MODEL_FILE, PRETRAINED,
mean=np.array([MEAN_VALUE]),
raw_scale=1.0, # 255 if load from caffe.io, 1.0 if load from my_io lmdb
image_dims=image_dims,)
# gpu=True)
# caffe.set_phase_test()
caffe.set_mode_gpu()
In [123]:
np.where(test_labels == 113)
Out[123]:
In [137]:
# %%time
n = 5262
# im = caffe.io.load_image(test_files[n], color=False)
im_path = val_files_all[n]
# im_loaded = caffe.io.load_image(im_path, color=False)
# im = images[n].astype(np.float32)/255.0
# im = im_loaded
im = images[n]
print im.shape
print le.inverse_transform(test_labels[n])
tic = time.time()
prediction = net.predict([im])
print "Prediction in %.2f s." % (time.time() - tic)
print 'prediction shape:', prediction[0].shape
plt.plot(prediction[0])
print 'predicted class:', prediction[0].argmax()
print 'true class:', test_labels[n]
test_labels[n]
print 'logloss:', -np.log(prediction[0][test_labels[n]])
plt.figure()
plt.imshow(np.squeeze(im), interpolation='none', cmap='gray')
Out[137]:
In [140]:
prediction[0][114]
Out[140]:
In [142]:
start = time.time()
prediction = net.predict(images)
print "Done in %.2f s." % (time.time() - start)
In [19]:
ll_ii = np.log(prediction[range(len(test_labels)), test_labels])
ll = -np.mean(np.log(prediction[range(len(test_labels)), test_labels]))
print 'Log loss:', ll
In [1]:
from tools.le import le
test_labels = np.array(test_labels)
ll_bylabel = np.zeros(len(set(test_labels)))
labels = sorted(set(test_labels))
for l in labels:
# ll_bylabel[l] = -np.mean(ll_ii[test_labels==l])
ll_bylabel[l] = -np.sum(ll_ii[test_labels==l])/len(labels)
q = np.c_[labels, ll_bylabel]
qq = q[(-q[:,1]).argsort()]
qqq = np.c_[qq[:, ::-1], le.inverse_transform(qq[:,0].astype(int))]
print qqq
# print ll_bylabel
# print np.argmax(ll_bylabel), np.max(ll_bylabel), le.inverse_transform(np.argmax(ll_bylabel))
In [108]:
z = le.inverse_transform(sorted(set(labels)))
zz = [qq for qq in z if 'copepod' in qq]
zz
Out[108]:
In [81]:
guess = np.argmax(prediction, axis=1)
# guess_arr = np.c_[test_files, guess]
# np.savetxt('./guess_labels/guess1.txt', guess_arr, fmt='%s', delimiter='\t')
Out[81]:
In [79]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from tools.le import le
conf_arr = confusion_matrix(test_labels, guess)
acc = accuracy_score(test_labels, guess)
print acc
d = np.diagonal(conf_arr.copy())
print np.sum(conf_arr)
np.fill_diagonal(conf_arr, 0) # gets rid of correct predictions
pt = np.sum(conf_arr, axis=1) # total predictions
names = np.array(le.inverse_transform(range(121)))
ind = pt.argsort()
np.c_[pt[ind], d[ind], names[ind], np.arange(121)[ind]]
Out[79]:
In [2]:
import pandas as pd
conf_arr_raw = confusion_matrix(test_labels, guess)
conf_arr_norm = conf_arr_raw.astype(float)/np.tile(conf_arr_raw.sum(axis=1),(121,1)).T
pd.set_option('display.max_columns', 122)
conf_df = pd.DataFrame(conf_arr_raw)
conf_df
In [78]:
conf_arr_raw = confusion_matrix(test_labels, guess)
conf_arr.sum()
Out[78]:
In [265]:
# http://arxiv.org/pdf/1410.0736v3.pdf
import pickle
pickle.dump((test_labels, prediction), open('./tools/naive_prediction.p', 'wb'))
prob_conf = np.zeros((121, 121))
for l in range(121):
inds = np.squeeze(np.array(np.where(test_labels == l)))
class_conf = prediction[inds, :].mean(axis=0)
prob_conf[l, :] = class_conf
F = prob_conf
D = (1-F)
np.fill_diagonal(D, 0)
D_p = 0.5*(D+D.T)
In [263]:
from sklearn.cluster import AffinityPropagation as AP
clst = AP(damping=0.98, # damping determines # of clusters
max_iter=500,
convergence_iter=15,
affinity='euclidean',
verbose=False)
clst.fit(D_p)
print 'Number of cluster:', len(clst.cluster_centers_)
membership = np.c_[range(121), clst.labels_]
fine_to_coarse = dict(membership)
coarse_to_fine = {l: [] for l in clst.labels_}
for k, v in fine_to_coarse.items():
coarse_to_fine[v].append(k)
Out[263]:
In [112]:
from collections import OrderedDict
specialists_d = OrderedDict([
('chaetognath', [
'chaetognath_non_sagitta',
'chaetognath_other',
'chaetognath_sagitta']),
('copepod', [
'copepod_calanoid',
'copepod_calanoid_eggs',
'copepod_calanoid_eucalanus',
'copepod_calanoid_flatheads',
'copepod_calanoid_frillyAntennae',
'copepod_calanoid_large',
'copepod_calanoid_large_side_antennatucked',
'copepod_calanoid_octomoms',
'copepod_calanoid_small_longantennae',
'copepod_cyclopoid_copilia',
'copepod_cyclopoid_oithona',
'copepod_cyclopoid_oithona_eggs',
'copepod_other']),
('tunicate_doliolid', [
'tunicate_doliolid',
'tunicate_doliolid_nurse']),
])
sp_member_d = {}
for p, c in specialists_d.items():
for m in c:
sp_member_d[m] = p
In [113]:
sp_member_d
Out[113]: