nonlinear embedding
autoencode the nonlabeled
assume label of highest prob on unlabeled and retrain network
perform network surgery (weights learned from unsupervised and then fine tuen with supervised)
or just use https://github.com/BVLC/caffe/tree/51be3523b8fe883d86600fbf3531b6ef7a6290fe/examples/finetune_flickr_style
also try hingeloss - as it would be anaolgous to using SVM
add morph features in fc layer
size/ bounding box features
feed features into labelspread
feature extraction is exemplified here: http://caffe.berkeleyvision.org/gathered/examples/feature_extraction.html

predict
take max as label
create guess.txt, cat guess0.txt ../data/train0.txt > ./guess_comb0.txt
- ./create_ndsb.sh
- randomize selection of guess data?? or select based on proba????
fine tune model to new db (or concatted db)
predict, repeat

Find the problematic classes (low probs's - massive uncertainty), or high log loss for training data. Augment those particlar classes.

t0 Log loss: 1.33067

classify everything and then aggreage the results of which images are just augmentations of the others and then take the average prediction probabilities
I SCREWED UP... rotation around 45 degrees leaves black background
need to get rid of mod 45 or set default background to white



In [11]:

    
import numpy as np
import matplotlib.pyplot as plt
import time
import tools.my_io as my_io
%matplotlib inline

import caffe

# Set the right path to your model definition file, pretrained model weights,
# and the image you would like to classify.
MODEL_FILE = '/media/raid_arr/data/ndsb/config/deploy_cnn_v3_maxout_supersparse.prototxt'
PRETRAINED = '/media/raid_arr/data/ndsb/models/zoomed_out_vanilla_smallmaxout/simple_fold0_iter_3000.caffemodel'

MEAN_VALUE = 23
IMAGE_FILE = '/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/acantharia_protist/100224.jpg'
VALIDATION_DB = '/media/raid_arr/tmp/test0_norm_lmdb'



In [7]:

    
# Loading From Database
print 'Loading data...'
tic = time.time()
data = my_io.load_lmdb(VALIDATION_DB)
print "Done in %.2f s." % (time.time() - tic)

val_files_all, images, labels = zip(*data)
test_labels = labels









    



Loading data...
Done in 1.62 s.



In [8]:

    
# Grab 1 image from file
input_image = caffe.io.load_image(IMAGE_FILE, color=False)



In [9]:

    
image_dims = images[0].shape[:2]
# image_dims = (57, 57)
print image_dims

net = caffe.Classifier(MODEL_FILE, PRETRAINED,
                       mean=np.array([MEAN_VALUE]),
                       raw_scale=1.0,    # 255 if load from caffe.io, 1.0 if load from my_io lmdb
                       image_dims=image_dims,)
#                        gpu=True)
# caffe.set_phase_test()
caffe.set_mode_gpu()



In [123]:

    
np.where(test_labels == 113)









    Out[123]:





(array([  52,   98,  130,  324,  348,  410,  417,  536,  538,  721,  723,
         738,  896,  939,  961, 1353, 1374, 1429, 1475, 1507, 1689, 1696,
        1727, 1761, 1877, 1950, 1963, 2004, 2014, 2111, 2206, 2413, 2463,
        2480, 2557, 2649, 2652, 2657, 2670, 2693, 2787, 2812, 2841, 2880,
        2887, 2974, 2983, 2993, 3057, 3150, 3223, 3367, 3372, 3439, 3542,
        3580, 3599, 3850, 3873, 3997, 4235, 4239, 4251, 4361, 4576, 4778,
        4835, 5060, 5068, 5114, 5169, 5176, 5185, 5194, 5232, 5249, 5262,
        5287, 5341, 5382, 5573, 5596, 5646, 5747, 5992, 5994, 6030, 6060]),)



In [137]:

    
# %%time
n = 5262
# im = caffe.io.load_image(test_files[n], color=False)
im_path = val_files_all[n]
# im_loaded = caffe.io.load_image(im_path, color=False)
# im = images[n].astype(np.float32)/255.0
# im = im_loaded
im = images[n]
print im.shape
print le.inverse_transform(test_labels[n])
tic = time.time()
prediction = net.predict([im])
print "Prediction in %.2f s." % (time.time() - tic)
print 'prediction shape:', prediction[0].shape
plt.plot(prediction[0])
print 'predicted class:', prediction[0].argmax()
print 'true class:', test_labels[n]
test_labels[n]
print 'logloss:', -np.log(prediction[0][test_labels[n]])
plt.figure()
plt.imshow(np.squeeze(im), interpolation='none', cmap='gray')









    



(64, 64, 1)
tunicate_doliolid
Prediction in 0.03 s.
prediction shape: (121,)
predicted class: 113
true class: 113
logloss: 0.00962584






    Out[137]:





<matplotlib.image.AxesImage at 0x7fbed0f10f90>



In [140]:

    
prediction[0][114]









    Out[140]:





0.0091936793



In [142]:

    
start = time.time()
prediction = net.predict(images)
print "Done in %.2f s." % (time.time() - start)









    



Done in 113.67 s.



In [19]:

    
ll_ii = np.log(prediction[range(len(test_labels)), test_labels])
ll = -np.mean(np.log(prediction[range(len(test_labels)), test_labels]))
print 'Log loss:', ll









    



Log loss: 0.943667

LOSS PER CLASS



In [1]:

    
from tools.le import le
test_labels = np.array(test_labels)
ll_bylabel = np.zeros(len(set(test_labels)))
labels = sorted(set(test_labels))
for l in labels:
#     ll_bylabel[l] = -np.mean(ll_ii[test_labels==l])
    ll_bylabel[l] = -np.sum(ll_ii[test_labels==l])/len(labels)
    
q = np.c_[labels, ll_bylabel]
qq = q[(-q[:,1]).argsort()]
qqq = np.c_[qq[:, ::-1], le.inverse_transform(qq[:,0].astype(int))]


print qqq
# print ll_bylabel
# print np.argmax(ll_bylabel), np.max(ll_bylabel), le.inverse_transform(np.argmax(ll_bylabel))









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-f9c795cf9ace> in <module>()
      1 from tools.le import le
----> 2 test_labels = np.array(test_labels)
      3 ll_bylabel = np.zeros(len(set(test_labels)))
      4 labels = sorted(set(test_labels))
      5 for l in labels:

NameError: name 'np' is not defined



In [108]:

    
z = le.inverse_transform(sorted(set(labels)))
zz = [qq for qq in z if 'copepod' in qq]
zz









    Out[108]:





['copepod_calanoid',
 'copepod_calanoid_eggs',
 'copepod_calanoid_eucalanus',
 'copepod_calanoid_flatheads',
 'copepod_calanoid_frillyAntennae',
 'copepod_calanoid_large',
 'copepod_calanoid_large_side_antennatucked',
 'copepod_calanoid_octomoms',
 'copepod_calanoid_small_longantennae',
 'copepod_cyclopoid_copilia',
 'copepod_cyclopoid_oithona',
 'copepod_cyclopoid_oithona_eggs',
 'copepod_other']



In [81]:

    
guess = np.argmax(prediction, axis=1)
# guess_arr = np.c_[test_files, guess]
# np.savetxt('./guess_labels/guess1.txt', guess_arr, fmt='%s', delimiter='\t')









    Out[81]:





6115



In [79]:

    
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from tools.le import le
conf_arr = confusion_matrix(test_labels, guess)
acc = accuracy_score(test_labels, guess)
print acc

d = np.diagonal(conf_arr.copy())
print np.sum(conf_arr)
np.fill_diagonal(conf_arr, 0)   # gets rid of correct predictions


pt = np.sum(conf_arr, axis=1)   # total predictions
names = np.array(le.inverse_transform(range(121)))
ind = pt.argsort()
np.c_[pt[ind], d[ind], names[ind], np.arange(121)[ind]]









    



0.722812755519
6115






    Out[79]:





array([['0', '3', 'hydromedusae_typeE', '76'],
       ['0', '8', 'tornaria_acorn_worm_larvae', '107'],
       ['1', '5', 'echinopluteus', '45'],
       ['2', '4', 'copepod_cyclopoid_copilia', '23'],
       ['2', '8', 'shrimp_caridean', '93'],
       ['2', '6', 'ctenophore_lobate', '31'],
       ['2', '3', 'hydromedusae_narco_dark', '63'],
       ['2', '1', 'ephyra', '46'],
       ['2', '0', 'heteropod', '56'],
       ['2', '0', 'fish_larvae_deep_body', '50'],
       ['2', '21', 'protist_star', '86'],
       ['2', '13', 'acantharia_protist_halo', '2'],
       ['2', '1', 'acantharia_protist_big_center', '1'],
       ['2', '0', 'hydromedusae_haliscera_small_sideview', '61'],
       ['3', '7', 'copepod_calanoid_octomoms', '21'],
       ['3', '13', 'echinoderm_larva_pluteus_typeC', '40'],
       ['3', '0', 'hydromedusae_other', '66'],
       ['3', '4', 'hydromedusae_h15', '59'],
       ['3', '20', 'fish_larvae_myctophids', '53'],
       ['3', '15', 'copepod_calanoid_small_longantennae', '22'],
       ['3', '1', 'hydromedusae_liriope', '62'],
       ['3', '0', 'pteropod_theco_dev_seq', '88'],
       ['3', '12', 'tunicate_salp_chains', '117'],
       ['3', '10', 'pteropod_triangle', '89'],
       ['3', '8', 'trichodesmium_multiple', '109'],
       ['3', '0', 'invertebrate_larvae_other_A', '78'],
       ['3', '13', 'chordate_type1', '13'],
       ['4', '9', 'copepod_calanoid_frillyAntennae', '18'],
       ['4', '0', 'appendicularian_fritillaridae', '4'],
       ['4', '24', 'euphausiids', '47'],
       ['4', '104', 'echinoderm_larva_seastar_brachiolaria', '43'],
       ['4', '14', 'echinoderm_larva_pluteus_urchin', '41'],
       ['4', '1', 'invertebrate_larvae_other_B', '79'],
       ['4', '54', 'copepod_calanoid_large', '19'],
       ['4', '4', 'echinoderm_larva_pluteus_brittlestar', '38'],
       ['4', '15', 'echinoderm_larva_pluteus_early', '39'],
       ['4', '1', 'stomatopod', '106'],
       ['4', '9', 'hydromedusae_typeF', '77'],
       ['4', '21', 'hydromedusae_solmundella', '73'],
       ['4', '0', 'fish_larvae_very_thin_body', '55'],
       ['4', '1', 'siphonophore_physonect_young', '105'],
       ['4', '8', 'hydromedusae_typeD_bell_and_tentacles', '75'],
       ['5', '26', 'shrimp_sergestidae', '94'],
       ['5', '5', 'amphipods', '3'],
       ['5', '2', 'fish_larvae_leptocephali', '51'],
       ['5', '3', 'euphausiids_young', '48'],
       ['5', '0', 'copepod_other', '26'],
       ['5', '18', 'ctenophore_cestid', '28'],
       ['5', '22', 'polychaete', '81'],
       ['5', '1', 'trochophore_larvae', '112'],
       ['5', '1', 'siphonophore_partial', '103'],
       ['5', '78', 'hydromedusae_shapeA', '68'],
       ['6', '3', 'hydromedusae_typeD', '74'],
       ['6', '5', 'decapods', '32'],
       ['6', '0', 'siphonophore_other_parts', '102'],
       ['6', '73', 'artifacts', '8'],
       ['7', '19', 'hydromedusae_aglaura', '57'],
       ['7', '39', 'hydromedusae_haliscera', '60'],
       ['7', '64', 'tunicate_partial', '115'],
       ['8', '1', 'ctenophore_cydippid_no_tentacles', '29'],
       ['8', '12', 'copepod_calanoid_eucalanus', '16'],
       ['8', '14', 'copepod_calanoid_large_side_antennatucked', '20'],
       ['9', '8', 'fish_larvae_medium_body', '52'],
       ['9', '66', 'protist_fuzzy_olive', '83'],
       ['9', '20', 'jellies_tentacles', '80'],
       ['9', '27', 'siphonophore_calycophoran_sphaeronectes', '99'],
       ['9', '4', 'fish_larvae_thin_body', '54'],
       ['9', '7', 'hydromedusae_sideview_big', '71'],
       ['10', '38', 'tunicate_salp', '116'],
       ['10', '94', 'diatom_chain_string', '36'],
       ['10', '16', 'siphonophore_physonect', '104'],
       ['11', '4', 'hydromedusae_bell_and_tentacles', '58'],
       ['11', '9', 'echinoderm_seacucumber_auricularia_larva', '44'],
       ['11', '0', 'ctenophore_cydippid_tentacles', '30'],
       ['11', '1', 'siphonophore_calycophoran_sphaeronectes_stem', '100'],
       ['11', '32', 'siphonophore_calycophoran_abylidae', '96'],
       ['11', '0', 'shrimp-like_other', '92'],
       ['12', '15', 'siphonophore_calycophoran_rocketship_adult', '97'],
       ['12', '10', 'pteropod_butterfly', '87'],
       ['14', '83', 'siphonophore_calycophoran_rocketship_young', '98'],
       ['14', '63', 'echinoderm_larva_seastar_bipinnaria', '42'],
       ['14', '21', 'shrimp_zoea', '95'],
       ['14', '128', 'trichodesmium_bowtie', '108'],
       ['15', '20', 'copepod_calanoid_eggs', '15'],
       ['15', '7', 'protist_dark_center', '82'],
       ['15', '126', 'hydromedusae_solmaris', '72'],
       ['16', '222', 'copepod_cyclopoid_oithona_eggs', '25'],
       ['16', '124', 'appendicularian_s_shape', '5'],
       ['17', '15', 'radiolarian_colony', '91'],
       ['17', '161', 'acantharia_protist', '0'],
       ['17', '32', 'appendicularian_straight', '7'],
       ['18', '16', 'artifacts_edge', '9'],
       ['18', '50', 'hydromedusae_narco_young', '64'],
       ['20', '160', 'copepod_cyclopoid_oithona', '24'],
       ['20', '16', 'copepod_calanoid_flatheads', '17'],
       ['20', '21', 'crustacean_other', '27'],
       ['21', '9', 'hydromedusae_shapeB', '70'],
       ['21', '6', 'hydromedusae_narcomedusae', '65'],
       ['22', '28', 'siphonophore_calycophoran_sphaeronectes_young', '101'],
       ['23', '35', 'radiolarian_chain', '90'],
       ['23', '15', 'hydromedusae_partial_dark', '67'],
       ['23', '32', 'hydromedusae_shapeA_sideview_small', '69'],
       ['26', '110', 'trichodesmium_tuft', '111'],
       ['27', '369', 'trichodesmium_puff', '110'],
       ['28', '97', 'protist_noctiluca', '84'],
       ['29', '55', 'tunicate_doliolid_nurse', '114'],
       ['30', '77', 'appendicularian_slight_curve', '6'],
       ['30', '70', 'diatom_chain_tube', '37'],
       ['31', '4', 'unknown_sticks', '119'],
       ['31', '57', 'tunicate_doliolid', '113'],
       ['41', '96', 'copepod_calanoid', '14'],
       ['44', '59', 'fecal_pellet', '49'],
       ['45', '118', 'chaetognath_non_sagitta', '10'],
       ['48', '25', 'detritus_blob', '33'],
       ['50', '29', 'detritus_filamentous', '34'],
       ['50', '14', 'unknown_blobs_and_smudges', '118'],
       ['53', '130', 'detritus_other', '35'],
       ['57', '82', 'chaetognath_sagitta', '12'],
       ['73', '162', 'protist_other', '85'],
       ['79', '6', 'unknown_unclassified', '120'],
       ['95', '292', 'chaetognath_other', '11']], 
      dtype='|S45')



In [2]:

    
import pandas as pd
conf_arr_raw = confusion_matrix(test_labels, guess)
conf_arr_norm = conf_arr_raw.astype(float)/np.tile(conf_arr_raw.sum(axis=1),(121,1)).T

pd.set_option('display.max_columns', 122)
conf_df = pd.DataFrame(conf_arr_raw)
conf_df









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-4448378f8b57> in <module>()
      1 import pandas as pd
----> 2 conf_arr_raw = confusion_matrix(test_labels, guess)
      3 conf_arr_norm = conf_arr_raw.astype(float)/np.tile(conf_arr_raw.sum(axis=1),(121,1)).T
      4 
      5 pd.set_option('display.max_columns', 122)

NameError: name 'confusion_matrix' is not defined



In [78]:

    
conf_arr_raw = confusion_matrix(test_labels, guess)
conf_arr.sum()









    Out[78]:





6115

HIERARCHICAL CLUSTERING

PROB CONFUSION MATRIX



In [265]:

    
# http://arxiv.org/pdf/1410.0736v3.pdf
import pickle
pickle.dump((test_labels, prediction), open('./tools/naive_prediction.p', 'wb'))
prob_conf = np.zeros((121, 121))
for l in range(121):
    inds = np.squeeze(np.array(np.where(test_labels == l)))
    class_conf = prediction[inds, :].mean(axis=0)
    prob_conf[l, :] = class_conf
F = prob_conf
D = (1-F)
np.fill_diagonal(D, 0)
D_p = 0.5*(D+D.T)



In [263]:

    
from sklearn.cluster import AffinityPropagation as AP

clst = AP(damping=0.98, # damping determines # of clusters
          max_iter=500, 
          convergence_iter=15, 
          affinity='euclidean', 
          verbose=False)
clst.fit(D_p)
print 'Number of cluster:', len(clst.cluster_centers_)
membership = np.c_[range(121), clst.labels_]

fine_to_coarse = dict(membership)
coarse_to_fine = {l: [] for l in clst.labels_}
for k, v in fine_to_coarse.items():
    coarse_to_fine[v].append(k)









    



Number of cluster: 27






    Out[263]:





{0: [0, 1, 63, 112],
 1: [4, 5, 6],
 2: [10, 11, 12, 51],
 3: [14, 15, 16, 17, 18, 19, 22],
 4: [21, 24, 25, 73],
 5: [39, 40, 41],
 6: [42, 43, 78],
 7: [23, 31, 46, 76, 91],
 8: [3, 47, 48, 92],
 9: [34, 37, 49, 55, 111, 119],
 10: [50, 52, 53, 54, 106],
 11: [58, 59, 75],
 12: [38, 57, 64],
 13: [60, 68, 71, 74],
 14: [29, 61, 69, 70],
 15: [62, 65, 72],
 16: [8, 36, 45, 80],
 17: [2, 83, 84, 85, 86],
 18: [32, 93, 95],
 19: [44, 66, 96, 102],
 20: [30, 98, 99, 100, 101, 105],
 21: [28, 81, 90, 94, 97, 104, 117],
 22: [82, 108, 109, 110],
 23: [107, 113, 114, 116],
 24: [9, 56, 67, 103, 115],
 25: [13, 20, 26, 27, 33, 35, 77, 118],
 26: [7, 79, 87, 88, 89, 120]}



In [112]:

    
from collections import OrderedDict
specialists_d = OrderedDict([
    ('chaetognath', [
        'chaetognath_non_sagitta', 
        'chaetognath_other',
        'chaetognath_sagitta']),
    ('copepod', [
        'copepod_calanoid',
        'copepod_calanoid_eggs',
        'copepod_calanoid_eucalanus',
        'copepod_calanoid_flatheads',
        'copepod_calanoid_frillyAntennae',
        'copepod_calanoid_large',
        'copepod_calanoid_large_side_antennatucked',
        'copepod_calanoid_octomoms',
        'copepod_calanoid_small_longantennae',
        'copepod_cyclopoid_copilia',
        'copepod_cyclopoid_oithona',
        'copepod_cyclopoid_oithona_eggs',
        'copepod_other']),
    ('tunicate_doliolid', [
        'tunicate_doliolid', 
        'tunicate_doliolid_nurse']),
])

sp_member_d = {}
for p, c in specialists_d.items():
    for m in c:
        sp_member_d[m] = p



In [113]:

    
sp_member_d









    Out[113]:





{'chaetognath_non_sagitta': 'chaetognath',
 'chaetognath_other': 'chaetognath',
 'chaetognath_sagitta': 'chaetognath',
 'copepod_calanoid': 'copepod',
 'copepod_calanoid_eggs': 'copepod',
 'copepod_calanoid_eucalanus': 'copepod',
 'copepod_calanoid_flatheads': 'copepod',
 'copepod_calanoid_frillyAntennae': 'copepod',
 'copepod_calanoid_large': 'copepod',
 'copepod_calanoid_large_side_antennatucked': 'copepod',
 'copepod_calanoid_octomoms': 'copepod',
 'copepod_calanoid_small_longantennae': 'copepod',
 'copepod_cyclopoid_copilia': 'copepod',
 'copepod_cyclopoid_oithona': 'copepod',
 'copepod_cyclopoid_oithona_eggs': 'copepod',
 'copepod_other': 'copepod',
 'tunicate_doliolid': 'tunicate_doliolid',
 'tunicate_doliolid_nurse': 'tunicate_doliolid'}