In [1]:
import sys
sys.path.append('../code/functions')
sys.path.append('../../pipeline_1/code/functions')

import cv2
import glob
import numpy as np
import tiffIO as io
import scipy.io as sio
import matplotlib.pyplot as plt
import plosLib as pLib
import connectLib as cLib
import numpy as np

from scipy import ndimage
from cluster import Cluster
from scipy.ndimage.filters import convolve
from skimage.filters import threshold_otsu
from skimage.exposure import equalize_adapthist
from skimage.morphology import remove_small_objects

In [2]:
procData = []
for mat in glob.glob('../../data/matlabData/collman14v2/*_p1.mat'):
    name = mat[34:-7]
    rawData = sio.loadmat(mat)
    npData = np.rollaxis(rawData[name], 2, 0)
    procData.append([name, npData])

Since the annotation channel of this data is somewhat suspect, I decided to load some alternate annoation files to compare

Interestingly, the labels are exactly 3 times the z span of the p1 data. could this mean they are just stacked p1, p2, p3?

Ok so the annotatons are legit. Next thing I want to be sure about is that we are checking precision, recall, and f1 correctly for this data


In [3]:
def otsuVox(argVox):
    probVox = np.nan_to_num(argVox)
    bianVox = np.zeros_like(probVox)
    for zIndex, curSlice in enumerate(probVox):
        #if the array contains all the same values
        if np.max(curSlice) == np.min(curSlice):
            #otsu thresh will fail here, leave bianVox as all 0's
            continue
        thresh = threshold_otsu(curSlice)
        bianVox[zIndex] = curSlice > thresh
    return bianVox

In [4]:
def precision_recall_f1(labels, predictions):

    if len(predictions) == 0:
        print 'ERROR: prediction list is empty'
        return 0., 0., 0.

    labelFound = np.zeros(len(labels))
    truePositives = 0
    falsePositives = 0

    for prediction in predictions:
        #casting to set is ok here since members are uinque
        predictedMembers = set([tuple(elem) for elem in prediction.getMembers()])
        detectionCutoff = 1
        found = False

        for idx, label in enumerate(labels):
            labelMembers = set([tuple(elem) for elem in label.getMembers()])
            #if the predictedOverlap is over the detectionCutoff ratio
            if len(predictedMembers & labelMembers) >= detectionCutoff:
                truePositives +=1
                found=True
                labelFound[idx] = 1

        if not found:
            falsePositives +=1

    precision = truePositives/float(truePositives + falsePositives)
    recall = np.count_nonzero(labelFound)/float(len(labels))
    f1 = 0
    try:
        f1 = 2 * (precision*recall)/(precision + recall)

    except ZeroDivisionError:
        f1 = 0

    return precision, recall, f1

I think I may see where the issue was arising. I was checking to see if the overlap between the label and the prediction was greater than the overlap ratio times the volume of the prediction. Since, in this data, our predictions are massive compared to the labels, this would not work super well.

I made a change such that the overlap between the label and the prediction be merely nondisjoint.

Revised Pipeline


In [5]:
gaba = procData[5][1]

In [6]:
otsuOut = otsuVox(gaba)

This looks more like it. So far so good


In [7]:
clusterList = cLib.clusterThresh(otsuOut, 500, 1000000)

Now let's take a look at the max and average intensity in each of the clusters. I have a feeling that there will be some level of bimodality


In [8]:
aveList = []
maxList = []
for cluster in clusterList:
    curClusterDist = []
    for member in cluster.members:
        curClusterDist.append(gaba[member[0]][member[1]][member[2]])
    aveList.append(np.mean(curClusterDist))
    maxList.append(np.max(curClusterDist))

In [9]:
plt.figure()
plt.hist(aveList, bins=40)
plt.title('Averages Of Clusters')
plt.show()



In [10]:
plt.figure()
plt.hist(maxList, bins=30)
plt.title('Max Of Clusters')
plt.show()



In [11]:
plt.figure()
plt.scatter(range(len(aveList)), aveList, c='b')
plt.scatter(range(len(maxList)), maxList, c='r')
plt.show()


That's what I like to see. The Max clusters exibits clear bimodality. Time to get the otsu threshold for the optimum class breakdown here, and disregard any clusters whose max is under the thresh


In [12]:
#thresh = threshold_otsu(np.array(maxList))
thresh = 23
finalClusters = []
for i in range(len(maxList)): #this is bad and i should feel bad
    if aveList[i] > thresh:
        finalClusters.append(clusterList[i])

In [ ]:
outVol = np.zeros_like(gaba)
for cluster in finalClusters:
    for member in cluster.members:
        outVol[member[0]][member[1]][member[2]] = 1

In [ ]:
labelClusters = cLib.clusterThresh(procData[0][1], 0, 10000000)

In [ ]:
print precision_recall_f1(labelClusters, finalClusters)

In [ ]: