In [54]:
import sys
sys.path.append('../code/functions')
sys.path.append('../../pipeline_1/code/functions')

import cv2
import glob
import random
import numpy as np
import tiffIO as io
import scipy.io as sio
import matplotlib.pyplot as plt
import connectLib as cLib
import matplotlib.lines as mlines

from qaLib import visDiff
from scipy import ndimage
from cluster import Cluster
from scipy.ndimage.filters import convolve
from skimage.filters import threshold_otsu
from skimage.exposure import equalize_adapthist
from skimage.morphology import remove_small_objects
from skimage.measure import label

In [2]:
rawData = sio.loadmat('collman15v2/PSD95_488_p1.mat')

keys = rawData.keys()
parsedKey = None
for key in keys:
    if not '__' in key:
        parsedKey = key
        break
if not parsedKey is None:
    out = np.rollaxis(rawData[parsedKey], 2, 0)
else:
    out =  None

In [3]:
print out.shape


(9, 6306, 4518)

In [4]:
procData = []
for mat in glob.glob('collman15v2/*_p1.mat'):
    name = mat[12:-7]
    rawData = sio.loadmat(mat)
    npData = np.rollaxis(rawData[name], 2, 0)
    procData.append([name, npData])

In [5]:
for dataSet in procData:
    plt.figure()
    plt.imshow(dataSet[1][0], cmap='gray')
    plt.title(str(dataSet[0]) + ' at z = 0')
    plt.show()


Since the annotation channel of this data is somewhat suspect, I decided to load some alternate annoation files to compare

Ok so the annotatons are legit. Next thing I want to be sure about is that we are checking precision, recall, and f1 correctly for this data


In [8]:
def precision_recall_f1(labels, predictions, overlapRatio):
   

    if len(predictions) == 0:
        print 'ERROR: prediction list is empty'
        return 0., 0., 0.
    
    labelFound = np.zeros(len(labels))
    truePositives = 0
    falsePositives = 0

    for prediction in predictions:
        #casting to set is ok here since members are uinque
        predictedMembers = set([tuple(elem) for elem in prediction.getMembers()])
        #detectionCutoff = overlapRatio * len(predictedMembers)
        detectionCutoff = 1
        found = False
        
        for idx, label in enumerate(labels):
            labelMembers = set([tuple(elem) for elem in label.getMembers()])
            #if the predictedOverlap is over the detectionCutoff ratio
            if len(predictedMembers & labelMembers) >= detectionCutoff:
                truePositives +=1
                found=True
                labelFound[idx] = 1
        
        if not found:
            falsePositives +=1
            
    precision = truePositives/float(truePositives + falsePositives)
    recall = np.count_nonzero(labelFound)/float(len(labels))
    f1 = 0
    try:
        f1 = 2 * (precision*recall)/(precision + recall)
        
    except ZeroDivisionError:
        f1 = 0
        
    return precision, recall, f1

In [9]:
testData = procData[7][1] #renaming for convinience

In [10]:
otsuOut = cLib.otsuVox(testData)

In [91]:
visVolDiff(otsuOut[0], procData[6][1][0])



In [24]:
for i in range(5):
    plt.figure()
    plt.imshow(otsuOut[i], cmap='gray')
    plt.title('Binarized Data at z = 0')
    plt.show()



In [25]:
labelOut = label(otsuOut)

In [26]:
for i in range(5):
    plt.figure()
    plt.imshow(labelOut[i])
    plt.title('Clustered Data at z = 0')
    plt.show()


This looks more like it. So far so good


In [27]:
clusterList = cLib.clusterThresh(otsuOut, 500, 1000000)

Now let's take a look at the max and average intensity in each of the clusters. I have a feeling that there will be some level of bimodality


In [28]:
aveList = []
maxList = []
for cluster in clusterList:
    curClusterDist = []
    for member in cluster.members:
        curClusterDist.append(testData[member[0]][member[1]][member[2]])
    aveList.append(np.mean(curClusterDist))
    maxList.append(np.max(curClusterDist))

In [29]:
plt.figure()
plt.hist(aveList, bins=40)
plt.title('Averages Of Clusters')
plt.show()



In [30]:
plt.figure()
plt.hist(maxList, bins=30)
plt.title('Max Of Clusters')
plt.show()



In [31]:
plt.figure()
plt.scatter(range(len(aveList)), aveList, c='b')
plt.scatter(range(len(maxList)), maxList, c='r')
plt.show()


That's what I like to see. The Max clusters exibits clear bimodality. Time to get the otsu threshold for the optimum class breakdown here, and disregard any clusters whose max is under the thresh


In [32]:
#thresh = threshold_otsu(np.array(maxList))
thresh = 47
finalClusters = []
for i in range(len(maxList)): #this is bad and i should feel bad
    if aveList[i] > thresh:
        finalClusters.append(clusterList[i])

In [93]:
#thresh = threshold_otsu(np.array(maxList))
randClusterDist = []
for i in range(100000):
    point = [int(random.random()*testData.shape[0]), int(random.random()*testData.shape[1]), int(random.random()*testData.shape[2])]
    randClusterDist.append(testData[point[0]][point[1]][point[2]])

mu = np.average(randClusterDist)
sigma = np.std(randClusterDist)
finalClusters = []
for i in range(len(maxList)): #this is bad and i should feel bad
    if (aveList[i] - mu)/float(sigma) > 6:
        finalClusters.append(clusterList[i])

In [94]:
labelClusters = cLib.clusterThresh(procData[6][1], 0, 10000000)

In [95]:
precision_recall_f1(labelClusters, finalClusters, 1)


Out[95]:
(0.88, 0.6218487394957983, 0.7287376902417189)

In [96]:
outVol = np.zeros_like(testData)
print len(finalClusters)
for cluster in finalClusters:
    for member in cluster.members:
        outVol[member[0]][member[1]][member[2]] = 1
        
print np.count_nonzero(outVol)/float(outVol.shape[0] * outVol.shape[1] * outVol.shape[2])


94
0.00603391604437

In [97]:
for i in range(0, outVol.shape[0]):
    plt.figure()
    plt.imshow(outVol[i], cmap='gray')
    plt.title('Synapsys Labels at z = '+str(i))
    plt.show()



In [98]:
visVolDiff(outVol[0], procData[6][1][0])



In [85]:
a = np.zeros((10, 10))
a[2:4, 2:4] = 1
a[6:8, 6:8] = 1
b = np.zeros((10, 10))
b[1:9, 1:9] = 1
visVolDiff(a, b)



In [45]:
plt.figure()
plt.imshow(a, cmap='gray')
plt.title("Image A")
plt.show()



In [46]:
plt.figure()
plt.imshow(b, cmap='gray')
plt.title("Image B")
plt.show()



In [73]:
fig = plt.figure()
plt.imshow(visDiff(a, b))
plt.title("Disparity Between A and B")

blue_line = mlines.Line2D([], [], color='blue',markersize=15, label='Blue line')
green_line = mlines.Line2D([], [], color='green', markersize=15, label='Green line')
red_line = mlines.Line2D([], [], color='red', markersize=15, label='Green line')


handles = [red_line, blue_line, green_line]
labels = ["Image A Only", "Image B Only", "Image A and Image B"]
fig.legend(handles=handles, labels=labels, loc=3)
plt.show()



In [75]:
def visVolDiff(a, b):
    fig = plt.figure()
    plt.imshow(visDiff(a, b))
    plt.title("Disparity Between Predictions And Labels")

    blue_line = mlines.Line2D([], [], color='blue',markersize=15, label='Blue line')
    green_line = mlines.Line2D([], [], color='green', markersize=15, label='Green line')
    red_line = mlines.Line2D([], [], color='red', markersize=15, label='Green line')


    handles = [red_line, blue_line, green_line]
    labels = ["Predictions Only", "Labels Only", "Both Predictions and Labels"]
    fig.legend(handles=handles, labels=labels, loc=3)
    plt.show()

In [78]:
a = np.zeros((10, 10))
a[1:9,1:9] = 1
b = np.zeros((10, 10))
b[2:4, 2:4] = 1
b[6:8, 6:8] = 1
visVolDiff(a, b)



In [ ]: