In [1]:
import sys
sys.path.append('../code/functions')
from connectLib import otsuVox
from connectLib import clusterThresh
from plosLib import pipeline as PLOS
from random import randrange as rand
from cluster import Cluster
import numpy as np
import pickle
import matplotlib.pyplot as plt

1. Pseudocode

2. Algorithm Code


In [2]:
def neighborhoodDensity(data, interPlane = 1, intraPlane = 1, percentile = 50):
    output = np.zeros_like(data)
    for z in range(data.shape[0]):
        for y in range(data.shape[1]):
            for x in range(data.shape[2]):
                zLow = z-intraPlane
                zHigh = z+intraPlane
                yLow = y-interPlane
                yHigh = y+interPlane
                xLow = x-interPlane
                xHigh = x+interPlane
                if zLow>=0 and zHigh<data.shape[0] and yLow>=0 and yHigh<data.shape[1] and xLow>=0 and xHigh<data.shape[2]:
                    subVol = data[zLow:zHigh, yLow:yHigh, xLow:xHigh]
                    if not all(subVol.shape) == 0:
                        thresh = np.percentile(subVol, percentile)
                        binSubVol = subVol >= thresh
                        output[z][y][x] = (np.count_nonzero(binSubVol)/float(interPlane*interPlane*intraPlane)) * data[z][y][x] * np.average(subVol)
    return output

3. Predicted Performance Conditions

I believe that this algorithm will perform well under conditions where the data we wish to extract is surrounded by other data that we wish to extract, and when noise is surrounded by other pixels that can be identified as background

In other words, the alorithm will work well when the distribution of pixel intensities around true data points is skewed left, and the distribution of pixel intensities around noise is skiewed right

4. Simulation Data

Easy Sim

This sim data will show a clearly bimodal image with a grid of outliers distributed over it. These outliers will be of an intensity such that they may resonably be classified as noise or synapse, depending on the area they occur in.

The goal of both PLOS and the Nieghborhood Filter in this sim will be to remove the obvious noise outliers wile not punishing the outliers that may be considered synapse


In [3]:
#generating easy sim data

easySim = np.zeros((100, 100, 100))
for z in range(100):
    for y in range(50, 100):
        for x in range(100):
            easySim[z][y][x] = 100

for y in range(0, 100, 10):
    for z in range(100):
        for x in range(0, 100, 10):
            for i in range(3):
                for j in range(3):
                    try:
                        easySim[z][y-3+i][x-3+j] = 75
                    except:
                        continue
        
plt.figure()
plt.imshow(easySim[50])
plt.show()



In [4]:
easySimN = neighborhoodDensity(easySim, 6, 6, 50)

In [5]:
easySimP = PLOS(easySim, 1, 1)

In [6]:
plt.figure()
plt.title('Neighborhood Filter Out')
plt.imshow(easySimN[50])
plt.show()

plt.figure()
plt.title('PLOS Out')
plt.imshow(easySimP[50])
plt.show()



In [7]:
easySimNB = otsuVox(easySimN)
easySimPB = otsuVox(easySimP)

In [8]:
plt.figure()
plt.title('Binary Neighborhood Filter Out')
plt.imshow(easySimNB[50], cmap='gray')
plt.show()

plt.figure()
plt.title('Binary PLOS Out')
plt.imshow(easySimPB[50], cmap='gray')
plt.show()


Easy Sim Analysis

As you can see above, the PLOS pipeline heavily punishes the outliers both the synapse region and in the non synapse region. This is not an effective strategy for our data since synapses in the foreground region may have a value identical to that of noise in the non foreground region.

The neighborhood filter mitigates this issue by only removing the outliers in the noise region, while leaving the foreground region entirely intact.

Realistic Sim

The realistic simulation will be run on data that models the distribution of synapses and noise in our true data


In [9]:
def generatePointSet():
    center = (rand(0, 9), rand(0, 999), rand(0, 999))
    toPopulate = []
    for z in range(-3, 2):
        for y in range(-3, 2):
            for x in range(-3, 2):
                curPoint = (center[0]+z, center[1]+y, center[2]+x)
                #only populate valid points
                valid = True
                for dim in range(3):
                    if curPoint[dim] < 0 or curPoint[dim] >= 1000:
                        valid = False
                if valid:
                    toPopulate.append(curPoint)
    return set(toPopulate)
    
def generateTestVolume():
    #create a test volume
    volume = np.zeros((10, 1000, 1000))
    myPointSet = set()
    for _ in range(rand(1000, 2000)):
        potentialPointSet = generatePointSet()
        #be sure there is no overlap
        while len(myPointSet.intersection(potentialPointSet)) > 0:
                potentialPointSet = generatePointSet()
        for elem in potentialPointSet:
            myPointSet.add(elem)
    #populate the true volume
    for elem in myPointSet:
        volume[elem[0], elem[1], elem[2]] = rand(40000, 60000)
    #introduce noise
    noiseVolume = np.copy(volume)
    for z in range(noiseVolume.shape[0]):
        for y in range(noiseVolume.shape[1]):
            for x in range(noiseVolume.shape[2]):
                if not (z, y, x) in myPointSet:
                    toPop = rand(0, 10)
                    if toPop == 5:
                        noiseVolume[z][y][x] = rand(0, 60000)
    return volume, noiseVolume

Scoring Code

The following code was lifted from the precisionrecall notebook and calculates f1 score, among other stats


In [10]:
def f1score(trueClusterList, testClusterList):
    
    tp = 0
    fp = 0
    fn = 0
    
    testClusterTuples = []
    for elem in testClusterList:
        myTupleList = []
        members = elem.members
        for member in members:
            myTupleList.append(tuple(member))
        testClusterTuples.append(myTupleList)

    trueClusterTuples = []
    for elem in trueClusterList:
        myTupleList = []
        members = elem.members
        for member in members:
            myTupleList.append(tuple(member))
        trueClusterTuples.append(myTupleList)
    
    truePositives = []
    for testList in testClusterTuples:
        found = False
        for trueList in trueClusterTuples:
            if len(set(testList).intersection(set(trueList))) > 0:
                found = True
        if found:
            truePositives.append(testList)
            tp+=1
    
    fp = len(testClusterList) - tp
    fn = len(trueClusterList) - tp
    precision = float(tp)/float(fp+tp)
    recall = float(tp)/float(tp+fn)
    f1 = (2*precision*recall)/(precision+recall)
    
    return precision, recall, f1, truePositives, trueClusterTuples, testClusterTuples

In [15]:
testVol, noiseVol = generateTestVolume()

In [16]:
plt.figure()
plt.hist(noiseVol.flatten())
plt.title("Intensity Histogram of Noise Volume")
plt.show()


<matplotlib.figure.Figure at 0x7f20b765bf50>

In [17]:
realSimN = neighborhoodDensity(noiseVol, 2, 2, 50)

In [18]:
plt.figure()
plt.hist(realSimN.flatten())
plt.title("Post Neighborhood Intensity Histogram of Noise Volume")
plt.show()



In [19]:
realSimNB = otsuVox(realSimN)

In [20]:
realSimP = PLOS(noiseVol, 1, 1)

In [21]:
plt.figure()
plt.hist(realSimP.flatten())
plt.title("Post PLOS Intensity Histogram of Noise Volume")
plt.show()



In [22]:
realSimPB = otsuVox(realSimP)

In [23]:
plt.figure()
plt.title('Noise Volume')
plt.imshow(noiseVol[5], cmap='gray')
plt.show()

plt.figure()
plt.imshow(testVol[5], cmap='gray')
plt.title('True Labels')
plt.show()

plt.figure()
plt.imshow(realSimNB[5], cmap='gray')
plt.title('Neighborhood Filter Predictions')
plt.show()

plt.figure()
plt.imshow(realSimPB[5], cmap='gray')
plt.title('PLOS Predictions')
plt.show()



In [24]:
clustersN = clusterThresh(realSimNB[4:6])
clustersP = clusterThresh(realSimPB[4:6])
clustersT = clusterThresh(testVol[4:6])

In [25]:
precisionN, recallN, f1N, _, _, _ = f1score(clustersT, clustersN)
precisionP, recallP, f1P, _, _, _ = f1score(clustersT, clustersP)
print 'Neighborhood:'
print '\tPrecision: ', precisionN
print '\tRecall: ', recallN
print '\tf1: ', f1N

print 'PLOS:'
print '\tPrecision: ', precisionP
print '\tRecall: ', recallP
print '\tf1: ', f1P


Neighborhood:
	Precision:  0.49528495285
	Recall:  0.981316003249
	f1:  0.658310626703
PLOS:
	Precision:  1.0
	Recall:  0.668562144598
	f1:  0.801363193768

In [30]:
statListP = []
statListN = []

def executeRealisticSim():
    testVol, noiseVol = generateTestVolume()
    
    realSimN = neighborhoodDensity(noiseVol, 2, 2, 50)
    realSimNB = otsuVox(realSimN)
    realSimP = PLOS(noiseVol, 1, 1)
    realSimPB = otsuVox(realSimP)
    
    clustersN = clusterThresh(realSimNB[4:6])
    clustersP = clusterThresh(realSimPB[4:6])
    clustersT = clusterThresh(testVol[4:6])
    
    precisionN, recallN, f1N, _, _, _ = f1score(clustersT, clustersN)
    precisionP, recallP, f1P, _, _, _ = f1score(clustersT, clustersP)
    
    print 'Neighborhood:'
    print '\tPrecision: ', precisionN
    print '\tRecall: ', recallN
    print '\tf1: ', f1N

    print 'PLOS:'
    print '\tPrecision: ', precisionP
    print '\tRecall: ', recallP
    print '\tf1: ', f1P
    
    statListN.append([precisionN, recallN, f1N])
    statListP.append([precisionP, recallP, f1P])

In [31]:
for i in range(10):
    executeRealisticSim()


Neighborhood:
	Precision:  0.482809430255
	Recall:  0.983983983984
	f1:  0.647775947282
PLOS:
	Precision:  1.0
	Recall:  0.676676676677
	f1:  0.807164179104
Neighborhood:
	Precision:  0.489216201999
	Recall:  0.989361702128
	f1:  0.65469904963
PLOS:
	Precision:  1.0
	Recall:  0.667021276596
	f1:  0.800255264837
Neighborhood:
	Precision:  0.463902787706
	Recall:  0.987823439878
	f1:  0.631322957198
PLOS:
	Precision:  1.0
	Recall:  0.662100456621
	f1:  0.796703296703
Neighborhood:
	Precision:  0.498932991891
	Recall:  0.994893617021
	f1:  0.664582148948
PLOS:
	Precision:  1.0
	Recall:  0.691914893617
	f1:  0.817907444668
Neighborhood:
	Precision:  0.474321705426
	Recall:  0.98490945674
	f1:  0.640287769784
PLOS:
	Precision:  1.0
	Recall:  0.697183098592
	f1:  0.821576763485
Neighborhood:
	Precision:  0.469353948095
	Recall:  0.994152046784
	f1:  0.637659414854
PLOS:
	Precision:  1.0
	Recall:  0.67134502924
	f1:  0.803358992302
Neighborhood:
	Precision:  0.465894465894
	Recall:  0.987721691678
	f1:  0.633143856581
PLOS:
	Precision:  1.0
	Recall:  0.660300136426
	f1:  0.795398520953
Neighborhood:
	Precision:  0.502014098691
	Recall:  0.982266009852
	f1:  0.664445184938
PLOS:
	Precision:  1.0
	Recall:  0.691625615764
	f1:  0.817705299942
Neighborhood:
	Precision:  0.484496124031
	Recall:  0.989819004525
	f1:  0.650557620818
PLOS:
	Precision:  1.0
	Recall:  0.674208144796
	f1:  0.805405405405
Neighborhood:
	Precision:  0.47972972973
	Recall:  0.989164086687
	f1:  0.646107178969
PLOS:
	Precision:  1.0
	Recall:  0.637770897833
	f1:  0.778827977316

In [32]:
precListP = [elem[0] for elem in statListP]
precListN = [elem[0] for elem in statListN]

recListP = [elem[1] for elem in statListP]
recListN = [elem[1] for elem in statListN]

fListP = [elem[2] for elem in statListP]
fListN = [elem[2] for elem in statListN]

In [33]:
print 'Neighborhood Filter:'
print '\tPrecision: ', np.average(precListN), '\t Variance', np.var(precListN)
print '\tRecall: ', np.average(recListN), '\t Variance ', np.var(recListN)
print '\tF1: ', np.average(fListN), '\t Variance ', np.var(fListN)


print 'PLOS Filter:'
print '\tPrecision: ', np.average(precListP), '\t Variance', np.var(precListP)
print '\tRecall: ', np.average(recListP), '\t Variance ', np.var(recListP)
print '\tF1: ', np.average(fListP), '\t Variance ', np.var(fListP)


Neighborhood Filter:
	Precision:  0.481067148372 	 Variance 0.000154847790808
	Recall:  0.988409503928 	 Variance  1.48877922851e-05
	F1:  0.6470581129 	 Variance  0.000125682686956
PLOS Filter:
	Precision:  1.0 	 Variance 0.0
	Recall:  0.673014622616 	 Variance  0.000286414472957
	F1:  0.804430314472 	 Variance  0.000147562234575

In [ ]: