In [1]:
import sys
sys.path.append('../code/functions')
import cv2
import pickle
from connectLib import otsuVox
from connectLib import clusterThresh
from random import randrange as rand
from cluster import Cluster
import mouseVis as mv
import numpy as np
import matplotlib.pyplot as plt
In [2]:
def generatePointSet():
center = (rand(0, 9), rand(0, 999), rand(0, 999))
toPopulate = []
for z in range(-3, 2):
for y in range(-3, 2):
for x in range(-3, 2):
curPoint = (center[0]+z, center[1]+y, center[2]+x)
#only populate valid points
valid = True
for dim in range(3):
if curPoint[dim] < 0 or curPoint[dim] >= 1000:
valid = False
if valid:
toPopulate.append(curPoint)
return set(toPopulate)
def generateTestVolume():
#create a test volume
volume = np.zeros((10, 1000, 1000))
myPointSet = set()
for _ in range(rand(1000, 2000)):
potentialPointSet = generatePointSet()
#be sure there is no overlap
while len(myPointSet.intersection(potentialPointSet)) > 0:
potentialPointSet = generatePointSet()
for elem in potentialPointSet:
myPointSet.add(elem)
#populate the true volume
for elem in myPointSet:
volume[elem[0], elem[1], elem[2]] = rand(40000, 60000)
#introduce noise
noiseVolume = np.copy(volume)
for z in range(noiseVolume.shape[0]):
for y in range(noiseVolume.shape[1]):
for x in range(noiseVolume.shape[2]):
if not (z, y, x) in myPointSet:
toPop = rand(0, 10)
if toPop == 5:
noiseVolume[z][y][x] = rand(0, 60000)
return volume, noiseVolume
In [6]:
myTestVolume, _ = generateTestVolume()
In [9]:
#passed arbitrarily large param for upper bound to get all clusters
testList = clusterThresh(myTestVolume, 0, 100000)
In [10]:
#generate an annotation volume from the test list
annotations = mv.generateAnnotations(testList, *(myTestVolume.shape))
In [15]:
print np.count_nonzero(np.logical_xor(annotations, myTestVolume))
The nonzero xor between the volumes proves that clustering the volume maintains the integrity of the cluster lists. This test is run 5 times in a row below to validate that the 0 difference metric is valid across trials
In [17]:
for i in range(5):
myTestVolume, _ = generateTestVolume()
testList = clusterThresh(myTestVolume, 0, 100000)
annotations = mv.generateAnnotations(testList, *(myTestVolume.shape))
print np.count_nonzero(np.logical_xor(annotations, myTestVolume))
In [3]:
def precision_recall_f1(labels, predictions, overlapRatio):
if len(predictions) == 0:
print 'ERROR: prediction list is empty'
return 0., 0., 0.
labelFound = np.zeros(len(labels))
truePositives = 0
falsePositives = 0
for prediction in predictions:
#casting to set is ok here since members are uinque
predictedMembers = set([tuple(elem) for elem in prediction.getMembers()])
detectionCutoff = overlapRatio * len(predictedMembers)
found = False
for idx, label in enumerate(labels):
labelMembers = set([tuple(elem) for elem in label.getMembers()])
#if the predictedOverlap is over the detectionCutoff ratio
if len(predictedMembers & labelMembers) >= detectionCutoff:
truePositives +=1
found=True
labelFound[idx] = 1
if not found:
falsePositives +=1
precision = truePositives/float(truePositives + falsePositives)
recall = np.count_nonzero(labelFound)/float(len(labels))
f1 = 0
try:
f1 = 2 * (precision*recall)/(precision + recall)
except ZeroDivisionError:
f1 = 0
return precision, recall, f1
In [48]:
myTestVolume, _ = generateTestVolume()
testList = clusterThresh(myTestVolume, 0, 100000)
#run the code on a test volume identical labels
precision, recall, f1 = precision_recall_f1(testList, testList, 1)
In [49]:
print precision, recall, f1
The 1 precision, recall and f1 metrics show that the algorithm performs as expected on the case of labels being their own predicions
Next, the recall will be tested by randomly selecting a percentile of the true clusters to be passed to the prediction. This should modulate only the recall, and not the precision, of the data, as all clusters being passed are still "correct" predictions. If the algorithm works as expected, I should see a constant precision metric and an upwardly sloping recall metric with slope of 10% That is, for every additional 10% of the labels included in the predictions, the recall sould increase by 10%
In [52]:
statList = []
for i in range(1, 11):
percentile = i/10.
predictions = np.random.choice(testList, int(percentile * len(testList)), replace=False)
precision, recall, f1 = precision_recall_f1(testList, predictions, 1)
statList.append([percentile, precision, recall])
print 'Percentile: ', percentile
print '\t precision: ', precision
print '\t recall: ', recall
In [54]:
fig = plt.figure()
elemwiseStats = zip(*(statList))
plt.title('Recall vs Percentile Passed')
plt.scatter(elemwiseStats[0], elemwiseStats[2])
plt.show()
In [55]:
fig = plt.figure()
plt.title('Precision vs Percentile Passed')
plt.scatter(elemwiseStats[0], elemwiseStats[1], c='r')
plt.show()
The linearly increasing nature of recall plot demonstrates that the recall portion of the code indeed corresponds to the ratio of true labels passed in.
Additionally, the precision plot being constant shows that modulating only the recall has no affect on the precision of the data
Next, the data will be diluted such that it contains a portion of noise clusters. This should change the precision, but not the recall, of the data. If this algorithm works as expected, it will produce a constant recall and a downward sloping precision curve with a slope of 10%. That is, for every additional 10% of noise added to the predictions, the precision should drop by 10%
In [67]:
statList = []
#get list points in data that I can populate noise clusters with
noCluster = zip(*(np.where(myTestVolume == 0)))
for i in range(0, 10):
#get the number of noise clusters that must be added for data to be diluted
#to target percent
percentile = i/10.
numNoise = int(percentile * len(testList)/float(1-percentile))
#generate the prediction + noise list
noiseList =[]
for j in range(numNoise):
badPoint = noCluster[rand(0, len(noCluster)-1)]
noiseList.append(Cluster([list(badPoint)]))
predictions = testList + noiseList
precision, recall, f1 = precision_recall_f1(testList, predictions, 1)
statList.append([percentile, precision, recall])
print 'Percentile: ', percentile
print '\t precision: ', precision
print '\t recall: ', recall
In [79]:
fig = plt.figure()
elemwiseStats = zip(*(statList))
plt.title('Recall vs Percentile of Data that is Not Noise')
plt.scatter(elemwiseStats[0], elemwiseStats[2])
plt.show()
In [78]:
fig = plt.figure()
plt.title('Precision vs Percent of Data that is Not Noise')
plt.scatter(elemwiseStats[0], elemwiseStats[1], c='r')
plt.show()
As these plots demonstrate, adding noise to the list of all true clusters delivers the expected result, that the precision drops, and the recall remains constant.
In our data, there is not a guarantee that the predicted clusters and the actual clusters will exactly overlap. In fact, this is likely not the case. However, we would not like to consider a cluster a false positive if it only differs from the true cluster by one pixel. For this reason, I have included an overlapRatio parameter to vary how much overlap between a prediction and a true cluster must exist for the prediction to be considered correct
In the following simulation, the cluster labels will be evenly divided and then eroded between 10% and 100%. I will then run the precision recall code against them with an ever increasing percent overlap metric. If the code works, I expect both the precision and the recall to drop by about 10% for every 10% increase in the percent overlap metric.
In [85]:
statList = []
#generate the list of eroded clusters
erodedList = []
for idx, cluster in enumerate(testList):
percentile = (idx%10)/10. + .1
members = cluster.getMembers()
erodedList.append(Cluster(members[:int(len(members)*percentile)]))
In [86]:
for i in range(1, 11):
percentile = i/10.
precision, recall, f1 = precision_recall_f1(erodedList, testList, percentile)
statList.append([percentile, precision, recall])
print 'Percentile: ', percentile
print '\t precision: ', precision
print '\t recall: ', recall
In [87]:
fig = plt.figure()
elemwiseStats = zip(*(statList))
plt.title('Recall vs Percent Required Overlap')
plt.scatter(elemwiseStats[0], elemwiseStats[2])
plt.show()
In [88]:
fig = plt.figure()
elemwiseStats = zip(*(statList))
plt.title('Precision vs Percent Required Overlap')
plt.scatter(elemwiseStats[0], elemwiseStats[1])
plt.show()
The slight nonlinearities in these charts are, in my opinion, based on rounding idiosyncrasies in both the eroded list generation function and the precision recall function. Their overall shape, however, demonstrates that the percent overlap metric is functioning as intended
In [ ]: