In [1]:
import matplotlib.pyplot as plt
import numpy
import csv
# Change the path to csv file appropriately
beaver_positive_csv = '/Users/ishanhanda/Documents/NYU_Fall16/Comp_Vision/Project/ProjectWorkspace/DataSets/OUTPUTS/Beaver.csv'
beaver_negative_csv = '/Users/ishanhanda/Documents/NYU_Fall16/Comp_Vision/Project/ProjectWorkspace/DataSets/OUTPUTS/Beaver_neg.csv'
def get_data_from_file(file_name):
print('Loading from file' + file_name)
reader = csv.reader(open(file_name,"rt"))
temp = list(reader)
return numpy.array(temp).astype('float')
positive_data = get_data_from_file(beaver_positive_csv)
positive_length = len(positive_data)
print('Beaver positive test samples count: {}'.format(positive_length))
negative_data = get_data_from_file(beaver_negative_csv)
negative_length = len(negative_data)
print('Beaver negative test samples count: {}'.format(negative_length))
In [2]:
# Here we are defining preset threshold levels for which TPR and FPR values will be calculated
thresholds = numpy.arange(0.0,1.0,0.05)
print('Thresholds: {}'.format(thresholds))
Now calculating TPR and FNR for the first positive test
In [3]:
sample_size = min(positive_length, negative_length)
# all_TPRs and all_FPRs will be used later to evalute confidence intervals for each threshold level
all_TPRs = [[None for _ in range(sample_size)] for _ in range(len(thresholds))]
all_FPRs = [[None for _ in range(sample_size)] for _ in range(len(thresholds))]
for j in range(0, sample_size):
current_positive_sample = positive_data[j]
TPRs = [None] * len(thresholds)
current_negative_sample = negative_data[j]
FPRs = [None] * len(thresholds)
for i in range(0, len(thresholds)):
test_positive = current_positive_sample[current_positive_sample >= thresholds[i]]
tpr = len(test_positive) / len(current_positive_sample)
TPRs[i] = tpr # This is the calculated TPR value for threshold level i in sample j
all_TPRs[i][j] = tpr # The calculated TPR value is also added to all_TPR values for this threshold.(Used later to calculate confidence intervals)
test_negative = current_negative_sample[current_negative_sample >= thresholds[i]]
fpr = len(test_negative) / len(current_negative_sample)
FPRs[i] = fpr # This is the calculated FPR value for threshold level i in sample j
all_FPRs[i][j] = fpr
print('\n\nPLOTTING ROC FOR CASE: {}'.format(j))
plt.scatter(FPRs, TPRs, color='red')
plt.show()
In [4]:
import scipy as sp
import scipy.stats
# Function to calculate confidence interval. By default it calculated 80%.
def mean_confidence_interval(data, confidence=0.8):
a = 1.0*numpy.array(data)
n = len(a)
m, se = numpy.mean(a), scipy.stats.sem(a)
h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
return m, max(0.0, m-h), min(1.0 ,m+h)
# Calculating and printing Confidence Intervals for all threshold values.
thresh_s = []
ci_lower_TPR = []
ci_lower_FPR = []
ci_TPR_diff = []
ci_upper_TPR = []
ci_upper_FPR = []
ci_FPR_diff = []
print("\n\nConfidence Intervals for TPRs:")
for i in range(0, len(thresholds)):
mean_tpr, lower_tpr, upper_tpr = mean_confidence_interval(all_TPRs[i])
thresh = round(thresholds[i],2)
thresh_s.append(thresh)
diff = upper_tpr - lower_tpr
ci_TPR_diff.append(diff)
ci_lower_TPR.append(lower_tpr)
ci_upper_TPR.append(upper_tpr)
print("80% Confidence Interval of TPR with threshold {} is: {} to {}".format(thresh, lower_tpr, upper_tpr))
print("\n\nConfidence Intervals for FPRs:")
for i in range(0, len(thresholds)):
mean_fpr, lower_fpr, upper_fpr = mean_confidence_interval(all_FPRs[i])
thresh = round(thresholds[i],2)
diff = upper_fpr - lower_fpr
ci_FPR_diff.append(diff)
ci_lower_FPR.append(lower_fpr)
ci_upper_FPR.append(upper_fpr)
print("80% Confidence Interval of FPR with threshold {} is: {} to {}".format(thresh, lower_fpr, upper_fpr))
In [5]:
# Plotting Confidence Intervals for TPR.
import pylab
N = len(thresh_s)
ind = numpy.arange(N) # the x locations for the groups
width = 0.25 # the width of the bars: can also be len(x) sequence
fig = plt.figure(figsize=(8,6))
p2 = plt.bar(ind, ci_TPR_diff, width, color='B',
bottom=ci_lower_TPR)
plt.ylabel('Confidence Intervals')
plt.xlabel('Thresholds')
plt.title('Confidence Intervals for TPR (Beaver)')
plt.xticks(ind + width/2., thresh_s)
plt.yticks(numpy.arange(0, 1.1, 0.05))
plt.grid()
pylab.savefig('CI_TPR_Beaver.png')
plt.show()
In [6]:
# Plotting Confidence Intervals for FPR.
fig = plt.figure(figsize=(8,6))
p2 = plt.bar(ind, ci_FPR_diff, width, color='R',
bottom=ci_lower_FPR)
plt.ylabel('Confidence Intervals')
plt.xlabel('Thresholds')
plt.title('Confidence Intervals for FPR (Beaver)')
plt.xticks(ind + width/2., thresh_s)
plt.yticks(numpy.arange(-0.05, 1.1, 0.05))
plt.grid()
pylab.savefig('CI_FPR_Beaver.png')
plt.show()
In [ ]: