In [1]:
%pylab inline
from scipy.stats import shapiro, bartlett, levene, ttest_1samp, skew, norm, ttest_ind
from scipy.interpolate import SmoothBivariateSpline, UnivariateSpline
from glob import glob
import csv
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
# from threshold_functions import *
In [2]:
def single_angle_gap(xTest,yTest,xData,yData,xScale,yScale):
xi = xScale * (xData - xData.min()) / xData.ptp()
yi = yScale * (yData - yData.min()) / yData.ptp()
xVal = xScale * (xTest - xData.min()) / xData.ptp()
yVal = yScale * (yTest - yData.min()) / yData.ptp()
dx = xi - xVal
dy = yi - yVal
if any((dx == 0) & (dy == 0)):
gap = 0
return gap
theta = np.arctan(dy/dx)
theta[dx<0] = theta[dx<0] + np.pi
theta[(dx>0) & (dy<0)] = theta[(dx>0) & (dy<0)] + 2*np.pi
theta[(dx==0) & (dy>0)] = np.pi/2
theta[(dx==0) & (dy<0)] = 3*np.pi/2
test = np.sort(theta)
test = np.append(test,test[0] + 2*np.pi)
gap = np.max(np.diff(test))*180/np.pi
return gap
def angle_gap(xTest,yTest,xData,yData,xScale,yScale):
dim = np.core.fromnumeric.shape(xTest)
if np.size(dim) == 0:
gap = single_angle_gap(xTest,yTest,xData,yData,xScale,yScale)
return gap
gap = np.zeros(dim)
if np.size(dim) == 1:
for i in range(dim[0]):
gap[i] = single_angle_gap(xTest[i],yTest[i],xData,yData,xScale,yScale)
return gap
for i in range(dim[0]):
for j in range (dim[1]):
gap[i,j] = single_angle_gap(xTest[i,j],yTest[i,j],xData,yData,xScale,yScale)
return gap
In [3]:
def single_fit_give(xTest,yTest,xData,yData,zData,s=None):
initialFitReturn = SmoothBivariateSpline(xData,yData,zData,kx=3,ky=3,s=s).ev(xTest,yTest)
adjXData = np.append(xData,xTest)
adjYData = np.append(yData,yTest)
posAdjZData = np.append(zData,initialFitReturn + 1)
negAdjZData = np.append(zData,initialFitReturn - 1)
posFitReturn = SmoothBivariateSpline(adjXData,adjYData,posAdjZData,kx=3,ky=3,s=s).ev(xTest,yTest)
negFitReturn = SmoothBivariateSpline(adjXData,adjYData,negAdjZData,kx=3,ky=3,s=s).ev(xTest,yTest)
posGive = posFitReturn - initialFitReturn
negGive = initialFitReturn - negFitReturn
give = np.mean([posGive, negGive])
return give
def fit_give(xTest,yTest,xData,yData,zData,s=None):
dim = np.core.fromnumeric.shape(xTest)
if np.size(dim) == 0:
give = single_fit_give(xTest,yTest,xData,yData,zData,s=s)
return give
give = np.zeros(dim)
if np.size(dim) == 1:
for i in range(dim[0]):
give[i] = single_fit_give(xTest[i],yTest[i],xData,yData,zData,s=s)
return give
for i in range(dim[0]):
for j in range (dim[1]):
give[i,j] = single_fit_give(xTest[i,j],yTest[i,j],xData,yData,zData,s=s)
return give
In [4]:
# Makes the panda tables display no more than 12 columns
# The html output bugs out without this
pd.options.display.max_columns = 12
# Makes all figures have a higher resolution
# Figures were too small without this
pylab.rcParams['savefig.dpi'] = 110
In [5]:
# Define thresholds
giveThreshold = 0.3
gapThreshold = 130
In [6]:
# Use pandas to load the csv file
cutoutData = pd.read_csv("cutoutdata.csv")
# Define width, length, and factor as the csv columns
# which had that corresponding header
width = cutoutData['width'].values
length = cutoutData['length'].values
factor = cutoutData['factor'].values
# Swap width and length values if order was mistaken
swap = width > length
store = width[swap]
width[swap] = length[swap]
length[swap] = store
In [7]:
# Define the cutout aspect ratio
ratio = width/length
# Equivalent field size
eqfs = sqrt(width*length)
In [8]:
artificial = isnan(cutoutData['Reference'].values)
scatter(width[~artificial],ratio[~artificial])
circleRef = ratio == 1
scatter(width[circleRef],ratio[circleRef],c='red')
scatter(width[artificial & ~circleRef],ratio[artificial & ~circleRef],c='green')
title("Distrbution of selected cutouts")
xlabel("Width (cm)")
ylabel("Ratio")
legend(['Clinical','Circles','Ellipses'],loc=4)
Out[8]:
In [9]:
scatter(width,factor)
widthFit = UnivariateSpline(width,factor)
width_i = linspace(min(width),max(width))
widthFit_i = widthFit(width_i)
plot(width_i,widthFit_i,'r-')
def widthMethod(widthTest,widthData,factorData):
widthFit = UnivariateSpline(widthData,factorData)
return widthFit(widthTest)
In [10]:
scatter(eqfs,factor)
eqfsFit = UnivariateSpline(eqfs,factor)
eqfs_i = linspace(min(eqfs),max(eqfs))
eqfsFit_i = eqfsFit(eqfs_i)
plot(eqfs_i,eqfsFit_i,'r-')
def eqfsMethod(eqfsTest,eqfsData,factorData):
eqfsFit = UnivariateSpline(eqfsData,factorData)
return eqfsFit(eqfsTest)
In [11]:
def widthLengthMethod(widthTest,lengthTest,widthData,lengthData,factorData):
extendedWidth = append(widthData,lengthData)
extendedLength = append(lengthData,widthData)
extendedFactor = append(factorData,factorData)
widthLengthFit = SmoothBivariateSpline(extendedWidth,extendedLength,extendedFactor,kx=3,ky=3,s=len(widthData))
return widthLengthFit.ev(widthTest,lengthTest)
In [12]:
extendedWidth = np.append(width,length)
extendedLength = np.append(length,width)
extendedFactor = np.append(factor,factor)
widthLengthFit = SmoothBivariateSpline(extendedWidth,extendedLength,extendedFactor,kx=3,ky=3,s=len(width))
xVec1 = linspace(extendedWidth.min(),extendedWidth.max(),100)
yVec1 = xVec1
xMesh1, yMesh1 = meshgrid(xVec1, yVec1)
gapMesh1 = angle_gap(xMesh1,yMesh1,extendedWidth,extendedLength,1,1)
giveMesh1 = fit_give(xMesh1,yMesh1,extendedWidth,extendedLength,extendedFactor,s=len(width))
zMesh1 = widthLengthFit.ev(xMesh1, yMesh1)
ref1 = (gapMesh1>gapThreshold) | (giveMesh1>giveThreshold)
zMesh1[ref1] = nan
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(xMesh1,yMesh1,zMesh1,alpha=0.3,rstride=4,cstride=4)
ax.scatter(extendedWidth,extendedLength,extendedFactor,'b')
ax.view_init(elev=10, azim=-80)
In [13]:
widthMethodDisagreement = zeros(size(width))
for i in range(len(width)):
widthTest = width[i]
factorTest = factor[i]
adjWidthData = delete(width,i)
adjFactorData = delete(factor,i)
predictedFactor = widthMethod(widthTest,adjWidthData,adjFactorData)
widthMethodDisagreement[i] = factorTest - predictedFactor
std(widthMethodDisagreement)
Out[13]:
In [14]:
eqfsMethodDisagreement = zeros(size(eqfs))
for i in range(len(eqfs)):
eqfsTest = eqfs[i]
factorTest = factor[i]
adjEqfsData = delete(eqfs,i)
adjFactorData = delete(factor,i)
predictedFactor = eqfsMethod(eqfsTest,adjEqfsData,adjFactorData)
eqfsMethodDisagreement[i] = factorTest - predictedFactor
std(eqfsMethodDisagreement)
Out[14]:
In [15]:
widthLengthMethodDisagreement = zeros(size(width))
for i in range(len(width)):
widthTest = width[i]
lengthTest = length[i]
factorTest = factor[i]
adjWidthData = delete(width,i)
adjLengthData = delete(length,i)
adjFactorData = delete(factor,i)
predictedFactor = widthLengthMethod(widthTest,lengthTest,adjWidthData,adjLengthData,adjFactorData)
widthLengthMethodDisagreement[i] = factorTest - predictedFactor
std(widthLengthMethodDisagreement)
Out[15]:
In [16]:
def univariateGap(xTest,xData):
xTest = array(xTest)
return (xTest > min(xData)) & (xTest < max(xData))
def single_univariateGive(xTest,xData,yData):
initialPredicted = UnivariateSpline(xData,yData)(xTest)
adjXData = append(xData,xTest)
negAdjYData = append(yData,initialPredicted-1)
posAdjYData = append(yData,initialPredicted+1)
negFitPredicted = UnivariateSpline(adjXData,negAdjYData)(xTest)
posFitPredicted = UnivariateSpline(adjXData,posAdjYData)(xTest)
posGive = posFitPredicted - initialPredicted
negGive = initialPredicted - negFitPredicted
return mean([posGive,negGive])
def univariateGive(xTest,xData,yData):
dim = shape(xTest)
if size(dim) == 0:
return single_univariateGive(xTest,xData,yData)
give = zeros(dim)
if size(dim) == 1:
for i in range(dim[0]):
give[i] = single_univariateGive(xTest[i],xData,yData)
return give
for i in range(dim[0]):
for j in range (dim[1]):
give[i,j] = single_univariateGive(xTest[i,j],xData,yData)
return give
In [17]:
# xTest = linspace(min(width)-ptp(width)*0.2, max(width)+ptp(width)*0.2,200)
# give = univariateGive(xTest,width,factor)
# plot(xTest,give)
# scatter(width,giveThreshold*ones(size(width)))
In [18]:
# xTest = linspace(min(eqfs)-ptp(eqfs)*0.2, max(eqfs)+ptp(eqfs)*0.2,200)
# give = univariateGive(xTest,eqfs,factor)
# plot(xTest,give)
# scatter(eqfs,giveThreshold*ones(size(eqfs)))
In [19]:
# xVec2 = linspace(width.min(),width.max(),100)
# yVec2 = linspace(ratio.min(),ratio.max(),100)
# xMesh2, yMesh2 = meshgrid(xVec2, yVec2)
# gapMesh2 = angle_gap(xMesh2,log2(yMesh2),widthMirrored,ratioLogMirrored,1,2)
# giveMesh2 = fit_give(xMesh2,log2(yMesh2),widthMirrored,ratioLogMirrored,factorMirrored,s=len(width))
# zMesh2 = widthRatioFit.ev(xMesh2, log2(yMesh2))
# ref2 = (gapMesh2>gapThreshold) | (giveMesh2>giveThreshold)
# zMesh2[ref2] = nan
In [20]:
# bot = np.amin(factor) - 0.04 * np.ptp(factor)
# top = np.amax(factor) + 0.04 * np.ptp(factor)
# fig = plt.figure()
# ax = fig.add_subplot(111, projection='3d')
# ax.contour(xMesh2,yMesh2,gapMesh2, offset=bot, levels=[gapThreshold], colors='r',alpha=0.3)
# ax.contour(xMesh2,yMesh2,giveMesh2, offset=bot, levels=[giveThreshold], colors='g',alpha=0.3)
# # Create an undrawn line with the same colour as the contours for the use in the legend
# proxyGap, = ax.plot([width.min(),width.min()],[ratio.min(),ratio.min()],[bot,bot],'r-',alpha=0.3)
# proxyGive, = ax.plot([width.min(),width.min()],[ratio.min(),ratio.min()],[bot,bot],'g-',alpha=0.3)
# # Plot the calculated surface fit
# ax.plot_surface(xMesh2,yMesh2,zMesh2,alpha=0.3,rstride=4,cstride=4)
# # Plot the measured data
# sc = ax.scatter(width,ratio,factor)
# # Define axis labels
# ax.set_xlabel('Width')
# ax.set_ylabel('Ratio')
# ax.set_zlabel('Factor')
# # Calculate the minimum ratio axis draw
# right = ratio.min()-0.01*ratio.ptp()
# # Enforce the ratio axis limits
# ax.set_ylim(right,1)
# # Define title
# ax.set_title('Fit displayed against width and aspect ratio')
# # Enforce the z axis limits
# ax.set_zlim(bot,top)
# # Adjust the viewing angle of the figure to that requested in `parameters.csv`
# ax.view_init(elev=10, azim=-80)
# # Draw the legend using the undrawn lines as proxys for the threshold contours
# legend([proxyGap,proxyGive],['Gap threshold', 'Give threshold'],loc=7,prop={'size':8})
In [21]:
# # Create a figure object
# fig = plt.figure()
# # Create axis object
# ax = fig.add_subplot(111)
# # Draw the filled in colour contour of the give mesh calculated prior
# cf = ax.contourf(xMesh2,yMesh2,giveMesh2,60)
# # Draw a colourbar using the colour fill contour
# colorbar(cf, label='Fit give')
# # Draw a black contour at the position of the parameter threshold
# ax.contour(xMesh2,yMesh2,giveMesh2,levels=[giveThreshold],colors='k')
# # Plot the positions of the measured data
# ax.scatter(width,ratio,color='black')
# # Define axis labels
# ax.set_xlabel('Width')
# ax.set_ylabel('Ratio')
# # Enforce ratios upper limit of 1
# #ax.set_ylim(top = 1)
# # Define title
# ax.set_title('Contour plot of the fit give')
In [22]:
widthMethodDisagreement = zeros(size(width))
for i in range(len(width)):
widthTest = width[i]
factorTest = factor[i]
adjWidthData = delete(width,i)
adjFactorData = delete(factor,i)
if ( (univariateGap(widthTest,adjWidthData)) &
(univariateGive(widthTest,adjWidthData,adjFactorData) < giveThreshold)):
predictedFactor = widthMethod(widthTest,adjWidthData,adjFactorData)
widthMethodDisagreement[i] = factorTest - predictedFactor
else:
widthMethodDisagreement[i] = nan
widthMethodDisagreement = widthMethodDisagreement[~isnan(widthMethodDisagreement)]
std(widthMethodDisagreement)
Out[22]:
In [23]:
mean(widthMethodDisagreement)
Out[23]:
In [24]:
t, prob = ttest_1samp(widthMethodDisagreement,0)
prob
Out[24]:
In [25]:
size(widthMethodDisagreement)
Out[25]:
In [26]:
fig = plt.figure(figsize=(15, 6))
ax = fig.add_subplot(111)
n, bins, patches = hist(widthMethodDisagreement*100)
setp(patches, 'facecolor', 'r', 'alpha', 0.5)
ax.set_xlabel('[Measured cutout] - [Predicted cutout] (%)')
ax.set_ylabel('Frequency')
ax.set_title('Width as parameter method')
ax.set_xlim(-3.2,3.2)
ax.set_xticks(arange(-3,3.5,0.5))
grid(True)
In [27]:
eqfsMethodDisagreement = zeros(size(eqfs))
for i in range(len(eqfs)):
eqfsTest = eqfs[i]
factorTest = factor[i]
adjEqfsData = delete(eqfs,i)
adjFactorData = delete(factor,i)
if ( (univariateGap(eqfsTest,adjEqfsData)) &
(univariateGive(eqfsTest,adjEqfsData,adjFactorData) < giveThreshold)):
predictedFactor = eqfsMethod(eqfsTest,adjEqfsData,adjFactorData)
eqfsMethodDisagreement[i] = factorTest - predictedFactor
else:
eqfsMethodDisagreement[i] = nan
eqfsMethodDisagreement = eqfsMethodDisagreement[~isnan(eqfsMethodDisagreement)]
std(eqfsMethodDisagreement)
Out[27]:
In [28]:
mean(eqfsMethodDisagreement)
Out[28]:
In [29]:
t, prob = ttest_1samp(eqfsMethodDisagreement,0)
prob
Out[29]:
In [30]:
size(eqfsMethodDisagreement)
Out[30]:
In [31]:
fig = plt.figure(figsize=(15, 6))
ax = fig.add_subplot(111)
n, bins, patches = hist(eqfsMethodDisagreement*100)
setp(patches, 'facecolor', 'g', 'alpha', 0.5)
ax.set_xlabel('[Measured cutout] - [Predicted cutout] (%)')
ax.set_ylabel('Frequency')
ax.set_title('Original equivalent field size')
ax.set_xlim(-3.2,3.2)
ax.set_xticks(arange(-3,3.5,0.5))
grid(True)
In [32]:
widthLengthMethodDisagreement = zeros(size(width))
for i in range(len(width)):
widthTest = width[i]
lengthTest = length[i]
factorTest = factor[i]
adjWidthData = delete(width,i)
adjLengthData = delete(length,i)
adjFactorData = delete(factor,i)
adjExtendedWidth = append(adjWidthData,adjLengthData)
adjExtendedLength = append(adjLengthData,adjWidthData)
adjExtendedFactor = append(adjFactorData,adjFactorData)
gap = angle_gap(widthTest,lengthTest,adjExtendedWidth,adjExtendedLength,1,1)
give = fit_give(widthTest,lengthTest,adjExtendedWidth,adjExtendedLength,adjExtendedFactor,s=len(width))
if (gap < gapThreshold) & (give < giveThreshold):
predictedFactor = widthLengthMethod(widthTest,lengthTest,adjExtendedWidth,adjExtendedLength,adjExtendedFactor)
widthLengthMethodDisagreement[i] = factorTest - predictedFactor
else:
widthLengthMethodDisagreement[i] = nan
widthLengthMethodDisagreement = widthLengthMethodDisagreement[~isnan(widthLengthMethodDisagreement)]
std(widthLengthMethodDisagreement)
Out[32]:
In [33]:
mean(widthLengthMethodDisagreement)
Out[33]:
In [34]:
t, prob = ttest_1samp(widthLengthMethodDisagreement,0)
prob
Out[34]:
In [35]:
size(widthLengthMethodDisagreement)
Out[35]:
In [36]:
fig = plt.figure(figsize=(15, 6))
ax = fig.add_subplot(111)
n, bins, patches = hist(widthLengthMethodDisagreement*100)
setp(patches, 'facecolor', 'b', 'alpha', 0.5)
ax.set_xlabel('[Measured cutout] - [Predicted cutout] (%)')
ax.set_ylabel('Frequency')
ax.set_title('Equivalent Ellipse Method')
ax.set_xlim(-3.2,3.2)
ax.set_xticks(arange(-3,3.5,0.5))
grid(True)
In [37]:
sectorIndegrationData = pd.read_csv("sector_integration.csv")
sectorDifferences = sectorIndegrationData['difference'].values
std(sectorDifferences)
Out[37]:
In [38]:
mean(sectorDifferences)
Out[38]:
In [39]:
t, prob = ttest_1samp(sectorDifferences,0)
prob
Out[39]:
In [40]:
size(sectorDifferences)
Out[40]:
In [41]:
fig = plt.figure(figsize=(15, 6))
ax = fig.add_subplot(111)
n, bins, patches = hist(sectorDifferences*100)
setp(patches, 'facecolor', 'm', 'alpha', 0.5)
ax.set_xlabel('[Measured cutout] - [Predicted cutout] (%)')
ax.set_ylabel('Frequency')
ax.set_title('Sector Integration Method')
ax.set_xlim(-3.2,3.2)
ax.set_xticks(arange(-3,3.5,0.5))
grid(True)
In [42]:
ii = argsort(sectorIndegrationData['measured_factor'].values[-6:])
ii
Out[42]:
In [43]:
with open('custom_cutout_factors','r') as f:
loaded_custom_factor = eval(f.read())
customDimensionsDict = {'5x8':[5,8],
'4x8':[4,8],
'4x6.5':[4,6.5],
'3x9':[3,9],
'3x6.5':[3,6.5],
'3x5':[3,5]}
customDataLabelList = list(customDimensionsDict.keys())
custom_factors = [loaded_custom_factor[x] for x in customDataLabelList]
custom_widths = [customDimensionsDict[x][0] for x in customDataLabelList]
custom_lengths = [customDimensionsDict[x][1] for x in customDataLabelList]
jj = argsort(custom_factors)
jj
Out[43]:
In [44]:
ii[jj]
Out[44]:
In [45]:
romualdDifferences = concatenate([-0.625 * ones(9), -0.375 * ones(8),
-0.125*ones(13), 0.125*ones(10),
0.375*ones(16), 0.625*ones(13), 0.875*ones(2)])
std(romualdDifferences)
Out[45]:
In [46]:
mean(romualdDifferences)
Out[46]:
In [47]:
t, prob = ttest_1samp(romualdDifferences,0)
prob
Out[47]:
In [48]:
fig = plt.figure(figsize=(15, 6))
ax = fig.add_subplot(111)
n, bins, patches = hist(romualdDifferences,7)
setp(patches, 'facecolor', 'k', 'alpha', 0.5)
ax.set_xlabel('[Measured cutout] - [Predicted cutout] (%)')
ax.set_ylabel('Frequency')
ax.set_title('Romuald Sector Integration Data')
ax.set_xlim(-3.2,3.2)
ax.set_xticks(arange(-3,3.5,0.5))
grid(True)
If there are significant deviations from normality the Levene test whould be used as opposed to the bartlett test.
In [49]:
W, pvalue = shapiro(widthMethodDisagreement)
pvalue
Out[49]:
In [50]:
W, pvalue = shapiro(eqfsMethodDisagreement)
pvalue
Out[50]:
In [51]:
W, pvalue = shapiro(sectorDifferences)
pvalue
Out[51]:
In [52]:
W, pvalue = shapiro(widthLengthMethodDisagreement)
pvalue
Out[52]:
In [53]:
T, pvalue = bartlett(widthMethodDisagreement,eqfsMethodDisagreement,widthLengthMethodDisagreement)
pvalue
Out[53]:
In [54]:
T, pvalue = bartlett(widthMethodDisagreement,eqfsMethodDisagreement)
pvalue
Out[54]:
In [55]:
T, pvalue = bartlett(widthMethodDisagreement,widthLengthMethodDisagreement)
pvalue
Out[55]:
In [56]:
T, pvalue = bartlett(eqfsMethodDisagreement,widthLengthMethodDisagreement)
pvalue
Out[56]:
In [57]:
T, pvalue = bartlett(sectorDifferences,widthLengthMethodDisagreement)
pvalue
Out[57]:
In [58]:
W, pvalue = levene(widthMethodDisagreement,eqfsMethodDisagreement,widthLengthMethodDisagreement)
pvalue
Out[58]:
In [59]:
W, pvalue = levene(widthMethodDisagreement,eqfsMethodDisagreement)
pvalue
Out[59]:
In [60]:
W, pvalue = levene(widthMethodDisagreement,widthLengthMethodDisagreement)
pvalue
Out[60]:
In [61]:
W, pvalue = levene(eqfsMethodDisagreement,widthLengthMethodDisagreement)
pvalue
Out[61]:
In [62]:
W, pvalue = levene(sectorDifferences,widthLengthMethodDisagreement)
pvalue
Out[62]:
In [63]:
W, pvalue = levene(romualdDifferences/100,widthLengthMethodDisagreement)
pvalue
Out[63]:
In [64]:
W, pvalue = ttest_ind(sectorDifferences, widthLengthMethodDisagreement, equal_var=False)
pvalue
Out[64]:
In [65]:
W, pvalue = ttest_ind(romualdDifferences/100, widthLengthMethodDisagreement, equal_var=False)
pvalue
Out[65]:
In [65]: