This work explores the space of DHV curves from the prostate library (currently 60 patients).
We proceed with Pricipal Componant Analysis (PCA) on the high dimensonal space of Volume-Dose data points (e.g. V95 for PTV) in the DVH library.
We restrict the study to the PTV, Rectum and Bladder curves only. This is done for conviniece and simplicity, and also to lower the dimensonality of the data.
In [1]:
import autoPlanLib as myTools #this has some custom functions to clean up the code here
from iPyNoteDisplay import *
extended_styles()
Out[1]:
In [3]:
#load patient DVH data (settings are in autoPlanLib.py)
patients = myTools.loadPatients()
In [98]:
myTools = reload(myTools) #when needed
In [4]:
#Some useful parameters
Rx_dose = 8100
num_pat = size(patients,0) #number of patients in library (60)
num_orgs = 3 #number of organs #len(patients[pat])
In [110]:
#plotting DVH of library
num_pat = 60
fig, ax = plt.subplots(1,3,figsize=(18,5))
for pat in range(num_pat):
for org in range(len(organ_fmt)):
data = patients[pat][org]
ax[org].plot(data.dose,multiply(data.volume,100.0))#,organ_fmt[o])
if pat == num_pat - 1:
ax[org].set_title("All " + data.name)
ax[org].set_xlabel('Dose (cGy)')
ax[org].set_ylabel('Volume (%)')
ax[org].set_ylim(0,100)
Here we have some freedom in how we re-sample the data for use in PCA. PCA requires a data matrix, each row being a observation (patient), and each column representing some measurement. The PCA library we use also assumes that the number of columns in the data matrix is less than or equal to the number or rows.
A few different sampling schemes were tested, but in the end they all gave very similar results.
Below is a scheme to weight the sampling grid quadratically towards lower volume (%) values (higher dose regions).
In [71]:
# say we want to weight samples by x^2
# integrate, then invert we get 3/x^3, then uniformly sample
n_pts = 20
grid_points = zeros((num_orgs,n_pts))
g = mgrid[0.0:1.0:(1./float(n_pts))] #a uniform grid with desired number of sampling points
sampling = map(lambda x: pow(x,1.0/3.0)*100, g) #map uniform grid through weighting function
sampling = abs(array(sampling)-98) #here the sampling points are effectively reversed and shifted
print "Volume sampling points (%):"
print sampling
#Vols for PTV, RECT and BLAD:
for i in range(3):
grid_points[i] = sampling / 100.0
Below are 2 hand picked sampling scheme which treats the PTV and OARS differently
In [7]:
n_pts = 12 #points sampled from each organ
grid_points = zeros((num_orgs,n_pts))
#Vols for PTV:
grid_points[0] = array( (99,98,97,96,95,90,50, 5, 4, 3, 2, 1) ) / 100.0
#Vols for RECT:
grid_points[1] = array( (90,70,60,50,40,30,20,10, 9, 7, 5, 1) ) / 100.0
#Vols for BLAD:
grid_points[2] = array( (90,70,60,50,40,30,20,10, 9, 7, 5, 1) ) / 100.0
print "Volume sampling points (%):"
print 100*grid_points
In [8]:
#another hand picked sampling scheme with fiewer points (for simplicity)
n_pts = 6 #points sampled from each organ
grid_points = zeros((num_orgs,n_pts))
#Vols for PTV:
grid_points[0] = array( (99,95,90,70,50,2) ) / 100.0
#Vols for RECT:
grid_points[1] = array( (90,60,30,20,10,5) ) / 100.0
#Vols for BLAD:
grid_points[2] = array( (90,60,30,20,10,5) ) / 100.0
print "Volume sampling points (%):"
print 100*grid_points
In [151]:
#a uniform sampling scheme with 20 sampling points per organ
n_pts = 20
low_vol = 1.
high_vol = 99.
a = frange(low_v, high_v, (high_v-low_v)/float(n_pts-1))
a = a[::-1] #reversed
grid_points = repeat([a/100.],3,axis=0)
In [152]:
#Plot the grid
grid_points100 = 100*grid_points #this gives %dose units
plot(grid_points100[0],grid_points100[0],'o')
title('Sampling points for Volume (%)')
show()
In [153]:
gridDim = len(grid_points[0])
V_grid = zeros((num_pat,num_orgs*gridDim))
for pat in range(num_pat):
for org in range(num_orgs):
data = patients[pat][org]
for i in range(len(grid_points[org])):
V_grid[pat,org*gridDim+i]=myTools.DoseAtVol(data.dose,data.volume,grid_points[org][i])/Rx_dose
#this defines the indices for each organ in the rows of V_grid
r = (range(0,n_pts),range(n_pts,2*n_pts),range(2*n_pts,3*n_pts))
In [154]:
fig, axes = plt.subplots(1,3,figsize=(18,4))
for pat in range(num_pat):
axes[0].plot( V_grid[pat,r[0]], grid_points100[0],'-m', alpha=.3)
axes[1].plot( V_grid[pat,r[1]], grid_points100[1],'-r', alpha=.3)
axes[2].plot( V_grid[pat,r[2]], grid_points100[2],'-b', alpha=.3)
#axes[i,2].plot([],[],'--k',label='Neg(%i)'% m_idx)
#axes.plot(V_grid[p,:],'.',alpha = 0.5)
axes[0].set_title(patients[0][0].name)
map(lambda x: x.set_xlabel('Dose/RxDose'),axes)
axes[0].set_ylabel('Volume (%)')
axes[1].set_title(patients[0][1].name)
axes[2].set_title(patients[0][2].name)
show()
In [155]:
from matplotlib.mlab import PCA
V_pca = PCA(V_grid)
figure()
title('PCA Fractions')
plot(V_pca.fracs,'or')
ylabel("Variance")
xlabel("Principal Component")
show()
The plot below gives a hint as to how the data matrix is sorted. Each row represents a patient DVH and the first 1/3 of the columns sample the PTV, the middle 1/3 sample the Rectum, and the last 1/3 sample the Bladder.
The PCA modes represent (unit) directions in the high dimensional data space (after the mean for each column has been subtracted and the standard deviation is divided out).
In [136]:
n_comps = 6
p = V_pca
fig, axes = plt.subplots(figsize=(18,7))
for i in range(n_comps):
axes.plot(p.Wt[i,:],label='PC %i'%i)
axes.plot(zeros(num_orgs*n_pts),':k')
axes.set_xlabel('Volume (%) for PTV (Purple), Rectum (Red), Bladder (Blue)')
axes.set_ylabel('weights/correlations (arb)')
axes.set_title('First %i PCA modes' % n_comps)
axes.legend(loc=0)
#format Ticks
axes.set_xticks(range(3*n_pts))
#make integer axis of Vol axis
l = axes.set_xticklabels(array(concatenate((grid_points100[0],grid_points100[1],grid_points100[2])),int)) #output hidden
junk = map(lambda x: x.set_color('purple'), l[:n_pts] )
junk = map(lambda x: x.set_color('red'), l[n_pts:2*n_pts] )
junk = map(lambda x: x.set_color('blue'), l[2*n_pts:] )
In [143]:
n_comps = 6
p = V_pca
fig2, axes2 = plt.subplots(n_comps,3,figsize=(18,n_comps*6))
for i in range(n_comps):
axes2[i,0].plot( zeros((len(grid_points[0]),1)),grid_points100[0],':k')
axes2[i,0].plot(p.Wt[i,r[0]],grid_points100[0],'-m',label="PTV")
axes2[i,0].plot(p.Wt[i,r[1]],grid_points100[1],'-r',label="Rectum")
axes2[i,0].plot(p.Wt[i,r[2]],grid_points100[2],'-b',label="Bladder")
axes2[i,0].set_title('Principal Componant Vector # %i' % i)
axes2[i,0].set_ylabel('Volume (%)')
axes2[i,0].set_xlabel('Weight/Correlation (arb)')
axes2[i,0].legend(loc=0)
#plot PC dimension (componants for each patient)
axes2[i,1].plot( p.Y[:,i],p.Y[:,i],'sr',mew=0,ms=20,alpha = 0.15)
axes2[i,1].set_title('PC %i vs. PC %i (squares are patients)' % (i,i))
axes2[i,1].set_xlabel('Weight/Correlation (arb)')
#Find Extremum Patients for PC mode (Most Negative, Closest to Zero, and Most Positive)
neg_idx = find(min(p.Y[:,i]) == p.Y[:,i])
tempArray = map(lambda x: abs(x), p.Y[:,i]) #map absolute value
zero_idx = find(min(tempArray) == tempArray)
pos_idx = find(max(p.Y[:,i]) == p.Y[:,i])
c_fmt = ('m','r','b') #line colors for PTV, Rect, and Blad
axes2[i,2].plot([],[],'--k',label='Neg(%i)'% neg_idx)
axes2[i,2].plot([],[],':k',label='Zero(%i)' % zero_idx)
axes2[i,2].plot([],[],'-k',label='Pos(%i)' % pos_idx)
for org in range(3):
axes2[i,2].plot( patients[neg_idx][org].dose, 100.*array(patients[neg_idx][org].volume),'--'+c_fmt[org])
axes2[i,2].plot( patients[zero_idx][org].dose, 100.*array(patients[zero_idx][org].volume),':'+c_fmt[org])
axes2[i,2].plot( patients[pos_idx][org].dose, 100.*array(patients[pos_idx][org].volume),'-'+c_fmt[org])
axes2[i,2].set_xlim((0,9100))
axes2[i,2].set_ylim((0,110))
axes2[i,2].set_title('Extremum DVH along PC %i ' % i)
axes2[i,2].set_xlabel('Dose (cGy)')
axes2[i,2].set_ylabel('Volume (%)')
axes2[i,2].legend(loc='upper center',ncol=3,frameon=False)
In [138]:
from matplotlib import animation
from IPython.core.display import HTML
In [157]:
# animation function. This is called sequentially
def animate(i):
pid = pids_sorted[i]
ax.set_title('Pat. %i PC_%i = %s' % (pid,pc_vect,str(p.Y[pid,pc_vect])[:5]))
for org in range(3):
line[org][0].set_data(patients[pid][org].dose, 100.*array(patients[pid][org].volume))
#set up the figure, the axis, and the plot element we want to animate
fig, ax = plt.subplots(figsize=(5,4))
line = []
line.append(ax.plot([], [],'-m',label='PTV'))
line.append(ax.plot([], [],'-r',label='Rectum'))
line.append(ax.plot([], [],'-b',label='Bladder'))
ax.set_xlim((0.0,9000.0))
ax.set_ylim((0.0,101))
ax.set_xlabel('Dose (cGy)')
ax.set_ylabel('Volume (%)')
ax.legend(loc=3)
html_out = ''
for pc_vect in range(3):
#create a sorted vector containing patients for a given mode
pids_sorted = argsort(p.Y[:,pc_vect])
# call the animator. blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, frames=60, interval=5, blit=True)
html_out += myTools.anim_to_html(anim) #+ " <br />"
fig.clf() #this prevents the regular plot from showing
HTML(html_out)
Out[157]:
In [82]:
#3D scatter in PCV (0,1,3) space
from mpl_toolkits.mplot3d.axes3d import Axes3D
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1, 1, 1, projection='3d')
pY = V_pca.Y
p = ax.scatter(pY[:,0], pY[:,1], pY[:,2],s=60,c='r',marker='o',edgecolors='none',alpha = 0.5)
#maybe try animation later
In [144]:
p = V_pca
proj = p.project(V_grid[0,:])
temp = zeros(proj.shape)
errors = zeros(proj.shape)
n_pats = 60 #number of patients to consider
n_pc = 6 #number of pca modes to include
#placeholders to find max error patient
max_error = 0.0
max_error_row = []
max_error_pca = []
for pat in range(n_pats):
row = V_grid[pat,:]
proj = p.project(row)
temp = zeros(proj.shape)
for i in range(n_pc):
temp = temp + multiply(p.Wt[i,:],proj[i])
#compute error between pca reduced representation
temp_err = abs(temp*p.sigma + p.mu - row)
#compute max error and compare to global
if max(temp_err) > max_error:
#save patient index
max_error_idx = pat
max_error_row = row.copy()
max_error_pca = temp.copy()
errors = errors + temp_err
In [145]:
#plot PCA errors
fig, ax = plt.subplots(1,2,figsize=(16,7))
ax[0].plot(100*errors[r[0]]/float(n_pats),grid_points100[0],':om',label="PTV")
ax[0].plot(100*errors[r[1]]/float(n_pats),grid_points100[0],':or',label="Rectum")
ax[0].plot(100*errors[r[2]]/float(n_pats),grid_points100[0],':ob',label="Bladder")
ax[0].set_title('Average Error with first %i PCA modes' % n_pc)
ax[0].legend(loc='best')
ax[0].set_ylabel('Volume (%)')
ax[0].set_xlabel('Absolute Error (% of RxDose)')
#example Origional and Reduced
dvh_pca = max_error_pca*p.sigma + p.mu
ax[1].plot(Rx_dose * dvh_pca[r[0]],grid_points100[0],'-m',label="PTV")
ax[1].plot(Rx_dose * dvh_pca[r[1]],grid_points100[1],'-r',label="Rectum")
ax[1].plot(Rx_dose * dvh_pca[r[2]],grid_points100[2],'-b',label="Bladder")
#plot(max_error_pca*p.sigma + p.mu)
ax[1].plot(Rx_dose * max_error_row[r[0]],grid_points100[0],'--m')
ax[1].plot(Rx_dose * max_error_row[r[1]],grid_points100[1],'--r')
ax[1].plot(Rx_dose * max_error_row[r[2]],grid_points100[2],'--b')
ax[1].plot([],[],'--k',label='Orig.')
ax[1].plot([],[],'-k',label='Proj.')
ax[1].set_ylabel('Volume (%)')
ax[1].set_xlabel('Dose (cGy)')
ax[1].legend(loc=0,ncol=2)
ax[1].set_title('Worst Max Error (Patient %i)' % max_error_idx)
show()
This is done to understand what features of the DVH are represented by each Pricipal Componant. We start with the mean DVH of the whole data set (for purely illustrative purposes). Each frame represents the average DVH moved along the direction of the PC. This is done by adding some scalar multiplied by the PC vector to the mean DVH vector.
In [ ]:
#good ranges for x^2 weighted sampling
pc_range = [] #an array for the range of vector motions
pc_range.append(frange(-5,9,.5)) #for PC_0
pc_range.append(frange(-5,16,.5)) #for PC_1
pc_range.append(frange(-6,6,.5)) #for PC_2
pc_range.append(frange(-4,6,.5)) #for PC_3
pc_range.append(frange(-2,6,.5)) #for PC_4
pc_range.append(frange(-3,6,.5)) #for PC_5
pc_range.append(frange(-6,4,.5)) #for PC_6
In [158]:
#good ranges for uniform samled data
pc_range = [] #an array for the range of vector motions
pc_range.append(frange(-9,6,.5)) #for PC_0
pc_range.append(frange(-8.5,6.5,.5)) #for PC_1
pc_range.append(frange(-3,6,.5)) #for PC_2
pc_range.append(frange(-4,3,.5)) #for PC_3
pc_range.append(frange(-3,2.5,.5)) #for PC_4
pc_range.append(frange(-3,3,.5)) #for PC_5
pc_range.append(frange(-4,4,.5)) #for PC_6
#this just gets the size of the vector right
row = V_grid[pat,:]
proj = p.project(row)
DVH_vect = zeros(proj.shape) #this is actually the "mean" DVH
#the code below will recover the PCA space reconstruction of DVH
#for i in range(n_pc):
# DVH_vect = DVH_vect + multiply(PC_0,proj[i])
#DVH_vect = DVH_vect*p.sigma + p.mu
# animation function. This is called sequentially
def animate(i):
temp_vec = Rx_dose * ( (DVH_vect + pc_range[pc_idx][i] * p.Wt[pc_idx,:])*p.sigma + p.mu )
ax.set_title('PC_%i * %.1f' % (pc_idx,pc_range[pc_idx][i]))
line1.set_data(temp_vec[r[0]], grid_points100[0])
line2.set_data(temp_vec[r[1]], grid_points100[1])
line3.set_data(temp_vec[r[2]], grid_points100[2])
pass
#set up the figure, the axis, and the plot element we want to animate
fig, ax = plt.subplots()#figsize=(10,8))
line1, = ax.plot([], [],'-m',label='PTV')
line2, = ax.plot([], [],'-r',label='Rectum')
line3, = ax.plot([], [],'-b',label='Bladder')
ax.set_xlim((0.0,Rx_dose*1.2))
ax.set_ylim((0.0,100))
ax.set_xlabel('Dose (cGy)')
ax.set_ylabel('Volume (%)')
ax.legend(loc=3)
html_out = ''
for pc_idx in range(len(pc_range)):
# call the animator. blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, frames=len(pc_range[pc_idx]), interval=5, blit=True)
html_out += myTools.anim_to_html(anim)# + " <br />"
fig.clf() #this prevents the regular plot from showing
#HTML(html_out)
In [149]:
HTML(html_out)
Out[149]: