In [1]:
#import library
import numpy as np
%matplotlib inline
import scipy.stats as stats
import xlrd as xl
In [2]:
#Load book, define sheet as X1, print the number of rows, set n to number of rows
#of sheet 1 column 1, numbers is column,row format. List is X1a, creates array z.
X = xl.open_workbook('avpfloat.xlsx')
X1 = X.sheet_by_name('NoFormulas')
In [3]:
#id test, should be 34730 x 36
m =X1.ncols
n =X1.nrows
print 'cols',m,'rows',n
In [5]:
#test and append if AD shows significant regulation (D29E + pCPP45::avrPto I96A,2xA). Append ratios of DE ('PTI'),
#AE (AvrPto suppressed 'PTI').
#pad-> pvalue AD
#rad-> ratio AD
#rde-> ratio DE
#d-> RPKM D_mean
#a-> RPKM A_mean
#DE, AE, AD, BD, CD-> list of induction values for specified samples when logic test is true
DE = []
AE = []
AD = []
BD = []
CD = []
for i in range (1,n):
pad= X1.cell_value(rowx=i, colx=5)
rad= X1.cell_value(rowx=i, colx=4)
rde= X1.cell_value(rowx=i, colx=20)
a= X1.cell_value(rowx=i, colx=3)
d= X1.cell_value(rowx=i, colx=2)
e= X1.cell_value(rowx=i, colx=18)
if (pad<=0.05 and (rad>=2 or rad<=0.5) and (d>=3 or a>=3 or e>=3) and (rde<=.5 or rde>=2)):
AD.append(X1.cell_value(rowx=i, colx=4))
BD.append(X1.cell_value(rowx=i, colx=8))
CD.append(X1.cell_value(rowx=i, colx=12))
AE.append(X1.cell_value(rowx=i, colx=16))
DE.append(X1.cell_value(rowx=i, colx=20))
In [21]:
#create adjusted induction values of mock
#Difference = log2 (WT/PTI) - log2 [(core/PTI)*(CTD/PTI)]
#New prediction = ABC, stands for combination of A,B,C RPKM adjustments = 2^[log2 (WT/mock) - Difference)]
#Note - in new prediction we need to subtract the difference instead of add since the values are inverted biologically.
#that is, the genes that are upregulated in the PTI sense are downregualted in the AvrPto sense, and vice versa.
ABC = []
for i in range(len(AD)):
ABC.append(2**((np.log2(AE[i]))+(np.log2(BD[i]*CD[i])-np.log2(AD[i]))))
In [22]:
import matplotlib.pyplot as plt
#Scatter plot generation
D29E = plt.scatter(DE, DE, s=1, alpha=1, color='k', linewidth=None)
AvrPto = plt.scatter(DE, AE, s=1, alpha=1, color='r', linewidth=None)
ABC = plt.scatter(DE, ABC, s=1, alpha=1, color='b', linewidth=None)
#Logarithmic scales with grid
plt.xscale('log', basex=2)
plt.yscale('log', basey=2)
plt.grid(True)
#Font size
font = {'family' : 'Arial','weight' : 'normal','size' : 14}
plt.rc('font', **font)
axis_font = {'fontname':'Serif', 'size':'14', 'weight':'bold'}
title_font = {'fontname': 'Serif', 'size':'16', 'weight':'bold'}
#set x-axis ticks
xtck = [0.015625, 0.03125, .0625, .125, .25, .5, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
xlbl = ['1/64', '1/32', '1/16', '1/8', '1/4', '1/2', '1', '2', '4', '8', '16', '32', '64', '128', '256', '512', '1024', '2048', '4096']
plt.xticks(xtck,xlbl,y=-0.02,rotation=270)
#set y-axis ticks
ytck= [0.015625, 0.03125, .0625, .125, .25, .5, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
ylbl= ['1/64', '1/32', '1/16', '1/8', '1/4', '1/2', '1', '2', '4', '8', '16', '32', '64', '128', '256', '512', '1024', '2048', '4096']
plt.yticks(ytck,ylbl, x=-0.02)
#Axis labels
plt.ylabel('Sample induction', **axis_font)
plt.xlabel('PTI induction', **axis_font)
#Reference lines
plt.axhline(y=1, color='k')
plt.axvline(x=1, color='k')
#Legend
plt.legend((D29E, AvrPto, ABC),('PTI', 'AvrPto-suppressed PTI', 'Domain-potential PTI-suppression'),scatterpoints=1,markerscale=4,loc='upper left',ncol=1,fontsize=14)
#Plot size and settings
fig = plt.gcf()
fig.set_size_inches(8,8)
fig.set_dpi(300)
plt.tight_layout()
fig.savefig('Domain-potential Suppression of PTI vs Mock.png',bbox_inches='tight',dpi=300)
plt.show()
In [17]:
#For testing the combination of domain effects vs
greater = []
less= []
greaterae= []
lessae= []
for i in range(len(AD)):
if (DE[i]<=0.5):
if (ABC[i]>=1):
greater.append(ABC[i])
if (ABC[i]<1):
less.append(ABC[i])
if (DE[i]<=0.5 and AE[i]>=1):
greaterae.append(AE[i])
if (DE[i]<=0.5 and AE[i]<1):
lessae.append(AE[i])
print "Number where ABC is greater than or equal to 1:", len(greater)
print "Number where ABC is less than 1:", len(less)
print "same test for AE greater or equal to 1:", len(greaterae), ", less:", len(lessae)
print len(greater) + len(less)
In [18]:
#Included for quick comparison: run instead of above script if a compatible set of lists for all genes in figure
#with WT AvrPto vs Mock induction values is desired. Results are similar.
#test and append if DE shows significant regulation (D29E + pCPP45::avrPto I96A,2xA). Append ratios of DE ('PTI'), AE (AvrPto
#suppressed 'PTI').
#pad-> pvalue AD
#rad-> ratio AD
#rae-> ratio AE
#rbd-> ratio BD (CTD)
#rcd->ratio CD (core)
#d-> RPKM D_mean
#a-> RPKM A_mean
#DE, AE, AD, BD, CD-> list of induction values for specified samples when logic test is true
DE = []
AE = []
AD = []
BD = []
CD = []
for i in range (1,n):
pde= X1.cell_value(rowx=i, colx=21)
rde= X1.cell_value(rowx=i, colx=20)
d= X1.cell_value(rowx=i, colx=19)
e= X1.cell_value(rowx=i, colx=18)
if (pde<=0.05 and (rde>=2 or rde<=0.5) and (d>=3 or e>=3)):
AD.append(X1.cell_value(rowx=i, colx=4))
BD.append(X1.cell_value(rowx=i, colx=8))
CD.append(X1.cell_value(rowx=i, colx=12))
AE.append(X1.cell_value(rowx=i, colx=16))
DE.append(X1.cell_value(rowx=i, colx=20))
In [19]:
#Counts the number of genes up regulated in the DE set compared to the inactivated AvrPto mutant I96A,2xA.
DEup = []
DEdown= []
for i in range(len(DE)):
if DE[i]>1:
DEup.append(DE[i])
if DE[i]<1:
DEdown.append(DE[i])
print 'up', len(DEup)
print 'down', len(DEdown)
In [ ]: