notebook.community

Edit and run



In [1]:

    
#import library
import numpy as np
%matplotlib inline
import scipy.stats as stats
import xlrd as xl



In [2]:

    
#Load book, define sheet as X1, print the number of rows, set n to number of rows
#of sheet 1 column 1, numbers is column,row format.  List is X1a, creates array z.
X = xl.open_workbook('avpfloat.xlsx')
X1 = X.sheet_by_name('NoFormulas')



In [3]:

    
#id test, should be 34730 x 36
m =X1.ncols
n =X1.nrows
print 'cols',m,'rows',n









    



cols 36 rows 34730



In [5]:

    
#test and append if AD shows significant regulation (D29E + pCPP45::avrPto I96A,2xA). Append ratios of DE ('PTI'),
#AE (AvrPto suppressed 'PTI').
#pad-> pvalue AD
#rad-> ratio AD
#rde-> ratio DE
#d-> RPKM D_mean
#a-> RPKM A_mean
#DE, AE, AD, BD, CD-> list of induction values for specified samples when logic test is true

DE = []
AE = []
AD = []
BD = []
CD = []
for i in range (1,n):
    pad= X1.cell_value(rowx=i, colx=5)
    rad= X1.cell_value(rowx=i, colx=4)
    rde= X1.cell_value(rowx=i, colx=20)
    a= X1.cell_value(rowx=i, colx=3)
    d= X1.cell_value(rowx=i, colx=2)
    e= X1.cell_value(rowx=i, colx=18) 
    if (pad<=0.05 and (rad>=2 or rad<=0.5) and (d>=3 or a>=3 or e>=3) and (rde<=.5 or rde>=2)):
        AD.append(X1.cell_value(rowx=i, colx=4))
        BD.append(X1.cell_value(rowx=i, colx=8))
        CD.append(X1.cell_value(rowx=i, colx=12))
        AE.append(X1.cell_value(rowx=i, colx=16))
        DE.append(X1.cell_value(rowx=i, colx=20))



In [21]:

    
#create adjusted induction values of mock

#Difference = log2 (WT/PTI) - log2 [(core/PTI)*(CTD/PTI)]
#New prediction = ABC, stands for combination of A,B,C RPKM adjustments = 2^[log2 (WT/mock) - Difference)]
#Note - in new prediction we need to subtract the difference instead of add since the values are inverted biologically.
#that is, the genes that are upregulated in the PTI sense are downregualted in the AvrPto sense, and vice versa.

ABC = []

for i in range(len(AD)):
    ABC.append(2**((np.log2(AE[i]))+(np.log2(BD[i]*CD[i])-np.log2(AD[i]))))



In [22]:

    
import matplotlib.pyplot as plt
#Scatter plot generation
D29E = plt.scatter(DE, DE, s=1, alpha=1, color='k', linewidth=None)
AvrPto = plt.scatter(DE, AE, s=1, alpha=1, color='r', linewidth=None)
ABC = plt.scatter(DE, ABC, s=1, alpha=1, color='b', linewidth=None)

#Logarithmic scales with grid
plt.xscale('log', basex=2)
plt.yscale('log', basey=2)
plt.grid(True)

#Font size
font = {'family' : 'Arial','weight' : 'normal','size'   :  14}

plt.rc('font', **font)

axis_font = {'fontname':'Serif', 'size':'14', 'weight':'bold'}
title_font = {'fontname': 'Serif', 'size':'16', 'weight':'bold'}

#set x-axis ticks
xtck = [0.015625, 0.03125, .0625, .125, .25, .5, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
xlbl = ['1/64', '1/32', '1/16', '1/8', '1/4', '1/2', '1', '2', '4', '8', '16', '32', '64', '128', '256', '512', '1024', '2048', '4096']
plt.xticks(xtck,xlbl,y=-0.02,rotation=270)

#set y-axis ticks
ytck= [0.015625, 0.03125, .0625, .125, .25, .5, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
ylbl= ['1/64', '1/32', '1/16', '1/8', '1/4', '1/2', '1', '2', '4', '8', '16', '32', '64', '128', '256', '512', '1024', '2048', '4096']
plt.yticks(ytck,ylbl, x=-0.02)

#Axis labels

plt.ylabel('Sample induction', **axis_font)
plt.xlabel('PTI induction', **axis_font)

#Reference lines
plt.axhline(y=1, color='k')
plt.axvline(x=1, color='k')

#Legend
plt.legend((D29E, AvrPto, ABC),('PTI', 'AvrPto-suppressed PTI', 'Domain-potential PTI-suppression'),scatterpoints=1,markerscale=4,loc='upper left',ncol=1,fontsize=14)

#Plot size and settings
fig = plt.gcf()
fig.set_size_inches(8,8)
fig.set_dpi(300)
plt.tight_layout()
fig.savefig('Domain-potential Suppression of PTI vs Mock.png',bbox_inches='tight',dpi=300)
plt.show()



In [17]:

    
#For testing the combination of domain effects vs 

greater = []
less= []
greaterae= []
lessae= []

for i in range(len(AD)):
    if (DE[i]<=0.5):
        if (ABC[i]>=1):
            greater.append(ABC[i])
        if (ABC[i]<1):
            less.append(ABC[i])
    if (DE[i]<=0.5 and AE[i]>=1):
        greaterae.append(AE[i])
    if (DE[i]<=0.5 and AE[i]<1):
        lessae.append(AE[i])
print "Number where ABC is greater than or equal to 1:", len(greater)
print "Number where ABC is less than 1:", len(less)
print "same test for AE greater or equal to 1:", len(greaterae), ", less:", len(lessae)
print len(greater) + len(less)









    



Number where ABC is greater than or equal to 1: 380
Number where ABC is less than 1: 1227
same test for AE greater or equal to 1: 30 , less: 1577
1607



In [18]:

    
#Included for quick comparison: run instead of above script if a compatible set of lists for all genes in figure
#with WT AvrPto vs Mock induction values is desired. Results are similar.

#test and append if DE shows significant regulation (D29E + pCPP45::avrPto I96A,2xA). Append ratios of DE ('PTI'), AE (AvrPto
#suppressed 'PTI').
#pad-> pvalue AD
#rad-> ratio AD
#rae-> ratio AE
#rbd-> ratio BD (CTD)
#rcd->ratio CD (core)
#d-> RPKM D_mean
#a-> RPKM A_mean
#DE, AE, AD, BD, CD-> list of induction values for specified samples when logic test is true

DE = []
AE = []
AD = []
BD = []
CD = []
for i in range (1,n):
    pde= X1.cell_value(rowx=i, colx=21)
    rde= X1.cell_value(rowx=i, colx=20)
    d= X1.cell_value(rowx=i, colx=19)
    e= X1.cell_value(rowx=i, colx=18)
    if (pde<=0.05 and (rde>=2 or rde<=0.5) and (d>=3 or e>=3)):
        AD.append(X1.cell_value(rowx=i, colx=4))
        BD.append(X1.cell_value(rowx=i, colx=8))
        CD.append(X1.cell_value(rowx=i, colx=12))
        AE.append(X1.cell_value(rowx=i, colx=16))
        DE.append(X1.cell_value(rowx=i, colx=20))



In [19]:

    
#Counts the number of genes up regulated in the DE set compared to the inactivated AvrPto mutant I96A,2xA.

DEup = []
DEdown= []

for i in range(len(DE)):
    if DE[i]>1:
        DEup.append(DE[i])
    if DE[i]<1:
        DEdown.append(DE[i])
print 'up', len(DEup)
print 'down', len(DEdown)









    



up 4370
down 2818



In [ ]: