Check for correlation between MYC amplified tumor samples and TP53 mutations MYC amplification is determined by two different ways:
In [1]:
import NotebookImport
from TCGA_analysis_BRCA_import import *
In [2]:
MYC_amp_rnaseq_values=list()
MYC_no_amp_rnaseq_values=list()
for sample in samples.values():
if sample.checkFocalGeneAmp("MYC") and (sample.getRNASeqFromGene("MYC") != "n/a"):
MYC_amp_rnaseq_values.append(sample.getRNASeqFromGene("MYC"))
if not sample.checkFocalGeneAmp("MYC") and (sample.getRNASeqFromGene("MYC") != "n/a"):
MYC_no_amp_rnaseq_values.append(sample.getRNASeqFromGene("MYC"))
MYC_amp_rnaseq = pandas.Series(MYC_amp_rnaseq_values)
MYC_no_amp_rnaseq = pandas.Series(MYC_no_amp_rnaseq_values)
t,p=scipy.stats.ttest_ind(MYC_amp_rnaseq_values,MYC_no_amp_rnaseq_values)
t1,p1=scipy.stats.mannwhitneyu(MYC_amp_rnaseq_values,MYC_no_amp_rnaseq_values)
print ""
print " 1) test whether MYC is overexpressed in samples with focal MYC amplification"
print " MYC_amp mean: "+str(MYC_amp_rnaseq.mean())+ " Samples: "+str(len(MYC_amp_rnaseq_values))
print " MYC_no_amp mean: "+str(MYC_no_amp_rnaseq.mean())+ " Samples: "+str(len(MYC_no_amp_rnaseq_values))
print " RNASeq Expression in MYC vs. non-MYC: T-test: "+str(t)+" p-value:"+str(p)
print " RNASeq Expression in MYC vs. non-MYC: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [4]:
# 2) test whether CCND1 is overexpressed in samples with focal CCND1 amplification"
CCND1_amp_rnaseq_values=list()
CCND1_no_amp_rnaseq_values=list()
for sample in samples.values():
if sample.checkFocalGeneAmp("CCND1") and (sample.getRNASeqFromGene("CCND1") != "n/a"):
CCND1_amp_rnaseq_values.append(sample.getRNASeqFromGene("CCND1"))
#print "CCND1 amplified\t"+str(sample.getRNASeqFromGene("CCND1"))
if not sample.checkFocalGeneAmp("CCND1") and (sample.getRNASeqFromGene("CCND1") != "n/a"):
CCND1_no_amp_rnaseq_values.append(sample.getRNASeqFromGene("CCND1"))
#print "CCND1 not amplified\t"+str(sample.getRNASeqFromGene("CCND1"))
CCND1_amp_rnaseq = pandas.Series(CCND1_amp_rnaseq_values)
CCND1_no_amp_rnaseq = pandas.Series(CCND1_no_amp_rnaseq_values)
t,p=scipy.stats.ttest_ind(CCND1_amp_rnaseq_values,CCND1_no_amp_rnaseq_values)
t1,p1=scipy.stats.mannwhitneyu(CCND1_amp_rnaseq_values,CCND1_no_amp_rnaseq_values)
print ""
print " 2) test whether CCND1 is overexpressed in samples with focal CCND1 amplification"
print " CCND1_amp mean: "+str(CCND1_amp_rnaseq.mean())+ " Samples: "+str(len(CCND1_amp_rnaseq_values))
print " CCND1_no_amp mean: "+str(CCND1_no_amp_rnaseq.mean())+ " Samples: "+str(len(CCND1_no_amp_rnaseq_values))
print " RNASeq Expression in CCND1 vs. non-CCND1: T-test: "+str(t)+" p-value:"+str(p)
print " RNASeq Expression in CCND1 vs. non-CCND1: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [5]:
# 3) test whether MYC is overexpressed in samples with MYC copynumber > 4
# get Log2-ratio values from MYC (hg19: chr8:128,748,315-128,753,680)
MYC_cnover4_rnaseq=list()
MYC_cnunder4_rnaseq = list()
for sample in samples.values():
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber != "n/a" and copynumber > 4 and sample.getRNASeqFromGene("MYC") != "n/a":
MYC_cnover4_rnaseq.append(sample.getRNASeqFromGene("MYC"))
elif copynumber != "n/a" and copynumber < 4 and sample.getRNASeqFromGene("MYC") != "n/a":
MYC_cnunder4_rnaseq.append(sample.getRNASeqFromGene("MYC"))
t,p=scipy.stats.ttest_ind(MYC_cnover4_rnaseq,MYC_cnunder4_rnaseq)
t1,p1=scipy.stats.mannwhitneyu(MYC_cnover4_rnaseq,MYC_cnunder4_rnaseq)
print ""
print " 3) test whether MYC is overexpressed in samples with MYC copynumber > 4"
print " MYC_CN>4 mean: "+str(numpy.mean(MYC_cnover4_rnaseq))+ " Samples: "+str(len(MYC_cnover4_rnaseq))
print " MYC_CN<4 mean: "+str(numpy.mean(MYC_cnunder4_rnaseq))+ " Samples: "+str(len(MYC_cnunder4_rnaseq))
print " RNASeq Expression in MYC (CN>4) vs. MYC (CN<4): T-test: "+str(t)+" p-value:"+str(p)
print " RNASeq Expression in MYC (CN>4) vs. MYC (CN<4): Mann-W-U: "+str(t1)+" p-value:"+str(p1)
In [6]:
# 4) test whether CCND1 is overexpressed in samples with CCND1 copynumber > 4
CCND1_cnover4_rnaseq=list()
CCND1_cnunder4_rnaseq = list()
for sample in samples.values():
log2 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber != "n/a" and copynumber > 4 and sample.getRNASeqFromGene("CCND1") != "n/a":
CCND1_cnover4_rnaseq.append(sample.getRNASeqFromGene("CCND1"))
elif copynumber != "n/a" and copynumber < 4 and sample.getRNASeqFromGene("CCND1") != "n/a":
CCND1_cnunder4_rnaseq.append(sample.getRNASeqFromGene("CCND1"))
t,p=scipy.stats.ttest_ind(CCND1_cnover4_rnaseq,CCND1_cnunder4_rnaseq)
t1,p1=scipy.stats.mannwhitneyu(CCND1_cnover4_rnaseq,CCND1_cnunder4_rnaseq)
print ""
print " 4) test whether CCND1 is overexpressed in samples with CCND1 copynumber > 4"
print " CCND1_CN>4 mean: "+str(numpy.mean(CCND1_cnover4_rnaseq))+ "Samples: "+str(len(CCND1_cnover4_rnaseq))
print " CCND1_CN<4 mean: "+str(numpy.mean(CCND1_cnunder4_rnaseq))+ "Samples: "+str(len(CCND1_cnunder4_rnaseq))
print " RNASeq Expression in CCND1 (CN>4) vs. CCND1 (CN<4): T-test: "+str(t)+" p-value:"+str(p)
print " RNASeq Expression in CCND1 (CN>4) vs. CCND1 (CN<4): Mann-W-U: "+str(t1)+" p-value:"+str(p1)
In [2]:
focal_amp_rnaseq_values=list()
focal_no_amp_rnaseq_values=list()
beroukhim_genes=["MYC","CCND1","ERBB2","CDK4","NKX2-1","MDM2","EGFR","MCL1","FGFR1","KRAS","CCNE1","CRKL","HMGA2","TERT","PRKCI","IGF1R","MYCL","MYCN","CDK6","BCL2L1","MYB","MET","JUN","BIRC2","YAP1","PDGFRA","KIT","PIK3CA","MDM4","AR"]
for sample in samples.values():
if not sample.rnaseq_data:
continue
for gene in beroukhim_genes:
if sample.checkFocalGeneAmp(gene) and (sample.getRNASeqFromGene(gene) != "n/a"):
focal_amp_rnaseq_values.append(sample.getRNASeqFromGene(gene))
if not sample.checkFocalGeneAmp(gene) and (sample.getRNASeqFromGene(gene) != "n/a"):
focal_no_amp_rnaseq_values.append(sample.getRNASeqFromGene(gene))
focal_amp_rnaseq = pandas.Series(focal_amp_rnaseq_values)
focal_no_amp_rnaseq = pandas.Series(focal_no_amp_rnaseq_values)
t,p=scipy.stats.ttest_ind(focal_amp_rnaseq_values,focal_no_amp_rnaseq_values)
t1,p1=scipy.stats.mannwhitneyu(focal_amp_rnaseq_values,focal_no_amp_rnaseq_values)
print ""
print " 5) test whether focally amplified genes are overexpressed in samples with focal amplifications"
print " MYC_amp mean: "+str(focal_amp_rnaseq.mean())+ " Samples: "+str(len(focal_amp_rnaseq_values))
print " MYC_no_amp mean: "+str(focal_no_amp_rnaseq.mean())+ " Samples: "+str(len(focal_no_amp_rnaseq_values))
print " RNASeq Expression in MYC vs. non-MYC: T-test: "+str(t)+" p-value:"+str(p)
print " RNASeq Expression in MYC vs. non-MYC: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [ ]: