In [5]:
import NotebookImport
from TCGA_analysis_PanCancer_import import *
from IPython.display import HTML
Remove samples where CCND1 is also focally amplified
In [2]:
# 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [3]:
# 1) Test whether MYC focally amplified non-breast tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if not sample.clinical:
continue
if sample.primarysiteofdesease == "breast":
continue
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 1) Test whether MYC focally amplified non-breast tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [3]:
# 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " MYC and CCND1 amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC and CCND1 not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC and CCND1 amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC and CCND1 not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [4]:
# 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
CCND1_amp_tp53_count=0
CCND1_no_amp_tp53_count=0
CCND1_amp_no_tp53_count=0
CCND1_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
CCND1_amp_tp53_count += 1
if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
CCND1_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
CCND1_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
CCND1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " CCND1 amplified samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
print " CCND1 not amplified samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
print " CCND1 amplified samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
print " CCND1 not amplified samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [5]:
# 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print " MYC CN>4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC CN>4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [4]:
# 4) test whether non-breast tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if not sample.clinical:
continue
if sample.primarysiteofdesease == "breast":
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 4) test whether non-breast tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print " MYC CN>4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC CN>4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [6]:
# 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors
CCND1_amp_tp53_count=0
CCND1_no_amp_tp53_count=0
CCND1_amp_no_tp53_count=0
CCND1_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
CCND1_amp_tp53_count += 1
if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
CCND1_no_amp_tp53_count += 1
if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
CCND1_amp_no_tp53_count += 1
if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
CCND1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors"
print " CCND1 CN>4 samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
print " CCND1 CN<4 samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
print " CCND1 CN>4 samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
print " CCND1 CN<4 samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
print " Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [7]:
# 6) test whether tumors with copynumber > 4 in MYC but coynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
copynumber_CCND1 = "n/a"
log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2_CCND1 != None):
copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
else:
copynumber_CCND1="n/a"
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 6) test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print " MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC CN>4, CCND1 CN<4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [8]:
# 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
copynumber_CCND1 = "n/a"
log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2_CCND1 != None):
copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
else:
copynumber_CCND1="n/a"
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print " MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC CN>4, CCND1 CN<4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [9]:
%matplotlib inline
In [10]:
# 8) check TP53 mutation spectrum
TP53_types = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
elif mutation.mut_type == "Silent":
TP53_types.append("Silent")
elif mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins":
TP53_types.append("Frameshift_Indel")
elif mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins":
TP53_types.append("Inframe_Indel")
elif mutation.mut_type == "Splice_Site":
TP53_types.append("Splice-Site")
elif mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7578264 and mutation.pos <= 7578443:
TP53_types.append("L2_Missense")
elif mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7577528 and mutation.pos <= 7577575:
TP53_types.append("L3_Missense")
elif mutation.mut_type == "Missense_Mutation":
TP53_types.append("Other_Missense")
elif mutation.mut_type == "Nonsense_Mutation":
TP53_types.append("Nonsense")
elif mutation.mut_type == "Nonstop_Mutation":
TP53_types.append("Nonstop")
elif mutation.mut_type == "RNA":
TP53_types.append("RNA")
TP53_type_series = pandas.Categorical(sorted(TP53_types))
fig_TP53=TP53_type_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'], title=None)
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('PanCancer_mut_spectrum.png', dpi=200)
print ""
print " 8) check TP53 mutation spectrum"
print " Total mutations: "+str(len(TP53_types))
print " "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_series.categories:
s += "<tr><td>"+i+"</td><td>"+str(TP53_type_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_series.describe()['freqs'][count])+"</td></tr>"
count += 1
s += "</table>"
h = HTML(s);h
Out[10]:
In [11]:
# 9) check TP53 mutation spectrum in MYC amplified tumors
TP53_types_MYC = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Silent":
TP53_types_MYC.append("Silent")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
TP53_types_MYC.append("Frameshift_Indel")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
TP53_types_MYC.append("Inframe_Indel")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Splice_Site":
TP53_types_MYC.append("Splice-Site")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7578264 and mutation.pos <= 7578443:
TP53_types_MYC.append("L2_Missense")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7577528 and mutation.pos <= 7577575:
TP53_types_MYC.append("L3_Missense")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation":
TP53_types_MYC.append("Other_Missense")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonsense_Mutation":
TP53_types_MYC.append("Nonsense")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonstop_Mutation":
TP53_types_MYC.append("Nonstop")
elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "RNA":
TP53_types_MYC.append("RNA")
TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('PanCancer_MYC_focal_mut_spectrum.png', dpi=200)
print ""
print " 9) check TP53 mutation spectrum in MYC amplified tumors"
print " Total mutations: "+str(len(TP53_types_MYC))
print " "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
count += 1
s += "</table>"
h = HTML(s);h
Out[11]:
In [33]:
# 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4
TP53_types_MYC = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
copynumber_CCND1 = "n/a"
log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2_CCND1 != None):
copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
else:
copynumber_CCND1="n/a"
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Silent":
TP53_types_MYC.append("Silent")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
TP53_types_MYC.append("Frameshift_Indel")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
TP53_types_MYC.append("Inframe_Indel")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Splice_Site":
TP53_types_MYC.append("Splice-Site")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7578264 and mutation.pos <= 7578443:
TP53_types_MYC.append("L2_Missense")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7577528 and mutation.pos <= 7577575:
TP53_types_MYC.append("L3_Missense")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation":
TP53_types_MYC.append("Other_Missense")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonsense_Mutation":
TP53_types_MYC.append("Nonsense")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonstop_Mutation":
TP53_types_MYC.append("Nonstop")
elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "RNA":
TP53_types_MYC.append("RNA")
TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g','r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('PanCancer_MYC_cn4_mut_spectrum.png', dpi=200)
print ""
print " 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4"
print " Total mutations: "+str(len(TP53_types_MYC))
print " "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
count += 1
s += "</table>"
h = HTML(s);h
Out[33]:
In [13]:
# 11) check TP53 mutation spectrum in CCND1 amplified tumors
TP53_types_CCND1 = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Silent":
TP53_types_CCND1.append("Silent")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
TP53_types_CCND1.append("Frameshift_Indel")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
TP53_types_CCND1.append("Inframe_Indel")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Splice_Site":
TP53_types_CCND1.append("Splice-Site")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7578264 and mutation.pos <= 7578443:
TP53_types_CCND1.append("L2_Missense")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7577528 and mutation.pos <= 7577575:
TP53_types_CCND1.append("L3_Missense")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation":
TP53_types_CCND1.append("Other_Missense")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonsense_Mutation":
TP53_types_CCND1.append("Nonsense")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonstop_Mutation":
TP53_types_CCND1.append("Nonstop")
elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "RNA":
TP53_types_CCND1.append("RNA")
TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('PanCancer_CCND1_focal_mut_spectrum.png', dpi=200)
print ""
print " 11) check TP53 mutation spectrum in CCND1 focally amplified tumors"
print " Total mutations: "+str(len(TP53_types_CCND1))
print " "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
count += 1
s += "</table>"
h = HTML(s);h
Out[13]:
In [14]:
# 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4
TP53_types_CCND1 = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
copynumber_CCND1 = "n/a"
log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2_CCND1 != None):
copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
else:
copynumber_CCND1="n/a"
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Silent":
TP53_types_CCND1.append("Silent")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
TP53_types_CCND1.append("Frameshift_Indel")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
TP53_types_CCND1.append("Inframe_Indel")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Splice_Site":
TP53_types_CCND1.append("Splice-Site")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7578264 and mutation.pos <= 7578443:
TP53_types_CCND1.append("L2_Missense")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >= 7577528 and mutation.pos <= 7577575:
TP53_types_CCND1.append("L3_Missense")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation":
TP53_types_CCND1.append("Other_Missense")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonsense_Mutation":
TP53_types_CCND1.append("Nonsense")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonstop_Mutation":
TP53_types_CCND1.append("Nonstop")
elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "RNA":
TP53_types_CCND1.append("RNA")
TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g','r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('PanCancer_CCND1_cn4_mut_spectrum.png', dpi=200)
print ""
print " 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4"
print " Total mutations: "+str(len(TP53_types_CCND1))
print " "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
count += 1
s += "</table>"
h = HTML(s);h
Out[14]:
In [15]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
MYC_amp_frameshift_count += 1
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
MYC_amp_no_frameshift_count += 1
if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
MYC_no_amp_frameshift_count += 1
if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples"
print " MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print " MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print " MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print " MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print " "
print " Fisher exact test (two-sided): "
print " -- odd's ratio: "+str(oddsratio)
print " -- p-value: "+str(pvalue)
print " Fisher exact test (one-sided greater): "
print " -- odd's ratio: "+str(oddsratio_greater)
print " -- p-value: "+str(pvalue_greater)
In [16]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
copynumber_CCND1 = "n/a"
log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2_CCND1 != None):
copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
else:
copynumber_CCND1="n/a"
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
MYC_amp_frameshift_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
MYC_amp_no_frameshift_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
MYC_no_amp_frameshift_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples"
print " MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print " MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print " MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print " MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print " "
print " Fisher exact test (two-sided): "
print " -- odd's ratio: "+str(oddsratio)
print " -- p-value: "+str(pvalue)
print " Fisher exact test (one-sided greater): "
print " -- odd's ratio: "+str(oddsratio_greater)
print " -- p-value: "+str(pvalue_greater)
In [17]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
MYC_amp_frameshift_count += 1
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
MYC_amp_no_frameshift_count += 1
if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
MYC_no_amp_frameshift_count += 1
if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) focally amplified samples"
print " MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print " MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print " MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print " MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print " "
print " Fisher exact test (two-sided): "
print " -- odd's ratio: "+str(oddsratio)
print " -- p-value: "+str(pvalue)
print " Fisher exact test (one-sided greater): "
print " -- odd's ratio: "+str(oddsratio_greater)
print " -- p-value: "+str(pvalue_greater)
In [18]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
continue
copynumber_CCND1 = "n/a"
log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2_CCND1 != None):
copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
else:
continue
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
MYC_amp_frameshift_count += 1
if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
MYC_amp_no_frameshift_count += 1
if copynumber_MYC < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
MYC_no_amp_frameshift_count += 1
if copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) CN>4 samples"
print " MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print " MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print " MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print " MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print " "
print " Fisher exact test (two-sided): "
print " -- odd's ratio: "+str(oddsratio)
print " -- p-value: "+str(pvalue)
print " Fisher exact test (one-sided greater): "
print " -- odd's ratio: "+str(oddsratio_greater)
print " -- p-value: "+str(pvalue_greater)
In [19]:
# 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
del_mut_found = False
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
del_mut_found = True
break
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and del_mut_found:
MYC_amp_tp53_count += 1
if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not del_mut_found:
MYC_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and del_mut_found:
MYC_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("MYC") and not del_mut_found:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors"
print " MYC amplified samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC not amplified samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC amplified samples having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC not amplified samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [20]:
# 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
del_mut_found = False
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
del_mut_found = True
break
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
copynumber_CCND1 = "n/a"
log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
if (log2_CCND1 != None):
copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
else:
copynumber_CCND1="n/a"
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and del_mut_found:
MYC_amp_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not del_mut_found:
MYC_amp_no_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and del_mut_found:
MYC_no_amp_tp53_count += 1
if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not del_mut_found:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC"
print " MYC CN>4 and CCND1 CN<4 samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC CN<4 samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC CN>4 and CCND1 CN<4 having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC CN<4 samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [21]:
# 15) test whether FGFR1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
FGFR1_amp_tp53_count += 1
if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
FGFR1_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
FGFR1_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
FGFR1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 15) test whether FGFR1 (but not ERBB2) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " FGFR1 amplified samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print " FGFR1 not amplified samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print " FGFR1 amplified samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print " FGFR1 not amplified samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [22]:
# 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_FGFR1 = "n/a"
log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
if (log2_FGFR1 != None):
copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
else:
copynumber_FGFR1="n/a"
copynumber_ERBB2 = "n/a"
log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
if (log2_ERBB2 != None):
copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
else:
copynumber_ERBB2="n/a"
if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
FGFR1_amp_tp53_count += 1
if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" in sample.genes_affected:
FGFR1_no_amp_tp53_count += 1
if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
FGFR1_amp_no_tp53_count += 1
if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" not in sample.genes_affected:
FGFR1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4 Tumors"
print " FGFR1 CN>4, ERBB2 CN<4 samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print " FGFR1 CN<4 samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print " FGFR1 CN>4, ERBB2 CN<4 samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print " FGFR1 CN<4 samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print " Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [23]:
# 17) test whether ERBB2 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
ERBB2_amp_tp53_count += 1
if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
ERBB2_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
ERBB2_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
ERBB2_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 17) test whether ERBB2 (but not FGFR1) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " ERBB2 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print " ERBB2 not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print " ERBB2 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print " ERBB2 not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [24]:
# 18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_FGFR1 = "n/a"
log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
if (log2_FGFR1 != None):
copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
else:
copynumber_FGFR1="n/a"
copynumber_ERBB2 = "n/a"
log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
if (log2_ERBB2 != None):
copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
else:
copynumber_ERBB2="n/a"
if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" in sample.genes_affected:
ERBB2_amp_tp53_count += 1
if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
ERBB2_no_amp_tp53_count += 1
if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" not in sample.genes_affected:
ERBB2_amp_no_tp53_count += 1
if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
ERBB2_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4"
print " ERBB2 CN>4, FGFR1 CN<4 samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print " ERBB2 CN<4 samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print " ERBB2 CN>4, FGFR1 CN<4 samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print " ERBB2 CN<4 samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print " Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [25]:
# 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
CCNE1_amp_tp53_count=0
CCNE1_no_amp_tp53_count=0
CCNE1_amp_no_tp53_count=0
CCNE1_no_amp_no_tp53_count=0
CCNE1_amp_count = 0
CCNE1_amp_missing_somatic = 0
for sample in samples.values():
if sample.checkFocalGeneAmp("CCNE1"):
CCNE1_amp_count += 1
if not sample.somatic_mutation_data:
if sample.checkFocalGeneAmp("CCNE1"):
CCNE1_amp_missing_somatic += 1
continue
if sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
CCNE1_amp_tp53_count += 1
if sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
CCNE1_amp_no_tp53_count += 1
if not sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
CCNE1_no_amp_tp53_count += 1
if not sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
CCNE1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " CCNE1 amplified samples: "+str(CCNE1_amp_count)
print " CCNE1 amplified samples missed due to missing somatic mutation data: "+str(CCNE1_amp_missing_somatic)
print ""
print " CCNE1 amplified samples having TP53 mutations: "+str(CCNE1_amp_tp53_count)
print " CCNE1 not amplified samples having TP53 mutations: "+str(CCNE1_no_amp_tp53_count)
print " CCNE1 amplified samples having no TP53 mutation: "+str(CCNE1_amp_no_tp53_count)
print " CCNE1 not amplified samples having no TP53 mutations: "+str(CCNE1_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [26]:
# 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
ERBB2_amp_tp53_count += 1
if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
ERBB2_amp_no_tp53_count += 1
if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1")) and "TP53" in sample.genes_affected:
ERBB2_no_amp_tp53_count += 1
if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1"))and "TP53" not in sample.genes_affected:
ERBB2_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " ERBB2 and FGFR1 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print " Not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print " ERBB2 and FGFR1 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print " Not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [27]:
# 21)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if "TP53" in sample.genes_affected:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
else:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 mut'])
plt.savefig('PanCancer_MYC_expression_TP53_mut.png', dpi=200)
print ""
print " 21) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print " TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_TP53))
print " TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_no_TP53))
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+" p-value:"+str(p)
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [28]:
# 22)Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
del_mut_found = False
for mutation in sample.somatic_mutations:
if mutation.gene != "TP53":
continue
if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
del_mut_found = True
break
if del_mut_found:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
elif "TP53" not in sample.genes_affected:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 del mut'])
plt.savefig('PanCancer_MYC_expression_TP53_delmut.png', dpi=200)
print ""
print " 22) Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print " TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_TP53))
print " TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_no_TP53))
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+" p-value:"+str(p)
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [29]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if not sample.checkFocalGeneAmp("MYC"):
continue
if "TP53" in sample.genes_affected:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
else:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['Focal MYC, TP53 WT', 'Focal MYC, TP53 mut'])
plt.savefig('PanCancer_MYC_expression_focal_MYC_TP53_mut.png', dpi=200)
print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples"
print " TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_TP53))
print " TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_no_TP53))
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+" p-value:"+str(p)
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [8]:
# 23)Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
no_expr_data = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if not sample.clinical:
continue
if sample.primarysiteofdesease == "breast":
continue
if not sample.checkFocalGeneAmp("MYC"):
continue
if sample.getRNASeqFromGene("MYC") == "n/a":
no_expr_data += 1
continue
if "TP53" in sample.genes_affected:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
else:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['Focal MYC, TP53 WT', 'Focal MYC, TP53 mut'])
plt.savefig('PanCancer_MYC_expression_focal_MYC_TP53_mut.png', dpi=200)
print ""
print " 23) Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples"
print " Expression data missing: "+str(no_expr_data)
print " TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_TP53))
print " TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_no_TP53))
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+" p-value:"+str(p)
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [10]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
if copynumber_MYC == "n/a" or copynumber_MYC < 4:
continue
if "TP53" in sample.genes_affected:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
else:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['CN>4 MYC, TP53 WT', 'CN>4 MYC, TP53 mut'])
plt.savefig('PanCancer_MYC_expression_cn4_MYC_TP53_mut.png', dpi=200)
print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples"
print " TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_TP53))
print " TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_no_TP53))
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+" p-value:"+str(p)
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
In [12]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
no_expr_data = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if not sample.clinical:
continue
if sample.primarysiteofdesease == "breast":
continue
copynumber_MYC = "n/a"
log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2_MYC != None):
copynumber_MYC = 2.0 * 2.0 ** log2_MYC
else:
copynumber_MYC="n/a"
if copynumber_MYC == "n/a" or copynumber_MYC < 4:
continue
if sample.getRNASeqFromGene("MYC") == "n/a":
no_expr_data += 1
continue
if "TP53" in sample.genes_affected:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
else:
if sample.getRNASeqFromGene("MYC") != "n/a":
MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['CN>4 MYC, TP53 WT', 'CN>4 MYC, TP53 mut'])
plt.savefig('PanCancer_noBreast_MYC_expression_cn4_MYC_TP53_mut.png', dpi=200)
print ""
print " 23) Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples"
print " Expression data missing: "+str(no_expr_data)
print " TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_TP53))
print " TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ " Samples: "+str(len(MYC_expr_no_TP53))
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+" p-value:"+str(p)
print " MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+" p-value:"+str(p1)
Assume MYC is overexpressed when Z-score of normalized expression levels is > 3
In [31]:
control_MYC_expression = list()
MYC_expr_tp53_count=0
MYC_no_expr_tp53_count=0
MYC_expr_no_tp53_count=0
MYC_no_expr_no_tp53_count=0
for control in controls.values():
if control.getRNASeqFromGene("MYC") != "n/a":
control_MYC_expression.append(control.getRNASeqFromGene("MYC"))
MYC_expr_series = pandas.Series(control_MYC_expression)
control_MYC_expression_mean=MYC_expr_series.mean()
control_MYC_expression_stdev=MYC_expr_series.std()
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.getRNASeqFromGene("MYC") != "n/a":
z_score_MYC = (sample.getRNASeqFromGene("MYC") - control_MYC_expression_mean) / control_MYC_expression_stdev
if z_score_MYC > 3 and "TP53" in sample.genes_affected:
MYC_expr_tp53_count += 1
if z_score_MYC > 3 and "TP53" not in sample.genes_affected:
MYC_expr_no_tp53_count += 1
if z_score_MYC < 3 and "TP53" in sample.genes_affected:
MYC_no_expr_tp53_count += 1
if z_score_MYC < 3 and "TP53" not in sample.genes_affected:
MYC_no_expr_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]], alternative='greater')
print ""
print " 23) Test whether TP53 mutations occur in overexpressed MYC-samples"
print " Control samples:"+str(len(controls.values()))
print " Control samples mean RSEM: "+str(control_MYC_expression_mean)
print " Control samples st.dev. RSEM: "+str(control_MYC_expression_stdev)
print " MYC overexpressed samples having TP53 mutations: "+str(MYC_expr_tp53_count)
print " MYC not overexpressed samples having TP53 mutations: "+str(MYC_no_expr_tp53_count)
print " MYC overexpressed samples having no TP53 mutation: "+str(MYC_expr_no_tp53_count)
print " MYC not overexpressed samples having no TP53 mutations: "+str(MYC_no_expr_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
For focal amplification calling, a difference of 0.2 in log2-ratios between neighbouring 20Mbp on each side is a requirement. Here, we restrict focal amplifications to segments with a log2-ratio difference of at least 0.4
In [32]:
# 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
stringent_MYC = False
for focal_amp in sample.focal_amplifications:
if "MYC" in focal_amp.genes and focal_amp.log2_upstream_adj > 0.4 and focal_amp.log2_downstream_adj > 0.4:
stringent_MYC = True
break
if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
MYC_amp_tp53_count += 1
if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
MYC_amp_no_tp53_count += 1
if not stringent_MYC and "TP53" in sample.genes_affected:
MYC_no_amp_tp53_count += 1
if not stringent_MYC and "TP53" not in sample.genes_affected:
MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print " MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print " MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print " MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print " MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print " Fisher-exact test: Odds-ratio: "+str(oddsratio)+" P-value: "+str(pvalue)
print " Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+" P-value: "+str(pvalue_greater)
In [32]: