Test whether MYC amplified samples correlate with TP53 mutations


In [1]:
import NotebookImport
from TCGA_analysis_BRCA_import import *
from IPython.display import HTML


importing IPython notebook from TCGA_analysis_BRCA_import.ipynb
Loading Focal Amplification data
Loading RNASeq data
Loading RNASeq data of controls
Loading CNV Data
Loading Somatic Mutation Data
TCGA-A8-A07C-01A not found
TCGA-BH-A0HN-01A not found
TCGA-BH-A0B8-01A not found
TCGA-BH-A0HF-01A not found
TCGA-A7-A4SC-01A not found
TCGA-AR-A0TU-01A not found
TCGA-AR-A1AT-01A not found
TCGA-E2-A1LS-01A not found
TCGA-A2-A0CZ-01A not found
TCGA-BH-A0HL-01A not found
TCGA-AN-A0G0-01A not found
TCGA-BH-A0B1-01A not found
TCGA-B6-A0I8-01A not found
TCGA-B6-A0I6-01A not found
Loading Clinical Data
Samples: 1086
  --Focal Data: 1086
  --CNV:        1086
  --RNASeq:     1081
  --Somatic:    968
  --Clinical:   1058

Test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors

Remove samples where CCND1 is also focally amplified


In [4]:
# 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 5) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC amplified samples having TP53 mutations: 29
   MYC not amplified samples having TP53 mutations: 261
   MYC amplified samples having no TP53 mutation: 22
   MYC not amplified samples having no TP53 mutations: 646
   Fisher-exact test: Odds-ratio: 3.26262626263   P-value: 5.86339805517e-05
   Fisher-exact test (one-sided greater): Odds-ratio: 3.26262626263   P-value: 4.49500578725e-05

Test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [6]:
# 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC_amp_tp53_count=0
   MYC_no_amp_tp53_count=0
   MYC_amp_no_tp53_count=0
   MYC_no_amp_no_tp53_count=0

   for sample in samples.values():
      if not sample.somatic_mutation_data:
         continue
      if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         MYC_amp_tp53_count += 1
      if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         MYC_amp_no_tp53_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         MYC_no_amp_tp53_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         MYC_no_amp_no_tp53_count += 1
   oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
   oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

   print ""
   print " 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
   print "   MYC and CCND1 amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
   print "   MYC and CCND1 not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
   print "   MYC and CCND1 amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
   print "   MYC and CCND1 not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
   print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
   print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC and CCND1 amplified samples having TP53 mutations: 7
   MYC and CCND1 not amplified samples having TP53 mutations: 215
   MYC and CCND1 amplified samples having no TP53 mutation: 3
   MYC and CCND1 not amplified samples having no TP53 mutations: 511
   Fisher-exact test: Odds-ratio: 5.54573643411   P-value: 0.0104611860509
   Fisher-exact test (one-sided greater): Odds-ratio: 5.54573643411   P-value: 0.0104611860509

Test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [8]:
# 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   CCND1_amp_tp53_count=0
   CCND1_no_amp_tp53_count=0
   CCND1_amp_no_tp53_count=0
   CCND1_no_amp_no_tp53_count=0

   for sample in samples.values():
      if not sample.somatic_mutation_data:
         continue
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
         CCND1_amp_tp53_count += 1
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
         CCND1_amp_no_tp53_count += 1
      if not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         CCND1_no_amp_tp53_count += 1
      if not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         CCND1_no_amp_no_tp53_count += 1
   oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
   oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')

   print ""
   print " 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
   print "   CCND1 amplified samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
   print "   CCND1 not amplified samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
   print "   CCND1 amplified samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
   print "   CCND1 not amplified samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
   print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
   print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   CCND1 amplified samples having TP53 mutations: 46
   CCND1 not amplified samples having TP53 mutations: 244
   CCND1 amplified samples having no TP53 mutation: 135
   CCND1 not amplified samples having no TP53 mutations: 533
   Fisher-exact test: Odds-ratio: 0.744323011536   P-value: 0.126839398345
   Fisher-exact test (one-sided greater): Odds-ratio: 0.744323011536   P-value: 0.953910395883

Test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors


In [9]:
# 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber = "n/a"
  log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2 != None):
     copynumber = 2.0 * 2.0 ** log2
  else:
     copynumber="n/a"
  if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
   MYC CN>4 samples having TP53 mutations: 63
   MYC CN<4 samples having TP53 mutations: 233
   MYC CN>4 samples having no TP53 mutation: 49
   MYC CN<4 samples having no TP53 mutations: 622
   Fisher-exact test (two-sided): Odds-ratio: 3.43225015328   P-value: 2.96649712127e-09
   Fisher-exact test (one-sided greater): Odds-ratio: 3.43225015328   P-value: 1.67811353348e-09

Test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors


In [11]:
# 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors
CCND1_amp_tp53_count=0
CCND1_no_amp_tp53_count=0
CCND1_amp_no_tp53_count=0
CCND1_no_amp_no_tp53_count=0

for sample in samples.values():
    if not sample.somatic_mutation_data:
        continue
    copynumber = "n/a"
    log2 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
    if (log2 != None):
        copynumber = 2.0 * 2.0 ** log2
    else:
        copynumber="n/a"
    if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
        CCND1_amp_tp53_count += 1
    if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
        CCND1_no_amp_tp53_count += 1
    if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
        CCND1_amp_no_tp53_count += 1
    if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
        CCND1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors"
print "   CCND1 CN>4 samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
print "   CCND1 CN<4 samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
print "   CCND1 CN>4 samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
print "   CCND1 CN<4 samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors
   CCND1 CN>4 samples having TP53 mutations: 35
   CCND1 CN<4 samples having TP53 mutations: 261
   CCND1 CN>4 samples having no TP53 mutation: 91
   CCND1 CN<4 samples having no TP53 mutations: 577
   Fisher-exact test (two-sided): Odds-ratio: 0.850279988211   P-value: 0.470182247218
   Fisher-exact test (one-sided greater): Odds-ratio: 0.850279988211   P-value: 0.806524470023

Test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors


In [15]:
# 6) test whether tumors with copynumber > 4 in MYC but coynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  copynumber_CCND1 = "n/a"
  log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
  if (log2_CCND1 != None):
     copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
  else:
     copynumber_CCND1="n/a"
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 6) test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 6) test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: 51
   MYC CN<4 samples having TP53 mutations: 233
   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: 39
   MYC CN<4 samples having no TP53 mutations: 622
   Fisher-exact test (two-sided): Odds-ratio: 3.49092109607   P-value: 4.36266476341e-08
   Fisher-exact test (one-sided greater): Odds-ratio: 3.49092109607   P-value: 2.87098113748e-08

Test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors


In [16]:
# 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  copynumber_CCND1 = "n/a"
  log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
  if (log2_CCND1 != None):
     copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
  else:
     copynumber_CCND1="n/a"
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: 11
   MYC CN<4 samples having TP53 mutations: 233
   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: 10
   MYC CN<4 samples having no TP53 mutations: 622
   Fisher-exact test (two-sided): Odds-ratio: 2.9364806867   P-value: 0.0230331066117
   Fisher-exact test (one-sided greater): Odds-ratio: 2.9364806867   P-value: 0.0140511529335

Check mutation spectrum in all samples


In [17]:
%matplotlib inline

In [17]:
# 8) check TP53 mutation spectrum
TP53_types = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      elif mutation.mut_type == "Silent":
         TP53_types.append("Silent")
      elif mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins":
         TP53_types.append("Frameshift_Indel")
      elif mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins":
         TP53_types.append("Inframe_Indel")
      elif mutation.mut_type == "Splice_Site":
         TP53_types.append("Splice-Site")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types.append("L2_Missense")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types.append("L3_Missense")
      elif mutation.mut_type == "Missense_Mutation":
         TP53_types.append("Other_Missense")
      elif mutation.mut_type == "Nonsense_Mutation":
         TP53_types.append("Nonsense")
      elif mutation.mut_type == "Nonstop_Mutation":
         TP53_types.append("Nonstop")
      elif mutation.mut_type == "RNA":
         TP53_types.append("RNA")

TP53_type_series = pandas.Categorical(sorted(TP53_types))
fig_TP53=TP53_type_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'], title=None)
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('BRCA_mut_spectrum.png', dpi=200)

print ""
print " 8) check TP53 mutation spectrum"
print "   Total mutations: "+str(len(TP53_types))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 8) check TP53 mutation spectrum
   Total mutations: 330
   
Out[17]:
CategoryCountFrequency
Frameshift_Indel550.166666666667
Inframe_Indel50.0151515151515
L2_Missense610.184848484848
L3_Missense220.0666666666667
Nonsense460.139393939394
Other_Missense1140.345454545455
Silent50.0151515151515
Splice-Site220.0666666666667

Check mutation spectrum in MYC CN<4 samples


In [5]:
# 8) check TP53 mutation spectrum
TP53_types = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      continue
   if copynumber_MYC > 4:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      elif mutation.mut_type == "Silent":
         TP53_types.append("Silent")
      elif mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins":
         TP53_types.append("Frameshift_Indel")
      elif mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins":
         TP53_types.append("Inframe_Indel")
      elif mutation.mut_type == "Splice_Site":
         TP53_types.append("Splice-Site")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types.append("L2_Missense")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types.append("L3_Missense")
      elif mutation.mut_type == "Missense_Mutation":
         TP53_types.append("Other_Missense")
      elif mutation.mut_type == "Nonsense_Mutation":
         TP53_types.append("Nonsense")
      elif mutation.mut_type == "Nonstop_Mutation":
         TP53_types.append("Nonstop")
      elif mutation.mut_type == "RNA":
         TP53_types.append("RNA")

TP53_type_series = pandas.Categorical(sorted(TP53_types))
fig_TP53=TP53_type_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'], title=None)
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('BRCA_MYC_cn_smaller4_mut_spectrum.png', dpi=200)

print ""
print " 8) check TP53 mutation spectrum"
print "   Total mutations: "+str(len(TP53_types))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 8) check TP53 mutation spectrum
   Total mutations: 263
   
Out[5]:
CategoryCountFrequency
Frameshift_Indel420.15969581749
Inframe_Indel50.0190114068441
L2_Missense510.19391634981
L3_Missense160.0608365019011
Nonsense330.125475285171
Other_Missense950.361216730038
Silent40.0152091254753
Splice-Site170.06463878327

Check mutation spectrum in MYC (and not CCND1) focally amplified samples


In [18]:
# 9) check TP53 mutation spectrum in MYC amplified tumors
TP53_types_MYC = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Silent":
         TP53_types_MYC.append("Silent")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_MYC.append("Frameshift_Indel")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_MYC.append("Inframe_Indel")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Splice_Site":
         TP53_types_MYC.append("Splice-Site")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_MYC.append("L2_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_MYC.append("L3_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation":
         TP53_types_MYC.append("Other_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_MYC.append("Nonsense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_MYC.append("Nonstop")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "RNA":
         TP53_types_MYC.append("RNA")

TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('BRCA_MYC_focal_mut_spectrum.png', dpi=200)

print ""
print " 9) check TP53 mutation spectrum in MYC amplified tumors"
print "   Total mutations: "+str(len(TP53_types_MYC))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 9) check TP53 mutation spectrum in MYC amplified tumors
   Total mutations: 31
   
Out[18]:
CategoryCountFrequency
Frameshift_Indel70.225806451613
Inframe_Indel10.0322580645161
L2_Missense30.0967741935484
L3_Missense30.0967741935484
Nonsense50.161290322581
Other_Missense70.225806451613
Silent10.0322580645161
Splice-Site40.129032258065

Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4


In [19]:
# 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4
TP53_types_MYC = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Silent":
         TP53_types_MYC.append("Silent")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_MYC.append("Frameshift_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_MYC.append("Inframe_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Splice_Site":
         TP53_types_MYC.append("Splice-Site")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_MYC.append("L2_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_MYC.append("L3_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation":
         TP53_types_MYC.append("Other_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_MYC.append("Nonsense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_MYC.append("Nonstop")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "RNA":
         TP53_types_MYC.append("RNA")

TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('BRCA_MYC_cn4_mut_spectrum.png', dpi=200)

print ""
print " 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4"
print "   Total mutations: "+str(len(TP53_types_MYC))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4
   Total mutations: 54
   
Out[19]:
CategoryCountFrequency
Frameshift_Indel100.185185185185
L2_Missense70.12962962963
L3_Missense60.111111111111
Nonsense120.222222222222
Other_Missense130.240740740741
Silent10.0185185185185
Splice-Site50.0925925925926

Check mutation spectrum in CCND1 (and not MYC) focally amplified samples


In [20]:
# 11) check TP53 mutation spectrum in CCND1 amplified tumors
TP53_types_CCND1 = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Silent":
         TP53_types_CCND1.append("Silent")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_CCND1.append("Frameshift_Indel")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_CCND1.append("Inframe_Indel")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Splice_Site":
         TP53_types_CCND1.append("Splice-Site")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_CCND1.append("L2_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_CCND1.append("L3_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation":
         TP53_types_CCND1.append("Other_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_CCND1.append("Nonsense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_CCND1.append("Nonstop")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "RNA":
         TP53_types_CCND1.append("RNA")

TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('BRCA_CCND1_focal_mut_spectrum.png', dpi=200)

print ""
print " 11) check TP53 mutation spectrum in CCND1 focally amplified tumors"
print "   Total mutations: "+str(len(TP53_types_CCND1))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 11) check TP53 mutation spectrum in CCND1 focally amplified tumors
   Total mutations: 54
   
Out[20]:
CategoryCountFrequency
Frameshift_Indel70.12962962963
Inframe_Indel10.0185185185185
L2_Missense100.185185185185
L3_Missense70.12962962963
Nonsense60.111111111111
Other_Missense200.37037037037
Silent10.0185185185185
Splice-Site20.037037037037

Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4


In [21]:
# 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4
TP53_types_CCND1 = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Silent":
         TP53_types_CCND1.append("Silent")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_CCND1.append("Frameshift_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_CCND1.append("Inframe_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Splice_Site":
         TP53_types_CCND1.append("Splice-Site")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_CCND1.append("L2_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_CCND1.append("L3_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation":
         TP53_types_CCND1.append("Other_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_CCND1.append("Nonsense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_CCND1.append("Nonstop")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "RNA":
         TP53_types_CCND1.append("RNA")

TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g','r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('BRCA_CCND1_cn4_mut_spectrum.png', dpi=200)


print ""
print " 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4"
print "   Total mutations: "+str(len(TP53_types_CCND1))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4
   Total mutations: 29
   
Out[21]:
CategoryCountFrequency
Frameshift_Indel20.0689655172414
Inframe_Indel10.0344827586207
L2_Missense60.206896551724
L3_Missense50.172413793103
Nonsense40.137931034483
Other_Missense100.344827586207
Silent10.0344827586207

Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples


In [22]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_amp_frameshift_count += 1
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_amp_no_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_no_amp_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples"
print "   MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
   MYC amplified Frameshift Indels: 7
   MYC amplified other mutations: 24
   MYC not amplified Frameshift Indels: 45
   MYC not amplified other mutations: 247
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 1.60092592593
      -- p-value: 0.306118867545
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 1.60092592593
      -- p-value: 0.213283512942

Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples


In [3]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_amp_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_amp_no_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_no_amp_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples"
print "   MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples
   MYC amplified Frameshift Indels: 10
   MYC amplified other mutations: 44
   MYC not amplified Frameshift Indels: 35
   MYC not amplified other mutations: 221
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 1.43506493506
      -- p-value: 0.394906197944
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 1.43506493506
      -- p-value: 0.234985508355

Test whether Deleterious mutations (Frameshift indels, Nonsense) occur more frequently in MYC (and not CCND1) focally amplified samples


In [3]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
         MYC_amp_frameshift_count += 1
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_amp_no_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
         MYC_no_amp_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) focally amplified samples"
print "   MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) focally amplified samples
   MYC amplified deleterious mutations: 12
   MYC amplified other mutations: 19
   MYC not amplified deleterious mutations: 86
   MYC not amplified other mutations: 206
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 1.51285189718
      -- p-value: 0.307139199574
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 1.51285189718
      -- p-value: 0.193185683693

Test whether Deleterious mutations (Frameshift indels, Nonsense) occur more frequently in MYC (and not CCND1) samples with copynumber > 4


In [5]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      continue
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
         MYC_amp_frameshift_count += 1
      if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_amp_no_frameshift_count += 1
      if copynumber_MYC < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
         MYC_no_amp_frameshift_count += 1
      if copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) CN>4 samples"
print "   MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) CN>4 samples
   MYC amplified deleterious mutations: 20
   MYC amplified other mutations: 30
   MYC not amplified deleterious mutations: 75
   MYC not amplified other mutations: 188
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 1.67111111111
      -- p-value: 0.130447054337
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 1.67111111111
      -- p-value: 0.0752620413679

Test whether MYC focally amplified tumors contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than non focally amplified Tumors


In [41]:
# 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and del_mut_found:
      MYC_amp_tp53_count += 1
   if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not del_mut_found:
      MYC_amp_no_tp53_count += 1
   if not sample.checkFocalGeneAmp("MYC") and del_mut_found:
      MYC_no_amp_tp53_count += 1
   if not sample.checkFocalGeneAmp("MYC") and not del_mut_found:
      MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors
   MYC amplified samples having deleterious TP53 mutations: 12
   MYC not amplified samples having deleterious TP53 mutations: 82
   MYC amplified samples having no deleterious TP53 mutation: 39
   MYC not amplified samples having no deleterious TP53 mutations: 825
   Fisher-exact test: Odds-ratio: 3.095684803   P-value: 0.00246132110443
   Fisher-exact test (one-sided greater): Odds-ratio: 3.095684803   P-value: 0.00246132110443

Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC


In [44]:
# 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and del_mut_found:
      MYC_amp_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not del_mut_found:
      MYC_amp_no_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC < 4 and del_mut_found:
      MYC_no_amp_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not del_mut_found:
      MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC"
print "   MYC CN>4 and CCND1 CN<4 samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4 and CCND1 CN<4 having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC
   MYC CN>4 and CCND1 CN<4 samples having deleterious TP53 mutations: 21
   MYC CN<4 samples having deleterious TP53 mutations: 72
   MYC CN>4 and CCND1 CN<4 having no deleterious TP53 mutation: 69
   MYC CN<4 samples having no deleterious TP53 mutations: 783
   Fisher-exact test: Odds-ratio: 3.3097826087   P-value: 5.41169000779e-05
   Fisher-exact test (one-sided greater): Odds-ratio: 3.3097826087   P-value: 5.41169000778e-05

Test whether FGFR1 focally amplified (but not ERBB2-amplified) tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [4]:
# 15) test whether FGFR1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
     FGFR1_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
     FGFR1_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     FGFR1_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     FGFR1_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 15) test whether FGFR1 (but not ERBB2) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   FGFR1 amplified samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print "   FGFR1 not amplified samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print "   FGFR1 amplified samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print "   FGFR1 not amplified samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 15) test whether FGFR1 (but not ERBB2) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   FGFR1 amplified samples having TP53 mutations: 33
   FGFR1 not amplified samples having TP53 mutations: 254
   FGFR1 amplified samples having no TP53 mutation: 86
   FGFR1 not amplified samples having no TP53 mutations: 581
   Fisher-exact test: Odds-ratio: 0.877723860099   P-value: 0.594236792843
   Fisher-exact test (one-sided greater): Odds-ratio: 0.877723860099   P-value: 0.757844007181

Test whether FGFR1 CN>4 (but ERBB2 CN < 4) tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [6]:
# 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_FGFR1 = "n/a"
  log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
  if (log2_FGFR1 != None):
     copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
  else:
     copynumber_FGFR1="n/a"
  copynumber_ERBB2 = "n/a"
  log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
  if (log2_ERBB2 != None):
     copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
  else:
     copynumber_ERBB2="n/a"
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
     FGFR1_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" in sample.genes_affected:
     FGFR1_no_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
     FGFR1_amp_no_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" not in sample.genes_affected:
     FGFR1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4 Tumors"
print "   FGFR1 CN>4, ERBB2 CN<4 samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print "   FGFR1 CN<4 samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print "   FGFR1 CN>4, ERBB2 CN<4  samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print "   FGFR1 CN<4 samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4 Tumors
   FGFR1 CN>4, ERBB2 CN<4 samples having TP53 mutations: 15
   FGFR1 CN<4 samples having TP53 mutations: 261
   FGFR1 CN>4, ERBB2 CN<4  samples having no TP53 mutation: 45
   FGFR1 CN<4 samples having no TP53 mutations: 601
   Fisher-exact test (two-sided): Odds-ratio: 0.767560664112   P-value: 0.46660681537
   Fisher-exact test (one-sided greater): Odds-ratio: 0.767560664112   P-value: 0.843671068461

Test whether ERBB2 focally amplified (but not FGFR1-amplified) tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [2]:
# 17) test whether ERBB2 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 17) test whether ERBB2 (but not FGFR1) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   ERBB2 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   ERBB2 not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   ERBB2 not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 17) test whether ERBB2 (but not FGFR1) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   ERBB2 amplified samples having TP53 mutations: 55
   ERBB2 not amplified samples having TP53 mutations: 232
   ERBB2 amplified samples having no TP53 mutation: 48
   ERBB2 not amplified samples having no TP53 mutations: 619
   Fisher-exact test: Odds-ratio: 3.05720186782   P-value: 1.78498213751e-07
   Fisher-exact test (one-sided greater): Odds-ratio: 3.05720186782   P-value: 1.36460550441e-07

Test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4


In [10]:
# 18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_FGFR1 = "n/a"
  log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
  if (log2_FGFR1 != None):
     copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
  else:
     copynumber_FGFR1="n/a"
  copynumber_ERBB2 = "n/a"
  log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
  if (log2_ERBB2 != None):
     copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
  else:
     copynumber_ERBB2="n/a"
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')

print ""
print "  18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4"
print "   ERBB2 CN>4, FGFR1 CN<4 samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   ERBB2 CN<4 samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 CN>4, FGFR1 CN<4  samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   ERBB2 CN<4 samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


  18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4
   ERBB2 CN>4, FGFR1 CN<4 samples having TP53 mutations: 43
   ERBB2 CN<4 samples having TP53 mutations: 239
   ERBB2 CN>4, FGFR1 CN<4  samples having no TP53 mutation: 41
   ERBB2 CN<4 samples having no TP53 mutations: 624
   Fisher-exact test (two-sided): Odds-ratio: 2.73823859578   P-value: 1.65478380577e-05
   Fisher-exact test (one-sided greater): Odds-ratio: 2.73823859578   P-value: 1.31151655611e-05

Test whether tumors with copynumber > 4 in CCNE1 contain TP53 mutations more (or less) frequently than tumors with CCNE1 CN<4


In [3]:
# 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
CCNE1_amp_tp53_count=0
CCNE1_no_amp_tp53_count=0
CCNE1_amp_no_tp53_count=0
CCNE1_no_amp_no_tp53_count=0

CCNE1_amp_count = 0
CCNE1_amp_missing_somatic = 0

for sample in samples.values():
  if sample.checkFocalGeneAmp("CCNE1"):
        CCNE1_amp_count += 1
  if not sample.somatic_mutation_data:
     if sample.checkFocalGeneAmp("CCNE1"):
        CCNE1_amp_missing_somatic += 1
     continue
  if sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
     CCNE1_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
     CCNE1_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
     CCNE1_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
     CCNE1_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   CCNE1 amplified samples: "+str(CCNE1_amp_count)
print "   CCNE1 amplified samples missed due to missing somatic mutation data: "+str(CCNE1_amp_missing_somatic)
print ""
print "   CCNE1 amplified samples having TP53 mutations: "+str(CCNE1_amp_tp53_count)
print "   CCNE1 not amplified samples having TP53 mutations: "+str(CCNE1_no_amp_tp53_count)
print "   CCNE1 amplified samples having no TP53 mutation: "+str(CCNE1_amp_no_tp53_count)
print "   CCNE1 not amplified samples having no TP53 mutations: "+str(CCNE1_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   CCNE1 amplified samples: 47
   CCNE1 amplified samples missed due to missing somatic mutation data: 4

   CCNE1 amplified samples having TP53 mutations: 31
   CCNE1 not amplified samples having TP53 mutations: 266
   CCNE1 amplified samples having no TP53 mutation: 12
   CCNE1 not amplified samples having no TP53 mutations: 659
   Fisher-exact test: Odds-ratio: 6.40006265664   P-value: 1.26662888567e-08
   Fisher-exact test (one-sided greater): Odds-ratio: 6.40006265664   P-value: 1.26662888567e-08

Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [3]:
# 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1")) and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1"))and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   ERBB2 and FGFR1 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   Not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 and FGFR1 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   Not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   ERBB2 and FGFR1 amplified samples having TP53 mutations: 10
   Not amplified samples having TP53 mutations: 199
   ERBB2 and FGFR1 amplified samples having no TP53 mutation: 4
   Not amplified samples having no TP53 mutations: 533
   Fisher-exact test: Odds-ratio: 6.6959798995   P-value: 0.000826351449265
   Fisher-exact test (one-sided greater): Odds-ratio: 6.6959798995   P-value: 0.000826351449265

Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53


In [43]:
# 21)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 mut'])

plt.savefig('BRCA_MYC_expression_TP53_mut.png', dpi=200)

print ""
print " 21) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 21) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53
   TP53 mutated MYC expr mean: 2701.82564527   Samples: 296
   TP53 wildtype MYC expr mean: 1909.67191854   Samples: 669
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 5.8378287163   p-value:7.22331966178e-09
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 78705.0   p-value:1.828259814e-07

Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53


In [42]:
# 22)Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
     continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   if del_mut_found:             
      if sample.getRNASeqFromGene("MYC") != "n/a":
         MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
   elif "TP53" not in sample.genes_affected:
      if sample.getRNASeqFromGene("MYC") != "n/a":
         MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)


data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 del mut'])

plt.savefig('BRCA_MYC_expression_TP53_delmut.png', dpi=200)

print ""
print " 22) Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 22) Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53
   TP53 mutated MYC expr mean: 3139.82021959   Samples: 97
   TP53 wildtype MYC expr mean: 1909.67191854   Samples: 669
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 6.41833085408   p-value:2.41710236335e-10
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 21345.0   p-value:2.50791773101e-08

Test whether MYC focally amplified TP53-mutated samples have higher MYC expression levels than MYC focally amplified tumors without TP53


In [41]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if not sample.checkFocalGeneAmp("MYC"):
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['Focal MYC, TP53 WT', 'Focal MYC, TP53 mut'])

plt.savefig('BRCA_MYC_expression_focal_MYC_TP53_mut.png', dpi=200)



print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
   TP53 mutated MYC expr mean: 4517.00753889   Samples: 36
   TP53 wildtype MYC expr mean: 2151.058084   Samples: 25
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 2.93135494264   p-value:0.00479549199539
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 233.0   p-value:0.000749414801724

Test whether MYC copynumber > 4 TP53-mutated samples have higher MYC expression levels than MYC copynumber > 4 tumors without TP53


In [6]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  if copynumber_MYC == "n/a" or copynumber_MYC < 4:
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['CN>4 MYC, TP53 WT', 'CN>4 MYC, TP53 mut'])

plt.savefig('BRCA_MYC_expression_cn4_MYC_TP53_mut.png', dpi=200)


print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
   TP53 mutated MYC expr mean: 4222.93165873   Samples: 63
   TP53 wildtype MYC expr mean: 2522.03757917   Samples: 48
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 2.90770494335   p-value:0.00441229029918
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 835.0   p-value:2.82722600825e-05

Test whether TP53 mutations occur in overexpressed MYC-samples

Assume MYC is overexpressed when Z-score of normalized expression levels is > 3


In [5]:
control_MYC_expression = list()

MYC_expr_tp53_count=0
MYC_no_expr_tp53_count=0
MYC_expr_no_tp53_count=0
MYC_no_expr_no_tp53_count=0
 
for control in controls.values():
    if control.getRNASeqFromGene("MYC") != "n/a":
        control_MYC_expression.append(control.getRNASeqFromGene("MYC"))

MYC_expr_series = pandas.Series(control_MYC_expression)
control_MYC_expression_mean=MYC_expr_series.mean()
control_MYC_expression_stdev=MYC_expr_series.std()


for sample in samples.values():
    if not sample.somatic_mutation_data:
        continue
    if sample.getRNASeqFromGene("MYC") != "n/a":
        z_score_MYC = (sample.getRNASeqFromGene("MYC") - control_MYC_expression_mean) / control_MYC_expression_stdev
        if z_score_MYC > 3 and "TP53" in sample.genes_affected:
            MYC_expr_tp53_count += 1
        if z_score_MYC > 3 and "TP53" not in sample.genes_affected:
            MYC_expr_no_tp53_count += 1
        if z_score_MYC < 3 and "TP53" in sample.genes_affected:
            MYC_no_expr_tp53_count += 1
        if z_score_MYC < 3 and "TP53" not in sample.genes_affected:
            MYC_no_expr_no_tp53_count += 1
            
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]], alternative='greater')

print ""
print " 23) Test whether TP53 mutations occur in overexpressed MYC-samples"
print "   Control samples:"+str(len(controls.values()))
print "   Control samples mean RSEM: "+str(control_MYC_expression_mean)
print "   Control samples st.dev. RSEM: "+str(control_MYC_expression_stdev)
print "   MYC overexpressed samples having TP53 mutations: "+str(MYC_expr_tp53_count)
print "   MYC not overexpressed samples having TP53 mutations: "+str(MYC_no_expr_tp53_count)
print "   MYC overexpressed samples having no TP53 mutation: "+str(MYC_expr_no_tp53_count)
print "   MYC not overexpressed samples having no TP53 mutations: "+str(MYC_no_expr_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 23) Test whether TP53 mutations occur in overexpressed MYC-samples
   Control samples:112
   Control samples mean RSEM: 4413.00622411
   Control samples st.dev. RSEM: 2323.21886387
   MYC overexpressed samples having TP53 mutations: 4
   MYC not overexpressed samples having TP53 mutations: 292
   MYC overexpressed samples having no TP53 mutation: 1
   MYC not overexpressed samples having no TP53 mutations: 668
   Fisher-exact test: Odds-ratio: 9.15068493151   P-value: 0.0330326096119
   Fisher-exact test (one-sided greater): Odds-ratio: 9.15068493151   P-value: 0.0330326096119

Test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors

For focal amplification calling, a difference of 0.2 in log2-ratios between neighbouring 20Mbp on each side is a requirement. Here, we restrict focal amplifications to segments with a log2-ratio difference of at least 0.4


In [2]:
# 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  stringent_MYC = False
  for focal_amp in sample.focal_amplifications:
    if "MYC" in focal_amp.genes and focal_amp.log2_upstream_adj > 0.4 and focal_amp.log2_downstream_adj > 0.4:
        stringent_MYC = True
        break
  if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if not stringent_MYC and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if not stringent_MYC and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC amplified samples having TP53 mutations: 17
   MYC not amplified samples having TP53 mutations: 276
   MYC amplified samples having no TP53 mutation: 13
   MYC not amplified samples having no TP53 mutations: 657
   Fisher-exact test: Odds-ratio: 3.11287625418   P-value: 0.00377373524372
   Fisher-exact test (one-sided greater): Odds-ratio: 3.11287625418   P-value: 0.0021620625108

In [ ]: