Test whether MYC amplified samples correlate with TP53 mutations


In [5]:
import NotebookImport
from TCGA_analysis_PanCancer_import import *
from IPython.display import HTML

Test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors

Remove samples where CCND1 is also focally amplified


In [2]:
# 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC amplified samples having TP53 mutations: 81
   MYC not amplified samples having TP53 mutations: 1209
   MYC amplified samples having no TP53 mutation: 38
   MYC not amplified samples having no TP53 mutations: 1951
   Fisher-exact test: Odds-ratio: 3.43979365287   P-value: 1.63105139639e-10
   Fisher-exact test (one-sided greater): Odds-ratio: 3.43979365287   P-value: 1.03294439614e-10

Test whether MYC focally amplified non-breast tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [3]:
# 1) Test whether MYC focally amplified non-breast tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if not sample.clinical:
     continue
  if sample.primarysiteofdesease == "breast":
     continue            
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 1) Test whether MYC focally amplified non-breast tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 1) Test whether MYC focally amplified non-breast tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC amplified samples having TP53 mutations: 52
   MYC not amplified samples having TP53 mutations: 947
   MYC amplified samples having no TP53 mutation: 16
   MYC not amplified samples having no TP53 mutations: 1302
   Fisher-exact test: Odds-ratio: 4.46832101373   P-value: 1.66724769305e-08
   Fisher-exact test (one-sided greater): Odds-ratio: 4.46832101373   P-value: 1.43914292831e-08

Test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [3]:
# 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC_amp_tp53_count=0
   MYC_no_amp_tp53_count=0
   MYC_amp_no_tp53_count=0
   MYC_no_amp_no_tp53_count=0

   for sample in samples.values():
      if not sample.somatic_mutation_data:
         continue
      if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         MYC_amp_tp53_count += 1
      if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         MYC_amp_no_tp53_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         MYC_no_amp_tp53_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         MYC_no_amp_no_tp53_count += 1
   oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
   oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

   print ""
   print " 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
   print "   MYC and CCND1 amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
   print "   MYC and CCND1 not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
   print "   MYC and CCND1 amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
   print "   MYC and CCND1 not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
   print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
   print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC and CCND1 amplified samples having TP53 mutations: 15
   MYC and CCND1 not amplified samples having TP53 mutations: 1064
   MYC and CCND1 amplified samples having no TP53 mutation: 3
   MYC and CCND1 not amplified samples having no TP53 mutations: 1799
   Fisher-exact test: Odds-ratio: 8.45394736842   P-value: 8.53448576145e-05
   Fisher-exact test (one-sided greater): Odds-ratio: 8.45394736842   P-value: 8.53448576148e-05

Test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [4]:
# 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   CCND1_amp_tp53_count=0
   CCND1_no_amp_tp53_count=0
   CCND1_amp_no_tp53_count=0
   CCND1_no_amp_no_tp53_count=0

   for sample in samples.values():
      if not sample.somatic_mutation_data:
         continue
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
         CCND1_amp_tp53_count += 1
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
         CCND1_amp_no_tp53_count += 1
      if not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         CCND1_no_amp_tp53_count += 1
      if not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         CCND1_no_amp_no_tp53_count += 1
   oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
   oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')

   print ""
   print " 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
   print "   CCND1 amplified samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
   print "   CCND1 not amplified samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
   print "   CCND1 amplified samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
   print "   CCND1 not amplified samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
   print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
   print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   CCND1 amplified samples having TP53 mutations: 145
   CCND1 not amplified samples having TP53 mutations: 1145
   CCND1 amplified samples having no TP53 mutation: 152
   CCND1 not amplified samples having no TP53 mutations: 1837
   Fisher-exact test: Odds-ratio: 1.53048149851   P-value: 0.000594109536398
   Fisher-exact test (one-sided greater): Odds-ratio: 1.53048149851   P-value: 0.00031902221439

Test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors


In [5]:
# 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber = "n/a"
  log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2 != None):
     copynumber = 2.0 * 2.0 ** log2
  else:
     copynumber="n/a"
  if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
   MYC CN>4 samples having TP53 mutations: 150
   MYC CN<4 samples having TP53 mutations: 1152
   MYC CN>4 samples having no TP53 mutation: 61
   MYC CN<4 samples having no TP53 mutations: 1931
   Fisher-exact test (two-sided): Odds-ratio: 4.12184084699   P-value: 1.08960838322e-21
   Fisher-exact test (one-sided greater): Odds-ratio: 4.12184084699   P-value: 8.25964240337e-22

test whether non-breast tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors


In [4]:
# 4) test whether non-breast tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if not sample.clinical:
     continue
  if sample.primarysiteofdesease == "breast":
     continue   
  copynumber = "n/a"
  log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2 != None):
     copynumber = 2.0 * 2.0 ** log2
  else:
     copynumber="n/a"
  if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 4) test whether non-breast tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 4) test whether non-breast tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
   MYC CN>4 samples having TP53 mutations: 87
   MYC CN<4 samples having TP53 mutations: 918
   MYC CN>4 samples having no TP53 mutation: 13
   MYC CN<4 samples having no TP53 mutations: 1305
   Fisher-exact test (two-sided): Odds-ratio: 9.51357466063   P-value: 2.36719407203e-20
   Fisher-exact test (one-sided greater): Odds-ratio: 9.51357466063   P-value: 1.94752063172e-20

Test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors


In [6]:
# 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors
CCND1_amp_tp53_count=0
CCND1_no_amp_tp53_count=0
CCND1_amp_no_tp53_count=0
CCND1_no_amp_no_tp53_count=0

for sample in samples.values():
    if not sample.somatic_mutation_data:
        continue
    copynumber = "n/a"
    log2 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
    if (log2 != None):
        copynumber = 2.0 * 2.0 ** log2
    else:
        copynumber="n/a"
    if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
        CCND1_amp_tp53_count += 1
    if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
        CCND1_no_amp_tp53_count += 1
    if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
        CCND1_amp_no_tp53_count += 1
    if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
        CCND1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors"
print "   CCND1 CN>4 samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
print "   CCND1 CN<4 samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
print "   CCND1 CN>4 samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
print "   CCND1 CN<4 samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors
   CCND1 CN>4 samples having TP53 mutations: 123
   CCND1 CN<4 samples having TP53 mutations: 1180
   CCND1 CN>4 samples having no TP53 mutation: 104
   CCND1 CN<4 samples having no TP53 mutations: 1884
   Fisher-exact test (two-sided): Odds-ratio: 1.88829856584   P-value: 4.34084997995e-06
   Fisher-exact test (one-sided greater): Odds-ratio: 1.88829856584   P-value: 2.87655495243e-06

Test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors


In [7]:
# 6) test whether tumors with copynumber > 4 in MYC but coynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  copynumber_CCND1 = "n/a"
  log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
  if (log2_CCND1 != None):
     copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
  else:
     copynumber_CCND1="n/a"
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 6) test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 6) test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: 132
   MYC CN<4 samples having TP53 mutations: 1152
   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: 52
   MYC CN<4 samples having no TP53 mutations: 1931
   Fisher-exact test (two-sided): Odds-ratio: 4.25500801282   P-value: 7.72649139447e-20
   Fisher-exact test (one-sided greater): Odds-ratio: 4.25500801282   P-value: 4.50285718666e-20

Test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors


In [8]:
# 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  copynumber_CCND1 = "n/a"
  log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
  if (log2_CCND1 != None):
     copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
  else:
     copynumber_CCND1="n/a"
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: 17
   MYC CN<4 samples having TP53 mutations: 1152
   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: 9
   MYC CN<4 samples having no TP53 mutations: 1931
   Fisher-exact test (two-sided): Odds-ratio: 3.16618441358   P-value: 0.0042412984064
   Fisher-exact test (one-sided greater): Odds-ratio: 3.16618441358   P-value: 0.00362973038831

Check mutation spectrum in all samples


In [9]:
%matplotlib inline

In [10]:
# 8) check TP53 mutation spectrum
TP53_types = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      elif mutation.mut_type == "Silent":
         TP53_types.append("Silent")
      elif mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins":
         TP53_types.append("Frameshift_Indel")
      elif mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins":
         TP53_types.append("Inframe_Indel")
      elif mutation.mut_type == "Splice_Site":
         TP53_types.append("Splice-Site")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types.append("L2_Missense")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types.append("L3_Missense")
      elif mutation.mut_type == "Missense_Mutation":
         TP53_types.append("Other_Missense")
      elif mutation.mut_type == "Nonsense_Mutation":
         TP53_types.append("Nonsense")
      elif mutation.mut_type == "Nonstop_Mutation":
         TP53_types.append("Nonstop")
      elif mutation.mut_type == "RNA":
         TP53_types.append("RNA")

TP53_type_series = pandas.Categorical(sorted(TP53_types))
fig_TP53=TP53_type_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'], title=None)
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('PanCancer_mut_spectrum.png', dpi=200)

print ""
print " 8) check TP53 mutation spectrum"
print "   Total mutations: "+str(len(TP53_types))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 8) check TP53 mutation spectrum
   Total mutations: 1486
   
Out[10]:
CategoryCountFrequency
Frameshift_Indel2010.135262449529
Inframe_Indel270.0181695827725
L2_Missense1500.100942126514
L3_Missense1430.0962314939435
Nonsense1960.131897711978
Other_Missense6630.446164199192
Silent310.0208613728129
Splice-Site750.0504710632571

Check mutation spectrum in MYC (and not CCND1) focally amplified samples


In [11]:
# 9) check TP53 mutation spectrum in MYC amplified tumors
TP53_types_MYC = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Silent":
         TP53_types_MYC.append("Silent")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_MYC.append("Frameshift_Indel")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_MYC.append("Inframe_Indel")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Splice_Site":
         TP53_types_MYC.append("Splice-Site")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_MYC.append("L2_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_MYC.append("L3_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation":
         TP53_types_MYC.append("Other_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_MYC.append("Nonsense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_MYC.append("Nonstop")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "RNA":
         TP53_types_MYC.append("RNA")

TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('PanCancer_MYC_focal_mut_spectrum.png', dpi=200)

print ""
print " 9) check TP53 mutation spectrum in MYC amplified tumors"
print "   Total mutations: "+str(len(TP53_types_MYC))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 9) check TP53 mutation spectrum in MYC amplified tumors
   Total mutations: 82
   
Out[11]:
CategoryCountFrequency
Frameshift_Indel100.121951219512
Inframe_Indel20.0243902439024
L2_Missense110.134146341463
L3_Missense70.0853658536585
Nonsense140.170731707317
Other_Missense320.390243902439
Silent20.0243902439024
Splice-Site40.0487804878049

Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4


In [33]:
# 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4
TP53_types_MYC = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Silent":
         TP53_types_MYC.append("Silent")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_MYC.append("Frameshift_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_MYC.append("Inframe_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Splice_Site":
         TP53_types_MYC.append("Splice-Site")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_MYC.append("L2_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_MYC.append("L3_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation":
         TP53_types_MYC.append("Other_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_MYC.append("Nonsense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_MYC.append("Nonstop")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "RNA":
         TP53_types_MYC.append("RNA")

TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g','r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('PanCancer_MYC_cn4_mut_spectrum.png', dpi=200)

print ""
print " 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4"
print "   Total mutations: "+str(len(TP53_types_MYC))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4
   Total mutations: 131
   
Out[33]:
CategoryCountFrequency
Frameshift_Indel160.12213740458
Inframe_Indel10.00763358778626
L2_Missense100.0763358778626
L3_Missense100.0763358778626
Nonsense220.167938931298
Other_Missense650.496183206107
Silent20.0152671755725
Splice-Site50.0381679389313

Check mutation spectrum in CCND1 (and not MYC) focally amplified samples


In [13]:
# 11) check TP53 mutation spectrum in CCND1 amplified tumors
TP53_types_CCND1 = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Silent":
         TP53_types_CCND1.append("Silent")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_CCND1.append("Frameshift_Indel")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_CCND1.append("Inframe_Indel")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Splice_Site":
         TP53_types_CCND1.append("Splice-Site")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_CCND1.append("L2_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_CCND1.append("L3_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation":
         TP53_types_CCND1.append("Other_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_CCND1.append("Nonsense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_CCND1.append("Nonstop")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "RNA":
         TP53_types_CCND1.append("RNA")

TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('PanCancer_CCND1_focal_mut_spectrum.png', dpi=200)

print ""
print " 11) check TP53 mutation spectrum in CCND1 focally amplified tumors"
print "   Total mutations: "+str(len(TP53_types_CCND1))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 11) check TP53 mutation spectrum in CCND1 focally amplified tumors
   Total mutations: 178
   
Out[13]:
CategoryCountFrequency
Frameshift_Indel250.140449438202
Inframe_Indel40.0224719101124
L2_Missense240.134831460674
L3_Missense180.101123595506
Nonsense200.112359550562
Other_Missense690.387640449438
Silent50.0280898876404
Splice-Site130.0730337078652

Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4


In [14]:
# 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4
TP53_types_CCND1 = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Silent":
         TP53_types_CCND1.append("Silent")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_CCND1.append("Frameshift_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_CCND1.append("Inframe_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Splice_Site":
         TP53_types_CCND1.append("Splice-Site")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_CCND1.append("L2_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_CCND1.append("L3_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation":
         TP53_types_CCND1.append("Other_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_CCND1.append("Nonsense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_CCND1.append("Nonstop")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "RNA":
         TP53_types_CCND1.append("RNA")

TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g','r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')

output1=fig_TP53.get_figure()
output1.savefig('PanCancer_CCND1_cn4_mut_spectrum.png', dpi=200)


print ""
print " 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4"
print "   Total mutations: "+str(len(TP53_types_CCND1))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h


 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4
   Total mutations: 131
   
Out[14]:
CategoryCountFrequency
Frameshift_Indel170.129770992366
Inframe_Indel20.0152671755725
L2_Missense210.160305343511
L3_Missense130.0992366412214
Nonsense170.129770992366
Other_Missense470.358778625954
Silent50.0381679389313
Splice-Site90.0687022900763

Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples


In [15]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_amp_frameshift_count += 1
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_amp_no_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_no_amp_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples"
print "   MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
   MYC amplified Frameshift Indels: 10
   MYC amplified other mutations: 78
   MYC not amplified Frameshift Indels: 187
   MYC not amplified other mutations: 1232
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 0.844645550528
      -- p-value: 0.745117201673
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 0.844645550528
      -- p-value: 0.736221318939

Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples


In [16]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_amp_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_amp_no_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_no_amp_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples"
print "   MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples
   MYC amplified Frameshift Indels: 16
   MYC amplified other mutations: 122
   MYC not amplified Frameshift Indels: 173
   MYC not amplified other mutations: 1182
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 0.896048517009
      -- p-value: 0.788639002871
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 0.896048517009
      -- p-value: 0.694605165158

Test whether Deleterious mutations (Frameshift indels, Nonsense) occur more frequently in MYC (and not CCND1) focally amplified samples


In [17]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
         MYC_amp_frameshift_count += 1
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_amp_no_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
         MYC_no_amp_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) focally amplified samples"
print "   MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) focally amplified samples
   MYC amplified deleterious mutations: 24
   MYC amplified other mutations: 64
   MYC not amplified deleterious mutations: 368
   MYC not amplified other mutations: 1051
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 1.07099184783
      -- p-value: 0.802452220641
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 1.07099184783
      -- p-value: 0.432569742738

Test whether Deleterious mutations (Frameshift indels, Nonsense) occur more frequently in MYC (and not CCND1) samples with copynumber > 4


In [18]:
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      continue
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
         MYC_amp_frameshift_count += 1
      if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_amp_no_frameshift_count += 1
      if copynumber_MYC < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
         MYC_no_amp_frameshift_count += 1
      if copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_no_amp_no_frameshift_count += 1

oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')

print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) CN>4 samples"
print "   MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)


 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) CN>4 samples
   MYC amplified deleterious mutations: 36
   MYC amplified other mutations: 97
   MYC not amplified deleterious mutations: 354
   MYC not amplified other mutations: 1008
   
   Fisher exact test (two-sided): 
      -- odd's ratio: 1.05678839769
      -- p-value: 0.836100793187
   Fisher exact test (one-sided greater): 
      -- odd's ratio: 1.05678839769
      -- p-value: 0.428760870613

Test whether MYC focally amplified tumors contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than non focally amplified Tumors


In [19]:
# 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and del_mut_found:
      MYC_amp_tp53_count += 1
   if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not del_mut_found:
      MYC_amp_no_tp53_count += 1
   if not sample.checkFocalGeneAmp("MYC") and del_mut_found:
      MYC_no_amp_tp53_count += 1
   if not sample.checkFocalGeneAmp("MYC") and not del_mut_found:
      MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors
   MYC amplified samples having deleterious TP53 mutations: 24
   MYC not amplified samples having deleterious TP53 mutations: 333
   MYC amplified samples having no deleterious TP53 mutation: 95
   MYC not amplified samples having no deleterious TP53 mutations: 2827
   Fisher-exact test: Odds-ratio: 2.14471313419   P-value: 0.00232817585569
   Fisher-exact test (one-sided greater): Odds-ratio: 2.14471313419   P-value: 0.0017466524582

Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC


In [20]:
# 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and del_mut_found:
      MYC_amp_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not del_mut_found:
      MYC_amp_no_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC < 4 and del_mut_found:
      MYC_no_amp_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not del_mut_found:
      MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC"
print "   MYC CN>4 and CCND1 CN<4 samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4 and CCND1 CN<4 having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC
   MYC CN>4 and CCND1 CN<4 samples having deleterious TP53 mutations: 37
   MYC CN<4 samples having deleterious TP53 mutations: 320
   MYC CN>4 and CCND1 CN<4 having no deleterious TP53 mutation: 147
   MYC CN<4 samples having no deleterious TP53 mutations: 2763
   Fisher-exact test: Odds-ratio: 2.17327806122   P-value: 0.000140653361828
   Fisher-exact test (one-sided greater): Odds-ratio: 2.17327806122   P-value: 0.000117835097918

Test whether FGFR1 focally amplified (but not ERBB2-amplified) tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [21]:
# 15) test whether FGFR1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
     FGFR1_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
     FGFR1_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     FGFR1_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     FGFR1_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 15) test whether FGFR1 (but not ERBB2) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   FGFR1 amplified samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print "   FGFR1 not amplified samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print "   FGFR1 amplified samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print "   FGFR1 not amplified samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 15) test whether FGFR1 (but not ERBB2) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   FGFR1 amplified samples having TP53 mutations: 118
   FGFR1 not amplified samples having TP53 mutations: 1171
   FGFR1 amplified samples having no TP53 mutation: 113
   FGFR1 not amplified samples having no TP53 mutations: 1875
   Fisher-exact test: Odds-ratio: 1.67204492038   P-value: 0.000201890837143
   Fisher-exact test (one-sided greater): Odds-ratio: 1.67204492038   P-value: 0.000115492642829

Test whether FGFR1 CN>4 (but ERBB2 CN < 4) tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [22]:
# 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_FGFR1 = "n/a"
  log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
  if (log2_FGFR1 != None):
     copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
  else:
     copynumber_FGFR1="n/a"
  copynumber_ERBB2 = "n/a"
  log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
  if (log2_ERBB2 != None):
     copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
  else:
     copynumber_ERBB2="n/a"
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
     FGFR1_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" in sample.genes_affected:
     FGFR1_no_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
     FGFR1_amp_no_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" not in sample.genes_affected:
     FGFR1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4 Tumors"
print "   FGFR1 CN>4, ERBB2 CN<4 samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print "   FGFR1 CN<4 samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print "   FGFR1 CN>4, ERBB2 CN<4  samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print "   FGFR1 CN<4 samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4 Tumors
   FGFR1 CN>4, ERBB2 CN<4 samples having TP53 mutations: 49
   FGFR1 CN<4 samples having TP53 mutations: 1216
   FGFR1 CN>4, ERBB2 CN<4  samples having no TP53 mutation: 52
   FGFR1 CN<4 samples having no TP53 mutations: 1905
   Fisher-exact test (two-sided): Odds-ratio: 1.47623038968   P-value: 0.062014902311
   Fisher-exact test (one-sided greater): Odds-ratio: 1.47623038968   P-value: 0.0344707391955

Test whether ERBB2 focally amplified (but not FGFR1-amplified) tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [23]:
# 17) test whether ERBB2 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 17) test whether ERBB2 (but not FGFR1) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   ERBB2 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   ERBB2 not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   ERBB2 not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 17) test whether ERBB2 (but not FGFR1) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   ERBB2 amplified samples having TP53 mutations: 89
   ERBB2 not amplified samples having TP53 mutations: 1200
   ERBB2 amplified samples having no TP53 mutation: 57
   ERBB2 not amplified samples having no TP53 mutations: 1931
   Fisher-exact test: Odds-ratio: 2.51255847953   P-value: 8.17510298023e-08
   Fisher-exact test (one-sided greater): Odds-ratio: 2.51255847953   P-value: 5.50442068161e-08

Test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4


In [24]:
# 18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_FGFR1 = "n/a"
  log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
  if (log2_FGFR1 != None):
     copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
  else:
     copynumber_FGFR1="n/a"
  copynumber_ERBB2 = "n/a"
  log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
  if (log2_ERBB2 != None):
     copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
  else:
     copynumber_ERBB2="n/a"
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')

print ""
print "  18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4"
print "   ERBB2 CN>4, FGFR1 CN<4 samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   ERBB2 CN<4 samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 CN>4, FGFR1 CN<4  samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   ERBB2 CN<4 samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


  18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4
   ERBB2 CN>4, FGFR1 CN<4 samples having TP53 mutations: 63
   ERBB2 CN<4 samples having TP53 mutations: 1212
   ERBB2 CN>4, FGFR1 CN<4  samples having no TP53 mutation: 46
   ERBB2 CN<4 samples having no TP53 mutations: 1937
   Fisher-exact test (two-sided): Odds-ratio: 2.18881833836   P-value: 8.28796074265e-05
   Fisher-exact test (one-sided greater): Odds-ratio: 2.18881833836   P-value: 4.70149412827e-05

Test whether tumors with copynumber > 4 in CCNE1 contain TP53 mutations more (or less) frequently than tumors with CCNE1 CN<4


In [25]:
# 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
CCNE1_amp_tp53_count=0
CCNE1_no_amp_tp53_count=0
CCNE1_amp_no_tp53_count=0
CCNE1_no_amp_no_tp53_count=0

CCNE1_amp_count = 0
CCNE1_amp_missing_somatic = 0

for sample in samples.values():
  if sample.checkFocalGeneAmp("CCNE1"):
        CCNE1_amp_count += 1
  if not sample.somatic_mutation_data:
     if sample.checkFocalGeneAmp("CCNE1"):
        CCNE1_amp_missing_somatic += 1
     continue
  if sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
     CCNE1_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
     CCNE1_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
     CCNE1_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
     CCNE1_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   CCNE1 amplified samples: "+str(CCNE1_amp_count)
print "   CCNE1 amplified samples missed due to missing somatic mutation data: "+str(CCNE1_amp_missing_somatic)
print ""
print "   CCNE1 amplified samples having TP53 mutations: "+str(CCNE1_amp_tp53_count)
print "   CCNE1 not amplified samples having TP53 mutations: "+str(CCNE1_no_amp_tp53_count)
print "   CCNE1 amplified samples having no TP53 mutation: "+str(CCNE1_amp_no_tp53_count)
print "   CCNE1 not amplified samples having no TP53 mutations: "+str(CCNE1_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   CCNE1 amplified samples: 355
   CCNE1 amplified samples missed due to missing somatic mutation data: 182

   CCNE1 amplified samples having TP53 mutations: 140
   CCNE1 not amplified samples having TP53 mutations: 1165
   CCNE1 amplified samples having no TP53 mutation: 33
   CCNE1 not amplified samples having no TP53 mutations: 1959
   Fisher-exact test: Odds-ratio: 7.13382754584   P-value: 3.38984899204e-30
   Fisher-exact test (one-sided greater): Odds-ratio: 7.13382754584   P-value: 3.24135459865e-30

Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors


In [26]:
# 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1")) and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1"))and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   ERBB2 and FGFR1 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   Not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 and FGFR1 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   Not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   ERBB2 and FGFR1 amplified samples having TP53 mutations: 16
   Not amplified samples having TP53 mutations: 1082
   ERBB2 and FGFR1 amplified samples having no TP53 mutation: 4
   Not amplified samples having no TP53 mutations: 1818
   Fisher-exact test: Odds-ratio: 6.72088724584   P-value: 0.000206109104436
   Fisher-exact test (one-sided greater): Odds-ratio: 6.72088724584   P-value: 0.000129163941675

Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53


In [27]:
# 21)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 mut'])

plt.savefig('PanCancer_MYC_expression_TP53_mut.png', dpi=200)

print ""
print " 21) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 21) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53
   TP53 mutated MYC expr mean: 3482.92977708   Samples: 951
   TP53 wildtype MYC expr mean: 2259.32258652   Samples: 1558
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 13.143429645   p-value:3.30378426196e-38
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 517640.0   p-value:3.91399315809e-37

Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53


In [28]:
# 22)Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
   if not sample.somatic_mutation_data:
     continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   if del_mut_found:             
      if sample.getRNASeqFromGene("MYC") != "n/a":
         MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
   elif "TP53" not in sample.genes_affected:
      if sample.getRNASeqFromGene("MYC") != "n/a":
         MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)


data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 del mut'])

plt.savefig('PanCancer_MYC_expression_TP53_delmut.png', dpi=200)

print ""
print " 22) Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 22) Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53
   TP53 mutated MYC expr mean: 3776.48882366   Samples: 279
   TP53 wildtype MYC expr mean: 2259.32258652   Samples: 1558
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 11.4204393349   p-value:3.12844849062e-29
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 133576.0   p-value:5.02516267825e-25

Test whether MYC focally amplified TP53-mutated samples have higher MYC expression levels than MYC focally amplified tumors without TP53


In [29]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if not sample.checkFocalGeneAmp("MYC"):
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['Focal MYC, TP53 WT', 'Focal MYC, TP53 mut'])

plt.savefig('PanCancer_MYC_expression_focal_MYC_TP53_mut.png', dpi=200)



print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
   TP53 mutated MYC expr mean: 5054.24670417   Samples: 72
   TP53 wildtype MYC expr mean: 2820.86824545   Samples: 33
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 3.14291388306   p-value:0.00218507602859
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 632.0   p-value:6.29330528191e-05

Test whether non-breast MYC focally amplified TP53-mutated samples have higher MYC expression levels than MYC focally amplified tumors without TP53


In [8]:
# 23)Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

no_expr_data = 0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if not sample.clinical:
     continue
  if sample.primarysiteofdesease == "breast":
     continue
  if not sample.checkFocalGeneAmp("MYC"):
     continue
  if sample.getRNASeqFromGene("MYC") == "n/a":
     no_expr_data += 1
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['Focal MYC, TP53 WT', 'Focal MYC, TP53 mut'])

plt.savefig('PanCancer_MYC_expression_focal_MYC_TP53_mut.png', dpi=200)



print ""
print " 23) Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples"
print "   Expression data missing: "+str(no_expr_data)
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 23) Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
   Expression data missing: 32
   TP53 mutated MYC expr mean: 5591.48586944   Samples: 36
   TP53 wildtype MYC expr mean: 4914.025   Samples: 8
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 0.482115745279   p-value:0.632226749626
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 122.0   p-value:0.25648369728

Test whether MYC copynumber > 4 TP53-mutated samples have higher MYC expression levels than MYC copynumber > 4 tumors without TP53


In [10]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  if copynumber_MYC == "n/a" or copynumber_MYC < 4:
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['CN>4 MYC, TP53 WT', 'CN>4 MYC, TP53 mut'])

plt.savefig('PanCancer_MYC_expression_cn4_MYC_TP53_mut.png', dpi=200)


print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
   TP53 mutated MYC expr mean: 4732.10297364   Samples: 110
   TP53 wildtype MYC expr mean: 2878.4370125   Samples: 56
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 3.30679760375   p-value:0.00115952907149
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 1747.0   p-value:2.66925648332e-06

Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples


In [12]:
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()

no_expr_data = 0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if not sample.clinical:
     continue
  if sample.primarysiteofdesease == "breast":
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  if copynumber_MYC == "n/a" or copynumber_MYC < 4:
     continue
  if sample.getRNASeqFromGene("MYC") == "n/a":
     no_expr_data += 1
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))

MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)

data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['CN>4 MYC, TP53 WT', 'CN>4 MYC, TP53 mut'])

plt.savefig('PanCancer_noBreast_MYC_expression_cn4_MYC_TP53_mut.png', dpi=200)


print ""
print " 23) Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples"
print "   Expression data missing: "+str(no_expr_data)
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)


 23) Test whether non-breast TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
   Expression data missing: 44
   TP53 mutated MYC expr mean: 5414.60920426   Samples: 47
   TP53 wildtype MYC expr mean: 4497.05905556   Samples: 9
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: 0.641383560054   p-value:0.523986617604
   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: 185.0   p-value:0.280944535351

Test whether TP53 mutations occur in overexpressed MYC-samples

Assume MYC is overexpressed when Z-score of normalized expression levels is > 3


In [31]:
control_MYC_expression = list()

MYC_expr_tp53_count=0
MYC_no_expr_tp53_count=0
MYC_expr_no_tp53_count=0
MYC_no_expr_no_tp53_count=0
 
for control in controls.values():
    if control.getRNASeqFromGene("MYC") != "n/a":
        control_MYC_expression.append(control.getRNASeqFromGene("MYC"))

MYC_expr_series = pandas.Series(control_MYC_expression)
control_MYC_expression_mean=MYC_expr_series.mean()
control_MYC_expression_stdev=MYC_expr_series.std()


for sample in samples.values():
    if not sample.somatic_mutation_data:
        continue
    if sample.getRNASeqFromGene("MYC") != "n/a":
        z_score_MYC = (sample.getRNASeqFromGene("MYC") - control_MYC_expression_mean) / control_MYC_expression_stdev
        if z_score_MYC > 3 and "TP53" in sample.genes_affected:
            MYC_expr_tp53_count += 1
        if z_score_MYC > 3 and "TP53" not in sample.genes_affected:
            MYC_expr_no_tp53_count += 1
        if z_score_MYC < 3 and "TP53" in sample.genes_affected:
            MYC_no_expr_tp53_count += 1
        if z_score_MYC < 3 and "TP53" not in sample.genes_affected:
            MYC_no_expr_no_tp53_count += 1
            
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]], alternative='greater')

print ""
print " 23) Test whether TP53 mutations occur in overexpressed MYC-samples"
print "   Control samples:"+str(len(controls.values()))
print "   Control samples mean RSEM: "+str(control_MYC_expression_mean)
print "   Control samples st.dev. RSEM: "+str(control_MYC_expression_stdev)
print "   MYC overexpressed samples having TP53 mutations: "+str(MYC_expr_tp53_count)
print "   MYC not overexpressed samples having TP53 mutations: "+str(MYC_no_expr_tp53_count)
print "   MYC overexpressed samples having no TP53 mutation: "+str(MYC_expr_no_tp53_count)
print "   MYC not overexpressed samples having no TP53 mutations: "+str(MYC_no_expr_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 23) Test whether TP53 mutations occur in overexpressed MYC-samples
   Control samples:0
   Control samples mean RSEM: nan
   Control samples st.dev. RSEM: nan
   MYC overexpressed samples having TP53 mutations: 0
   MYC not overexpressed samples having TP53 mutations: 0
   MYC overexpressed samples having no TP53 mutation: 0
   MYC not overexpressed samples having no TP53 mutations: 0
   Fisher-exact test: Odds-ratio: nan   P-value: 1.0
   Fisher-exact test (one-sided greater): Odds-ratio: nan   P-value: 1.0

Test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors

For focal amplification calling, a difference of 0.2 in log2-ratios between neighbouring 20Mbp on each side is a requirement. Here, we restrict focal amplifications to segments with a log2-ratio difference of at least 0.4


In [32]:
# 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0

for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  stringent_MYC = False
  for focal_amp in sample.focal_amplifications:
    if "MYC" in focal_amp.genes and focal_amp.log2_upstream_adj > 0.4 and focal_amp.log2_downstream_adj > 0.4:
        stringent_MYC = True
        break
  if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if not stringent_MYC and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if not stringent_MYC and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')

print ""
print " 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)


 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC amplified samples having TP53 mutations: 48
   MYC not amplified samples having TP53 mutations: 1250
   MYC amplified samples having no TP53 mutation: 27
   MYC not amplified samples having no TP53 mutations: 1964
   Fisher-exact test: Odds-ratio: 2.79324444444   P-value: 2.00194544603e-05
   Fisher-exact test (one-sided greater): Odds-ratio: 2.79324444444   P-value: 1.22225093494e-05

In [32]: