In [1]:
    
import NotebookImport
from TCGA_analysis_BRCA_import import *
from IPython.display import HTML
    
    
Remove samples where CCND1 is also focally amplified
In [4]:
    
# 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 1) test whether MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [6]:
    
# 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   MYC_amp_tp53_count=0
   MYC_no_amp_tp53_count=0
   MYC_amp_no_tp53_count=0
   MYC_no_amp_no_tp53_count=0
   for sample in samples.values():
      if not sample.somatic_mutation_data:
         continue
      if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         MYC_amp_tp53_count += 1
      if sample.checkFocalGeneAmp("MYC") and sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         MYC_amp_no_tp53_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         MYC_no_amp_tp53_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         MYC_no_amp_no_tp53_count += 1
   oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
   oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
   print ""
   print " 2) test whether MYC and CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
   print "   MYC and CCND1 amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
   print "   MYC and CCND1 not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
   print "   MYC and CCND1 amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
   print "   MYC and CCND1 not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
   print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
   print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [8]:
    
# 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
   CCND1_amp_tp53_count=0
   CCND1_no_amp_tp53_count=0
   CCND1_amp_no_tp53_count=0
   CCND1_no_amp_no_tp53_count=0
   for sample in samples.values():
      if not sample.somatic_mutation_data:
         continue
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" in sample.genes_affected:
         CCND1_amp_tp53_count += 1
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and "TP53" not in sample.genes_affected:
         CCND1_amp_no_tp53_count += 1
      if not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
         CCND1_no_amp_tp53_count += 1
      if not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
         CCND1_no_amp_no_tp53_count += 1
   oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
   oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')
   print ""
   print " 3) test whether CCND1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
   print "   CCND1 amplified samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
   print "   CCND1 not amplified samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
   print "   CCND1 amplified samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
   print "   CCND1 not amplified samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
   print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
   print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [9]:
    
# 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber = "n/a"
  log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2 != None):
     copynumber = 2.0 * 2.0 ** log2
  else:
     copynumber="n/a"
  if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 4) test whether tumors with copynumber > 4 in MYC contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4 samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [11]:
    
# 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors
CCND1_amp_tp53_count=0
CCND1_no_amp_tp53_count=0
CCND1_amp_no_tp53_count=0
CCND1_no_amp_no_tp53_count=0
for sample in samples.values():
    if not sample.somatic_mutation_data:
        continue
    copynumber = "n/a"
    log2 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
    if (log2 != None):
        copynumber = 2.0 * 2.0 ** log2
    else:
        copynumber="n/a"
    if copynumber != "n/a" and copynumber > 4 and "TP53" in sample.genes_affected:
        CCND1_amp_tp53_count += 1
    if copynumber != "n/a" and copynumber < 4 and "TP53" in sample.genes_affected:
        CCND1_no_amp_tp53_count += 1
    if copynumber != "n/a" and copynumber > 4 and "TP53" not in sample.genes_affected:
        CCND1_amp_no_tp53_count += 1
    if copynumber != "n/a" and copynumber < 4 and "TP53" not in sample.genes_affected:
        CCND1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCND1_amp_tp53_count, CCND1_amp_no_tp53_count], [CCND1_no_amp_tp53_count, CCND1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 5) test whether tumors with copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with CCND1 CN<4 Tumors"
print "   CCND1 CN>4 samples having TP53 mutations: "+str(CCND1_amp_tp53_count)
print "   CCND1 CN<4 samples having TP53 mutations: "+str(CCND1_no_amp_tp53_count)
print "   CCND1 CN>4 samples having no TP53 mutation: "+str(CCND1_amp_no_tp53_count)
print "   CCND1 CN<4 samples having no TP53 mutations: "+str(CCND1_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [15]:
    
# 6) test whether tumors with copynumber > 4 in MYC but coynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  copynumber_CCND1 = "n/a"
  log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
  if (log2_CCND1 != None):
     copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
  else:
     copynumber_CCND1="n/a"
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 6) test whether tumors with copynumber > 4 in MYC but copynumber < 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [16]:
    
# 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  copynumber_CCND1 = "n/a"
  log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
  if (log2_CCND1 != None):
     copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
  else:
     copynumber_CCND1="n/a"
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if copynumber_MYC != "n/a" and copynumber_MYC < 4 and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 7) test whether tumors with copynumber > 4 in MYC and copynumber > 4 in CCND1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors"
print "   MYC CN>4, CCND1 CN<4 samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4, CCND1 CN<4  samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [17]:
    
%matplotlib inline
    
In [17]:
    
# 8) check TP53 mutation spectrum
TP53_types = list()
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      elif mutation.mut_type == "Silent":
         TP53_types.append("Silent")
      elif mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins":
         TP53_types.append("Frameshift_Indel")
      elif mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins":
         TP53_types.append("Inframe_Indel")
      elif mutation.mut_type == "Splice_Site":
         TP53_types.append("Splice-Site")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types.append("L2_Missense")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types.append("L3_Missense")
      elif mutation.mut_type == "Missense_Mutation":
         TP53_types.append("Other_Missense")
      elif mutation.mut_type == "Nonsense_Mutation":
         TP53_types.append("Nonsense")
      elif mutation.mut_type == "Nonstop_Mutation":
         TP53_types.append("Nonstop")
      elif mutation.mut_type == "RNA":
         TP53_types.append("RNA")
TP53_type_series = pandas.Categorical(sorted(TP53_types))
fig_TP53=TP53_type_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'], title=None)
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('BRCA_mut_spectrum.png', dpi=200)
print ""
print " 8) check TP53 mutation spectrum"
print "   Total mutations: "+str(len(TP53_types))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h
    
    
    Out[17]:
    
In [5]:
    
# 8) check TP53 mutation spectrum
TP53_types = list()
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      continue
   if copynumber_MYC > 4:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      elif mutation.mut_type == "Silent":
         TP53_types.append("Silent")
      elif mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins":
         TP53_types.append("Frameshift_Indel")
      elif mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins":
         TP53_types.append("Inframe_Indel")
      elif mutation.mut_type == "Splice_Site":
         TP53_types.append("Splice-Site")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types.append("L2_Missense")
      elif mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types.append("L3_Missense")
      elif mutation.mut_type == "Missense_Mutation":
         TP53_types.append("Other_Missense")
      elif mutation.mut_type == "Nonsense_Mutation":
         TP53_types.append("Nonsense")
      elif mutation.mut_type == "Nonstop_Mutation":
         TP53_types.append("Nonstop")
      elif mutation.mut_type == "RNA":
         TP53_types.append("RNA")
TP53_type_series = pandas.Categorical(sorted(TP53_types))
fig_TP53=TP53_type_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'], title=None)
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('BRCA_MYC_cn_smaller4_mut_spectrum.png', dpi=200)
print ""
print " 8) check TP53 mutation spectrum"
print "   Total mutations: "+str(len(TP53_types))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h
    
    
    Out[5]:
    
In [18]:
    
# 9) check TP53 mutation spectrum in MYC amplified tumors
TP53_types_MYC = list()
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Silent":
         TP53_types_MYC.append("Silent")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_MYC.append("Frameshift_Indel")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_MYC.append("Inframe_Indel")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Splice_Site":
         TP53_types_MYC.append("Splice-Site")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_MYC.append("L2_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_MYC.append("L3_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Missense_Mutation":
         TP53_types_MYC.append("Other_Missense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_MYC.append("Nonsense")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_MYC.append("Nonstop")
      elif sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and mutation.mut_type == "RNA":
         TP53_types_MYC.append("RNA")
TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('BRCA_MYC_focal_mut_spectrum.png', dpi=200)
print ""
print " 9) check TP53 mutation spectrum in MYC amplified tumors"
print "   Total mutations: "+str(len(TP53_types_MYC))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h
    
    
    Out[18]:
    
In [19]:
    
# 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4
TP53_types_MYC = list()
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Silent":
         TP53_types_MYC.append("Silent")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_MYC.append("Frameshift_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_MYC.append("Inframe_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Splice_Site":
         TP53_types_MYC.append("Splice-Site")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_MYC.append("L2_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_MYC.append("L3_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Missense_Mutation":
         TP53_types_MYC.append("Other_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_MYC.append("Nonsense")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_MYC.append("Nonstop")
      elif copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and mutation.mut_type == "RNA":
         TP53_types_MYC.append("RNA")
TP53_type_MYC_series = pandas.Categorical(sorted(TP53_types_MYC))
fig_TP53=TP53_type_MYC_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('BRCA_MYC_cn4_mut_spectrum.png', dpi=200)
print ""
print " 10) Check mutation spectrum in MYC (and not CCND1) samples having copynumber > 4"
print "   Total mutations: "+str(len(TP53_types_MYC))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_type_MYC_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_type_MYC_series.describe()['counts'][count])+"</td><td>"+str(TP53_type_MYC_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h
    
    
    Out[19]:
    
In [20]:
    
# 11) check TP53 mutation spectrum in CCND1 amplified tumors
TP53_types_CCND1 = list()
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Silent":
         TP53_types_CCND1.append("Silent")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_CCND1.append("Frameshift_Indel")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_CCND1.append("Inframe_Indel")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Splice_Site":
         TP53_types_CCND1.append("Splice-Site")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_CCND1.append("L2_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_CCND1.append("L3_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Missense_Mutation":
         TP53_types_CCND1.append("Other_Missense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_CCND1.append("Nonsense")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_CCND1.append("Nonstop")
      elif sample.checkFocalGeneAmp("CCND1") and not sample.checkFocalGeneAmp("MYC") and mutation.mut_type == "RNA":
         TP53_types_CCND1.append("RNA")
TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g', 'r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('BRCA_CCND1_focal_mut_spectrum.png', dpi=200)
print ""
print " 11) check TP53 mutation spectrum in CCND1 focally amplified tumors"
print "   Total mutations: "+str(len(TP53_types_CCND1))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h
    
    
    Out[20]:
    
In [21]:
    
# 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4
TP53_types_CCND1 = list()
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Silent":
         TP53_types_CCND1.append("Silent")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         TP53_types_CCND1.append("Frameshift_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and (mutation.mut_type == "In_Frame_Del" or mutation.mut_type == "In_Frame_Ins"):
         TP53_types_CCND1.append("Inframe_Indel")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Splice_Site":
         TP53_types_CCND1.append("Splice-Site")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7578264 and mutation.pos <= 7578443:
         TP53_types_CCND1.append("L2_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation" and mutation.pos >=  7577528 and mutation.pos <= 7577575:
         TP53_types_CCND1.append("L3_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Missense_Mutation":
         TP53_types_CCND1.append("Other_Missense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonsense_Mutation":
         TP53_types_CCND1.append("Nonsense")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "Nonstop_Mutation":
         TP53_types_CCND1.append("Nonstop")
      elif copynumber_MYC != "n/a" and copynumber_MYC < 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4 and mutation.mut_type == "RNA":
         TP53_types_CCND1.append("RNA")
TP53_types_CCND1_series = pandas.Categorical(sorted(TP53_types_CCND1))
fig_TP53=TP53_types_CCND1_series.describe().counts.plot(kind='pie', figsize=(6, 6),colors=['b', 'g','r', 'c', 'm', 'y','k','w'])
fig_TP53.set_ylabel('')
output1=fig_TP53.get_figure()
output1.savefig('BRCA_CCND1_cn4_mut_spectrum.png', dpi=200)
print ""
print " 12) Check mutation spectrum in CCND1 (and not MYC) samples having copynumber > 4"
print "   Total mutations: "+str(len(TP53_types_CCND1))
print "   "
count = 0
s = "<table><tr><th>Category</th><th>Count</th><th>Frequency</th></tr>"
for i in TP53_types_CCND1_series.categories:
   s += "<tr><td>"+i+"</td><td>"+str(TP53_types_CCND1_series.describe()['counts'][count])+"</td><td>"+str(TP53_types_CCND1_series.describe()['freqs'][count])+"</td></tr>"
   count += 1
s += "</table>"
h = HTML(s);h
    
    
    Out[21]:
    
In [22]:
    
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_amp_frameshift_count += 1
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_amp_no_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_no_amp_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples"
print "   MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)
    
    
In [3]:
    
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_amp_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_amp_no_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins"):
         MYC_no_amp_frameshift_count += 1
      if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins":
         MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4 samples"
print "   MYC amplified Frameshift Indels: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified Frameshift Indels: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)
    
    
In [3]:
    
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) focally amplified samples
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
         MYC_amp_frameshift_count += 1
      if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_amp_no_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
         MYC_no_amp_frameshift_count += 1
      if not sample.checkFocalGeneAmp("MYC") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) focally amplified samples"
print "   MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)
    
    
In [5]:
    
# 13) Test whether Frameshift indels occur more frequently in MYC (and not CCND1) CN>4
MYC_amp_frameshift_count = 0
MYC_amp_no_frameshift_count = 0
MYC_no_amp_frameshift_count = 0
MYC_no_amp_no_frameshift_count = 0
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      continue
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      continue
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation" ):
         MYC_amp_frameshift_count += 1
      if copynumber_MYC > 4 and copynumber_CCND1 < 4 and not sample.checkFocalGeneAmp("CCND1") and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_amp_no_frameshift_count += 1
      if copynumber_MYC < 4 and (mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation"):
         MYC_no_amp_frameshift_count += 1
      if copynumber_MYC < 4 and not mutation.mut_type == "Frame_Shift_Del" and not mutation.mut_type == "Frame_Shift_Ins" and not mutation.mut_type == "Nonsense_Mutation":
         MYC_no_amp_no_frameshift_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_frameshift_count, MYC_amp_no_frameshift_count], [MYC_no_amp_frameshift_count, MYC_no_amp_no_frameshift_count]], alternative='greater')
print ""
print " 13) Test whether Deleterious mutations occur more frequently in MYC (and not CCND1) CN>4 samples"
print "   MYC amplified deleterious mutations: "+str(MYC_amp_frameshift_count)
print "   MYC amplified other mutations: "+str(MYC_amp_no_frameshift_count)
print "   MYC not amplified deleterious mutations: "+str(MYC_no_amp_frameshift_count)
print "   MYC not amplified other mutations: "+str(MYC_no_amp_no_frameshift_count)
print "   "
print "   Fisher exact test (two-sided): "
print "      -- odd's ratio: "+str(oddsratio)
print "      -- p-value: "+str(pvalue)
print "   Fisher exact test (one-sided greater): "
print "      -- odd's ratio: "+str(oddsratio_greater)
print "      -- p-value: "+str(pvalue_greater)
    
    
In [41]:
    
# 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and del_mut_found:
      MYC_amp_tp53_count += 1
   if sample.checkFocalGeneAmp("MYC") and not sample.checkFocalGeneAmp("CCND1") and not del_mut_found:
      MYC_amp_no_tp53_count += 1
   if not sample.checkFocalGeneAmp("MYC") and del_mut_found:
      MYC_no_amp_tp53_count += 1
   if not sample.checkFocalGeneAmp("MYC") and not del_mut_found:
      MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 13) test whether MYC focally amplified tumors contain deleterious TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [44]:
    
# 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
   if not sample.somatic_mutation_data:
      continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   copynumber_MYC = "n/a"
   log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2_MYC != None):
      copynumber_MYC = 2.0 * 2.0 ** log2_MYC
   else:
      copynumber_MYC="n/a"
   copynumber_CCND1 = "n/a"
   log2_CCND1 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2_CCND1 != None):
      copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
   else:
      copynumber_CCND1="n/a"
   if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and del_mut_found:
      MYC_amp_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC > 4 and copynumber_CCND1 != "n/a" and copynumber_CCND1 < 4 and not del_mut_found:
      MYC_amp_no_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC < 4 and del_mut_found:
      MYC_no_amp_tp53_count += 1
   if copynumber_MYC != "n/a" and copynumber_MYC < 4 and not del_mut_found:
      MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 14) Test whether samples with copynumber > 4 in MYC (but not CCND1) contain deleterious TP53 mutations (Frameshift_indels and Nonsense) more frequently than tumors having copynumber < 4 in MYC"
print "   MYC CN>4 and CCND1 CN<4 samples having deleterious TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC CN<4 samples having deleterious TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC CN>4 and CCND1 CN<4 having no deleterious TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC CN<4 samples having no deleterious TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [4]:
    
# 15) test whether FGFR1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
     FGFR1_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("FGFR1") and not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
     FGFR1_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     FGFR1_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     FGFR1_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 15) test whether FGFR1 (but not ERBB2) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   FGFR1 amplified samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print "   FGFR1 not amplified samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print "   FGFR1 amplified samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print "   FGFR1 not amplified samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [6]:
    
# 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4
FGFR1_amp_tp53_count=0
FGFR1_no_amp_tp53_count=0
FGFR1_amp_no_tp53_count=0
FGFR1_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_FGFR1 = "n/a"
  log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
  if (log2_FGFR1 != None):
     copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
  else:
     copynumber_FGFR1="n/a"
  copynumber_ERBB2 = "n/a"
  log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
  if (log2_ERBB2 != None):
     copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
  else:
     copynumber_ERBB2="n/a"
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
     FGFR1_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" in sample.genes_affected:
     FGFR1_no_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
     FGFR1_amp_no_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and "TP53" not in sample.genes_affected:
     FGFR1_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[FGFR1_amp_tp53_count, FGFR1_amp_no_tp53_count], [FGFR1_no_amp_tp53_count, FGFR1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 16) test whether tumors with copynumber > 4 in FGFR1 but copynumber < 4 in ERBB2 contain TP53 mutations more frequently than tumors with FGFR1 CN<4 Tumors"
print "   FGFR1 CN>4, ERBB2 CN<4 samples having TP53 mutations: "+str(FGFR1_amp_tp53_count)
print "   FGFR1 CN<4 samples having TP53 mutations: "+str(FGFR1_no_amp_tp53_count)
print "   FGFR1 CN>4, ERBB2 CN<4  samples having no TP53 mutation: "+str(FGFR1_amp_no_tp53_count)
print "   FGFR1 CN<4 samples having no TP53 mutations: "+str(FGFR1_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [2]:
    
# 17) test whether ERBB2 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("ERBB2") and not sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("ERBB2") and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("ERBB2") and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 17) test whether ERBB2 (but not FGFR1) focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   ERBB2 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   ERBB2 not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   ERBB2 not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [10]:
    
# 18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with MYC CN<4 Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_FGFR1 = "n/a"
  log2_FGFR1 = sample.getLog2FromGene(gene_positions['FGFR1'].chrom,gene_positions['FGFR1'].start, gene_positions['FGFR1'].end)
  if (log2_FGFR1 != None):
     copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
  else:
     copynumber_FGFR1="n/a"
  copynumber_ERBB2 = "n/a"
  log2_ERBB2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
  if (log2_ERBB2 != None):
     copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
  else:
     copynumber_ERBB2="n/a"
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 < 4 and copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4 and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 < 4 and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')
print ""
print "  18) test whether tumors with copynumber > 4 in ERBB2 but copynumber < 4 in FGFR1 contain TP53 mutations more frequently than tumors with ERBB2 CN<4"
print "   ERBB2 CN>4, FGFR1 CN<4 samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   ERBB2 CN<4 samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 CN>4, FGFR1 CN<4  samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   ERBB2 CN<4 samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test (two-sided): Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [3]:
    
# 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
CCNE1_amp_tp53_count=0
CCNE1_no_amp_tp53_count=0
CCNE1_amp_no_tp53_count=0
CCNE1_no_amp_no_tp53_count=0
CCNE1_amp_count = 0
CCNE1_amp_missing_somatic = 0
for sample in samples.values():
  if sample.checkFocalGeneAmp("CCNE1"):
        CCNE1_amp_count += 1
  if not sample.somatic_mutation_data:
     if sample.checkFocalGeneAmp("CCNE1"):
        CCNE1_amp_missing_somatic += 1
     continue
  if sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
     CCNE1_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
     CCNE1_amp_no_tp53_count += 1
  if not sample.checkFocalGeneAmp("CCNE1") and "TP53" in sample.genes_affected:
     CCNE1_no_amp_tp53_count += 1
  if not sample.checkFocalGeneAmp("CCNE1") and "TP53" not in sample.genes_affected:
     CCNE1_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[CCNE1_amp_tp53_count, CCNE1_amp_no_tp53_count], [CCNE1_no_amp_tp53_count, CCNE1_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 19) test whether CCNE1 focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   CCNE1 amplified samples: "+str(CCNE1_amp_count)
print "   CCNE1 amplified samples missed due to missing somatic mutation data: "+str(CCNE1_amp_missing_somatic)
print ""
print "   CCNE1 amplified samples having TP53 mutations: "+str(CCNE1_amp_tp53_count)
print "   CCNE1 not amplified samples having TP53 mutations: "+str(CCNE1_no_amp_tp53_count)
print "   CCNE1 amplified samples having no TP53 mutation: "+str(CCNE1_amp_no_tp53_count)
print "   CCNE1 not amplified samples having no TP53 mutations: "+str(CCNE1_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [3]:
    
# 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
ERBB2_amp_tp53_count=0
ERBB2_no_amp_tp53_count=0
ERBB2_amp_no_tp53_count=0
ERBB2_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" in sample.genes_affected:
     ERBB2_amp_tp53_count += 1
  if sample.checkFocalGeneAmp("ERBB2") and sample.checkFocalGeneAmp("FGFR1") and "TP53" not in sample.genes_affected:
     ERBB2_amp_no_tp53_count += 1
  if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1")) and "TP53" in sample.genes_affected:
     ERBB2_no_amp_tp53_count += 1
  if not (sample.checkFocalGeneAmp("ERBB2") or sample.checkFocalGeneAmp("FGFR1"))and "TP53" not in sample.genes_affected:
     ERBB2_no_amp_no_tp53_count += 1
                        
oddsratio, pvalue = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[ERBB2_amp_tp53_count, ERBB2_amp_no_tp53_count], [ERBB2_no_amp_tp53_count, ERBB2_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 20) Test whether ERBB2 focally amplified and FGFR1-amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   ERBB2 and FGFR1 amplified samples having TP53 mutations: "+str(ERBB2_amp_tp53_count)
print "   Not amplified samples having TP53 mutations: "+str(ERBB2_no_amp_tp53_count)
print "   ERBB2 and FGFR1 amplified samples having no TP53 mutation: "+str(ERBB2_amp_no_tp53_count)
print "   Not amplified samples having no TP53 mutations: "+str(ERBB2_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [43]:
    
# 21)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 mut'])
plt.savefig('BRCA_MYC_expression_TP53_mut.png', dpi=200)
print ""
print " 21) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)
    
    
    
In [42]:
    
# 22)Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
   if not sample.somatic_mutation_data:
     continue
   del_mut_found = False
   for mutation in sample.somatic_mutations:
      if mutation.gene != "TP53":
         continue
      if mutation.mut_type == "Frame_Shift_Del" or mutation.mut_type == "Frame_Shift_Ins" or mutation.mut_type == "Nonsense_Mutation":
         del_mut_found = True
         break
   if del_mut_found:             
      if sample.getRNASeqFromGene("MYC") != "n/a":
         MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
   elif "TP53" not in sample.genes_affected:
      if sample.getRNASeqFromGene("MYC") != "n/a":
         MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['TP53 WT', 'TP53 del mut'])
plt.savefig('BRCA_MYC_expression_TP53_delmut.png', dpi=200)
print ""
print " 22) Test whether deleteriously TP53-mutated samples have higher MYC expression levels than tumors without TP53"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)
    
    
    
In [41]:
    
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  if not sample.checkFocalGeneAmp("MYC"):
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['Focal MYC, TP53 WT', 'Focal MYC, TP53 mut'])
plt.savefig('BRCA_MYC_expression_focal_MYC_TP53_mut.png', dpi=200)
print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC focally amplified samples"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)
    
    
    
In [6]:
    
# 23)Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples
MYC_expr_TP53 = list()
MYC_expr_no_TP53 = list()
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  copynumber_MYC = "n/a"
  log2_MYC = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
  if (log2_MYC != None):
     copynumber_MYC = 2.0 * 2.0 ** log2_MYC
  else:
     copynumber_MYC="n/a"
  if copynumber_MYC == "n/a" or copynumber_MYC < 4:
     continue
  if "TP53" in sample.genes_affected:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_TP53.append(sample.getRNASeqFromGene("MYC"))
  else:
     if sample.getRNASeqFromGene("MYC") != "n/a":
       MYC_expr_no_TP53.append(sample.getRNASeqFromGene("MYC"))
MYC_expr_TP53_series = pandas.Series(MYC_expr_TP53)
MYC_expr_no_TP53_series = pandas.Series(MYC_expr_no_TP53)
t,p=scipy.stats.ttest_ind(MYC_expr_TP53,MYC_expr_no_TP53)
t1,p1=scipy.stats.mannwhitneyu(MYC_expr_TP53,MYC_expr_no_TP53)
data=list()
data.append(MYC_expr_no_TP53_series)
data.append(MYC_expr_TP53_series)
fig1=plt.boxplot(data, widths = 0.6, labels=['CN>4 MYC, TP53 WT', 'CN>4 MYC, TP53 mut'])
plt.savefig('BRCA_MYC_expression_cn4_MYC_TP53_mut.png', dpi=200)
print ""
print " 23) Test whether TP53-mutated samples have higher MYC expression levels than tumors without TP53 in MYC copynumber > 4 samples"
print "   TP53 mutated MYC expr mean: "+str(MYC_expr_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_TP53))
print "   TP53 wildtype MYC expr mean: "+str(MYC_expr_no_TP53_series.mean())+ "   Samples: "+str(len(MYC_expr_no_TP53))
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: T-test: "+str(t)+"   p-value:"+str(p)
print "   MYC expression in TP53 mutated samples vs. non-TP53 mutated samples: Mann-W-U-test: "+str(t1)+"   p-value:"+str(p1)
    
    
    
Assume MYC is overexpressed when Z-score of normalized expression levels is > 3
In [5]:
    
control_MYC_expression = list()
MYC_expr_tp53_count=0
MYC_no_expr_tp53_count=0
MYC_expr_no_tp53_count=0
MYC_no_expr_no_tp53_count=0
 
for control in controls.values():
    if control.getRNASeqFromGene("MYC") != "n/a":
        control_MYC_expression.append(control.getRNASeqFromGene("MYC"))
MYC_expr_series = pandas.Series(control_MYC_expression)
control_MYC_expression_mean=MYC_expr_series.mean()
control_MYC_expression_stdev=MYC_expr_series.std()
for sample in samples.values():
    if not sample.somatic_mutation_data:
        continue
    if sample.getRNASeqFromGene("MYC") != "n/a":
        z_score_MYC = (sample.getRNASeqFromGene("MYC") - control_MYC_expression_mean) / control_MYC_expression_stdev
        if z_score_MYC > 3 and "TP53" in sample.genes_affected:
            MYC_expr_tp53_count += 1
        if z_score_MYC > 3 and "TP53" not in sample.genes_affected:
            MYC_expr_no_tp53_count += 1
        if z_score_MYC < 3 and "TP53" in sample.genes_affected:
            MYC_no_expr_tp53_count += 1
        if z_score_MYC < 3 and "TP53" not in sample.genes_affected:
            MYC_no_expr_no_tp53_count += 1
            
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_expr_tp53_count, MYC_expr_no_tp53_count], [MYC_no_expr_tp53_count, MYC_no_expr_no_tp53_count]], alternative='greater')
print ""
print " 23) Test whether TP53 mutations occur in overexpressed MYC-samples"
print "   Control samples:"+str(len(controls.values()))
print "   Control samples mean RSEM: "+str(control_MYC_expression_mean)
print "   Control samples st.dev. RSEM: "+str(control_MYC_expression_stdev)
print "   MYC overexpressed samples having TP53 mutations: "+str(MYC_expr_tp53_count)
print "   MYC not overexpressed samples having TP53 mutations: "+str(MYC_no_expr_tp53_count)
print "   MYC overexpressed samples having no TP53 mutation: "+str(MYC_expr_no_tp53_count)
print "   MYC not overexpressed samples having no TP53 mutations: "+str(MYC_no_expr_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
For focal amplification calling, a difference of 0.2 in log2-ratios between neighbouring 20Mbp on each side is a requirement. Here, we restrict focal amplifications to segments with a log2-ratio difference of at least 0.4
In [2]:
    
# 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors
MYC_amp_tp53_count=0
MYC_no_amp_tp53_count=0
MYC_amp_no_tp53_count=0
MYC_no_amp_no_tp53_count=0
for sample in samples.values():
  if not sample.somatic_mutation_data:
     continue
  stringent_MYC = False
  for focal_amp in sample.focal_amplifications:
    if "MYC" in focal_amp.genes and focal_amp.log2_upstream_adj > 0.4 and focal_amp.log2_downstream_adj > 0.4:
        stringent_MYC = True
        break
  if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" in sample.genes_affected:
     MYC_amp_tp53_count += 1
  if stringent_MYC and not sample.checkFocalGeneAmp("CCND1") and "TP53" not in sample.genes_affected:
     MYC_amp_no_tp53_count += 1
  if not stringent_MYC and "TP53" in sample.genes_affected:
     MYC_no_amp_tp53_count += 1
  if not stringent_MYC and "TP53" not in sample.genes_affected:
     MYC_no_amp_no_tp53_count += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_amp_tp53_count, MYC_amp_no_tp53_count], [MYC_no_amp_tp53_count, MYC_no_amp_no_tp53_count]], alternative='greater')
print ""
print " 1) test whether stringent MYC focally amplified tumors contain TP53 mutations more frequently than non focally amplified Tumors"
print "   MYC amplified samples having TP53 mutations: "+str(MYC_amp_tp53_count)
print "   MYC not amplified samples having TP53 mutations: "+str(MYC_no_amp_tp53_count)
print "   MYC amplified samples having no TP53 mutation: "+str(MYC_amp_no_tp53_count)
print "   MYC not amplified samples having no TP53 mutations: "+str(MYC_no_amp_no_tp53_count)
print "   Fisher-exact test: Odds-ratio: "+str(oddsratio)+"   P-value: "+str(pvalue)
print "   Fisher-exact test (one-sided greater): Odds-ratio: "+str(oddsratio_greater)+"   P-value: "+str(pvalue_greater)
    
    
In [ ]: