In Leiserson et al. MYC is left out, though it is presented as a rather "hot" node with frequent copynumber amplifications.
Leiserson:Cancer cells are thought to harbor multiple driver mutations that perturb multiple biological functions15. Consistent with this model, we found that four pairs of subnetworks, including TP53 and NOTCH signaling, TP53 and RTK signaling, PI3K signaling and the cohesin complex, and PI3K signaling and the ASCOM complex, exhibited significant co-occurrence (P < 0.05, multiple-hypotheses corrected) across the pan-cancer cohort (Fig. 2b) or in individual cancer types (Fig. 2c). Multiple pairs of genes within these subnetworks showed co-occurring mutations (Supplementary Table 24). In contrast, mutually exclusive mutations are typically expected within a pathway and not across pathways32,33.
Ref 32 Yeang, C.-H., McCormick, F. & Levine, A. Combinatorial patterns of somatic gene mutations in cancer. FASEB J. 22, 2605–2622 (2008).
Ref 33 Vandin, F., Upfal, E. & Raphael, B.J. De novo discovery of mutated driver pathways in cancer. Genome Res. 22, 375–385 (2012).
Where does MYC belong to?
In [1]:
import NotebookImport
from TCGA_analysis_BRCA_import import *
from IPython.display import HTML
For all pathways described by Leiserson: Check which pathway is more often co-mutated by MYC
In [2]:
TP53_pathway=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK", "SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668"]
STAG1_pathway=["PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A"]
MLL_pathway=["KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1"]
BAP1_pathway=["ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B"]
KRAS_pathway=["BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2"]
HLA_pathway=["B2M", "CD1D","HLA-A", "HLA-B", "P4HTM"]
HER2_pathway=["EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR"]
CDON_pathway=["BOC", "CDON"]
KEAP1_pathway=["KEAP1", "NFE2L2","CHD6", "PTMA", "WAC"]
SPOP_pathway=["MYD88", "SPOP"]
RUNX1_pathway=["CBFB", "RUNX1","ELF4"]
NOTCH_pathway=["FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH"]
ARID_pathway=["ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2"]
SMARC_pathway=["SMARCB1", "SMARCA4"]
SMC_pathway=["NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2"]
CLASP_pathway=["CLASP1","CLASP2","CLIP2"]
pathways={"TP53_pathway": TP53_pathway, "STAG1_pathway":STAG1_pathway, "MLL_pathway":MLL_pathway,"BAP1_pathway":BAP1_pathway,"KRAS_pathway":KRAS_pathway, "HLA_pathway":HLA_pathway, "HER2_pathway":HER2_pathway, "CDON_pathway":CDON_pathway, "KEAP1_pathway":KEAP1_pathway,"SPOP_pathway":SPOP_pathway, "RUNX1_pathway":RUNX1_pathway, "NOTCH_pathway":NOTCH_pathway, "ARID_pathway":ARID_pathway, "SMARC_pathway":SMARC_pathway, "SMC_pathway":SMC_pathway,"CLASP_pathway":CLASP_pathway }
MYC_pos_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_pos_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_affected[pathway] += 1
elif not pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_unaffected[pathway] += 1
elif pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_affected[pathway] += 1
elif not pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 16)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 16)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[2]:
Remove TP53 from TP53 pathway
In [3]:
TP53_pathway=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK", "SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668"]
STAG1_pathway=["PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A"]
MLL_pathway=["KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1"]
BAP1_pathway=["ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B"]
KRAS_pathway=["BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2"]
HLA_pathway=["B2M", "CD1D","HLA-A", "HLA-B", "P4HTM"]
HER2_pathway=["EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR"]
CDON_pathway=["BOC", "CDON"]
KEAP1_pathway=["KEAP1", "NFE2L2","CHD6", "PTMA", "WAC"]
SPOP_pathway=["MYD88", "SPOP"]
RUNX1_pathway=["CBFB", "RUNX1","ELF4"]
NOTCH_pathway=["FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH"]
ARID_pathway=["ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2"]
SMARC_pathway=["SMARCB1", "SMARCA4"]
SMC_pathway=["NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2"]
CLASP_pathway=["CLASP1","CLASP2","CLIP2"]
pathways={"TP53_pathway": TP53_pathway, "STAG1_pathway":STAG1_pathway, "MLL_pathway":MLL_pathway,"BAP1_pathway":BAP1_pathway,"KRAS_pathway":KRAS_pathway, "HLA_pathway":HLA_pathway, "HER2_pathway":HER2_pathway, "CDON_pathway":CDON_pathway, "KEAP1_pathway":KEAP1_pathway,"SPOP_pathway":SPOP_pathway, "RUNX1_pathway":RUNX1_pathway, "NOTCH_pathway":NOTCH_pathway, "ARID_pathway":ARID_pathway, "SMARC_pathway":SMARC_pathway, "SMC_pathway":SMC_pathway,"CLASP_pathway":CLASP_pathway }
MYC_pos_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_pos_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_affected[pathway] += 1
elif not pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_unaffected[pathway] += 1
elif pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_affected[pathway] += 1
elif not pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 16)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 16)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[3]:
For all pathways described by Leiserson: Check which pathway is more often co-mutated by MYC
In [4]:
TP53_pathway=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK", "SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668"]
STAG1_pathway=["PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A"]
MLL_pathway=["KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1"]
BAP1_pathway=["ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B"]
KRAS_pathway=["BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2"]
HLA_pathway=["B2M", "CD1D","HLA-A", "HLA-B", "P4HTM"]
HER2_pathway=["EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR"]
CDON_pathway=["BOC", "CDON"]
KEAP1_pathway=["KEAP1", "NFE2L2","CHD6", "PTMA", "WAC"]
SPOP_pathway=["MYD88", "SPOP"]
RUNX1_pathway=["CBFB", "RUNX1","ELF4"]
NOTCH_pathway=["FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH"]
ARID_pathway=["ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2"]
SMARC_pathway=["SMARCB1", "SMARCA4"]
SMC_pathway=["NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2"]
CLASP_pathway=["CLASP1","CLASP2","CLIP2"]
pathways={"TP53_pathway": TP53_pathway, "STAG1_pathway":STAG1_pathway, "MLL_pathway":MLL_pathway,"BAP1_pathway":BAP1_pathway,"KRAS_pathway":KRAS_pathway, "HLA_pathway":HLA_pathway, "HER2_pathway":HER2_pathway, "CDON_pathway":CDON_pathway, "KEAP1_pathway":KEAP1_pathway,"SPOP_pathway":SPOP_pathway, "RUNX1_pathway":RUNX1_pathway, "NOTCH_pathway":NOTCH_pathway, "ARID_pathway":ARID_pathway, "SMARC_pathway":SMARC_pathway, "SMC_pathway":SMC_pathway,"CLASP_pathway":CLASP_pathway }
MYC_pos_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_pos_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if copynumber != "n/a" and pathway_affected and copynumber > 4:
MYC_pos_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and copynumber > 4:
MYC_pos_pathway_unaffected[pathway] += 1
elif copynumber != "n/a" and pathway_affected and not copynumber > 4:
MYC_neg_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and not copynumber > 4:
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 16)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 16)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[4]:
Remove TP53 from TP53 pathway
In [5]:
TP53_pathway=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK", "SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668"]
STAG1_pathway=["PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A"]
MLL_pathway=["KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1"]
BAP1_pathway=["ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B"]
KRAS_pathway=["BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2"]
HLA_pathway=["B2M", "CD1D","HLA-A", "HLA-B", "P4HTM"]
HER2_pathway=["EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR"]
CDON_pathway=["BOC", "CDON"]
KEAP1_pathway=["KEAP1", "NFE2L2","CHD6", "PTMA", "WAC"]
SPOP_pathway=["MYD88", "SPOP"]
RUNX1_pathway=["CBFB", "RUNX1","ELF4"]
NOTCH_pathway=["FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH"]
ARID_pathway=["ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2"]
SMARC_pathway=["SMARCB1", "SMARCA4"]
SMC_pathway=["NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2"]
CLASP_pathway=["CLASP1","CLASP2","CLIP2"]
pathways={"TP53_pathway": TP53_pathway, "STAG1_pathway":STAG1_pathway, "MLL_pathway":MLL_pathway,"BAP1_pathway":BAP1_pathway,"KRAS_pathway":KRAS_pathway, "HLA_pathway":HLA_pathway, "HER2_pathway":HER2_pathway, "CDON_pathway":CDON_pathway, "KEAP1_pathway":KEAP1_pathway,"SPOP_pathway":SPOP_pathway, "RUNX1_pathway":RUNX1_pathway, "NOTCH_pathway":NOTCH_pathway, "ARID_pathway":ARID_pathway, "SMARC_pathway":SMARC_pathway, "SMC_pathway":SMC_pathway,"CLASP_pathway":CLASP_pathway }
MYC_pos_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_pos_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if copynumber != "n/a" and pathway_affected and copynumber > 4:
MYC_pos_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and copynumber > 4:
MYC_pos_pathway_unaffected[pathway] += 1
elif copynumber != "n/a" and pathway_affected and not copynumber > 4:
MYC_neg_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and not copynumber > 4:
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 16)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 16)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[5]:
Check all genes which appear in any pathway in Leiserson paper Only output genes where any p-value is lower than 0.05
In [6]:
genes=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK",
"SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668",
"PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A",
"KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1",
"ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B",
"BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2",
"B2M", "CD1D","HLA-A", "HLA-B", "P4HTM",
"EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR",
"BOC", "CDON",
"KEAP1", "NFE2L2","CHD6", "PTMA", "WAC",
"MYD88", "SPOP",
"CBFB", "RUNX1","ELF4",
"FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH",
"ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2",
"SMARCB1", "SMARCA4",
"NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2",
"CLASP1","CLASP2","CLIP2"]
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Gene</th><th>MYC + Gene affected</th><th>MYC + Gene unaffected</th><th>MYC - Gene affected</th><th>MYC - Gene unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
gene_count = 0
for gene in genes:
gene_count += 1
MYC_pos_gene_affected = 0
MYC_pos_gene_unaffected = 0
MYC_neg_gene_affected = 0
MYC_neg_gene_unaffected = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp(gene) or gene in sample.genes_affected:
if sample.checkFocalGeneAmp("MYC"):
MYC_pos_gene_affected += 1
else:
MYC_neg_gene_affected += 1
else:
if sample.checkFocalGeneAmp("MYC"):
MYC_pos_gene_unaffected += 1
else:
MYC_neg_gene_unaffected += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='less')
s += "<tr><th>"+gene+"</th><td>"+str(MYC_pos_gene_affected)+"</td>"
s += "<td>"+str(MYC_pos_gene_unaffected)+"</td>"
s += "<td>"+str(MYC_neg_gene_affected)+"</td>"
s += "<td>"+str(MYC_neg_gene_unaffected)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 134)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 134)+"</td></tr>"
s += "</table>"
print "Genes: "+str(gene_count)
h = HTML(s);h
Out[6]:
In [7]:
genes=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK",
"SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668",
"PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A",
"KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1",
"ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B",
"BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2",
"B2M", "CD1D","HLA-A", "HLA-B", "P4HTM",
"EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR",
"BOC", "CDON",
"KEAP1", "NFE2L2","CHD6", "PTMA", "WAC",
"MYD88", "SPOP",
"CBFB", "RUNX1","ELF4",
"FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH",
"ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2",
"SMARCB1", "SMARCA4",
"NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2",
"CLASP1","CLASP2","CLIP2"]
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Gene</th><th>MYC + Gene affected</th><th>MYC + Gene unaffected</th><th>MYC - Gene affected</th><th>MYC - Gene unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
gene_count = 0
for gene in genes:
gene_count += 1
MYC_pos_gene_affected = 0
MYC_pos_gene_unaffected = 0
MYC_neg_gene_affected = 0
MYC_neg_gene_unaffected = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if sample.checkFocalGeneAmp(gene) or gene in sample.genes_affected:
if copynumber != "n/a" and copynumber > 4:
MYC_pos_gene_affected += 1
elif copynumber != "n/a" and not copynumber > 4:
MYC_neg_gene_affected += 1
else:
if copynumber != "n/a" and copynumber > 4:
MYC_pos_gene_unaffected += 1
elif copynumber != "n/a" and not copynumber > 4:
MYC_neg_gene_unaffected += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='less')
#if (pvalue > 0.05 and pvalue_greater > 0.05 and pvalue_less > 0.05):
#continue
s += "<tr><th>"+gene+"</th><td>"+str(MYC_pos_gene_affected)+"</td>"
s += "<td>"+str(MYC_pos_gene_unaffected)+"</td>"
s += "<td>"+str(MYC_neg_gene_affected)+"</td>"
s += "<td>"+str(MYC_neg_gene_unaffected)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.0005:
s += "<td>"+"{:.10f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.10f}".format(pvalue_greater * 134)+"</td>"
elif pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 134)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 134)+"</td></tr>"
s += "</table>"
print "Genes: "+str(gene_count)
h = HTML(s);h
Out[7]:
This time use pathway definitions of Vogelstein et al.
Note that MYC has been deleted from pathway: Cell Cycle / Apoptosis
In [8]:
chromatin_modification_pathway=["ARID1A","ARID1B","ARID2","ASXL1","ATRX","CREBBP","DAXX","DNMT1","DNMT3A","EP300","EZH2","H3H3A","HIST1H3B","IDH1","IDH2","KDM5C","KDM6A","MEN1","MLL2","MLL3","NCOR1","PAX5","PBRM1","PRDM1","SETD2","SETBP1","SMARCA4","SMARCB1","SPOP","TET2","WT1","NCOA3"]
cell_cycle_apoptosis_pathway=["ABL1","BCL2","CARD1","CASP8","CDC73","CDKN2A","CYLD","DAXX","FUBP1","MED12","MYD88","NFE2L2","NPM2","PPP2R1A","RB1","TNFAIP3","TRAF7","TP53","CCND1","CDKN2C","MDM2","MDM4","MYCL1","MYCN","SKP2"]
tgf_beta_pathway=["ACVR1B","EP300","FOXL2","GATA1","GATA2","GNAS","MED12","SMAD2","SMAD4"]
PI3K_pathway=["AKT1","ALK","B2M","CBL","CEBPA","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","KIT","MET","PDGFRA","PIK2CA","PIK3R1","PTEN","RET","TSC1","TSHR","VHL","NCOA3"]
RAS_pathway=["ALK","B2M","BRAF","CBL","CEBPA","CIC","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","HRAS","KIT","KRAS","MAP2K1","MAP3K1","MET","NF1","NRAS","PDGFRA","PTPN11","RET","VHL"]
APC_pathway=["AXIN1","CDH1","CTNNB1","EP300","FAM123B","GNAS","HNF1A","NF2","RNF43","SOX9"]
transcription_regulation_pathway=["AR","BCOR","CREBBP","GATA3","KLF4","PHF6","RUNX1","SF3B1","SRSF2","U2AF1","IKZF1","LMO1"]
dna_damage_pathway=["ATM","BAP1","BRCA1","BRCA2","MLH1","MSH2","MSH6","STAG2","TP53"]
mapkinase_pathway=["B2M","CEBPA","GNA11","GNAQ","MAP3K1","TNFAIP3","TSHR","MAP2K4","NKX2-1"]
STAT_pathway=["CLRF","FGFR2","FGFR3","FLT3","JAK1","JAK2","JAK3","STAT","MPL","SOCS1","VHL"]
NOTCH_pathway=["EP300","FBXW7","GATA1","GATA2","NOTCH1","NOTCH2"]
wnt_pathway=["KLF4"]
hedgehog_pathway=["PTCH1","SMO","SPOP"]
mTOR_pathway=["STK11"]
replicationpathway=["SETBP1"]
pathways={"Chromatin Modification": chromatin_modification_pathway, "Cell Cycle\\Apoptosis":cell_cycle_apoptosis_pathway,
"TGF beta":tgf_beta_pathway,"PI3K":PI3K_pathway,"RAS":RAS_pathway, "APC":APC_pathway,
"Transcription Regulation":transcription_regulation_pathway,
"DNA Damage Repair":dna_damage_pathway, "MAP Kinase":mapkinase_pathway,
"STAT":STAT_pathway, "NOTCH":NOTCH_pathway,
"WNT":wnt_pathway, "Hedgehog":hedgehog_pathway,
"mTOR":mTOR_pathway, "Replication":replicationpathway}
MYC_pos_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_pos_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_affected[pathway] += 1
elif not pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_unaffected[pathway] += 1
elif pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_affected[pathway] += 1
elif not pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.0005:
s += "<td>"+"{:.10f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.10f}".format(pvalue_greater * 15)+"</td>"
elif pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 15)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 15)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[8]:
Remove TP53 from Pathways: Cell Cycle/Apoptosis and DNA Damaga Control
In [9]:
chromatin_modification_pathway=["ARID1A","ARID1B","ARID2","ASXL1","ATRX","CREBBP","DAXX","DNMT1","DNMT3A","EP300","EZH2","H3H3A","HIST1H3B","IDH1","IDH2","KDM5C","KDM6A","MEN1","MLL2","MLL3","NCOR1","PAX5","PBRM1","PRDM1","SETD2","SETBP1","SMARCA4","SMARCB1","SPOP","TET2","WT1","NCOA3"]
cell_cycle_apoptosis_pathway=["ABL1","BCL2","CARD1","CASP8","CDC73","CDKN2A","CYLD","DAXX","FUBP1","MED12","MYD88","NFE2L2","NPM2","PPP2R1A","RB1","TNFAIP3","TRAF7","CCND1","CDKN2C","MDM2","MDM4","MYCL1","MYCN","SKP2"]
tgf_beta_pathway=["ACVR1B","EP300","FOXL2","GATA1","GATA2","GNAS","MED12","SMAD2","SMAD4"]
PI3K_pathway=["AKT1","ALK","B2M","CBL","CEBPA","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","KIT","MET","PDGFRA","PIK2CA","PIK3R1","PTEN","RET","TSC1","TSHR","VHL","NCOA3"]
RAS_pathway=["ALK","B2M","BRAF","CBL","CEBPA","CIC","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","HRAS","KIT","KRAS","MAP2K1","MAP3K1","MET","NF1","NRAS","PDGFRA","PTPN11","RET","VHL"]
APC_pathway=["AXIN1","CDH1","CTNNB1","EP300","FAM123B","GNAS","HNF1A","NF2","RNF43","SOX9"]
transcription_regulation_pathway=["AR","BCOR","CREBBP","GATA3","KLF4","PHF6","RUNX1","SF3B1","SRSF2","U2AF1","IKZF1","LMO1"]
dna_damage_pathway=["ATM","BAP1","BRCA1","BRCA2","MLH1","MSH2","MSH6","STAG2"]
mapkinase_pathway=["B2M","CEBPA","GNA11","GNAQ","MAP3K1","TNFAIP3","TSHR","MAP2K4","NKX2-1"]
STAT_pathway=["CLRF","FGFR2","FGFR3","FLT3","JAK1","JAK2","JAK3","STAT","MPL","SOCS1","VHL"]
NOTCH_pathway=["EP300","FBXW7","GATA1","GATA2","NOTCH1","NOTCH2"]
wnt_pathway=["KLF4"]
hedgehog_pathway=["PTCH1","SMO","SPOP"]
mTOR_pathway=["STK11"]
replicationpathway=["SETBP1"]
pathways={"Chromatin Modification": chromatin_modification_pathway, "Cell Cycle\\Apoptosis":cell_cycle_apoptosis_pathway,
"TGF beta":tgf_beta_pathway,"PI3K":PI3K_pathway,"RAS":RAS_pathway, "APC":APC_pathway,
"Transcription Regulation":transcription_regulation_pathway,
"DNA Damage Repair":dna_damage_pathway, "MAP Kinase":mapkinase_pathway,
"STAT":STAT_pathway, "NOTCH":NOTCH_pathway,
"WNT":wnt_pathway, "Hedgehog":hedgehog_pathway,
"mTOR":mTOR_pathway, "Replication":replicationpathway}
MYC_pos_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_pos_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_affected[pathway] += 1
elif not pathway_affected and sample.checkFocalGeneAmp("MYC"):
MYC_pos_pathway_unaffected[pathway] += 1
elif pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_affected[pathway] += 1
elif not pathway_affected and not sample.checkFocalGeneAmp("MYC"):
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 15)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 15)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[9]:
In [10]:
chromatin_modification_pathway=["ARID1A","ARID1B","ARID2","ASXL1","ATRX","CREBBP","DAXX","DNMT1","DNMT3A","EP300","EZH2","H3H3A","HIST1H3B","IDH1","IDH2","KDM5C","KDM6A","MEN1","MLL2","MLL3","NCOR1","PAX5","PBRM1","PRDM1","SETD2","SETBP1","SMARCA4","SMARCB1","SPOP","TET2","WT1","NCOA3"]
cell_cycle_apoptosis_pathway=["ABL1","BCL2","CARD1","CASP8","CDC73","CDKN2A","CYLD","DAXX","FUBP1","MED12","MYD88","NFE2L2","NPM2","PPP2R1A","RB1","TNFAIP3","TRAF7","TP53","CCND1","CDKN2C","MDM2","MDM4","MYCL1","MYCN","SKP2"]
tgf_beta_pathway=["ACVR1B","EP300","FOXL2","GATA1","GATA2","GNAS","MED12","SMAD2","SMAD4"]
PI3K_pathway=["AKT1","ALK","B2M","CBL","CEBPA","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","KIT","MET","PDGFRA","PIK2CA","PIK3R1","PTEN","RET","TSC1","TSHR","VHL","NCOA3"]
RAS_pathway=["ALK","B2M","BRAF","CBL","CEBPA","CIC","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","HRAS","KIT","KRAS","MAP2K1","MAP3K1","MET","NF1","NRAS","PDGFRA","PTPN11","RET","VHL"]
APC_pathway=["AXIN1","CDH1","CTNNB1","EP300","FAM123B","GNAS","HNF1A","NF2","RNF43","SOX9"]
transcription_regulation_pathway=["AR","BCOR","CREBBP","GATA3","KLF4","PHF6","RUNX1","SF3B1","SRSF2","U2AF1","IKZF1","LMO1"]
dna_damage_pathway=["ATM","BAP1","BRCA1","BRCA2","MLH1","MSH2","MSH6","STAG2","TP53"]
mapkinase_pathway=["B2M","CEBPA","GNA11","GNAQ","MAP3K1","TNFAIP3","TSHR","MAP2K4","NKX2-1"]
STAT_pathway=["CLRF","FGFR2","FGFR3","FLT3","JAK1","JAK2","JAK3","STAT","MPL","SOCS1","VHL"]
NOTCH_pathway=["EP300","FBXW7","GATA1","GATA2","NOTCH1","NOTCH2"]
wnt_pathway=["KLF4"]
hedgehog_pathway=["PTCH1","SMO","SPOP"]
mTOR_pathway=["STK11"]
replicationpathway=["SETBP1"]
pathways={"Chromatin Modification": chromatin_modification_pathway, "Cell Cycle\\Apoptosis":cell_cycle_apoptosis_pathway,
"TGF beta":tgf_beta_pathway,"PI3K":PI3K_pathway,"RAS":RAS_pathway, "APC":APC_pathway,
"Transcription Regulation":transcription_regulation_pathway,
"DNA Damage Repair":dna_damage_pathway, "MAP Kinase":mapkinase_pathway,
"STAT":STAT_pathway, "NOTCH":NOTCH_pathway,
"WNT":wnt_pathway, "Hedgehog":hedgehog_pathway,
"mTOR":mTOR_pathway, "Replication":replicationpathway}
MYC_pos_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_pos_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber == "n/a":
continue
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if pathway_affected and copynumber > 4:
MYC_pos_pathway_affected[pathway] += 1
elif not pathway_affected and copynumber > 4:
MYC_pos_pathway_unaffected[pathway] += 1
elif pathway_affected and not copynumber > 4:
MYC_neg_pathway_affected[pathway] += 1
elif not pathway_affected and not copynumber > 4:
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.0005:
s += "<td>"+"{:.10f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.10f}".format(pvalue_greater * 15)+"</td>"
elif pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 15)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 15)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[10]:
Remove TP53 from pathways Cell Cycle / Apoptosis and DNA Damage repair
In [11]:
chromatin_modification_pathway=["ARID1A","ARID1B","ARID2","ASXL1","ATRX","CREBBP","DAXX","DNMT1","DNMT3A","EP300","EZH2","H3H3A","HIST1H3B","IDH1","IDH2","KDM5C","KDM6A","MEN1","MLL2","MLL3","NCOR1","PAX5","PBRM1","PRDM1","SETD2","SETBP1","SMARCA4","SMARCB1","SPOP","TET2","WT1","NCOA3"]
cell_cycle_apoptosis_pathway=["ABL1","BCL2","CARD1","CASP8","CDC73","CDKN2A","CYLD","DAXX","FUBP1","MED12","MYD88","NFE2L2","NPM2","PPP2R1A","RB1","TNFAIP3","TRAF7","CCND1","CDKN2C","MDM2","MDM4","MYCL1","MYCN","SKP2"]
tgf_beta_pathway=["ACVR1B","EP300","FOXL2","GATA1","GATA2","GNAS","MED12","SMAD2","SMAD4"]
PI3K_pathway=["AKT1","ALK","B2M","CBL","CEBPA","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","KIT","MET","PDGFRA","PIK2CA","PIK3R1","PTEN","RET","TSC1","TSHR","VHL","NCOA3"]
RAS_pathway=["ALK","B2M","BRAF","CBL","CEBPA","CIC","CSF1R","EGFR","ERBB2","FGFR2","FGFR3","FLT3","GNA11","GNAQ","GNAS","HRAS","KIT","KRAS","MAP2K1","MAP3K1","MET","NF1","NRAS","PDGFRA","PTPN11","RET","VHL"]
APC_pathway=["AXIN1","CDH1","CTNNB1","EP300","FAM123B","GNAS","HNF1A","NF2","RNF43","SOX9"]
transcription_regulation_pathway=["AR","BCOR","CREBBP","GATA3","KLF4","PHF6","RUNX1","SF3B1","SRSF2","U2AF1","IKZF1","LMO1"]
dna_damage_pathway=["ATM","BAP1","BRCA1","BRCA2","MLH1","MSH2","MSH6","STAG2"]
mapkinase_pathway=["B2M","CEBPA","GNA11","GNAQ","MAP3K1","TNFAIP3","TSHR","MAP2K4","NKX2-1"]
STAT_pathway=["CLRF","FGFR2","FGFR3","FLT3","JAK1","JAK2","JAK3","STAT","MPL","SOCS1","VHL"]
NOTCH_pathway=["EP300","FBXW7","GATA1","GATA2","NOTCH1","NOTCH2"]
wnt_pathway=["KLF4"]
hedgehog_pathway=["PTCH1","SMO","SPOP"]
mTOR_pathway=["STK11"]
replicationpathway=["SETBP1"]
pathways={"Chromatin Modification": chromatin_modification_pathway, "Cell Cycle\\Apoptosis":cell_cycle_apoptosis_pathway,
"TGF beta":tgf_beta_pathway,"PI3K":PI3K_pathway,"RAS":RAS_pathway, "APC":APC_pathway,
"Transcription Regulation":transcription_regulation_pathway,
"DNA Damage Repair":dna_damage_pathway, "MAP Kinase":mapkinase_pathway,
"STAT":STAT_pathway, "NOTCH":NOTCH_pathway,
"WNT":wnt_pathway, "Hedgehog":hedgehog_pathway,
"mTOR":mTOR_pathway, "Replication":replicationpathway}
MYC_pos_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_pos_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_affected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
MYC_neg_pathway_unaffected={"Chromatin Modification": 0, "Cell Cycle\\Apoptosis":0, "TGF beta":0,"PI3K":0,"RAS":0, "APC":0,
"Transcription Regulation":0, "DNA Damage Repair":0, "MAP Kinase":0,"STAT":0, "NOTCH":0, "WNT":0, "Hedgehog":0,
"mTOR":0, "Replication":0}
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber == "n/a":
continue
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if pathway_affected and copynumber > 4:
MYC_pos_pathway_affected[pathway] += 1
elif not pathway_affected and copynumber > 4:
MYC_pos_pathway_unaffected[pathway] += 1
elif pathway_affected and not copynumber > 4:
MYC_neg_pathway_affected[pathway] += 1
elif not pathway_affected and not copynumber > 4:
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 15)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 15)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 15)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 15)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[11]:
This time use genes of pathway definitions of Vogelstein et al.
Note that MYC has been deleted from the gene list
In [12]:
genes=["ABL1","ACVR1B","AKT1","ALK","APC","AR","ARID1A","ARID1B","ARID2","ASXL1","ATM","ATRX","AXIN1","B2M","BAP1","BCL2","BCOR","BRAF",
"BRCA1","BRCA2","CARD11","CASP8","CBL","CDC73","CDH1","CDKN2A","CEBPA","CIC","CREBBP","CRLF2","CSF1R","CTNNB1","CYLD","DAXX",
"DNMT1","DNMT3A","EGFR","EP300","ERBB2","EZH2","FAM123B","FBXW7","FGFR2","FGFR3","FLT3","FOXL2","FUBP1","GATA1","GATA2","GATA3",
"GNA11","GNAQ","GNAS","H3F3A","HIST1H3B","HNF1A","HRAS","IDH1","IDH2","JAK1","JAK2","JAK3","KDM5C","KDM6A","KIT","KLF4","KRAS",
"MAP2K1","MAP3K1","MED12","MEN1","MET","MLH1","MLL2","MLL3","MPL","MSH2","MSH6","MYD88","NCOR1","NF1","NF2","NFE2L2","NOTCH1",
"NOTCH2","NPM1","NRAS","PAX5","PBRM1","PDGFRA","PHF6","PIK3CA","PIK3R1","PPP2R1A","PRDM1","PTCH1","PTEN","PTPN11","RB1","RET",
"RNF43","RUNX1","SETD2","SETBP1","SF3B1","SMAD2","SMAD4","SMARCA4","SMARCB1","SMO","SOCS1","SOX9","SPOP","SRSF2","STAG2","STK11",
"TET2","TNFAIP3","TRAF7","TP53","TSC1","TSHR","U2AF1","VHL","WT1","CCND1","CDKN2C","IKZF1","LMO1","MAP2K4","MDM2","MDM4",
"MYCL1","MYCN","NCOA3","NKX2-1","SKP2"]
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Gene</th><th>MYC + Gene affected</th><th>MYC + Gene unaffected</th><th>MYC - Gene affected</th><th>MYC - Gene unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
gene_count = 0
for gene in genes:
gene_count += 1
MYC_pos_gene_affected = 0
MYC_pos_gene_unaffected = 0
MYC_neg_gene_affected = 0
MYC_neg_gene_unaffected = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
if sample.checkFocalGeneAmp(gene) or gene in sample.genes_affected:
if sample.checkFocalGeneAmp("MYC"):
MYC_pos_gene_affected += 1
else:
MYC_neg_gene_affected += 1
else:
if sample.checkFocalGeneAmp("MYC"):
MYC_pos_gene_unaffected += 1
else:
MYC_neg_gene_unaffected += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='less')
if (pvalue > 0.05 and pvalue_greater > 0.05 and pvalue_less > 0.05):
continue
s += "<tr><th>"+gene+"</th><td>"+str(MYC_pos_gene_affected)+"</td>"
s += "<td>"+str(MYC_pos_gene_unaffected)+"</td>"
s += "<td>"+str(MYC_neg_gene_affected)+"</td>"
s += "<td>"+str(MYC_neg_gene_unaffected)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 137)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 137)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 137)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 137)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 137)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 137)+"</td></tr>"
s += "</table>"
print "Genes: "+str(gene_count)
h = HTML(s);h
Out[12]:
In [13]:
genes=["ABL1","ACVR1B","AKT1","ALK","APC","AR","ARID1A","ARID1B","ARID2","ASXL1","ATM","ATRX","AXIN1","B2M","BAP1","BCL2","BCOR","BRAF",
"BRCA1","BRCA2","CARD11","CASP8","CBL","CDC73","CDH1","CDKN2A","CEBPA","CIC","CREBBP","CRLF2","CSF1R","CTNNB1","CYLD","DAXX",
"DNMT1","DNMT3A","EGFR","EP300","ERBB2","EZH2","FAM123B","FBXW7","FGFR2","FGFR3","FLT3","FOXL2","FUBP1","GATA1","GATA2","GATA3",
"GNA11","GNAQ","GNAS","H3F3A","HIST1H3B","HNF1A","HRAS","IDH1","IDH2","JAK1","JAK2","JAK3","KDM5C","KDM6A","KIT","KLF4","KRAS",
"MAP2K1","MAP3K1","MED12","MEN1","MET","MLH1","MLL2","MLL3","MPL","MSH2","MSH6","MYD88","NCOR1","NF1","NF2","NFE2L2","NOTCH1",
"NOTCH2","NPM1","NRAS","PAX5","PBRM1","PDGFRA","PHF6","PIK3CA","PIK3R1","PPP2R1A","PRDM1","PTCH1","PTEN","PTPN11","RB1","RET",
"RNF43","RUNX1","SETD2","SETBP1","SF3B1","SMAD2","SMAD4","SMARCA4","SMARCB1","SMO","SOCS1","SOX9","SPOP","SRSF2","STAG2","STK11",
"TET2","TNFAIP3","TRAF7","TP53","TSC1","TSHR","U2AF1","VHL","WT1","CCND1","CDKN2C","IKZF1","LMO1","MAP2K4","MDM2","MDM4",
"MYCL1","MYCN","NCOA3","NKX2-1","SKP2"]
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Gene</th><th>MYC + Gene affected</th><th>MYC + Gene unaffected</th><th>MYC - Gene affected</th><th>MYC - Gene unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
gene_count = 0
for gene in genes:
gene_count += 1
MYC_pos_gene_affected = 0
MYC_pos_gene_unaffected = 0
MYC_neg_gene_affected = 0
MYC_neg_gene_unaffected = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if copynumber == "n/a":
continue
if sample.checkFocalGeneAmp(gene) or gene in sample.genes_affected:
if copynumber > 4:
MYC_pos_gene_affected += 1
else:
MYC_neg_gene_affected += 1
else:
if copynumber > 4:
MYC_pos_gene_unaffected += 1
else:
MYC_neg_gene_unaffected += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='less')
if (pvalue > 0.05 and pvalue_greater > 0.05 and pvalue_less > 0.05):
continue
s += "<tr><th>"+gene+"</th><td>"+str(MYC_pos_gene_affected)+"</td>"
s += "<td>"+str(MYC_pos_gene_unaffected)+"</td>"
s += "<td>"+str(MYC_neg_gene_affected)+"</td>"
s += "<td>"+str(MYC_neg_gene_unaffected)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 137)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 137)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 137)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 137)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 137)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 137)+"</td></tr>"
s += "</table>"
print "Genes: "+str(gene_count)
h = HTML(s);h
Out[13]:
In [14]:
TP53_pathway=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK", "SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668"]
STAG1_pathway=["PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A"]
MLL_pathway=["KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1"]
BAP1_pathway=["ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B"]
KRAS_pathway=["BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2"]
HLA_pathway=["B2M", "CD1D","HLA-A", "HLA-B", "P4HTM"]
HER2_pathway=["EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR"]
CDON_pathway=["BOC", "CDON"]
KEAP1_pathway=["KEAP1", "NFE2L2","CHD6", "PTMA", "WAC"]
SPOP_pathway=["MYD88", "SPOP"]
RUNX1_pathway=["CBFB", "RUNX1","ELF4"]
NOTCH_pathway=["FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH"]
ARID_pathway=["ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2"]
SMARC_pathway=["SMARCB1", "SMARCA4"]
SMC_pathway=["NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2"]
CLASP_pathway=["CLASP1","CLASP2","CLIP2"]
pathways={"TP53_pathway": TP53_pathway, "STAG1_pathway":STAG1_pathway, "MLL_pathway":MLL_pathway,"BAP1_pathway":BAP1_pathway,"KRAS_pathway":KRAS_pathway, "HLA_pathway":HLA_pathway, "HER2_pathway":HER2_pathway, "CDON_pathway":CDON_pathway, "KEAP1_pathway":KEAP1_pathway,"SPOP_pathway":SPOP_pathway, "RUNX1_pathway":RUNX1_pathway, "NOTCH_pathway":NOTCH_pathway, "ARID_pathway":ARID_pathway, "SMARC_pathway":SMARC_pathway, "SMC_pathway":SMC_pathway,"CLASP_pathway":CLASP_pathway }
MYC_pos_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_pos_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if copynumber != "n/a" and pathway_affected and copynumber > 4 and copynumber < 6:
MYC_pos_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and copynumber > 4 and copynumber < 6:
MYC_pos_pathway_unaffected[pathway] += 1
elif copynumber != "n/a" and pathway_affected and not copynumber > 4:
MYC_neg_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and not copynumber > 4:
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 16)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 16)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[14]:
In [15]:
TP53_pathway=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK", "SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668"]
STAG1_pathway=["PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A"]
MLL_pathway=["KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1"]
BAP1_pathway=["ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B"]
KRAS_pathway=["BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2"]
HLA_pathway=["B2M", "CD1D","HLA-A", "HLA-B", "P4HTM"]
HER2_pathway=["EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR"]
CDON_pathway=["BOC", "CDON"]
KEAP1_pathway=["KEAP1", "NFE2L2","CHD6", "PTMA", "WAC"]
SPOP_pathway=["MYD88", "SPOP"]
RUNX1_pathway=["CBFB", "RUNX1","ELF4"]
NOTCH_pathway=["FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH"]
ARID_pathway=["ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2"]
SMARC_pathway=["SMARCB1", "SMARCA4"]
SMC_pathway=["NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2"]
CLASP_pathway=["CLASP1","CLASP2","CLIP2"]
pathways={"TP53_pathway": TP53_pathway, "STAG1_pathway":STAG1_pathway, "MLL_pathway":MLL_pathway,"BAP1_pathway":BAP1_pathway,"KRAS_pathway":KRAS_pathway, "HLA_pathway":HLA_pathway, "HER2_pathway":HER2_pathway, "CDON_pathway":CDON_pathway, "KEAP1_pathway":KEAP1_pathway,"SPOP_pathway":SPOP_pathway, "RUNX1_pathway":RUNX1_pathway, "NOTCH_pathway":NOTCH_pathway, "ARID_pathway":ARID_pathway, "SMARC_pathway":SMARC_pathway, "SMC_pathway":SMC_pathway,"CLASP_pathway":CLASP_pathway }
MYC_pos_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_pos_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_affected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
MYC_neg_pathway_unaffected={"TP53_pathway": 0, "STAG1_pathway":0, "MLL_pathway":0,"BAP1_pathway":0,"KRAS_pathway":0, "HLA_pathway":0, "HER2_pathway":0, "CDON_pathway":0, "KEAP1_pathway":0,"SPOP_pathway":0, "RUNX1_pathway":0, "NOTCH_pathway":0, "ARID_pathway":0, "SMARC_pathway":0, "SMC_pathway":0,"CLASP_pathway":0 }
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
for pathway in pathways.keys():
pathway_affected = False
for gene in pathways[pathway]:
if gene in sample.genes_affected or sample.checkFocalGeneAmp(gene):
pathway_affected = True
break
if copynumber != "n/a" and pathway_affected and copynumber > 6:
MYC_pos_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and copynumber > 6:
MYC_pos_pathway_unaffected[pathway] += 1
elif copynumber != "n/a" and pathway_affected and not copynumber > 6:
MYC_neg_pathway_affected[pathway] += 1
elif copynumber != "n/a" and not pathway_affected and not copynumber > 6:
MYC_neg_pathway_unaffected[pathway] += 1
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Pathway</th><th>MYC + pathway affected</th><th>MYC + pathway unaffected</th><th>MYC - pathway affected</th><th>MYC - pathway unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
for pathway in pathways.keys():
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_pathway_affected[pathway], MYC_pos_pathway_unaffected[pathway]], [MYC_neg_pathway_affected[pathway], MYC_neg_pathway_unaffected[pathway]]], alternative='less')
s += "<tr><th>"+pathway+"</th><td>"+str(MYC_pos_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_pos_pathway_unaffected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_affected[pathway])+"</td>"
s += "<td>"+str(MYC_neg_pathway_unaffected[pathway])+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 16)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 16)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 16)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 16)+"</td></tr>"
s += "</table>"
h = HTML(s);h
Out[15]:
In [16]:
genes=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK",
"SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668",
"PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A",
"KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1",
"ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B",
"BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2",
"B2M", "CD1D","HLA-A", "HLA-B", "P4HTM",
"EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR",
"BOC", "CDON",
"KEAP1", "NFE2L2","CHD6", "PTMA", "WAC",
"MYD88", "SPOP",
"CBFB", "RUNX1","ELF4",
"FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH",
"ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2",
"SMARCB1", "SMARCA4",
"NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2",
"CLASP1","CLASP2","CLIP2"]
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Gene</th><th>MYC + Gene affected</th><th>MYC + Gene unaffected</th><th>MYC - Gene affected</th><th>MYC - Gene unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
gene_count = 0
for gene in genes:
gene_count += 1
MYC_pos_gene_affected = 0
MYC_pos_gene_unaffected = 0
MYC_neg_gene_affected = 0
MYC_neg_gene_unaffected = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if sample.checkFocalGeneAmp(gene) or gene in sample.genes_affected:
if copynumber != "n/a" and copynumber > 4 and copynumber < 6:
MYC_pos_gene_affected += 1
elif copynumber != "n/a" and not copynumber > 4:
MYC_neg_gene_affected += 1
else:
if copynumber != "n/a" and copynumber > 4 and copynumber < 6:
MYC_pos_gene_unaffected += 1
elif copynumber != "n/a" and not copynumber > 4:
MYC_neg_gene_unaffected += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='less')
if (pvalue > 0.05 and pvalue_greater > 0.05 and pvalue_less > 0.05):
continue
s += "<tr><th>"+gene+"</th><td>"+str(MYC_pos_gene_affected)+"</td>"
s += "<td>"+str(MYC_pos_gene_unaffected)+"</td>"
s += "<td>"+str(MYC_neg_gene_affected)+"</td>"
s += "<td>"+str(MYC_neg_gene_unaffected)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 134)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 134)+"</td></tr>"
s += "</table>"
print "Genes: "+str(gene_count)
h = HTML(s);h
Out[16]:
In [17]:
genes=["CCND1","CDKN2A", "CUL9", "NPM1", "PTEN", "TP53","AGBL2", "ALS2CR8", "AMOTL1", "ANKRD12", "BRPF1", "CACHD1", "CARNS1", "CDKN2AIP", "CELSR3", "CHD8", "EPHA3", "HECW2", "IFT140", "IWS1", "MAGI2", "MAST3", "MDM4", "MLL5", "PLEKHA8", "PRDM2", "PRKRIR", "SCAPER", "SETD2", "SMG1", "SMG5", "SMG7", "SNRK",
"SPTBN2", "STK11IP", "STRADA", "SWAP70", "TEP1", "TRIP12", "TTLL5", "UACA", "ZFP91", "ZMIZ1", "ZNF227", "ZNF668",
"PDS5B", "STAG1", "STAG2", "WAPAL","PDS5A", "RAD21", "SMC1A",
"KDM6A", "MLL2", "MLL3","E2F3", "N4BP2", "PROSER1",
"ASXL1", "ASXL2", "BAP1","ANKRD17", "FOXK1", "FOXK2", "KDM1B",
"BRAF", "KRAS", "PIK3CA","ARHGAP4", "EIF2AK1", "FAM118B", "NRAS", "PLCE1", "RASGRF2", "RASGRP2", "RGL1", "VARS2",
"B2M", "CD1D","HLA-A", "HLA-B", "P4HTM",
"EGFR", "ERBB2","AREG", "ELF3", "ERBB4", "LRIG1", "OSMR",
"BOC", "CDON",
"KEAP1", "NFE2L2","CHD6", "PTMA", "WAC",
"MYD88", "SPOP",
"CBFB", "RUNX1","ELF4",
"FBXW7", "NOTCH1","CCNE1", "DCLRE1C", "DLL1", "DTX4", "FAM101A", "FOXM1", "JAG1", "KLF5", "LFNG", "LPP", "MAML1", "MMS22L", "NOTCH3", "NOTCH4", "SHPRH",
"ARID1A", "PBRM1","ADNP", "ARID1B", "ARID2",
"SMARCB1", "SMARCA4",
"NCAPD3","SMC4","NCAPG2","NCAPH2","SMC2","NCAPD2",
"CLASP1","CLASP2","CLIP2"]
s = "<table>"
s += "<th></th><th colspan=\"4\">Sample</th><th colspan=\"3\">Fisher's exact test (two-sided)</th><th colspan=\"3\">Fisher's exact test (one-sided greater)</th><th colspan=\"3\">Fisher's exact test (one-sided less)</th></tr>"
s += "<tr><th>Gene</th><th>MYC + Gene affected</th><th>MYC + Gene unaffected</th><th>MYC - Gene affected</th><th>MYC - Gene unaffected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th><th>Odd's ratio</th><th>p-value</th><th>p-value corrected</th></tr>"
gene_count = 0
for gene in genes:
gene_count += 1
MYC_pos_gene_affected = 0
MYC_pos_gene_unaffected = 0
MYC_neg_gene_affected = 0
MYC_neg_gene_unaffected = 0
for sample in samples.values():
if not sample.somatic_mutation_data:
continue
copynumber = "n/a"
log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
if (log2 != None):
copynumber = 2.0 * 2.0 ** log2
else:
copynumber="n/a"
if sample.checkFocalGeneAmp(gene) or gene in sample.genes_affected:
if copynumber != "n/a" and copynumber > 6:
MYC_pos_gene_affected += 1
elif copynumber != "n/a" and not copynumber > 6:
MYC_neg_gene_affected += 1
else:
if copynumber != "n/a" and copynumber > 6:
MYC_pos_gene_unaffected += 1
elif copynumber != "n/a" and not copynumber > 6:
MYC_neg_gene_unaffected += 1
oddsratio, pvalue = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]])
oddsratio_greater, pvalue_greater = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='greater')
oddsratio_less, pvalue_less = scipy.stats.fisher_exact([[MYC_pos_gene_affected, MYC_pos_gene_unaffected], [MYC_neg_gene_affected, MYC_neg_gene_unaffected]], alternative='less')
if (pvalue > 0.05 and pvalue_greater > 0.05 and pvalue_less > 0.05):
continue
s += "<tr><th>"+gene+"</th><td>"+str(MYC_pos_gene_affected)+"</td>"
s += "<td>"+str(MYC_pos_gene_unaffected)+"</td>"
s += "<td>"+str(MYC_neg_gene_affected)+"</td>"
s += "<td>"+str(MYC_neg_gene_unaffected)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio)+"</td>"
if pvalue < 0.05:
s += "<td>"+"{:.5f}".format(pvalue)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_greater)+"</td>"
if pvalue_greater < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_greater * 134)+"</td>"
else:
s += "<td>"+"{:.2f}".format(pvalue_greater)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_greater * 134)+"</td>"
s += "<td>"+"{:.2f}".format(oddsratio_less)+"</td>"
if pvalue_less < 0.05:
s += "<td>"+"{:.5f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.5f}".format(pvalue_less * 134)+"</td></tr>"
else:
s += "<td>"+"{:.2f}".format(pvalue_less)+"</td>"
s += "<td>"+"{:.2f}".format(pvalue_less * 134)+"</td></tr>"
s += "</table>"
print "Genes: "+str(gene_count)
h = HTML(s);h
Out[17]: