Get statistics for focal amplifications of Breast cancer samples


In [1]:
import NotebookImport
from TCGA_analysis_BRCA_import import *
from IPython.display import HTML


importing IPython notebook from TCGA_analysis_BRCA_import.ipynb
Loading Focal Amplification data
Loading RNASeq data
Loading RNASeq data of controls
Loading CNV Data
Loading Somatic Mutation Data
TCGA-A8-A07C-01A not found
TCGA-BH-A0HN-01A not found
TCGA-BH-A0B8-01A not found
TCGA-BH-A0HF-01A not found
TCGA-A7-A4SC-01A not found
TCGA-AR-A0TU-01A not found
TCGA-AR-A1AT-01A not found
TCGA-E2-A1LS-01A not found
TCGA-A2-A0CZ-01A not found
TCGA-BH-A0HL-01A not found
TCGA-AN-A0G0-01A not found
TCGA-BH-A0B1-01A not found
TCGA-B6-A0I8-01A not found
TCGA-B6-A0I6-01A not found
Loading Clinical Data
Samples: 1086
  --Focal Data: 1086
  --CNV:        1086
  --RNASeq:     1081
  --Somatic:    968
  --Clinical:   1058

Check lengths of segments having copynumber > 4 at significant targets found in Beroukhim et al, Nature 2010 "The landscape of somatic copy-number alteration across human cancers"


In [2]:
# 1) check lengths of segments having copynumber > 4 in known focal amplification genes (Beroukhim et al, Nature 2010 "The landscape of somatic copy-number alteration across human cancers", Supp Table2 Known Targets
beroukhim_genes=["MYC","CCND1","ERBB2","CDK4","NKX2-1","MDM2","EGFR","MCL1","FGFR1","KRAS","CCNE1","CRKL","HMGA2","TERT","PRKCI","IGF1R","MYCL","MYCN","CDK6","BCL2L1","MYB","MET","JUN","BIRC2","YAP1","PDGFRA","KIT","PIK3CA","MDM4","AR"]

focal_sizes=list()
nonfocal_sizes = list()

for sample in samples.values():
   for gene in beroukhim_genes:
      if gene in gene_positions.keys():
         log2 = sample.getLog2FromGene(gene_positions[gene].chrom,gene_positions[gene].start, gene_positions[gene].end)
         if (log2 != None):
            copynumber = 2.0 * 2.0 ** log2
         else:
            copynumber = "n/a"
         if copynumber != "n/a" and copynumber > 4:
            focal_sizes.append(sample.getSizeFromPosition(gene_positions[gene].chrom,gene_positions[gene].start, gene_positions[gene].end))
         if copynumber != "n/a" and copynumber < 4:
            nonfocal_sizes.append(sample.getSizeFromPosition(gene_positions[gene].chrom,gene_positions[gene].start, gene_positions[gene].end))

fig, axs = plt.subplots(1,2,figsize=(12,3))
focal_sizes_series = pandas.Series(focal_sizes)
fig_focal=focal_sizes_series.plot(ax=axs[0],kind='hist', alpha=0.5, range=(0,50000000), y='Frequency', x='Size in 10Mbp')
axs[0].vlines(x=20000000, ymin=0, ymax=500, color='r')
axs[0].set_xlabel("Size of target genes at copynumber > 4")
axs[0].set_ylabel("Frequency")
#output=fig_focal.get_figure()
#plt.show()
#output.savefig('BRCA_sizes_copynumber_over4_known.png')
mean_focal = focal_sizes_series.mean()
std_focal = focal_sizes_series.std()

nonfocal_sizes_series = pandas.Series(nonfocal_sizes)
fig_nonfocal=nonfocal_sizes_series.plot(ax=axs[1],kind='hist', alpha=0.5, range=(0,50000000), y='Frequency', x='Size in 10Mbp')
axs[1].vlines(x=20000000, ymin=0, ymax=1800, color='r')
axs[1].set_xlabel("Size of target genes at copynumber < 4")
axs[1].set_ylabel("Frequency")

output1=fig_nonfocal.get_figure()
#plt.show()
output1.savefig('BRCA_sizes_copynumber_lower4_known.png', dpi=200)
mean_nonfocal = nonfocal_sizes_series.mean()
std_nonfocal = nonfocal_sizes_series.std()

print ""
print " 1) check focal size distribution for samples having copynumber > 4 in genes with known focal amplifications"
print "   Segments copynumber > 4 and known focal amplifications:"
print "   Mean: "+str(mean_focal)
print "   Standard Deviation: "+str(std_focal)
print "   ---------------------------------"
print "   Segments copynumber < 4:"
print "   Mean: "+str(mean_nonfocal)
print "   Standard Deviation: "+str(std_nonfocal)
print "   ---------------------------------"


 1) check focal size distribution for samples having copynumber > 4 in genes with known focal amplifications
   Segments copynumber > 4 and known focal amplifications:
   Mean: 7082793.66038
   Standard Deviation: 15843140.2327
   ---------------------------------
   Segments copynumber < 4:
   Mean: 79218189.861
   Standard Deviation: 56601108.5392
   ---------------------------------

Correlation of lengths and gene expression of MYC


In [3]:
# 2) correlation of lengths and gene expression of MYC

MYC_expression = list()
MYC_size = list()

for sample in samples.values():
 size = sample.getSizeFromPosition(gene_positions['MYC'].chrom, gene_positions['MYC'].start, gene_positions['MYC'].end)
 if sample.getRNASeqFromGene("MYC") != "n/a" and size != None:
    MYC_expression.append(sample.getRNASeqFromGene("MYC"))
    MYC_size.append(size)

MYC_expr_tuple={'MYC expression': MYC_expression,'MYC size': MYC_size}
MYC_expr_size=pandas.DataFrame(MYC_expr_tuple)
fig_correlation= MYC_expr_size.plot(kind='scatter', x='MYC expression', y='MYC size')
plt.hlines(y=20000000, xmin=-1000, xmax=15000, color='r')
#output1=fig_correlation.get_figure()
#plt.show()
#output1.savefig('BRCA_expression_vs_size_MYC.png')
corr_coeff, p_value = scipy.stats.pearsonr(MYC_expression,MYC_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(MYC_expression,MYC_size)


print ""
print " 2) correlation of lengths and gene expression of MYC"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 2) correlation of lengths and gene expression of MYC
   Pearson Correlation coefficient: -0.147887543897
   P-value: 1.05380023931e-06
   -----------------------
   Spearman Correlation coefficient: -0.142162525012
   P-value: 2.72713504464e-06

Correlation of lengths and gene expression of CCND1


In [4]:
# 3) correlation of lengths and gene expression of CCND1

CCND1_expression = list()
CCND1_size = list()

for sample in samples.values():
 size = sample.getSizeFromPosition(gene_positions['CCND1'].chrom, gene_positions['CCND1'].start, gene_positions['CCND1'].end)
 if sample.getRNASeqFromGene("CCND1") != "n/a" and size != None:
    CCND1_expression.append(sample.getRNASeqFromGene("CCND1"))
    CCND1_size.append(size)

CCND1_expr_tuple={'CCND1 expression': CCND1_expression,'CCND1 size': CCND1_size}
CCND1_expr_size=pandas.DataFrame(CCND1_expr_tuple)
fig_correlation= CCND1_expr_size.plot(kind='scatter', x='CCND1 expression', y='CCND1 size')
plt.hlines(y=20000000, xmin=-50000, xmax=250000, color='r')
#output1=fig_correlation.get_figure()
#plt.show()
#output1.savefig('BRCA_expression_vs_size_MYC.png')
corr_coeff, p_value = scipy.stats.pearsonr(CCND1_expression,CCND1_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(CCND1_expression,CCND1_size)


print ""
print " 3) correlation of lengths and gene expression of CCND1"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 3) correlation of lengths and gene expression of CCND1
   Pearson Correlation coefficient: -0.314370121706
   P-value: 4.1387646263e-26
   -----------------------
   Spearman Correlation coefficient: -0.327557220894
   P-value: 2.50186177605e-28

Correlation of lengths and gene expression of Beroukhim targets


In [5]:
# 4) correlation of lengths and gene expression of Beroukhim targets
beroukhim_genes=["MYC","CCND1","ERBB2","CDK4","NKX2-1","MDM2","EGFR","MCL1","FGFR1","KRAS","CCNE1","CRKL","HMGA2","TERT","PRKCI","IGF1R","MYCL","MYCN","CDK6","BCL2L1","MYB","MET","JUN","BIRC2","YAP1","PDGFRA","KIT","PIK3CA","MDM4","AR"]
gene_expression = list()
gene_size = list()

for sample in samples.values():
   for gene in beroukhim_genes:
      size = sample.getSizeFromPosition(gene_positions[gene].chrom, gene_positions[gene].start, gene_positions[gene].end)
      if sample.getRNASeqFromGene(gene) != "n/a" and size != None:
         gene_expression.append(sample.getRNASeqFromGene(gene))
         gene_size.append(size)

gene_expr_tuple={'Gene expression': gene_expression,'Gene size': gene_size}
gene_expr_size=pandas.DataFrame(gene_expr_tuple)
fig_correlation= gene_expr_size.plot(kind='scatter', x='Gene expression', y='Gene size')
plt.hlines(y=20000000, xmin=-50000, xmax=400000, color='r')
#output1=fig_correlation.get_figure()
#plt.show()
#output1.savefig('BRCA_expression_vs_size_MYC.png')
corr_coeff, p_value = scipy.stats.pearsonr(gene_expression,gene_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(gene_expression,gene_size)


print ""
print " 4) correlation of lengths and gene expression of Beroukhim targets"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 4) correlation of lengths and gene expression of Beroukhim targets
   Pearson Correlation coefficient: -0.118798381477
   P-value: 6.99974317387e-98
   -----------------------
   Spearman Correlation coefficient: -0.12854956005
   P-value: 2.05448846524e-114

Correlation of lengths and copynumber of MYC


In [6]:
# 5) correlation of lengths and copynumber of MYC

MYC_copynumber = list()
MYC_size = list()

for sample in samples.values():
 size = sample.getSizeFromPosition(gene_positions['MYC'].chrom, gene_positions['MYC'].start, gene_positions['MYC'].end)
 log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
 if (log2 != None):
    copynumber = 2.0 * 2.0 ** log2
 else:
    copynumber = "n/a"
 if copynumber != "n/a" and size != None:
    MYC_copynumber.append(copynumber)
    MYC_size.append(size)

MYC_copynumber_tuple={'MYC copynumber': MYC_copynumber,'MYC size': MYC_size}
MYC_copynumber_size=pandas.DataFrame(MYC_copynumber_tuple)
fig_correlation= MYC_copynumber_size.plot(kind='scatter', x='MYC copynumber', y='MYC size')
ax = plt.gca()
ax.set_xlim(0,20)
ax.set_ylim(-10000000,150000000)
plt.hlines(y=20000000, xmin=0, xmax=40, color='r')
plt.vlines(x=4, ymin=0, ymax=150000000, color='g')
output1=fig_correlation.get_figure()
output1.savefig('BRCA_copynumber_vs_size_MYC.png', dpi=200)

corr_coeff, p_value = scipy.stats.pearsonr(MYC_copynumber,MYC_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(MYC_copynumber,MYC_size)

print ""
print " 5) correlation of lengths and copynumber of MYC"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 5) correlation of lengths and copynumber of MYC
   Pearson Correlation coefficient: -0.437190821066
   P-value: 7.19147208437e-52
   -----------------------
   Spearman Correlation coefficient: -0.512551876307
   P-value: 1.00004073498e-73

Correlation of lengths and copynumber of CCND1


In [7]:
# 6) correlation of lengths and copynumber of CCND1

CCND1_copynumber = list()
CCND1_size = list()

for sample in samples.values():
 size = sample.getSizeFromPosition(gene_positions['CCND1'].chrom, gene_positions['CCND1'].start, gene_positions['CCND1'].end)
 log2 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
 if (log2 != None):
    copynumber = 2.0 * 2.0 ** log2
 else:
    copynumber = "n/a"
 if copynumber != "n/a" and size != None:
    CCND1_copynumber.append(copynumber)
    CCND1_size.append(size)

CCND1_copynumber_tuple={'CCND1 copynumber': CCND1_copynumber,'CCND1 size': CCND1_size}
CCND1_copynumber_size=pandas.DataFrame(CCND1_copynumber_tuple)
fig_correlation= CCND1_copynumber_size.plot(kind='scatter', x='CCND1 copynumber', y='CCND1 size')
ax = plt.gca()
ax.set_xlim(0,20)
ax.set_ylim(-10000000,150000000)
plt.hlines(y=20000000, xmin=0, xmax=40, color='r')
plt.vlines(x=4, ymin=0, ymax=150000000, color='g')
output1=fig_correlation.get_figure()
output1.savefig('BRCA_copynumber_vs_size_CCND1.png', dpi=200)

corr_coeff, p_value = scipy.stats.pearsonr(CCND1_copynumber,CCND1_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(CCND1_copynumber,CCND1_size)

print ""
print " 6) correlation of lengths and copynumber of CCND1"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 6) correlation of lengths and copynumber of CCND1
   Pearson Correlation coefficient: -0.47745745891
   P-value: 1.21621401656e-62
   -----------------------
   Spearman Correlation coefficient: -0.616089294877
   P-value: 5.67928882188e-114

Correlation of lengths and copynumber of EGFR


In [8]:
# 6) correlation of lengths and copynumber of EGFR

EGFR_copynumber = list()
EGFR_size = list()

for sample in samples.values():
 size = sample.getSizeFromPosition(gene_positions['EGFR'].chrom, gene_positions['EGFR'].start, gene_positions['EGFR'].end)
 log2 = sample.getLog2FromGene(gene_positions['EGFR'].chrom,gene_positions['EGFR'].start, gene_positions['EGFR'].end)
 if (log2 != None):
    copynumber = 2.0 * 2.0 ** log2
 else:
    copynumber = "n/a"
 if copynumber != "n/a" and size != None:
    EGFR_copynumber.append(copynumber)
    EGFR_size.append(size)

EGFR_copynumber_tuple={'EGFR copynumber': EGFR_copynumber,'EGFR size': EGFR_size}
EGFR_copynumber_size=pandas.DataFrame(EGFR_copynumber_tuple)
fig_correlation= EGFR_copynumber_size.plot(kind='scatter', x='EGFR copynumber', y='EGFR size')
ax = plt.gca()
ax.set_xlim(0,20)
ax.set_ylim(-10000000,150000000)
plt.hlines(y=20000000, xmin=0, xmax=40, color='r')
plt.vlines(x=4, ymin=0, ymax=150000000, color='g')
output1=fig_correlation.get_figure()
output1.savefig('BRCA_copynumber_vs_size_EGFR.png', dpi=200)

corr_coeff, p_value = scipy.stats.pearsonr(EGFR_copynumber,EGFR_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(EGFR_copynumber,EGFR_size)

print ""
print " 6) correlation of lengths and copynumber of EGFR"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 6) correlation of lengths and copynumber of EGFR
   Pearson Correlation coefficient: -0.200303028576
   P-value: 3.5720504696e-11
   -----------------------
   Spearman Correlation coefficient: -0.226730557852
   P-value: 5.63142694924e-14

Correlation of lengths and copynumber of ERBB2


In [9]:
# 6) correlation of lengths and copynumber of ERBB2

ERBB2_copynumber = list()
ERBB2_size = list()

for sample in samples.values():
 size = sample.getSizeFromPosition(gene_positions['ERBB2'].chrom, gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
 log2 = sample.getLog2FromGene(gene_positions['ERBB2'].chrom,gene_positions['ERBB2'].start, gene_positions['ERBB2'].end)
 if (log2 != None):
    copynumber = 2.0 * 2.0 ** log2
 else:
    copynumber = "n/a"
 if copynumber != "n/a" and size != None:
    ERBB2_copynumber.append(copynumber)
    ERBB2_size.append(size)

ERBB2_copynumber_tuple={'ERBB2 copynumber': ERBB2_copynumber,'ERBB2 size': ERBB2_size}
ERBB2_copynumber_size=pandas.DataFrame(ERBB2_copynumber_tuple)
fig_correlation= ERBB2_copynumber_size.plot(kind='scatter', x='ERBB2 copynumber', y='ERBB2 size')
ax = plt.gca()
ax.set_xlim(0,20)
ax.set_ylim(-10000000,150000000)
plt.hlines(y=20000000, xmin=0, xmax=40, color='r')
plt.vlines(x=4, ymin=0, ymax=150000000, color='g')
output1=fig_correlation.get_figure()
output1.savefig('BRCA_copynumber_vs_size_ERBB2.png', dpi=200)

corr_coeff, p_value = scipy.stats.pearsonr(ERBB2_copynumber,ERBB2_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(ERBB2_copynumber,ERBB2_size)

print ""
print " 6) correlation of lengths and copynumber of ERBB2"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 6) correlation of lengths and copynumber of ERBB2
   Pearson Correlation coefficient: -0.429389516885
   P-value: 2.5110331937e-49
   -----------------------
   Spearman Correlation coefficient: -0.231268482455
   P-value: 1.7551744624e-14

Correlation of lengths and copynumber of Beroukhim targets


In [10]:
# 7) correlation of lengths and copynumber of Beroukhim targets
beroukhim_genes=["MYC","CCND1","ERBB2","CDK4","NKX2-1","MDM2","EGFR","MCL1","FGFR1","KRAS","CCNE1","CRKL","HMGA2","TERT","PRKCI","IGF1R","MYCL","MYCN","CDK6","BCL2L1","MYB","MET","JUN","BIRC2","YAP1","PDGFRA","KIT","PIK3CA","MDM4","AR"]
gene_copynumber = list()
gene_size = list()

for sample in samples.values():
   for gene in beroukhim_genes:
      size = sample.getSizeFromPosition(gene_positions[gene].chrom, gene_positions[gene].start, gene_positions[gene].end)
      log2 = sample.getLog2FromGene(gene_positions[gene].chrom,gene_positions[gene].start, gene_positions[gene].end)
      if (log2 != None):
         copynumber = 2.0 * 2.0 ** log2
      else:
         copynumber = "n/a"
      if copynumber != "n/a" and size != None:
         gene_copynumber.append(copynumber)
         gene_size.append(size)

gene_copynumber_tuple={'Gene copynumber': gene_copynumber,'Gene size': gene_size}
gene_copynumber_size=pandas.DataFrame(gene_copynumber_tuple)
fig_correlation= gene_copynumber_size.plot(kind='scatter', x='Gene copynumber', y='Gene size')
ax = plt.gca()
ax.set_xlim(0,20)
ax.set_ylim(-10000000,150000000)
plt.hlines(y=20000000, xmin=0, xmax=40, color='r')
plt.vlines(x=4, ymin=0, ymax=150000000, color='g')#output1=fig_correlation.get_figure()
#plt.show()
output1.savefig('BRCA_copynumber_vs_size_beroukhim_targets.png', dpi=200)
corr_coeff, p_value = scipy.stats.pearsonr(gene_copynumber,gene_size)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(gene_copynumber,gene_size)


print ""
print " 7) correlation of lengths and copynumber of Beroukhim targets"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 7) correlation of lengths and copynumber of Beroukhim targets
   Pearson Correlation coefficient: -0.183684750247
   P-value: 1.09212998829e-242
   -----------------------
   Spearman Correlation coefficient: -0.141869952524
   P-value: 1.30694107949e-144

Correlation of copynumber and gene expression of MYC


In [11]:
# 8) correlation of lengths and gene expression of MYC

MYC_expression = list()
MYC_copynumber = list()

for sample in samples.values():
   log2 = sample.getLog2FromGene(gene_positions['MYC'].chrom,gene_positions['MYC'].start, gene_positions['MYC'].end)
   if (log2 != None):
      copynumber = 2.0 * 2.0 ** log2
   else:
      copynumber = "n/a"
   if sample.getRNASeqFromGene("MYC") != "n/a" and copynumber != "n/a":
      MYC_expression.append(sample.getRNASeqFromGene("MYC"))
      MYC_copynumber.append(copynumber)

MYC_expr_tuple={'MYC expression': MYC_expression,'MYC copynumber': MYC_copynumber}
MYC_expr_copynumber=pandas.DataFrame(MYC_expr_tuple)
fig_correlation= MYC_expr_copynumber.plot(kind='scatter', x='MYC expression', y='MYC copynumber')
plt.hlines(y=2, xmin=-1000, xmax=30000, color='r')
#output1=fig_correlation.get_figure()
#plt.show()
#output1.savefig('BRCA_expression_vs_size_MYC.png')
corr_coeff, p_value = scipy.stats.pearsonr(MYC_expression,MYC_copynumber)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(MYC_expression,MYC_copynumber)


print ""
print " 8) correlation of copynumber and gene expression of MYC"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 8) correlation of copynumber and gene expression of MYC
   Pearson Correlation coefficient: 0.245256251777
   P-value: 2.93926202022e-16
   -----------------------
   Spearman Correlation coefficient: 0.239497014605
   P-value: 1.48788465051e-15

Correlation of copynumber and gene expression of CCND1


In [12]:
# 8) correlation of lengths and gene expression of CCND1

CCND1_expression = list()
CCND1_copynumber = list()

for sample in samples.values():
   log2 = sample.getLog2FromGene(gene_positions['CCND1'].chrom,gene_positions['CCND1'].start, gene_positions['CCND1'].end)
   if (log2 != None):
      copynumber = 2.0 * 2.0 ** log2
   else:
      copynumber = "n/a"
   if sample.getRNASeqFromGene("CCND1") != "n/a" and copynumber != "n/a":
      CCND1_expression.append(sample.getRNASeqFromGene("CCND1"))
      CCND1_copynumber.append(copynumber)

CCND1_expr_tuple={'CCND1 expression': CCND1_expression,'CCND1 copynumber': CCND1_copynumber}
CCND1_expr_copynumber=pandas.DataFrame(CCND1_expr_tuple)
fig_correlation= CCND1_expr_copynumber.plot(kind='scatter', x='CCND1 expression', y='CCND1 copynumber')
plt.hlines(y=2, xmin=-1000, xmax=200000, color='r')
#output1=fig_correlation.get_figure()
#plt.show()
#output1.savefig('BRCA_expression_vs_size_MYC.png')
corr_coeff, p_value = scipy.stats.pearsonr(CCND1_expression,CCND1_copynumber)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(CCND1_expression,CCND1_copynumber)


print ""
print " 8) correlation of copynumber and gene expression of CCND1"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 8) correlation of copynumber and gene expression of CCND1
   Pearson Correlation coefficient: 0.666920858689
   P-value: 2.17810249461e-139
   -----------------------
   Spearman Correlation coefficient: 0.484010965769
   P-value: 2.81730478766e-64

Correlation of copynumber and gene expression of Beroukhim targets


In [13]:
# 8) correlation of lengths and gene expression of Beroukhim targets

gene_expression = list()
gene_copynumber = list()
beroukhim_genes=["MYC","CCND1","ERBB2","CDK4","NKX2-1","MDM2","EGFR","MCL1","FGFR1","KRAS","CCNE1","CRKL","HMGA2","TERT","PRKCI","IGF1R","MYCL","MYCN","CDK6","BCL2L1","MYB","MET","JUN","BIRC2","YAP1","PDGFRA","KIT","PIK3CA","MDM4","AR"]

for sample in samples.values():
   for gene in beroukhim_genes:
      log2 = sample.getLog2FromGene(gene_positions[gene].chrom,gene_positions[gene].start, gene_positions[gene].end)
      if (log2 != None):
         copynumber = 2.0 * 2.0 ** log2
      else:
         copynumber = "n/a"
      if sample.getRNASeqFromGene(gene) != "n/a" and copynumber != "n/a":
         gene_expression.append(sample.getRNASeqFromGene(gene))
         gene_copynumber.append(copynumber)

gene_expr_tuple={'expression': gene_expression,'copynumber': gene_copynumber}
gene_expr_copynumber=pandas.DataFrame(gene_expr_tuple)
fig_correlation= gene_expr_copynumber.plot(kind='scatter', x='expression', y='copynumber')
plt.hlines(y=2, xmin=-1000, xmax=300000, color='r')

corr_coeff, p_value = scipy.stats.pearsonr(gene_expression,gene_copynumber)
corr_coeff_spear, p_value_spear = scipy.stats.spearmanr(gene_expression,gene_copynumber)


print ""
print " 8) correlation of copynumber and gene expression of Beroukhim targets"
print "   Pearson Correlation coefficient: "+str(corr_coeff)
print "   P-value: "+str(p_value)
print "   -----------------------"
print "   Spearman Correlation coefficient: "+str(corr_coeff_spear)
print "   P-value: "+str(p_value_spear)


 8) correlation of copynumber and gene expression of Beroukhim targets
   Pearson Correlation coefficient: 0.658190119956
   P-value: 0.0
   -----------------------
   Spearman Correlation coefficient: 0.137538997523
   P-value: 7.61429629911e-131

Check occurrence of focal amplifications of Beroukhim targets


In [14]:
# 7) Check occurrence of focal amplifications of Beroukhim targets
beroukhim_genes={"MYC" : 0,"CCND1" : 0,"ERBB2" : 0,"CDK4" : 0,"NKX2-1" : 0,"MDM2" : 0,"EGFR" : 0,"MCL1" : 0,"FGFR1" : 0,"KRAS" : 0,"CCNE1" : 0,"CRKL" : 0,"HMGA2" : 0,"TERT" : 0,"PRKCI" : 0,"IGF1R" : 0,"MYCL" : 0,"MYCN" : 0,"CDK6" : 0,"BCL2L1" : 0,"MYB" : 0,"MET" : 0,"JUN" : 0,"BIRC2" : 0,"YAP1" : 0,"PDGFRA" : 0,"KIT" : 0,"PIK3CA" : 0,"MDM4" : 0,"AR" : 0}

for sample in samples.values():
   for gene in beroukhim_genes.keys():
      if sample.checkFocalGeneAmp(gene):
         beroukhim_genes[gene] += 1   

focal_amp_series=pandas.Series(beroukhim_genes)
focal_amp_series.sort(ascending=False)
fig_correlation= focal_amp_series.plot(kind='bar')

print ""
print " 7) Check occurrence of focal amplifications of Beroukhim targets"
print ""


 7) Check occurrence of focal amplifications of Beroukhim targets

Get 20 most frequent genes in focal events


In [15]:
# 8) Get 50 most frequent genes in focal events
gene_amp_frequency = dict()
for sample in samples.values():
   for gene in sample.listFocalAmpGenes():
       if gene in gene_amp_frequency.keys():
          gene_amp_frequency[gene] += 1
       else:
          gene_amp_frequency[gene] = 1

gene_del_frequency = dict()
for sample in samples.values():
   for gene in sample.listFocalDelGenes():
      if gene in gene_del_frequency.keys():
         gene_del_frequency[gene] += 1
      else:
         gene_del_frequency[gene] = 1

print ""
print " 8) Get 20 most frequent genes in focal amplifications/deletions"
print "   Amplifications:"
count = 0
for gene in sorted(gene_amp_frequency, key=gene_amp_frequency.get, reverse=True):
 count += 1
 if (gene in gene_positions.keys()):
    print "Gene: "+gene+"  Freq: "+str(gene_amp_frequency[gene])+"  Pos: "+gene_positions[gene].returnPos()
 else:
    print "Gene: "+gene+"  Freq: "+str(gene_amp_frequency[gene])
 if (count > 19):
   break
count = 0
print ""
print "   Deletions:"
for gene in sorted(gene_del_frequency, key=gene_del_frequency.get, reverse=True):
 count += 1
 if (gene in gene_positions.keys()):
    print "Gene: "+gene+"  Freq: "+str(gene_del_frequency[gene])+"  Pos: "+gene_positions[gene].returnPos()
 else:
    print "Gene: "+gene+"  Freq: "+str(gene_del_frequency[gene])
 if (count > 19):
   break


 8) Get 20 most frequent genes in focal amplifications/deletions
   Amplifications:
Gene: PPFIA1  Freq: 229  Pos: chr11:70116805-70224598
Gene: ANO1  Freq: 218  Pos: chr11:69931515-70035652
Gene: CCND1  Freq: 216  Pos: chr11:69455872-69469242
Gene: ORAOV1  Freq: 212  Pos: chr11:69480331-69490165
Gene: FADD  Freq: 208  Pos: chr11:70049268-70053508
Gene: CTTN  Freq: 205  Pos: chr11:70244611-70282690
Gene: TPCN2  Freq: 170  Pos: chr11:68816349-68858072
Gene: ASH2L  Freq: 169  Pos: chr8:37963310-37997598
Gene: ERLIN2  Freq: 167  Pos: chr8:37594096-37604071
Gene: RAB11FIP1  Freq: 164  Pos: chr8:37716464-37757015
Gene: PROSC  Freq: 164  Pos: chr8:37620100-37637286
Gene: ZNF703  Freq: 163  Pos: chr8:37553300-37556396
Gene: FGFR1  Freq: 163  Pos: chr8:38268655-38326352
Gene: DDHD2  Freq: 163  Pos: chr8:38089470-38120287
Gene: WHSC1L1  Freq: 162  Pos: chr8:38173934-38239790
Gene: BRF2  Freq: 161  Pos: chr8:37701397-37707431
Gene: EIF4EBP1  Freq: 159  Pos: chr8:37888019-37917883
Gene: PPAPDC1B  Freq: 153  Pos: chr8:38120649-38126738
Gene: BAG4  Freq: 153  Pos: chr8:38034105-38070819
Gene: LSM1  Freq: 153  Pos: chr8:38020838-38034248

   Deletions:
Gene: CDKN2A  Freq: 53  Pos: chr9:21967750-21975132
Gene: STK11  Freq: 40  Pos: chr19:1205797-1228434
Gene: MAP2K4  Freq: 34  Pos: chr17:11924134-12047148
Gene: PTEN  Freq: 33  Pos: chr10:89623194-89728532
Gene: NF1  Freq: 31  Pos: chr17:29421944-29704695
Gene: RB1  Freq: 23  Pos: chr13:48877882-49056026
Gene: SMAD4  Freq: 17  Pos: chr18:48556582-48611411
Gene: CYLD  Freq: 15  Pos: chr16:50775960-50835846
Gene: TP53  Freq: 9  Pos: chr17:7571719-7590868
Gene: NF2  Freq: 7  Pos: chr22:29999544-30079904
Gene: CDH1  Freq: 5  Pos: chr16:68771194-68869444
Gene: SOCS1  Freq: 4  Pos: chr16:11348273-11350039
Gene: MSH2  Freq: 4  Pos: chr2:47630205-47710367
Gene: MLH1  Freq: 2  Pos: chr3:37035267-37092337
Gene: MSH6  Freq: 2  Pos: chr2:48010220-48034092

Check for cooccurence of focal amplifications in MYC,CDKN2A, FGFR1, CCND1, ERBB2, CCNE1

Numbers in diagonal of the table represent all gene amplifications found


In [16]:
# 9) Check for cooccurence of focal amplifications in MYC, CDKN2A, FGFR1, CCND1, ERBB2, CCNE1
MYC_count = 0
MYC_and_CDKN2A_count = 0
MYC_and_FGFR1_count = 0
MYC_and_CCND1_count = 0
MYC_and_ERBB2_count = 0
MYC_and_CCNE1_count = 0
FGFR1_count = 0
FGFR1_and_CDKN2A_count = 0
FGFR1_and_CCND1_count = 0
FGFR1_and_ERBB2_count = 0
FGFR1_and_CCNE1_count = 0
CCND1_count = 0
CCND1_and_CDKN2A_count = 0
CCND1_and_ERBB2_count = 0
CCND1_and_CCNE1_count = 0
ERBB2_count = 0
ERBB2_and_CDKN2A_count = 0
ERBB2_and_CCNE1_count = 0
CDKN2A_count = 0
CDKN2A_and_CCNE1_count = 0
CCNE1_count = 0

for sample in samples.values():
    if sample.checkFocalGeneAmp("MYC"):
        MYC_count += 1
        if sample.checkFocalGeneDel("CDKN2A"):
            MYC_and_CDKN2A_count += 1
        if sample.checkFocalGeneAmp("FGFR1"):
            MYC_and_FGFR1_count += 1
        if sample.checkFocalGeneAmp("CCND1"):
            MYC_and_CCND1_count += 1
        if sample.checkFocalGeneAmp("ERBB2"):
            MYC_and_ERBB2_count += 1
        if sample.checkFocalGeneAmp("CCNE1"):
            MYC_and_CCNE1_count += 1
    if sample.checkFocalGeneAmp("FGFR1"):
        FGFR1_count += 1
        if sample.checkFocalGeneDel("CDKN2A"):
            FGFR1_and_CDKN2A_count += 1
        if sample.checkFocalGeneAmp("CCND1"):
            FGFR1_and_CCND1_count += 1
        if sample.checkFocalGeneAmp("ERBB2"):
            FGFR1_and_ERBB2_count += 1
        if sample.checkFocalGeneAmp("CCNE1"):
            FGFR1_and_CCNE1_count += 1
    if sample.checkFocalGeneAmp("CCND1"):
        CCND1_count += 1
        if sample.checkFocalGeneDel("CDKN2A"):
            CCND1_and_CDKN2A_count += 1
        if sample.checkFocalGeneAmp("ERBB2"):
            CCND1_and_ERBB2_count += 1
        if sample.checkFocalGeneAmp("CCNE1"):
            CCND1_and_CCNE1_count += 1
    if sample.checkFocalGeneAmp("ERBB2"):
        ERBB2_count += 1
        if sample.checkFocalGeneDel("CDKN2A"):
            ERBB2_and_CDKN2A_count += 1
        if sample.checkFocalGeneAmp("CCNE1"):
            ERBB2_and_CCNE1_count += 1
    if sample.checkFocalGeneDel("CDKN2A"):
        CDKN2A_count += 1
        if sample.checkFocalGeneAmp("CCNE1"):
            CDKN2A_and_CCNE1_count += 1
    if sample.checkFocalGeneAmp("CCNE1"):
        CCNE1_count += 1

print ""
print " 9) Check for cooccurence of focal amplifications in MYC, CDKN2A, FGFR1, CCND1, ERBB2, CCNE1"
print ""
print "  MYC amplified: "+str(MYC_count)
print "  CDKNA2 deleted: "+str(CDKN2A_count)
print "  FGFR1 amplified: "+str(FGFR1_count)
print "  CCND1 amplified: "+str(CCND1_count)
print "  ERBB2 amplified: "+str(ERBB2_count)
print ""
s = "<table><tr><th></th><th>MYC</th><th>CDKN2A</th><th>FGFR1</th><th>CCND1</th><th>ERBB2</th><th>CCNE1</th></tr>"
s += "<tr><th>MYC</th><td><b>"+str(MYC_count)+"</b></td><td>"+str(MYC_and_CDKN2A_count)+"</td><td>"+str(MYC_and_FGFR1_count)+"</td><td>"+str(MYC_and_CCND1_count)+"</td><td>"+str(MYC_and_ERBB2_count)+"</td><td>"+str(MYC_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>CDKN2A</th><td>"+str(MYC_and_CDKN2A_count)+"</td><td><b>"+str(CDKN2A_count)+"</b></td><td>"+str(FGFR1_and_CDKN2A_count)+"</td><td>"+str(CCND1_and_CDKN2A_count)+"</td><td>"+str(ERBB2_and_CDKN2A_count)+"</td><td>"+str(CDKN2A_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>FGFR1</th><td>"+str(MYC_and_FGFR1_count)+"</td><td>"+str(FGFR1_and_CDKN2A_count)+"</td><td><b>"+str(FGFR1_count)+"</b></td><td>"+str(FGFR1_and_CCND1_count)+"</td><td>"+str(FGFR1_and_ERBB2_count)+"</td><td>"+str(FGFR1_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>CCND1</th><td>"+str(MYC_and_CCND1_count)+"</td><td>"+str(CCND1_and_CDKN2A_count)+"</td><td>"+str(FGFR1_and_CCND1_count)+"</td><td><b>"+str(CCND1_count)+"</b></td><td>"+str(CCND1_and_ERBB2_count)+"</td><td>"+str(CCND1_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>ERBB2</th><td>"+str(MYC_and_ERBB2_count)+"</td><td>"+str(ERBB2_and_CDKN2A_count)+"</td><td>"+str(FGFR1_and_ERBB2_count)+"</td><td>"+str(CCND1_and_ERBB2_count)+"</td><td><b>"+str(ERBB2_count)+"</b></td><td>"+str(ERBB2_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>CCNE1</th><td>"+str(MYC_and_CCNE1_count)+"</td><td>"+str(CDKN2A_and_CCNE1_count)+"</td><td>"+str(FGFR1_and_CCNE1_count)+"</td><td>"+str(CCND1_and_CCNE1_count)+"</td><td>"+str(ERBB2_and_CCNE1_count)+"</td><td><b>"+str(CCNE1_count)+"</b></td></tr>"
s += "</table>"
h = HTML(s);h


 9) Check for cooccurence of focal amplifications in MYC, CDKN2A, FGFR1, CCND1, ERBB2, CCNE1

  MYC amplified: 63
  CDKNA2 deleted: 53
  FGFR1 amplified: 148
  CCND1 amplified: 214
  ERBB2 amplified: 129

Out[16]:
MYCCDKN2AFGFR1CCND1ERBB2CCNE1
MYC6331110138
CDKN2A35381561
FGFR111814849158
CCND1101549214335
ERBB2136153312910
CCNE181851047

Check for cooccurence of amplifications (copynumber > 4) in MYC,CDKN2A, FGFR1, CCND1, ERBB2, CCNE1


In [17]:
# 10) Check for cooccurence of amplifications (copynumber > 4) MYC, CDKN2A, FGFR1, CCND1, ERBB2, CCNE1
MYC_count = 0
MYC_and_CDKN2A_count = 0
MYC_and_FGFR1_count = 0
MYC_and_CCND1_count = 0
MYC_and_ERBB2_count = 0
MYC_and_CCNE1_count = 0
FGFR1_count = 0
FGFR1_and_CDKN2A_count = 0
FGFR1_and_CCND1_count = 0
FGFR1_and_ERBB2_count = 0
FGFR1_and_CCNE1_count = 0
CCND1_count = 0
CCND1_and_CDKN2A_count = 0
CCND1_and_ERBB2_count = 0
CCND1_and_CCNE1_count = 0
ERBB2_count = 0
ERBB2_and_CDKN2A_count = 0
ERBB2_and_CCNE1_count = 0
CDKN2A_count = 0
CDKN2A_and_CCNE1_count = 0
CCNE1_count = 0

for sample in samples.values():
    log2_MYC = sample.getLog2FromGene(gene_positions["MYC"].chrom,gene_positions["MYC"].start, gene_positions["MYC"].end)
    if (log2_MYC != None):
       copynumber_MYC = 2.0 * 2.0 ** log2_MYC
    else:
       copynumber_MYC = "n/a"    
    log2_FGFR1 = sample.getLog2FromGene(gene_positions["FGFR1"].chrom,gene_positions["FGFR1"].start, gene_positions["FGFR1"].end)
    if (log2_FGFR1 != None):
       copynumber_FGFR1 = 2.0 * 2.0 ** log2_FGFR1
    else:
       copynumber_FGFR1 = "n/a"
    log2_CDKN2A = sample.getLog2FromGene(gene_positions["CDKN2A"].chrom,gene_positions["CDKN2A"].start, gene_positions["CDKN2A"].end)
    log2_CCND1 = sample.getLog2FromGene(gene_positions["CCND1"].chrom,gene_positions["CCND1"].start, gene_positions["CCND1"].end)
    if (log2_CCND1 != None):
       copynumber_CCND1 = 2.0 * 2.0 ** log2_CCND1
    else:
       copynumber_CCND1 = "n/a"
    log2_ERBB2 = sample.getLog2FromGene(gene_positions["ERBB2"].chrom,gene_positions["ERBB2"].start, gene_positions["ERBB2"].end)
    if (log2_ERBB2 != None):
       copynumber_ERBB2 = 2.0 * 2.0 ** log2_ERBB2
    else:
       copynumber_ERBB2 = "n/a"
    log2_CCNE1 = sample.getLog2FromGene(gene_positions["CCNE1"].chrom,gene_positions["CCNE1"].start, gene_positions["CCNE1"].end)
    if (log2_CCNE1 != None):
       copynumber_CCNE1 = 2.0 * 2.0 ** log2_CCNE1
    else:
       copynumber_CCNE1 = "n/a"
        
    if copynumber_MYC != "n/a" and copynumber_MYC > 4:
        MYC_count += 1
        if log2_CDKN2A != None and log2_CDKN2A < -0.3:
            MYC_and_CDKN2A_count += 1
        if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4:
            MYC_and_FGFR1_count += 1
        if copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4:
            MYC_and_CCND1_count += 1
        if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4:
            MYC_and_ERBB2_count += 1
        if copynumber_CCNE1 != "n/a" and copynumber_CCNE1 > 4:
            MYC_and_CCNE1_count += 1
    if copynumber_FGFR1 != "n/a" and copynumber_FGFR1 > 4:
        FGFR1_count += 1
        if log2_CDKN2A != None and log2_CDKN2A < -0.3:
            FGFR1_and_CDKN2A_count += 1
        if copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4:
            FGFR1_and_CCND1_count += 1
        if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4:
            FGFR1_and_ERBB2_count += 1
        if copynumber_CCNE1 != "n/a" and copynumber_CCNE1 > 4:
            FGFR1_and_CCNE1_count += 1
    if copynumber_CCND1 != "n/a" and copynumber_CCND1 > 4:
        CCND1_count += 1
        if log2_CDKN2A != None and log2_CDKN2A < -0.3:
            CCND1_and_CDKN2A_count += 1
        if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4:
            CCND1_and_ERBB2_count += 1
        if copynumber_CCNE1 != "n/a" and copynumber_CCNE1 > 4:
            CCND1_and_CCNE1_count += 1
    if copynumber_ERBB2 != "n/a" and copynumber_ERBB2 > 4:
        ERBB2_count += 1
        if log2_CDKN2A != None and log2_CDKN2A < -0.3:
            ERBB2_and_CDKN2A_count += 1
        if copynumber_CCNE1 != "n/a" and copynumber_CCNE1 > 4:
            ERBB2_and_CCNE1_count += 1
    if log2_CDKN2A != None and log2_CDKN2A < -0.3:
        CDKN2A_count += 1
        if copynumber_CCNE1 != "n/a" and copynumber_CCNE1 > 4:
            CDKN2A_and_CCNE1_count += 1
    if copynumber_CCNE1 != "n/a" and copynumber_CCNE1 > 4:
        CCNE1_count += 1

print ""
print " 9) Check for cooccurence of amplifications (copynumber > 4) in MYC, CDKN2A, FGFR1, CCND1, ERBB2, CCNE1"
print ""
print "  MYC amplified: "+str(MYC_count)
print "  CDKNA2 deleted: "+str(CDKN2A_count)
print "  FGFR1 amplified: "+str(FGFR1_count)
print "  CCND1 amplified: "+str(CCND1_count)
print "  ERBB2 amplified: "+str(ERBB2_count)
print ""
s = "<table><tr><th></th><th>MYC</th><th>CDKN2A</th><th>FGFR1</th><th>CCND1</th><th>ERBB2</th><th>CCNE1</th></tr>"
s += "<tr><th>MYC</th><td><b>"+str(MYC_count)+"</b></td><td>"+str(MYC_and_CDKN2A_count)+"</td><td>"+str(MYC_and_FGFR1_count)+"</td><td>"+str(MYC_and_CCND1_count)+"</td><td>"+str(MYC_and_ERBB2_count)+"</td><td>"+str(MYC_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>CDKN2A</th><td>"+str(MYC_and_CDKN2A_count)+"</td><td><b>"+str(CDKN2A_count)+"</b></td><td>"+str(FGFR1_and_CDKN2A_count)+"</td><td>"+str(CCND1_and_CDKN2A_count)+"</td><td>"+str(ERBB2_and_CDKN2A_count)+"</td><td>"+str(CDKN2A_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>FGFR1</th><td>"+str(MYC_and_FGFR1_count)+"</td><td>"+str(FGFR1_and_CDKN2A_count)+"</td><td><b>"+str(FGFR1_count)+"</b></td><td>"+str(FGFR1_and_CCND1_count)+"</td><td>"+str(FGFR1_and_ERBB2_count)+"</td><td>"+str(FGFR1_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>CCND1</th><td>"+str(MYC_and_CCND1_count)+"</td><td>"+str(CCND1_and_CDKN2A_count)+"</td><td>"+str(FGFR1_and_CCND1_count)+"</td><td><b>"+str(CCND1_count)+"</b></td><td>"+str(CCND1_and_ERBB2_count)+"</td><td>"+str(CCND1_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>ERBB2</th><td>"+str(MYC_and_ERBB2_count)+"</td><td>"+str(ERBB2_and_CDKN2A_count)+"</td><td>"+str(FGFR1_and_ERBB2_count)+"</td><td>"+str(CCND1_and_ERBB2_count)+"</td><td><b>"+str(ERBB2_count)+"</b></td><td>"+str(ERBB2_and_CCNE1_count)+"</td></tr>"
s += "<tr><th>CCNE1</th><td>"+str(MYC_and_CCNE1_count)+"</td><td>"+str(CDKN2A_and_CCNE1_count)+"</td><td>"+str(FGFR1_and_CCNE1_count)+"</td><td>"+str(CCND1_and_CCNE1_count)+"</td><td>"+str(ERBB2_and_CCNE1_count)+"</td><td><b>"+str(CCNE1_count)+"</b></td></tr>"
s += "</table>"
h = HTML(s);h


 9) Check for cooccurence of amplifications (copynumber > 4) in MYC, CDKN2A, FGFR1, CCND1, ERBB2, CCNE1

  MYC amplified: 116
  CDKNA2 deleted: 222
  FGFR1 amplified: 76
  CCND1 amplified: 141
  ERBB2 amplified: 103

Out[17]:
MYCCDKN2AFGFR1CCND1ERBB2CCNE1
MYC116321621133
CDKN2A322222446283
FGFR11624762543
CCND1214625141193
ERBB213284191033
CCNE13333318

In [17]: