In [3]:
%matplotlib inline

In [4]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
pd.set_option('display.max_columns', 50) # print all rows


import os
os.chdir('/Users/evanbiederstedt/Downloads/RRBS_data_files')

import statsmodels.api as sm

In [5]:
"""
Normal_B_regions.csv
pcell_regions.csv
trito_regions.csv
mcell_regions.csv
CD19cell_regions.csv
cw154_regions.csv
"""


Out[5]:
'\nNormal_B_regions.csv\npcell_regions.csv\ntrito_regions.csv\nmcell_regions.csv\nCD19cell_regions.csv\ncw154_regions.csv\n'

In [6]:
normalB = pd.read_csv("Correct_Normal_B_regions.csv")
mcell = pd.read_csv("Correct_mcell_regions.csv")
pcell = pd.read_csv("Correct_pcell_regions.csv")
cd19cell = pd.read_csv("Correct_CD19cell_regions.csv")
trito = pd.read_csv("Correct_trito_regions.csv")
cw154 = pd.read_csv("Correct_cw154_regions.csv")

In [7]:
print(normalB.shape)
print(mcell.shape)
print(pcell.shape)
print(cd19cell.shape)
print(trito.shape)
print(cw154.shape)


(113, 40)
(75, 40)
(70, 40)
(84, 40)
(42, 40)
(62, 40)

In [8]:
normal_all = pd.concat([normalB, mcell, pcell, cd19cell])

cll_all = pd.concat([trito, cw154])

In [9]:
print(normal_all.shape)
print(cll_all.shape)


(342, 40)
(104, 40)

In [ ]:
#
# 
#

In [10]:
normalB.columns


Out[10]:
Index(['index', 'filename', 'methylation_tssDistance',
       'methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance',
       'methylation_ctcfUpDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf', 'PDR_tssDistance',
       'PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance',
       'PDR_ctcfUpDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf'],
      dtype='object')

In [58]:
normalB_meth = normalB[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]

pcell_meth = pcell[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]


mcell_meth = mcell[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]


cd19cell_meth = cd19cell[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]


trito_meth = trito[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]

cw154_meth = cw154[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]


normal_all_meth = normal_all[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]


cll_all_meth = cll_all[['methylation_genesDistance', 'methylation_exonsDistance',
       'methylation_intronsDistance', 'methylation_promoterDistance',
       'methylation_cgiDistance', 'methylation_ctcfDistance', 'methylation_ctcfDownDistance',
       'methylation_geneDistalRegulatoryModulesDistance',
       'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
       'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
       'methylation_geneDistalRegulatoryModulesK562Distance',
       'methylation_hypoInHues64Distance', 'methylation_intergenic',
       'methylation_shore', 'methylation_shelf']]

In [ ]:


In [59]:
normalB_PDR = normalB[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]

pcell_PDR = pcell[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]

mcell_PDR = mcell[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]

cd19cell_PDR = cd19cell[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]

trito_PDR = trito[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]

cw154_PDR = cw154[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]


normal_all_PDR = normal_all[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]



cll_all_PDR = cll_all[['PDR_genesDistance', 'PDR_exonsDistance', 'PDR_intronsDistance',
       'PDR_promoterDistance', 'PDR_cgiDistance', 'PDR_ctcfDistance', 'PDR_ctcfDownDistance',
       'PDR_geneDistalRegulatoryModulesDistance', 'PDR_vistaEnhancersDistance',
       'PDR_3PrimeUTRDistance', 'PDR_5PrimeUTRDistance',
       'PDR_firstExonDistance', 'PDR_geneDistalRegulatoryModulesK562Distance',
       'PDR_hypoInHues64Distance', 'PDR_intergenic', 'PDR_shore', 'PDR_shelf']]

In [45]:
sns.boxplot(normalB_meth, orient="h", showfliers=False)
plt.title("Methylation Normal B cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[45]:
<matplotlib.text.Text at 0x119090128>

In [46]:
sns.boxplot(pcell_PDR, orient="h", showfliers=False)
plt.title("Methylation CD27 p cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[46]:
<matplotlib.text.Text at 0x118abfe48>

In [47]:
sns.boxplot(mcell_meth, orient="h", showfliers=False)
plt.title("Methylation CD27 m cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[47]:
<matplotlib.text.Text at 0x1195a67f0>

In [48]:
sns.boxplot(cd19cell_meth, orient="h", showfliers=False)
plt.title("Methylation CD19 cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[48]:
<matplotlib.text.Text at 0x119854668>

In [49]:
sns.boxplot(trito_meth, orient="h", showfliers=False)
plt.title("Methylation trito_pool CLL cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[49]:
<matplotlib.text.Text at 0x119afe2e8>

In [50]:
sns.boxplot(cw154_meth, orient="h", showfliers=False)
plt.title("Methylation cw154 CLL cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[50]:
<matplotlib.text.Text at 0x119dcb240>

In [ ]:


In [ ]:


In [ ]:


In [51]:
sns.boxplot(normalB_PDR, orient="h", showfliers=False)
plt.title("PDR Normal B cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[51]:
<matplotlib.text.Text at 0x11a05ff60>

In [52]:
sns.boxplot(pcell_PDR, orient="h", showfliers=False)
plt.title("PDR CD27 p cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[52]:
<matplotlib.text.Text at 0x11a317e48>

In [53]:
sns.boxplot(mcell_PDR, orient="h", showfliers=False)
plt.title("PDR CD27 m cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[53]:
<matplotlib.text.Text at 0x11a68be48>

In [54]:
sns.boxplot(cd19cell_PDR, orient="h", showfliers=False)
plt.title("PDR CD19 cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[54]:
<matplotlib.text.Text at 0x11a93c048>

In [55]:
sns.boxplot(trito_PDR, orient="h", showfliers=False)
plt.title("PDR trito_pool CLL cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[55]:
<matplotlib.text.Text at 0x11aae80b8>

In [56]:
sns.boxplot(cw154_PDR, orient="h", showfliers=False)
plt.title("PDR cw154 CLL cells")


/opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/seaborn/categorical.py:2171: UserWarning: The boxplot API has been changed. Attempting to adjust your arguments for the new API (which might not work). Please update your code. See the version 0.6 release notes for more info.
  warnings.warn(msg, UserWarning)
Out[56]:
<matplotlib.text.Text at 0x11ad90d30>

In [ ]:


In [61]:
pdr_normal_strip = pd.melt(normal_all_PDR, var_name='genome_region', value_name='PDR')
pdr_cll_strip = pd.melt(cll_all_PDR, var_name='genome_region', value_name='PDR')

In [62]:
meth_normal_strip = pd.melt(normal_all_meth, var_name='genome_region', value_name='methyl')
meth_cll_strip = pd.melt(cll_all_meth, var_name='genome_region', value_name='methyl')

In [63]:
pdr_normal_strip["category"] = str("Normal")
pdr_cll_strip["category"] = str("CLL")

meth_normal_strip["category"] = str("Normal")
meth_cll_strip["category"] = str("CLL")

In [64]:
total_pdr_pairs = pd.concat([pdr_normal_strip, pdr_cll_strip])

total_meth_pairs = pd.concat([meth_normal_strip, meth_cll_strip])

In [68]:
sns.boxplot(y=total_pdr_pairs.genome_region, x=total_pdr_pairs.PDR, hue=total_pdr_pairs.category, showfliers=False)
plt.title("PDR by genomic regions: 342 Normal vs 104 CLL")
plt.ylabel("genomic region")


Out[68]:
<matplotlib.text.Text at 0x11b938240>

In [70]:
sns.boxplot(y=total_meth_pairs.genome_region, x=total_meth_pairs.methyl, hue=total_meth_pairs.category, showfliers=False)
plt.title("Methylation by regions: 342 Normal vs 104 CLL")
plt.ylabel("genomic region")
plt.xlabel("methylation, percentage")


Out[70]:
<matplotlib.text.Text at 0x11c0519e8>

In [80]:
cll_all_PDR.columns

print(str("cll_all_PDR.PDR_genesDistance.mean()"))
print(cll_all_PDR.PDR_genesDistance.mean())

print(str("cll_all_PDR.PDR_exonsDistance.mean()"))
print(cll_all_PDR.PDR_exonsDistance.mean())

print(str("cll_all_PDR.PDR_intronsDistance.mean()"))
print(cll_all_PDR.PDR_intronsDistance.mean())

print(str("cll_all_PDR.PDR_promoterDistance.mean()"))
print(cll_all_PDR.PDR_promoterDistance.mean())

print(str("cll_all_PDR.PDR_cgiDistance.mean()"))
print(cll_all_PDR.PDR_cgiDistance.mean())

print(str("cll_all_PDR.PDR_ctcfDistance.mean()"))
print(cll_all_PDR.PDR_ctcfDistance.mean())

print(str("cll_all_PDR.PDR_ctcfDownDistance.mean()"))
print(cll_all_PDR.PDR_ctcfDownDistance.mean())

print(str("cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()"))
print(cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean())

print(str("cll_all_PDR.PDR_vistaEnhancersDistance.mean()"))
print(cll_all_PDR.PDR_vistaEnhancersDistance.mean())

print(str("cll_all_PDR.PDR_3PrimeUTRDistance.mean()"))
print(cll_all_PDR.PDR_3PrimeUTRDistance.mean())

print(str("cll_all_PDR.PDR_5PrimeUTRDistance.mean()"))
print(cll_all_PDR.PDR_5PrimeUTRDistance.mean())

print(str("cll_all_PDR.PDR_firstExonDistance.mean()"))
print(cll_all_PDR.PDR_firstExonDistance.mean())

print(str("cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()"))
print(cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean())

print(str("cll_all_PDR.PDR_hypoInHues64Distance.mean()"))
print(cll_all_PDR.PDR_hypoInHues64Distance.mean())

print(str("cll_all_PDR.PDR_intergenic.mean()"))
print(cll_all_PDR.PDR_intergenic.mean())

print(str("cll_all_PDR.PDR_shore.mean()"))
print(cll_all_PDR.PDR_shore.mean())

print(str("cll_all_PDR.PDR_shelf.mean()"))
print(cll_all_PDR.PDR_shelf.mean())


cll_all_PDR.PDR_genesDistance.mean()
0.37269617839324076
cll_all_PDR.PDR_exonsDistance.mean()
0.41829297529114845
cll_all_PDR.PDR_intronsDistance.mean()
0.3650699308814477
cll_all_PDR.PDR_promoterDistance.mean()
0.40727005498639657
cll_all_PDR.PDR_cgiDistance.mean()
0.4490893441890996
cll_all_PDR.PDR_ctcfDistance.mean()
0.44148463221106
cll_all_PDR.PDR_ctcfDownDistance.mean()
0.44148463221106
cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()
0.4405816304593216
cll_all_PDR.PDR_vistaEnhancersDistance.mean()
0.45874503113499265
cll_all_PDR.PDR_3PrimeUTRDistance.mean()
0.3998354903492582
cll_all_PDR.PDR_5PrimeUTRDistance.mean()
0.37802327639764954
cll_all_PDR.PDR_firstExonDistance.mean()
0.4307069320191062
cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()
0.3894503244391172
cll_all_PDR.PDR_hypoInHues64Distance.mean()
0.30505650168520754
cll_all_PDR.PDR_intergenic.mean()
0.39517010171087885
cll_all_PDR.PDR_shore.mean()
0.393623998945781
cll_all_PDR.PDR_shelf.mean()
0.36001169284195517

In [82]:
print(str("normal_all_PDR.PDR_genesDistance.mean()"))
print(normal_all_PDR.PDR_genesDistance.mean())

print(str("normal_all_PDR.PDR_exonsDistance.mean()"))
print(normal_all_PDR.PDR_exonsDistance.mean())

print(str("normal_all_PDR.PDR_intronsDistance.mean()"))
print(normal_all_PDR.PDR_intronsDistance.mean())

print(str("normal_all_PDR.PDR_promoterDistance.mean()"))
print(normal_all_PDR.PDR_promoterDistance.mean())

print(str("normal_all_PDR.PDR_cgiDistance.mean()"))
print(normal_all_PDR.PDR_cgiDistance.mean())

print(str("normal_all_PDR.PDR_ctcfDistance.mean()"))
print(normal_all_PDR.PDR_ctcfDistance.mean())

print(str("normal_all_PDR.PDR_ctcfDownDistance.mean()"))
print(normal_all_PDR.PDR_ctcfDownDistance.mean())

print(str("normal_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()"))
print(normal_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean())

print(str("normal_all_PDR.PDR_vistaEnhancersDistance.mean()"))
print(normal_all_PDR.PDR_vistaEnhancersDistance.mean())

print(str("normal_all_PDR.PDR_3PrimeUTRDistance.mean()"))
print(normal_all_PDR.PDR_3PrimeUTRDistance.mean())

print(str("normal_all_PDR.PDR_5PrimeUTRDistance.mean()"))
print(normal_all_PDR.PDR_5PrimeUTRDistance.mean())

print(str("normal_all_PDR.PDR_firstExonDistance.mean()"))
print(normal_all_PDR.PDR_firstExonDistance.mean())

print(str("normal_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()"))
print(normal_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean())

print(str("normal_all_PDR.PDR_hypoInHues64Distance.mean()"))
print(normal_all_PDR.PDR_hypoInHues64Distance.mean())

print(str("normal_all_PDR.PDR_intergenic.mean()"))
print(normal_all_PDR.PDR_intergenic.mean())

print(str("normal_all_PDR.PDR_shore.mean()"))
print(normal_all_PDR.PDR_shore.mean())

print(str("normal_all_PDR.PDR_shelf.mean()"))
print(normal_all_PDR.PDR_shelf.mean())


normal_all_PDR.PDR_genesDistance.mean()
0.24930473585961
normal_all_PDR.PDR_exonsDistance.mean()
0.25325746332922106
normal_all_PDR.PDR_intronsDistance.mean()
0.2476756236075499
normal_all_PDR.PDR_promoterDistance.mean()
0.2103625254076451
normal_all_PDR.PDR_cgiDistance.mean()
0.23937048382920514
normal_all_PDR.PDR_ctcfDistance.mean()
0.24221148975647863
normal_all_PDR.PDR_ctcfDownDistance.mean()
0.24221148975647863
normal_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()
0.2864282797079518
normal_all_PDR.PDR_vistaEnhancersDistance.mean()
0.3537765947170139
normal_all_PDR.PDR_3PrimeUTRDistance.mean()
0.30394027469996177
normal_all_PDR.PDR_5PrimeUTRDistance.mean()
0.2060569744089335
normal_all_PDR.PDR_firstExonDistance.mean()
0.21613506516898875
normal_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()
0.230102722902288
normal_all_PDR.PDR_hypoInHues64Distance.mean()
0.2464695521440493
normal_all_PDR.PDR_intergenic.mean()
0.31758050169763036
normal_all_PDR.PDR_shore.mean()
0.3242239370358949
normal_all_PDR.PDR_shelf.mean()
0.29427454495709804

In [ ]:


In [103]:
print(str("cll_all_PDR.PDR_genesDistance.mean()"))
print((cll_all_PDR.PDR_genesDistance.mean() - normal_all_PDR.PDR_genesDistance.mean())
/(normal_all_PDR.PDR_genesDistance.mean()))

print(str("cll_all_PDR.PDR_exonsDistance.mean()"))

print((cll_all_PDR.PDR_exonsDistance.mean() - normal_all_PDR.PDR_exonsDistance.mean())
/(normal_all_PDR.PDR_exonsDistance.mean()))

print(str("cll_all_PDR.PDR_intronsDistance.mean()"))
print((cll_all_PDR.PDR_intronsDistance.mean() - normal_all_PDR.PDR_intronsDistance.mean())/(normal_all_PDR.PDR_intronsDistance.mean()
))




print(str("cll_all_PDR.PDR_promoterDistance.mean()"))
print((cll_all_PDR.PDR_promoterDistance.mean() - normal_all_PDR.PDR_promoterDistance.mean())
/normal_all_PDR.PDR_promoterDistance.mean())



print(str("cll_all_PDR.PDR_cgiDistance.mean()"))
print((cll_all_PDR.PDR_cgiDistance.mean() - normal_all_PDR.PDR_cgiDistance.mean())/normal_all_PDR.PDR_cgiDistance.mean())



print(str("cll_all_PDR.PDR_ctcfDistance.mean()"))
print((cll_all_PDR.PDR_ctcfDistance.mean() - normal_all_PDR.PDR_ctcfDistance.mean())/normal_all_PDR.PDR_ctcfDistance.mean())



print(str("cll_all_PDR.PDR_ctcfDownDistance.mean()"))
print((cll_all_PDR.PDR_ctcfDownDistance.mean() - normal_all_PDR.PDR_ctcfDownDistance.mean())/normal_all_PDR.PDR_ctcfDownDistance.mean())



print(str("cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()"))
print((cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()-normal_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean())/
      (normal_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()))
      


print(str("cll_all_PDR.PDR_vistaEnhancersDistance.mean()"))
print((cll_all_PDR.PDR_vistaEnhancersDistance.mean()-normal_all_PDR.PDR_vistaEnhancersDistance.mean())/normal_all_PDR.PDR_vistaEnhancersDistance.mean())



print(str("cll_all_PDR.PDR_3PrimeUTRDistance.mean()"))
print((cll_all_PDR.PDR_3PrimeUTRDistance.mean()-normal_all_PDR.PDR_vistaEnhancersDistance.mean())/normal_all_PDR.PDR_vistaEnhancersDistance.mean())
      


print(str("cll_all_PDR.PDR_5PrimeUTRDistance.mean()"))
print((cll_all_PDR.PDR_5PrimeUTRDistance.mean()-normal_all_PDR.PDR_5PrimeUTRDistance.mean())/normal_all_PDR.PDR_5PrimeUTRDistance.mean())



print(str("cll_all_PDR.PDR_firstExonDistance.mean()"))
print((cll_all_PDR.PDR_firstExonDistance.mean()-normal_all_PDR.PDR_firstExonDistance.mean())/normal_all_PDR.PDR_firstExonDistance.mean())



print(str("cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()"))
print((cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()-normal_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean())
      /normal_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean())



print(str("cll_all_PDR.PDR_hypoInHues64Distance.mean()"))
print((cll_all_PDR.PDR_hypoInHues64Distance.mean()-normal_all_PDR.PDR_hypoInHues64Distance.mean())/normal_all_PDR.PDR_hypoInHues64Distance.mean()
)



print(str("cll_all_PDR.PDR_intergenic.mean()"))
print((cll_all_PDR.PDR_intergenic.mean()-normal_all_PDR.PDR_intergenic.mean())/normal_all_PDR.PDR_intergenic.mean())



print(str("cll_all_PDR.PDR_shore.mean()"))
print((cll_all_PDR.PDR_shore.mean()-normal_all_PDR.PDR_shore.mean())/normal_all_PDR.PDR_shore.mean())

       
       
print(str("cll_all_PDR.PDR_shelf.mean()"))
print((cll_all_PDR.PDR_shelf.mean()-normal_all_PDR.PDR_shelf.mean())/normal_all_PDR.PDR_shelf.mean())


cll_all_PDR.PDR_genesDistance.mean()
0.4949422324777485
cll_all_PDR.PDR_exonsDistance.mean()
0.6516511292201885
cll_all_PDR.PDR_intronsDistance.mean()
0.47398409889506476
cll_all_PDR.PDR_promoterDistance.mean()
0.9360390078849821
cll_all_PDR.PDR_cgiDistance.mean()
0.8761266510600045
cll_all_PDR.PDR_ctcfDistance.mean()
0.8227237388900591
cll_all_PDR.PDR_ctcfDownDistance.mean()
0.8227237388900591
cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()
0.538191797641447
cll_all_PDR.PDR_vistaEnhancersDistance.mean()
0.296708255960073
cll_all_PDR.PDR_3PrimeUTRDistance.mean()
0.13019203734799595
cll_all_PDR.PDR_5PrimeUTRDistance.mean()
0.8345570562801612
cll_all_PDR.PDR_firstExonDistance.mean()
0.9927674932447954
cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()
0.6925063707503165
cll_all_PDR.PDR_hypoInHues64Distance.mean()
0.23770461313175528
cll_all_PDR.PDR_intergenic.mean()
0.2443147472797995
cll_all_PDR.PDR_shore.mean()
0.2140497785091135
cll_all_PDR.PDR_shelf.mean()
0.2233871363030767

In [104]:
print("Methylation values")


Methylation values

In [106]:
print(str("cll_all_PDR.PDR_genesDistance.mean()"))
print((cll_all_meth.methylation_genesDistance.mean() - normal_all_meth.methylation_genesDistance.mean())
/(normal_all_meth.methylation_genesDistance.mean()))

print(str("cll_all_PDR.PDR_exonsDistance.mean()"))

print((cll_all_meth.methylation_exonsDistance.mean() - normal_all_meth.methylation_exonsDistance.mean())
/(normal_all_meth.methylation_exonsDistance.mean()))

print(str("cll_all_PDR.PDR_intronsDistance.mean()"))
print((cll_all_meth.methylation_intronsDistance.mean() - normal_all_meth.methylation_intronsDistance.mean())
      /(normal_all_meth.methylation_intronsDistance.mean()
))




print(str("cll_all_PDR.PDR_promoterDistance.mean()"))
print((cll_all_meth.methylation_promoterDistance.mean() - normal_all_meth.methylation_promoterDistance.mean())
/normal_all_meth.methylation_promoterDistance.mean())



print(str("cll_all_PDR.PDR_cgiDistance.mean()"))
print((cll_all_meth.methylation_cgiDistance.mean() - normal_all_meth.methylation_cgiDistance.mean())
      /normal_all_meth.methylation_cgiDistance.mean())



print(str("cll_all_PDR.PDR_ctcfDistance.mean()"))
print((cll_all_meth.methylation_ctcfDistance.mean() - normal_all_meth.methylation_ctcfDistance.mean())/
      normal_all_meth.methylation_ctcfDistance.mean())



print(str("cll_all_PDR.PDR_ctcfDownDistance.mean()"))
print((cll_all_meth.methylation_ctcfDownDistance.mean() - normal_all_meth.methylation_ctcfDownDistance.mean())
      /normal_all_meth.methylation_ctcfDownDistance.mean())



print(str("cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()"))
print((cll_all_meth.methylation_geneDistalRegulatoryModulesDistance.mean()-normal_all_meth.methylation_geneDistalRegulatoryModulesDistance.mean())/
      (normal_all_meth.methylation_geneDistalRegulatoryModulesDistance.mean()))
      


print(str("cll_all_PDR.PDR_vistaEnhancersDistance.mean()"))
print((cll_all_meth.methylation_vistaEnhancersDistance.mean()-normal_all_meth.methylation_vistaEnhancersDistance.mean())
      /normal_all_meth.methylation_vistaEnhancersDistance.mean())



print(str("cll_all_PDR.PDR_3PrimeUTRDistance.mean()"))
print((cll_all_meth.methylation_3PrimeUTRDistance.mean()-normal_all_meth.methylation_vistaEnhancersDistance.mean())
      /normal_all_meth.methylation_vistaEnhancersDistance.mean())
      


print(str("cll_all_PDR.PDR_5PrimeUTRDistance.mean()"))
print((cll_all_meth.methylation_5PrimeUTRDistance.mean()-normal_all_meth.methylation_5PrimeUTRDistance.mean())
      /normal_all_meth.methylation_5PrimeUTRDistance.mean())



print(str("cll_all_PDR.PDR_firstExonDistance.mean()"))
print((cll_all_meth.methylation_firstExonDistance.mean()-normal_all_meth.methylation_firstExonDistance.mean())
      /normal_all_meth.methylation_firstExonDistance.mean())



print(str("cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()"))
print((cll_all_meth.methylation_geneDistalRegulatoryModulesK562Distance.mean()-normal_all_meth.methylation_geneDistalRegulatoryModulesK562Distance.mean())
      /normal_all_meth.methylation_geneDistalRegulatoryModulesK562Distance.mean())



print(str("cll_all_PDR.PDR_hypoInHues64Distance.mean()"))
print((cll_all_meth.methylation_hypoInHues64Distance.mean()-normal_all_meth.methylation_hypoInHues64Distance.mean())
      /normal_all_meth.methylation_hypoInHues64Distance.mean())



print(str("cll_all_PDR.PDR_intergenic.mean()"))
print((cll_all_meth.methylation_intergenic.mean()-normal_all_meth.methylation_intergenic.mean())/normal_all_meth.methylation_intergenic.mean())



print(str("cll_all_PDR.PDR_shore.mean()"))
print((cll_all_meth.methylation_shore.mean()-normal_all_meth.methylation_shore.mean())/normal_all_meth.methylation_shore.mean())

       
       
print(str("cll_all_PDR.PDR_shelf.mean()"))
print((cll_all_meth.methylation_shelf.mean()-normal_all_meth.methylation_shelf.mean())/normal_all_meth.methylation_shelf.mean())


cll_all_PDR.PDR_genesDistance.mean()
0.10419033751219631
cll_all_PDR.PDR_exonsDistance.mean()
0.19328549925513083
cll_all_PDR.PDR_intronsDistance.mean()
0.08362722373625661
cll_all_PDR.PDR_promoterDistance.mean()
0.5193102810740803
cll_all_PDR.PDR_cgiDistance.mean()
0.521889052082765
cll_all_PDR.PDR_ctcfDistance.mean()
0.4107902977666422
cll_all_PDR.PDR_ctcfDownDistance.mean()
0.4107902977666422
cll_all_PDR.PDR_geneDistalRegulatoryModulesDistance.mean()
0.17690238011319143
cll_all_PDR.PDR_vistaEnhancersDistance.mean()
0.10248177080324447
cll_all_PDR.PDR_3PrimeUTRDistance.mean()
0.4052770321200992
cll_all_PDR.PDR_5PrimeUTRDistance.mean()
0.2517061312038912
cll_all_PDR.PDR_firstExonDistance.mean()
0.6214028030402958
cll_all_PDR.PDR_geneDistalRegulatoryModulesK562Distance.mean()
0.2343302299125117
cll_all_PDR.PDR_hypoInHues64Distance.mean()
-0.04642664738991629
cll_all_PDR.PDR_intergenic.mean()
-0.02915619886107203
cll_all_PDR.PDR_shore.mean()
0.005221867388901363
cll_all_PDR.PDR_shelf.mean()
-0.03405818827650831

In [ ]:


In [ ]:


In [ ]:


In [110]:
rank = total_pdr_pairs.groupby("genome_region")["PDR"].mean().sort_values()[::-1].index

sns.boxplot(y=total_pdr_pairs.genome_region, x=total_pdr_pairs.PDR, hue=total_pdr_pairs.category, showfliers=False, order=rank)
plt.title("PDR by genomic regions: 342 Normal vs 104 CLL")
plt.ylabel("genomic region")


Out[110]:
<matplotlib.text.Text at 0x11c5d0f28>

In [ ]:


In [112]:
rank = total_meth_pairs.groupby("genome_region")["methyl"].mean().sort_values()[::-1].index

sns.boxplot(y=total_meth_pairs.genome_region, x=total_meth_pairs.methyl, hue=total_meth_pairs.category, showfliers=False, order=rank)
plt.title("Methylation by regions: 342 Normal vs 104 CLL")
plt.ylabel("genomic region")
plt.xlabel("methylation, percentage")


Out[112]:
<matplotlib.text.Text at 0x11c8d8240>

In [114]:
rank = total_pdr_pairs.groupby("genome_region")["PDR"].mean().sort_values()[::-1].index

sns.violinplot(y=total_pdr_pairs.genome_region, x=total_pdr_pairs.PDR, hue=total_pdr_pairs.category, showfliers=False, order=rank)
plt.title("PDR by genomic regions: 342 Normal vs 104 CLL")
plt.ylabel("genomic region")


Out[114]:
<matplotlib.text.Text at 0x11cffe240>

In [108]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: