In [1]:
%matplotlib inline
In [2]:
#
# Remember to
#
#
In [3]:
from ipython_memwatcher import MemWatcher
mw = MemWatcher()
mw.start_watching_memory()
In [4]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
pd.set_option('display.max_columns', 50) # print all rows
import os
os.chdir('/Users/evanbiederstedt/Downloads/RRBS_data_files')
import statsmodels.api as sm
In [5]:
"""
CD19cell_regions.csv
cw154_regions.csv
Normal_B_regions.csv
trito_regions.csv
pcell_regions.csv
mcell_regions.csv
"""
Out[5]:
In [6]:
trito = pd.read_csv("trito_regions.csv")
normal = pd.read_csv("Normal_B_regions.csv")
pcell = pd.read_csv("pcell_regions.csv")
mcell = pd.read_csv("mcell_regions.csv")
cw154 = pd.read_csv("cw154_regions.csv")
cd19cell = pd.read_csv("CD19cell_regions.csv")
In [7]:
print(trito.shape)
print(normal.shape) # remove 2cell files
print(pcell.shape)
print(mcell.shape)
print(cw154.shape)
print(cd19cell.shape)
In [8]:
trito["filename"] = trito["filename"].str[:33]
In [9]:
trito.head()
Out[9]:
In [10]:
normal["filename"] = normal["filename"].str[:40]
In [11]:
normal.tail()
Out[11]:
In [12]:
pcell["protocol"] = pcell["filename"].str[:31]
In [13]:
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell1_22_'] = pcell["filename"].str[:46]
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell23_44'] = pcell["filename"].str[:47]
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell45_66'] = pcell["filename"].str[:47]
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell67_88'] = pcell["filename"].str[:47]
In [14]:
pcell.tail()
Out[14]:
In [15]:
mcell["protocol"] = mcell["filename"].str[:31]
In [16]:
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell1_22_'] = mcell["filename"].str[:46]
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell23_44'] = mcell["filename"].str[:47]
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell45_66'] = mcell["filename"].str[:47]
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell67_88'] = mcell["filename"].str[:47]
In [17]:
mcell.tail()
Out[17]:
In [18]:
len("RRBS_NormalBCD19pcell1_22_")
Out[18]:
In [19]:
cd19cell["protocol"] = cd19cell["filename"].str[:26]
In [20]:
len('RRBS_NormalBCD19pcell1_22_TAAGGCGA.ACAACC')
Out[20]:
In [21]:
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell1_22_'] = cd19cell["filename"].str[:41]
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell23_44'] = cd19cell["filename"].str[:42]
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell45_66'] = cd19cell["filename"].str[:42]
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell67_88'] = cd19cell["filename"].str[:42]
In [22]:
cd19cell.tail()
Out[22]:
In [23]:
len("RRBS_cw154_Tris_protease_GR")
Out[23]:
In [24]:
cw154["protocol"] = cw154["filename"].str[:27]
In [25]:
cw154.head() # RRBS_cw154_CutSmart_protein # RRBS_cw154_Tris_protease_CT # RRBS_cw154_Tris_protease_GR
Out[25]:
In [26]:
cw154["filename"][cw154["protocol"] == "RRBS_cw154_CutSmart_protein"] = cw154["filename"].str[:48]
cw154["filename"][cw154["protocol"] == "RRBS_cw154_Tris_protease_CT"] = cw154["filename"].str[:40]
cw154["filename"][cw154["protocol"] == "RRBS_cw154_Tris_protease_GR"] = cw154["filename"].str[:43]
In [27]:
cw154
Out[27]:
In [28]:
files = [trito, normal, pcell, mcell, cw154, cd19cell]
In [29]:
total_region_files = pd.concat([trito, normal, pcell, mcell, cw154, cd19cell])
In [30]:
total_region_files.shape
Out[30]:
In [31]:
total_region_files = total_region_files[["filename", "methylation_tssDistance","methylation_genesDistance","methylation_exonsDistance",
"methylation_intronsDistance", "methylation_promoterDistance","methylation_cgiDistance",
"methylation_ctcfDistance","methylation_ctcfUpDistance","methylation_ctcfDownDistance",
"methylation_geneDistalRegulatoryModulesDistance","methylation_vistaEnhancersDistance",
"methylation_3PrimeUTRDistance","methylation_5PrimeUTRDistance",
"methylation_firstExonDistance","methylation_geneDistalRegulatoryModulesK562Distance",
"methylation_hypoInHues64Distance","methylation_intergenic",
"methylation_shore","methylation_shelf","PDR_tssDistance",
"PDR_genesDistance","PDR_exonsDistance","PDR_intronsDistance", "PDR_promoterDistance",
"PDR_cgiDistance","PDR_ctcfDistance","PDR_ctcfUpDistance","PDR_ctcfDownDistance",
"PDR_geneDistalRegulatoryModulesDistance","PDR_vistaEnhancersDistance","PDR_3PrimeUTRDistance",
"PDR_5PrimeUTRDistance","PDR_firstExonDistance","PDR_geneDistalRegulatoryModulesK562Distance",
"PDR_hypoInHues64Distance","PDR_intergenic","PDR_shore","PDR_shelf"]]
In [35]:
total_region_files = total_region_files.reset_index(drop=True)
In [37]:
total_region_files[:40]
Out[37]:
In [38]:
stats = pd.read_csv("RRBS_anno_statistics_full_446files_filter50K.csv")
In [39]:
stats.shape
Out[39]:
In [40]:
stats_files = stats.filename
In [43]:
merged = stats.merge(total_region_files, on='filename')
In [46]:
merged = merged.drop(['thisMeth', 'mixedReadCount', 'total_reads', 'total_cpg_no_filter', 'total_cpg_gtrthan1',
'total_cpg_gtrthan38', 'avgReadCpgs_nofilter','avgReadCpgs_lessthan1CpG', 'avgReadCpgs_gtreql3.8CpG', 'bsRate',], axis=1)
In [47]:
merged
Out[47]:
In [48]:
merged.to_csv("total_genomic_region.csv", index=False)
In [49]:
merged.shape
Out[49]:
In [50]:
merged.columns
Out[50]:
In [51]:
#
# First do pairs by CLL vs Normal B; We could discuss protocols at a later point
#
normal = merged[merged["type"]=="normal"]
CLL = merged[merged["type"]=="CLL"]
In [53]:
print(len(normal))
print(len(CLL))
In [54]:
CLL_pairs = CLL
normal_pairs = normal
In [58]:
CLL_pairs.columns
Out[58]:
In [80]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation, CLL_pairs.methylation)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_difference': stacked})[['filename', 'methylation_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs1 = pd.merge(out, methylation_differences, how='inner')
print(pairs1.shape)
In [81]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_total, CLL_pairs.PDR_total)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
PDR_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_difference': stacked})[['filename', 'PDR_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs2 = pd.merge(out, PDR_differences, how='inner')
print(pairs2.shape)
In [82]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_unweighted, CLL_pairs.methylation_unweighted)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_unweighted_difference': stacked})[['filename', 'methylation_unweighted_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs3 = pd.merge(out, methylation_differences, how='inner')
print(pairs3.shape)
In [83]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_unweighted, CLL_pairs.PDR_unweighted)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
PDR_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_unweighted_difference': stacked})[['filename', 'PDR_unweighted_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs4 = pd.merge(out, PDR_differences, how='inner')
print(pairs4.shape)
In [84]:
"""
'methylation_tssDistance',
'methylation_genesDistance', 'methylation_exonsDistance',
'methylation_intronsDistance', 'methylation_promoterDistance',
'methylation_cgiDistance', 'methylation_ctcfDistance',
'methylation_ctcfUpDistance', 'methylation_ctcfDownDistance',
'methylation_geneDistalRegulatoryModulesDistance',
'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
'methylation_geneDistalRegulatoryModulesK562Distance',
'methylation_hypoInHues64Distance', 'methylation_intergenic',
'methylation_shore', 'methylation_shelf'
"""
Out[84]:
In [85]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_tssDistance, CLL_pairs.methylation_tssDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_tssDistance_difference': stacked})[['filename', 'methylation_tssDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs5 = pd.merge(out, methylation_differences, how='inner')
print(pairs5.shape)
In [86]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_genesDistance, CLL_pairs.methylation_genesDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_genesDistance_difference': stacked})[['filename', 'methylation_genesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs6 = pd.merge(out, methylation_differences, how='inner')
print(pairs6.shape)
In [87]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_exonsDistance, CLL_pairs.methylation_exonsDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_exonsDistance_difference': stacked})[['filename', 'methylation_exonsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs7 = pd.merge(out, methylation_differences, how='inner')
print(pairs7.shape)
In [88]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_intronsDistance, CLL_pairs.methylation_intronsDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_intronsDistance_difference': stacked})[['filename', 'methylation_intronsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs8 = pd.merge(out, methylation_differences, how='inner')
print(pairs8.shape)
In [89]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_promoterDistance, CLL_pairs.methylation_promoterDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_promoterDistance_difference': stacked})[['filename', 'methylation_promoterDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs9 = pd.merge(out, methylation_differences, how='inner')
print(pairs9.shape)
In [90]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_cgiDistance, CLL_pairs.methylation_cgiDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_cgiDistance_difference': stacked})[['filename', 'methylation_cgiDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs10 = pd.merge(out, methylation_differences, how='inner')
print(pairs10.shape)
In [91]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_ctcfDistance, CLL_pairs.methylation_ctcfDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_ctcfDistance_difference': stacked})[['filename', 'methylation_ctcfDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs11 = pd.merge(out, methylation_differences, how='inner')
print(pairs11.shape)
In [92]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_ctcfUpDistance, CLL_pairs.methylation_ctcfUpDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_ctcfUpDistance_difference': stacked})[['filename', 'methylation_ctcfUpDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs12 = pd.merge(out, methylation_differences, how='inner')
print(pairs12.shape)
In [93]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_ctcfDownDistance, CLL_pairs.methylation_ctcfDownDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_ctcfDownDistance_difference': stacked})[['filename', 'methylation_ctcfDownDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs13 = pd.merge(out, methylation_differences, how='inner')
print(pairs13.shape)
In [94]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_geneDistalRegulatoryModulesDistance, CLL_pairs.methylation_geneDistalRegulatoryModulesDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_geneDistalRegulatoryModulesDistance_difference': stacked})[['filename', 'methylation_geneDistalRegulatoryModulesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs14 = pd.merge(out, methylation_differences, how='inner')
print(pairs14.shape)
In [95]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_vistaEnhancersDistance, CLL_pairs.methylation_vistaEnhancersDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_vistaEnhancersDistance_difference': stacked})[['filename', 'methylation_vistaEnhancersDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs15 = pd.merge(out, methylation_differences, how='inner')
print(pairs15.shape)
In [96]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_3PrimeUTRDistance, CLL_pairs.methylation_3PrimeUTRDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_3PrimeUTRDistance_difference': stacked})[['filename', 'methylation_3PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs16 = pd.merge(out, methylation_differences, how='inner')
print(pairs16.shape)
In [97]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_5PrimeUTRDistance, CLL_pairs.methylation_5PrimeUTRDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_5PrimeUTRDistance_difference': stacked})[['filename', 'methylation_5PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs17 = pd.merge(out, methylation_differences, how='inner')
print(pairs17.shape)
In [98]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_firstExonDistance, CLL_pairs.methylation_firstExonDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_firstExonDistance_difference': stacked})[['filename', 'methylation_firstExonDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs18 = pd.merge(out, methylation_differences, how='inner')
print(pairs18.shape)
In [99]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_geneDistalRegulatoryModulesK562Distance, CLL_pairs.methylation_geneDistalRegulatoryModulesK562Distance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_geneDistalRegulatoryModulesK562Distance_difference': stacked})[['filename', 'methylation_geneDistalRegulatoryModulesK562Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs19 = pd.merge(out, methylation_differences, how='inner')
print(pairs19.shape)
In [100]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_hypoInHues64Distance, CLL_pairs.methylation_hypoInHues64Distance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_hypoInHues64Distance_difference': stacked})[['filename', 'methylation_hypoInHues64Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs20 = pd.merge(out, methylation_differences, how='inner')
print(pairs20.shape)
In [101]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_intergenic, CLL_pairs.methylation_intergenic)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_intergenic_difference': stacked})[['filename', 'methylation_intergenic_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs21 = pd.merge(out, methylation_differences, how='inner')
print(pairs21.shape)
In [102]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_shore, CLL_pairs.methylation_shore)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_shore_difference': stacked})[['filename', 'methylation_shore_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs22 = pd.merge(out, methylation_differences, how='inner')
print(pairs22.shape)
In [103]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.methylation_shelf, CLL_pairs.methylation_shelf)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_shelf_difference': stacked})[['filename', 'methylation_shelf_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs23 = pd.merge(out, methylation_differences, how='inner')
print(pairs23.shape)
In [ ]:
In [105]:
"""
###
PDR by genomic regions
###
"""
Out[105]:
In [106]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_tssDistance, CLL_pairs.PDR_tssDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_tssDistance_difference': stacked})[['filename', 'PDR_tssDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs24 = pd.merge(out, methylation_differences, how='inner')
print(pairs24.shape)
In [107]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_genesDistance, CLL_pairs.PDR_genesDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_genesDistance_difference': stacked})[['filename', 'PDR_genesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs25 = pd.merge(out, methylation_differences, how='inner')
print(pairs25.shape)
In [108]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_exonsDistance, CLL_pairs.PDR_exonsDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_exonsDistance_difference': stacked})[['filename', 'PDR_exonsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs26 = pd.merge(out, methylation_differences, how='inner')
print(pairs26.shape)
In [109]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_intronsDistance, CLL_pairs.PDR_intronsDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_intronsDistance_difference': stacked})[['filename', 'PDR_intronsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs27 = pd.merge(out, methylation_differences, how='inner')
print(pairs27.shape)
In [110]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_promoterDistance, CLL_pairs.PDR_promoterDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_promoterDistance_difference': stacked})[['filename', 'PDR_promoterDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs28 = pd.merge(out, methylation_differences, how='inner')
print(pairs28.shape)
In [111]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_cgiDistance, CLL_pairs.PDR_cgiDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_cgiDistance_difference': stacked})[['filename', 'PDR_cgiDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs29 = pd.merge(out, methylation_differences, how='inner')
print(pairs29.shape)
In [112]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_ctcfDistance, CLL_pairs.PDR_ctcfDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_ctcfDistance_difference': stacked})[['filename', 'PDR_ctcfDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs30 = pd.merge(out, methylation_differences, how='inner')
print(pairs30.shape)
In [113]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_ctcfUpDistance, CLL_pairs.PDR_ctcfUpDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_ctcfUpDistance_difference': stacked})[['filename', 'PDR_ctcfUpDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs31 = pd.merge(out, methylation_differences, how='inner')
print(pairs31.shape)
In [114]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_ctcfDownDistance, CLL_pairs.PDR_ctcfDownDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_ctcfDownDistance_difference': stacked})[['filename', 'PDR_ctcfDownDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs32 = pd.merge(out, methylation_differences, how='inner')
print(pairs32.shape)
In [115]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_geneDistalRegulatoryModulesDistance, CLL_pairs.PDR_geneDistalRegulatoryModulesDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_geneDistalRegulatoryModulesDistance_difference': stacked})[['filename', 'PDR_geneDistalRegulatoryModulesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs33 = pd.merge(out, methylation_differences, how='inner')
print(pairs33.shape)
In [116]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_vistaEnhancersDistance, CLL_pairs.PDR_vistaEnhancersDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_vistaEnhancersDistance_difference': stacked})[['filename', 'PDR_vistaEnhancersDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs34 = pd.merge(out, methylation_differences, how='inner')
print(pairs34.shape)
In [117]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_3PrimeUTRDistance, CLL_pairs.PDR_3PrimeUTRDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_3PrimeUTRDistance_difference': stacked})[['filename', 'PDR_3PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs35 = pd.merge(out, methylation_differences, how='inner')
print(pairs35.shape)
In [118]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_5PrimeUTRDistance, CLL_pairs.PDR_5PrimeUTRDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_5PrimeUTRDistance_difference': stacked})[['filename', 'PDR_5PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs36 = pd.merge(out, methylation_differences, how='inner')
print(pairs36.shape)
In [119]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_firstExonDistance, CLL_pairs.PDR_firstExonDistance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_firstExonDistance_difference': stacked})[['filename', 'PDR_firstExonDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs37 = pd.merge(out, methylation_differences, how='inner')
print(pairs37.shape)
In [120]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_geneDistalRegulatoryModulesK562Distance, CLL_pairs.PDR_geneDistalRegulatoryModulesK562Distance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_geneDistalRegulatoryModulesK562Distance_difference': stacked})[['filename', 'PDR_geneDistalRegulatoryModulesK562Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs38 = pd.merge(out, methylation_differences, how='inner')
print(pairs38.shape)
In [121]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_hypoInHues64Distance, CLL_pairs.PDR_hypoInHues64Distance)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_hypoInHues64Distance_difference': stacked})[['filename', 'PDR_hypoInHues64Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs39 = pd.merge(out, methylation_differences, how='inner')
print(pairs39.shape)
In [122]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_intergenic, CLL_pairs.PDR_intergenic)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_intergenic_difference': stacked})[['filename', 'PDR_intergenic_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs40 = pd.merge(out, methylation_differences, how='inner')
print(pairs40.shape)
In [123]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_shore, CLL_pairs.PDR_shore)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_shore_difference': stacked})[['filename', 'PDR_shore_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs41 = pd.merge(out, methylation_differences, how='inner')
print(pairs41.shape)
In [124]:
CLL_pairsA = CLL_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(CLL_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([CLL_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(CLL_pairs.PDR_shelf, CLL_pairs.PDR_shelf)), CLL_pairs.filename, CLL_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_shelf_difference': stacked})[['filename', 'PDR_shelf_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs42 = pd.merge(out, methylation_differences, how='inner')
print(pairs42.shape)
In [ ]:
In [125]:
pairs42
Out[125]:
In [136]:
"""
'methylation_tssDistance',
'methylation_genesDistance', 'methylation_exonsDistance',
'methylation_intronsDistance', 'methylation_promoterDistance',
'methylation_cgiDistance', 'methylation_ctcfDistance',
'methylation_ctcfUpDistance', 'methylation_ctcfDownDistance',
'methylation_geneDistalRegulatoryModulesDistance',
'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
'methylation_geneDistalRegulatoryModulesK562Distance',
'methylation_hypoInHues64Distance', 'methylation_intergenic',
'methylation_shore', 'methylation_shelf'
"""
Out[136]:
In [138]:
pairs1 = pairs1[["filename", "methylation_difference"]]
pairs2 = pairs2[["filename", "PDR_difference"]]
pairs3 = pairs3[["filename", "methylation_unweighted_difference"]]
pairs4 = pairs4[["filename", "PDR_unweighted_difference"]]
pairs5 = pairs5[["filename", "methylation_tssDistance_difference"]]
pairs6 = pairs6[["filename", "methylation_genesDistance_difference"]]
pairs7 = pairs7[["filename", "methylation_exonsDistance_difference"]]
pairs8 = pairs8[["filename", "methylation_intronsDistance_difference"]]
pairs9 = pairs9[["filename", "methylation_promoterDistance_difference"]]
pairs10 = pairs10[["filename", "methylation_cgiDistance_difference"]]
pairs11 = pairs11[["filename", "methylation_ctcfDistance_difference"]]
pairs12 = pairs12[["filename", "methylation_ctcfUpDistance_difference"]]
pairs13 = pairs13[["filename", "methylation_ctcfDownDistance_difference"]]
pairs14 = pairs14[["filename", "methylation_geneDistalRegulatoryModulesDistance"]]
pairs15 = pairs15[["filename", "methylation_vistaEnhancersDistance_difference"]]
pairs16 = pairs16[["filename", "methylation_3PrimeUTRDistance_difference"]]
pairs17 = pairs17[["filename", "methylation_5PrimeUTRDistance_difference"]]
pairs18 = pairs18[["filename", "methylation_firstExonDistance_difference"]]
pairs19 = pairs19[["filename", "methylation_geneDistalRegulatoryModulesK562Distance_difference"]]
pairs20 = pairs20[["filename", "methylation_hypoInHues64Distance_difference"]]
pairs21 = pairs21[["filename", "methylation_intergenic_difference"]]
pairs22 = pairs22[["filename", "methylation_shore_difference"]]
pairs23 = pairs23[["filename", "methylation_shelf_difference"]]
pairs24 = pairs24[["filename", "PDR_tssDistance_difference"]]
pairs25 = pairs25[["filename", "PDR_genesDistance_difference"]]
pairs26 = pairs26[["filename", "PDR_exonsDistance_difference"]]
pairs27 = pairs27[["filename", "PDR_intronsDistance_difference"]]
pairs28 = pairs28[["filename", "PDR_promoterDistance_difference"]]
pairs29 = pairs29[["filename", "PDR_cgiDistance_difference"]]
pairs30 = pairs30[["filename", "PDR_ctcfDistance_difference"]]
pairs31 = pairs31[["filename", "PDR_ctcfUpDistance_difference"]]
pairs32 = pairs32[["filename", "PDR_ctcfDownDistance_difference"]]
pairs33 = pairs33[["filename", "PDR_geneDistalRegulatoryModulesDistance"]]
pairs34 = pairs34[["filename", "PDR_vistaEnhancersDistance_difference"]]
pairs35 = pairs35[["filename", "PDR_3PrimeUTRDistance_difference"]]
pairs36 = pairs36[["filename", "PDR_5PrimeUTRDistance_difference"]]
pairs37 = pairs37[["filename", "PDR_firstExonDistance_difference"]]
pairs38 = pairs38[["filename", "PDR_geneDistalRegulatoryModulesK562Distance_difference"]]
pairs39 = pairs39[["filename", "PDR_hypoInHues64Distance_difference"]]
pairs40 = pairs40[["filename", "PDR_intergenic_difference"]]
pairs41 = pairs41[["filename", "PDR_shore_difference"]]
pairs42 = pairs42[["filename", "PDR_shelf_difference"]]
In [139]:
pairs_total = [pairs1, pairs2, pairs3, pairs4, pairs5, pairs6, pairs7, pairs8, pairs9, pairs10,
pairs11, pairs12, pairs13, pairs14, pairs15, pairs16, pairs17, pairs18, pairs19, pairs20,
pairs21, pairs22, pairs23, pairs24, pairs25, pairs26, pairs27, pairs28, pairs29, pairs30,
pairs31, pairs32, pairs33, pairs34, pairs35, pairs36, pairs37, pairs38, pairs39, pairs40,
pairs41, pairs42]
In [141]:
total_CLL_pairs = pd.concat([df.set_index("filename") for df in pairs_total], axis=1).reset_index()
In [143]:
total_CLL_pairs.shape
Out[143]:
In [145]:
total_CLL_pairs.to_csv("total_CLL_pairs.csv", index=False)
In [ ]: