In [2]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
pd.set_option('display.max_columns', 50) # print all rows
import os
os.chdir('/Users/evanbiederstedt/Downloads/RRBS_data_files')
import statsmodels.api as sm
In [3]:
"""
CD19cell_regions.csv
cw154_regions.csv
Normal_B_regions.csv
trito_regions.csv
pcell_regions.csv
mcell_regions.csv
"""
Out[3]:
In [4]:
trito = pd.read_csv("trito_regions.csv")
normal = pd.read_csv("Normal_B_regions.csv")
pcell = pd.read_csv("pcell_regions.csv")
mcell = pd.read_csv("mcell_regions.csv")
cw154 = pd.read_csv("cw154_regions.csv")
cd19cell = pd.read_csv("CD19cell_regions.csv")
In [5]:
print(trito.shape)
print(normal.shape) # remove 2cell files
print(pcell.shape)
print(mcell.shape)
print(cw154.shape)
print(cd19cell.shape)
In [6]:
trito["filename"] = trito["filename"].str[:33]
In [7]:
trito.head()
Out[7]:
In [8]:
normal["filename"] = normal["filename"].str[:40]
In [9]:
normal.tail()
Out[9]:
In [10]:
pcell["protocol"] = pcell["filename"].str[:31]
In [11]:
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell1_22_'] = pcell["filename"].str[:46]
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell23_44'] = pcell["filename"].str[:47]
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell45_66'] = pcell["filename"].str[:47]
pcell["filename"][pcell["protocol"]=='RRBS_NormalBCD19pCD27pcell67_88'] = pcell["filename"].str[:47]
In [12]:
pcell.tail()
Out[12]:
In [13]:
mcell["protocol"] = mcell["filename"].str[:31]
In [14]:
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell1_22_'] = mcell["filename"].str[:46]
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell23_44'] = mcell["filename"].str[:47]
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell45_66'] = mcell["filename"].str[:47]
mcell["filename"][mcell["protocol"]=='RRBS_NormalBCD19pCD27mcell67_88'] = mcell["filename"].str[:47]
In [15]:
mcell.tail()
Out[15]:
In [16]:
len("RRBS_NormalBCD19pcell1_22_")
Out[16]:
In [17]:
cd19cell["protocol"] = cd19cell["filename"].str[:26]
In [18]:
len('RRBS_NormalBCD19pcell1_22_TAAGGCGA.ACAACC')
Out[18]:
In [19]:
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell1_22_'] = cd19cell["filename"].str[:41]
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell23_44'] = cd19cell["filename"].str[:42]
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell45_66'] = cd19cell["filename"].str[:42]
cd19cell["filename"][cd19cell["protocol"]=='RRBS_NormalBCD19pcell67_88'] = cd19cell["filename"].str[:42]
In [20]:
cd19cell.tail()
Out[20]:
In [21]:
len("RRBS_cw154_Tris_protease_GR")
Out[21]:
In [22]:
cw154["protocol"] = cw154["filename"].str[:27]
In [23]:
cw154.head() # RRBS_cw154_CutSmart_protein # RRBS_cw154_Tris_protease_CT # RRBS_cw154_Tris_protease_GR
Out[23]:
In [24]:
cw154["filename"][cw154["protocol"] == "RRBS_cw154_CutSmart_protein"] = cw154["filename"].str[:48]
cw154["filename"][cw154["protocol"] == "RRBS_cw154_Tris_protease_CT"] = cw154["filename"].str[:40]
cw154["filename"][cw154["protocol"] == "RRBS_cw154_Tris_protease_GR"] = cw154["filename"].str[:43]
In [25]:
files = [trito, normal, pcell, mcell, cw154, cd19cell]
In [26]:
total_region_files = pd.concat([trito, normal, pcell, mcell, cw154, cd19cell])
In [27]:
total_region_files.shape
Out[27]:
In [28]:
total_region_files = total_region_files[["filename", "methylation_tssDistance","methylation_genesDistance","methylation_exonsDistance",
"methylation_intronsDistance", "methylation_promoterDistance","methylation_cgiDistance",
"methylation_ctcfDistance","methylation_ctcfUpDistance","methylation_ctcfDownDistance",
"methylation_geneDistalRegulatoryModulesDistance","methylation_vistaEnhancersDistance",
"methylation_3PrimeUTRDistance","methylation_5PrimeUTRDistance",
"methylation_firstExonDistance","methylation_geneDistalRegulatoryModulesK562Distance",
"methylation_hypoInHues64Distance","methylation_intergenic",
"methylation_shore","methylation_shelf","PDR_tssDistance",
"PDR_genesDistance","PDR_exonsDistance","PDR_intronsDistance", "PDR_promoterDistance",
"PDR_cgiDistance","PDR_ctcfDistance","PDR_ctcfUpDistance","PDR_ctcfDownDistance",
"PDR_geneDistalRegulatoryModulesDistance","PDR_vistaEnhancersDistance","PDR_3PrimeUTRDistance",
"PDR_5PrimeUTRDistance","PDR_firstExonDistance","PDR_geneDistalRegulatoryModulesK562Distance",
"PDR_hypoInHues64Distance","PDR_intergenic","PDR_shore","PDR_shelf"]]
In [29]:
total_region_files = total_region_files.reset_index(drop=True)
In [30]:
total_region_files[:40]
Out[30]:
In [31]:
stats = pd.read_csv("RRBS_anno_statistics_full_446files_filter50K.csv")
In [32]:
stats.shape
Out[32]:
In [33]:
stats_files = stats.filename
In [34]:
merged = stats.merge(total_region_files, on='filename')
In [35]:
merged = merged.drop(['thisMeth', 'mixedReadCount', 'total_reads', 'total_cpg_no_filter', 'total_cpg_gtrthan1',
'total_cpg_gtrthan38', 'avgReadCpgs_nofilter','avgReadCpgs_lessthan1CpG', 'avgReadCpgs_gtreql3.8CpG', 'bsRate',], axis=1)
In [36]:
merged
Out[36]:
In [37]:
print(np.unique(merged.protocol)) # there are 23 'protocol' fields
In [ ]:
In [ ]:
In [ ]:
In [38]:
# merged.to_csv("total_genomic_region.csv", index=False)
In [39]:
merged.shape
Out[39]:
In [40]:
merged.columns
Out[40]:
In [41]:
#
# First do pairs by CLL vs Normal B; We could discuss protocols at a later point
#
normal = merged[merged["type"]=="normal"]
CLL = merged[merged["type"]=="CLL"]
In [42]:
print(len(normal))
print(len(CLL))
In [43]:
#CLL_pairs = CLL
normal_pairs = normal
In [44]:
normal_pairs.columns
Out[44]:
In [45]:
print(np.unique(normal_pairs.protocol))
In [46]:
protocol = normal_pairs[normal_pairs["protocol"] == "NormalBCD19pCD27mcell1_22_"]
In [47]:
protocol.shape
Out[47]:
In [48]:
normal_pairs.shape
Out[48]:
In [49]:
protocol = protocol.reset_index(drop=True)
In [50]:
"""
DANGER!!!!
"""
# Falsely named variable!!! I simply do this to not modify the code below
normal_pairs = protocol
In [ ]:
In [ ]:
In [51]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation, normal_pairs.methylation)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_difference': stacked})[['filename', 'methylation_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs1 = pd.merge(out, methylation_differences, how='inner')
print(pairs1.shape)
In [52]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_total, normal_pairs.PDR_total)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
PDR_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_difference': stacked})[['filename', 'PDR_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs2 = pd.merge(out, PDR_differences, how='inner')
print(pairs2.shape)
In [53]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_unweighted, normal_pairs.methylation_unweighted)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_unweighted_difference': stacked})[['filename', 'methylation_unweighted_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs3 = pd.merge(out, methylation_differences, how='inner')
print(pairs3.shape)
In [54]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_unweighted, normal_pairs.PDR_unweighted)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
PDR_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_unweighted_difference': stacked})[['filename', 'PDR_unweighted_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs4 = pd.merge(out, PDR_differences, how='inner')
print(pairs4.shape)
In [55]:
"""
'methylation_tssDistance',
'methylation_genesDistance', 'methylation_exonsDistance',
'methylation_intronsDistance', 'methylation_promoterDistance',
'methylation_cgiDistance', 'methylation_ctcfDistance',
'methylation_ctcfUpDistance', 'methylation_ctcfDownDistance',
'methylation_geneDistalRegulatoryModulesDistance',
'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
'methylation_geneDistalRegulatoryModulesK562Distance',
'methylation_hypoInHues64Distance', 'methylation_intergenic',
'methylation_shore', 'methylation_shelf'
"""
Out[55]:
In [56]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_tssDistance, normal_pairs.methylation_tssDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_tssDistance_difference': stacked})[['filename', 'methylation_tssDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs5 = pd.merge(out, methylation_differences, how='inner')
print(pairs5.shape)
In [57]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_genesDistance, normal_pairs.methylation_genesDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_genesDistance_difference': stacked})[['filename', 'methylation_genesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs6 = pd.merge(out, methylation_differences, how='inner')
print(pairs6.shape)
In [58]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_exonsDistance, normal_pairs.methylation_exonsDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_exonsDistance_difference': stacked})[['filename', 'methylation_exonsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs7 = pd.merge(out, methylation_differences, how='inner')
print(pairs7.shape)
In [59]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_intronsDistance, normal_pairs.methylation_intronsDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_intronsDistance_difference': stacked})[['filename', 'methylation_intronsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs8 = pd.merge(out, methylation_differences, how='inner')
print(pairs8.shape)
In [60]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_promoterDistance, normal_pairs.methylation_promoterDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_promoterDistance_difference': stacked})[['filename', 'methylation_promoterDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs9 = pd.merge(out, methylation_differences, how='inner')
print(pairs9.shape)
In [61]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_cgiDistance, normal_pairs.methylation_cgiDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_cgiDistance_difference': stacked})[['filename', 'methylation_cgiDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs10 = pd.merge(out, methylation_differences, how='inner')
print(pairs10.shape)
In [62]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_ctcfDistance, normal_pairs.methylation_ctcfDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_ctcfDistance_difference': stacked})[['filename', 'methylation_ctcfDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs11 = pd.merge(out, methylation_differences, how='inner')
print(pairs11.shape)
In [63]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_ctcfUpDistance, normal_pairs.methylation_ctcfUpDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_ctcfUpDistance_difference': stacked})[['filename', 'methylation_ctcfUpDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs12 = pd.merge(out, methylation_differences, how='inner')
print(pairs12.shape)
In [64]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_ctcfDownDistance, normal_pairs.methylation_ctcfDownDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_ctcfDownDistance_difference': stacked})[['filename', 'methylation_ctcfDownDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs13 = pd.merge(out, methylation_differences, how='inner')
print(pairs13.shape)
In [65]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_geneDistalRegulatoryModulesDistance, normal_pairs.methylation_geneDistalRegulatoryModulesDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_geneDistalRegulatoryModulesDistance_difference': stacked})[['filename', 'methylation_geneDistalRegulatoryModulesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs14 = pd.merge(out, methylation_differences, how='inner')
print(pairs14.shape)
In [66]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_vistaEnhancersDistance, normal_pairs.methylation_vistaEnhancersDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_vistaEnhancersDistance_difference': stacked})[['filename', 'methylation_vistaEnhancersDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs15 = pd.merge(out, methylation_differences, how='inner')
print(pairs15.shape)
In [67]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_3PrimeUTRDistance, normal_pairs.methylation_3PrimeUTRDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_3PrimeUTRDistance_difference': stacked})[['filename', 'methylation_3PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs16 = pd.merge(out, methylation_differences, how='inner')
print(pairs16.shape)
In [68]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_5PrimeUTRDistance, normal_pairs.methylation_5PrimeUTRDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_5PrimeUTRDistance_difference': stacked})[['filename', 'methylation_5PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs17 = pd.merge(out, methylation_differences, how='inner')
print(pairs17.shape)
In [69]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_firstExonDistance, normal_pairs.methylation_firstExonDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_firstExonDistance_difference': stacked})[['filename', 'methylation_firstExonDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs18 = pd.merge(out, methylation_differences, how='inner')
print(pairs18.shape)
In [70]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_geneDistalRegulatoryModulesK562Distance, normal_pairs.methylation_geneDistalRegulatoryModulesK562Distance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_geneDistalRegulatoryModulesK562Distance_difference': stacked})[['filename', 'methylation_geneDistalRegulatoryModulesK562Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs19 = pd.merge(out, methylation_differences, how='inner')
print(pairs19.shape)
In [71]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_hypoInHues64Distance, normal_pairs.methylation_hypoInHues64Distance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_hypoInHues64Distance_difference': stacked})[['filename', 'methylation_hypoInHues64Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs20 = pd.merge(out, methylation_differences, how='inner')
print(pairs20.shape)
In [72]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_intergenic, normal_pairs.methylation_intergenic)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_intergenic_difference': stacked})[['filename', 'methylation_intergenic_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs21 = pd.merge(out, methylation_differences, how='inner')
print(pairs21.shape)
In [73]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_shore, normal_pairs.methylation_shore)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_shore_difference': stacked})[['filename', 'methylation_shore_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs22 = pd.merge(out, methylation_differences, how='inner')
print(pairs22.shape)
In [74]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.methylation_shelf, normal_pairs.methylation_shelf)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'methylation_shelf_difference': stacked})[['filename', 'methylation_shelf_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs23 = pd.merge(out, methylation_differences, how='inner')
print(pairs23.shape)
In [ ]:
In [75]:
"""
###
PDR by genomic regions
###
"""
Out[75]:
In [76]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_tssDistance, normal_pairs.PDR_tssDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_tssDistance_difference': stacked})[['filename', 'PDR_tssDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs24 = pd.merge(out, methylation_differences, how='inner')
print(pairs24.shape)
In [77]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_genesDistance, normal_pairs.PDR_genesDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_genesDistance_difference': stacked})[['filename', 'PDR_genesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs25 = pd.merge(out, methylation_differences, how='inner')
print(pairs25.shape)
In [78]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_exonsDistance, normal_pairs.PDR_exonsDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_exonsDistance_difference': stacked})[['filename', 'PDR_exonsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs26 = pd.merge(out, methylation_differences, how='inner')
print(pairs26.shape)
In [79]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_intronsDistance, normal_pairs.PDR_intronsDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_intronsDistance_difference': stacked})[['filename', 'PDR_intronsDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs27 = pd.merge(out, methylation_differences, how='inner')
print(pairs27.shape)
In [80]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_promoterDistance, normal_pairs.PDR_promoterDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_promoterDistance_difference': stacked})[['filename', 'PDR_promoterDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs28 = pd.merge(out, methylation_differences, how='inner')
print(pairs28.shape)
In [81]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_cgiDistance, normal_pairs.PDR_cgiDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_cgiDistance_difference': stacked})[['filename', 'PDR_cgiDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs29 = pd.merge(out, methylation_differences, how='inner')
print(pairs29.shape)
In [82]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_ctcfDistance, normal_pairs.PDR_ctcfDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_ctcfDistance_difference': stacked})[['filename', 'PDR_ctcfDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs30 = pd.merge(out, methylation_differences, how='inner')
print(pairs30.shape)
In [83]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_ctcfUpDistance, normal_pairs.PDR_ctcfUpDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_ctcfUpDistance_difference': stacked})[['filename', 'PDR_ctcfUpDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs31 = pd.merge(out, methylation_differences, how='inner')
print(pairs31.shape)
In [84]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_ctcfDownDistance, normal_pairs.PDR_ctcfDownDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_ctcfDownDistance_difference': stacked})[['filename', 'PDR_ctcfDownDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs32 = pd.merge(out, methylation_differences, how='inner')
print(pairs32.shape)
In [85]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_geneDistalRegulatoryModulesDistance, normal_pairs.PDR_geneDistalRegulatoryModulesDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_geneDistalRegulatoryModulesDistance_difference': stacked})[['filename', 'PDR_geneDistalRegulatoryModulesDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs33 = pd.merge(out, methylation_differences, how='inner')
print(pairs33.shape)
In [86]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_vistaEnhancersDistance, normal_pairs.PDR_vistaEnhancersDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_vistaEnhancersDistance_difference': stacked})[['filename', 'PDR_vistaEnhancersDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs34 = pd.merge(out, methylation_differences, how='inner')
print(pairs34.shape)
In [87]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_3PrimeUTRDistance, normal_pairs.PDR_3PrimeUTRDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_3PrimeUTRDistance_difference': stacked})[['filename', 'PDR_3PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs35 = pd.merge(out, methylation_differences, how='inner')
print(pairs35.shape)
In [88]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_5PrimeUTRDistance, normal_pairs.PDR_5PrimeUTRDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_5PrimeUTRDistance_difference': stacked})[['filename', 'PDR_5PrimeUTRDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs36 = pd.merge(out, methylation_differences, how='inner')
print(pairs36.shape)
In [89]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_firstExonDistance, normal_pairs.PDR_firstExonDistance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_firstExonDistance_difference': stacked})[['filename', 'PDR_firstExonDistance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs37 = pd.merge(out, methylation_differences, how='inner')
print(pairs37.shape)
In [90]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_geneDistalRegulatoryModulesK562Distance, normal_pairs.PDR_geneDistalRegulatoryModulesK562Distance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_geneDistalRegulatoryModulesK562Distance_difference': stacked})[['filename', 'PDR_geneDistalRegulatoryModulesK562Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs38 = pd.merge(out, methylation_differences, how='inner')
print(pairs38.shape)
In [91]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_hypoInHues64Distance, normal_pairs.PDR_hypoInHues64Distance)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_hypoInHues64Distance_difference': stacked})[['filename', 'PDR_hypoInHues64Distance_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs39 = pd.merge(out, methylation_differences, how='inner')
print(pairs39.shape)
In [92]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_intergenic, normal_pairs.PDR_intergenic)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_intergenic_difference': stacked})[['filename', 'PDR_intergenic_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs40 = pd.merge(out, methylation_differences, how='inner')
print(pairs40.shape)
In [93]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_shore, normal_pairs.PDR_shore)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_shore_difference': stacked})[['filename', 'PDR_shore_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs41 = pd.merge(out, methylation_differences, how='inner')
print(pairs41.shape)
In [94]:
normal_pairsA = normal_pairs.set_index("filename")
from itertools import combinations
cc = list(combinations(normal_pairs.filename, 2)) # combines into all pairs
out = pd.DataFrame([normal_pairsA.loc[c,:].mean() for c in cc], index=cc) # covariates between pairs == mean
df_ex = pd.DataFrame(np.abs(np.subtract.outer(normal_pairs.PDR_shelf, normal_pairs.PDR_shelf)), normal_pairs.filename, normal_pairs.filename)
stacked = df_ex.stack()
methylation_differences = pd.DataFrame({'filename': stacked.index.to_series(), 'PDR_shelf_difference': stacked})[['filename', 'PDR_shelf_difference']].reset_index(drop=True)
out['filename'] = out.index
out = out.reset_index(drop=True)
pairs42 = pd.merge(out, methylation_differences, how='inner')
print(pairs42.shape)
In [ ]:
In [95]:
pairs42.head()
Out[95]:
In [96]:
"""
'methylation_tssDistance',
'methylation_genesDistance', 'methylation_exonsDistance',
'methylation_intronsDistance', 'methylation_promoterDistance',
'methylation_cgiDistance', 'methylation_ctcfDistance',
'methylation_ctcfUpDistance', 'methylation_ctcfDownDistance',
'methylation_geneDistalRegulatoryModulesDistance',
'methylation_vistaEnhancersDistance', 'methylation_3PrimeUTRDistance',
'methylation_5PrimeUTRDistance', 'methylation_firstExonDistance',
'methylation_geneDistalRegulatoryModulesK562Distance',
'methylation_hypoInHues64Distance', 'methylation_intergenic',
'methylation_shore', 'methylation_shelf'
"""
Out[96]:
In [97]:
pairs1 = pairs1[["filename", "methylation_difference"]]
pairs2 = pairs2[["filename", "PDR_difference"]]
pairs3 = pairs3[["filename", "methylation_unweighted_difference"]]
pairs4 = pairs4[["filename", "PDR_unweighted_difference"]]
pairs5 = pairs5[["filename", "methylation_tssDistance_difference"]]
pairs6 = pairs6[["filename", "methylation_genesDistance_difference"]]
pairs7 = pairs7[["filename", "methylation_exonsDistance_difference"]]
pairs8 = pairs8[["filename", "methylation_intronsDistance_difference"]]
pairs9 = pairs9[["filename", "methylation_promoterDistance_difference"]]
pairs10 = pairs10[["filename", "methylation_cgiDistance_difference"]]
pairs11 = pairs11[["filename", "methylation_ctcfDistance_difference"]]
pairs12 = pairs12[["filename", "methylation_ctcfUpDistance_difference"]]
pairs13 = pairs13[["filename", "methylation_ctcfDownDistance_difference"]]
pairs14 = pairs14[["filename", "methylation_geneDistalRegulatoryModulesDistance"]]
pairs15 = pairs15[["filename", "methylation_vistaEnhancersDistance_difference"]]
pairs16 = pairs16[["filename", "methylation_3PrimeUTRDistance_difference"]]
pairs17 = pairs17[["filename", "methylation_5PrimeUTRDistance_difference"]]
pairs18 = pairs18[["filename", "methylation_firstExonDistance_difference"]]
pairs19 = pairs19[["filename", "methylation_geneDistalRegulatoryModulesK562Distance_difference"]]
pairs20 = pairs20[["filename", "methylation_hypoInHues64Distance_difference"]]
pairs21 = pairs21[["filename", "methylation_intergenic_difference"]]
pairs22 = pairs22[["filename", "methylation_shore_difference"]]
pairs23 = pairs23[["filename", "methylation_shelf_difference"]]
pairs24 = pairs24[["filename", "PDR_tssDistance_difference"]]
pairs25 = pairs25[["filename", "PDR_genesDistance_difference"]]
pairs26 = pairs26[["filename", "PDR_exonsDistance_difference"]]
pairs27 = pairs27[["filename", "PDR_intronsDistance_difference"]]
pairs28 = pairs28[["filename", "PDR_promoterDistance_difference"]]
pairs29 = pairs29[["filename", "PDR_cgiDistance_difference"]]
pairs30 = pairs30[["filename", "PDR_ctcfDistance_difference"]]
pairs31 = pairs31[["filename", "PDR_ctcfUpDistance_difference"]]
pairs32 = pairs32[["filename", "PDR_ctcfDownDistance_difference"]]
pairs33 = pairs33[["filename", "PDR_geneDistalRegulatoryModulesDistance"]]
pairs34 = pairs34[["filename", "PDR_vistaEnhancersDistance_difference"]]
pairs35 = pairs35[["filename", "PDR_3PrimeUTRDistance_difference"]]
pairs36 = pairs36[["filename", "PDR_5PrimeUTRDistance_difference"]]
pairs37 = pairs37[["filename", "PDR_firstExonDistance_difference"]]
pairs38 = pairs38[["filename", "PDR_geneDistalRegulatoryModulesK562Distance_difference"]]
pairs39 = pairs39[["filename", "PDR_hypoInHues64Distance_difference"]]
pairs40 = pairs40[["filename", "PDR_intergenic_difference"]]
pairs41 = pairs41[["filename", "PDR_shore_difference"]]
pairs42 = pairs42[["filename", "PDR_shelf_difference"]]
In [98]:
pairs_total = [pairs1, pairs2, pairs3, pairs4, pairs5, pairs6, pairs7, pairs8, pairs9, pairs10,
pairs11, pairs12, pairs13, pairs14, pairs15, pairs16, pairs17, pairs18, pairs19, pairs20,
pairs21, pairs22, pairs23, pairs24, pairs25, pairs26, pairs27, pairs28, pairs29, pairs30,
pairs31, pairs32, pairs33, pairs34, pairs35, pairs36, pairs37, pairs38, pairs39, pairs40,
pairs41, pairs42]
In [99]:
total_normal_pairs = pd.concat([df.set_index("filename") for df in pairs_total], axis=1).reset_index()
In [100]:
total_normal_pairs.shape
Out[100]:
In [101]:
total_normal_pairs.to_csv("total_normal_pairs_NormalBCD19pCD27mcell1_22.csv", index=False)
In [ ]:
In [ ]: