In [2]:
from pytadbit.mapping.analyze import eig_correlate_matrices, correlate_matrices
from pytadbit import load_hic_data_from_reads
from cPickle import load
from matplotlib import pyplot as plt
In [3]:
reso = 200000
base_path = 'results/fragment/{0}/03_filtering/valid_reads12_{0}.tsv'
bias_path = 'results/fragment/{1}/04_normalizing/biases_{0}_{1}.pick'
bads_path = 'results/fragment/{1}/04_normalizing/bad_columns_{0}_{1}.pick'
Write a little function to load HiCData obeject
In [10]:
def my_load_hic_data(renz, rep, reso):
hic_data = load_hic_data_from_reads(base_path.format(renz), resolution=reso)
hic_data.bias = load(open(bias_path.format(reso, renz)))
hic_data.bads = load(open(bads_path.format(reso, renz)))
return hic_data
In [11]:
renz1 = 'HindIII'
renz2 = 'MboI'
In [12]:
hic_data1 = my_load_hic_data(renz1, reso)
hic_data2 = my_load_hic_data(renz2, reso)
In [13]:
## this part is to "tune" the plot ##
plt.figure(figsize=(9, 6))
axe = plt.subplot()
axe.grid()
axe.set_xticks(range(0, 55, 5))
axe.set_xticklabels(['%d Mb' % int(i * 0.2) if i else '' for i in range(0, 55, 5)], rotation=-45)
#####################################
_ = correlate_matrices(hic_data1, hic_data2, max_dist=50, show=False, axe=axe, normalized=True)
In [16]:
hic_data1 = my_load_hic_data(renz1, rep1, 1000000)
hic_data2 = my_load_hic_data(renz2, rep2, 1000000)
In [17]:
corrs = eig_correlate_matrices(hic_data1, hic_data2, show=True, aspect='auto', normalized=True)
for cor in corrs:
print ' '.join(['%5.3f' % (c) for c in cor]) + '\n'
In [18]:
renz1 = 'NcoI'
renz2 = 'NcoI'
rep1 = 'T0'
rep2 = 'T60'
In [19]:
hic_data1 = my_load_hic_data(renz1, rep1, reso)
hic_data2 = my_load_hic_data(renz2, rep2, reso)
In [20]:
## this part is to "tune" the plot ##
plt.figure(figsize=(9, 6))
axe = plt.subplot()
axe.grid()
axe.set_xticks(range(0, 55, 5))
axe.set_xticklabels(['%d Mb' % int(i * 0.2) if i else '' for i in range(0, 55, 5)], rotation=-45)
#####################################
_ = correlate_matrices(hic_data1, hic_data2, max_dist=50, show=False, axe=axe)
In [21]:
hic_data1 = my_load_hic_data(renz1, rep1, 1000000)
hic_data2 = my_load_hic_data(renz2, rep2, 1000000)
In [22]:
corrs = eig_correlate_matrices(hic_data1, hic_data2, show=True, aspect='auto', normalized=True)
for cor in corrs:
print ' '.join(['%5.3f' % (c) for c in cor]) + '\n'
In [23]:
renz1 = 'HindIII'
renz2 = 'NcoI'
rep1 = 'T0'
rep2 = 'T0'
In [24]:
hic_data1 = my_load_hic_data(renz1, rep1, reso)
hic_data2 = my_load_hic_data(renz2, rep2, reso)
In [25]:
## this part is to "tune" the plot ##
plt.figure(figsize=(9, 6))
axe = plt.subplot()
axe.grid()
axe.set_xticks(range(0, 55, 5))
axe.set_xticklabels(['%d Mb' % int(i * 0.2) if i else '' for i in range(0, 55, 5)], rotation=-45)
#####################################
_ = correlate_matrices(hic_data1, hic_data2, max_dist=50, show=False, axe=axe)
In [26]:
hic_data1 = my_load_hic_data(renz1, rep1, 1000000)
hic_data2 = my_load_hic_data(renz2, rep2, 1000000)
In [27]:
corrs = eig_correlate_matrices(hic_data1, hic_data2, show=True, aspect='auto', normalized=True)
for cor in corrs:
print ' '.join(['%5.3f' % (c) for c in cor]) + '\n'
In [28]:
renz1 = 'HindIII'
renz2 = 'NcoI'
rep1 = 'T60'
rep2 = 'T60'
In [29]:
hic_data1 = my_load_hic_data(renz1, rep1, reso)
hic_data2 = my_load_hic_data(renz2, rep2, reso)
In [30]:
## this part is to "tune" the plot ##
plt.figure(figsize=(9, 6))
axe = plt.subplot()
axe.grid()
axe.set_xticks(range(0, 55, 5))
axe.set_xticklabels(['%d Mb' % int(i * 0.2) if i else '' for i in range(0, 55, 5)], rotation=-45)
#####################################
_ = correlate_matrices(hic_data1, hic_data2, max_dist=50, show=False, axe=axe)
In [31]:
hic_data1 = my_load_hic_data(renz1, rep1, 1000000)
hic_data2 = my_load_hic_data(renz2, rep2, 1000000)
In [33]:
corrs = eig_correlate_matrices(hic_data1, hic_data2, show=True, aspect='auto', normalized=True)
for cor in corrs:
print ' '.join(['%5.3f' % (c) for c in cor]) + '\n'
Once agreed that experiments are similar, they can be merged.
Here is a simple way to merge valid pairs. Arguably we may want to merge unfiltered data but the difference would be minimal specially with non-replicates.
In [34]:
from pytadbit.mapping import merge_2d_beds
In [36]:
! mkdir -p results/fragment/both_T0/
! mkdir -p results/fragment/both_T60/
! mkdir -p results/fragment/both_T0/03_filtering/
! mkdir -p results/fragment/both_T60/03_filtering/
In [37]:
renz1 = 'HindIII'
renz2 = 'NcoI'
rep = 'T60'
hic_data1 = 'results/fragment/{0}_{1}/03_filtering/valid_reads12_{0}_{1}.tsv'.format(renz1, rep)
hic_data2 = 'results/fragment/{0}_{1}/03_filtering/valid_reads12_{0}_{1}.tsv'.format(renz2, rep)
hic_data = 'results/fragment/both_{0}/03_filtering/valid_reads12_{0}.tsv'.format(rep)
merge_2d_beds(hic_data1, hic_data2, hic_data)
Out[37]:
In [38]:
renz1 = 'HindIII'
renz2 = 'NcoI'
rep = 'T0'
hic_data1 = 'results/fragment/{0}_{1}/03_filtering/valid_reads12_{0}_{1}.tsv'.format(renz1, rep)
hic_data2 = 'results/fragment/{0}_{1}/03_filtering/valid_reads12_{0}_{1}.tsv'.format(renz2, rep)
hic_data = 'results/fragment/both_{0}/03_filtering/valid_reads12_{0}.tsv'.format(rep)
merge_2d_beds(hic_data1, hic_data2, hic_data)
Out[38]:
In [44]:
from pytadbit.mapping.analyze import hic_map
from cPickle import dump
In [39]:
! mkdir -p results/fragment/both_T0/04_normalizing
! mkdir -p results/fragment/both_T60/04_normalizing
All in one loop to:
all at diferent resolutions, and for both time points
In [ ]:
for rep in ['T0', 'T60']:
print ' -', rep
for reso in [1000000, 300000, 100000]:
print ' *', reso
# load hic_data
hic_data = load_hic_data_from_reads(
'results/fragment/both_{0}/03_filtering/valid_reads12_{0}.tsv'.format(rep),
reso)
# filter columns
hic_data.filter_columns(draw_hist=False, min_count=10, by_mean=True)
# normalize
hic_data.normalize_hic(iterations=0)
# save biases to reconstruct normalization
out = open('results/fragment/both_{1}/04_normalizing/biases_{0}_{1}.pick'.format(reso, rep), 'w')
dump(hic_data.bias, out)
out.close()
# save filtered out columns
out = open('results/fragment/both_{1}/04_normalizing/bad_columns_{0}_{1}.pick'.format(reso, rep), 'w')
dump(hic_data.bads, out)
out.close()
# save data as raw matrix per chromsome
hic_map(hic_data, by_chrom='intra', normalized=False,
savedata='results/fragment/both_{1}/04_normalizing/{0}_raw'.format(reso, rep))
# save data as normalized matrix per chromosome
hic_map(hic_data, by_chrom='intra', normalized=True,
savedata='results/fragment/both_{1}/04_normalizing/{0}_norm'.format(reso, rep))
# if the resolution is low save the full genomic matrix
if reso > 500000:
hic_map(hic_data, by_chrom=False, normalized=False,
savefig ='results/fragment/both_{1}/04_normalizing/{0}_raw.png'.format(reso, rep),
savedata='results/fragment/both_{1}/04_normalizing/{0}_raw.mat'.format(reso, rep))
hic_map(hic_data, by_chrom=False, normalized=True,
savefig ='results/fragment/both_{1}/04_normalizing/{0}_norm.png'.format(reso, rep) ,
savedata='results/fragment/both_{1}/04_normalizing/{0}_norm.mat'.format(reso, rep))
In [ ]: