In [1]:
from pytadbit.parsers.hic_parser import load_hic_data_from_reads
In [2]:
r_enz_1 = 'HindIII'
r_enz_2 = 'MboI'
reso = 1000000
In [5]:
hic_data_1 = load_hic_data_from_reads(
'results/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz_1),
reso)
hic_data_2 = load_hic_data_from_reads(
'results/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz_2),
reso)
In [6]:
hic_data_1.filter_columns(draw_hist=True, min_count=10, by_mean=True)
hic_data_2.filter_columns(draw_hist=True, min_count=10, by_mean=True)
In [7]:
print len(hic_data_1)
print len(hic_data_1.bads)
print len(hic_data_2)
print len(hic_data_2.bads)
In [9]:
# 3102 bins and 269 bins were thrown out!
In [8]:
hic_data_1.normalize_hic(iterations=10, max_dev=0.1)
hic_data_2.normalize_hic(iterations=10, max_dev=0.1)
In [11]:
# starting the iterative corrections.
In [10]:
from pytadbit.mapping.analyze import hic_map
hic_map(hic_data_1, normalized=True, focus='chr18', show=True)
hic_map(hic_data_2, normalized=True, focus='chr18', show=True)
In [12]:
# save and export normalized data
In [17]:
! mkdir -p results/$r_enz_1/04_normalizing
! mkdir -p results/$r_enz_2/04_normalizing
In [18]:
from cPickle import dump
In [19]:
out = open('results/{1}/04_normalizing/biases_{0}_{1}.pick'.format(reso, r_enz_1), 'w')
dump(hic_data_1.bias, out)
out.close()
out = open('results/{1}/04_normalizing/biases_{0}_{1}.pick'.format(reso, r_enz_2), 'w')
dump(hic_data_2.bias, out)
out.close()
In [20]:
out = open('results/{1}/04_normalizing/bad_columns_{0}_{1}.pick'.format(reso, r_enz_1), 'w')
dump(hic_data_1.bads, out)
out.close()
out = open('results/{1}/04_normalizing/bad_columns_{0}_{1}.pick'.format(reso, r_enz_2), 'w')
dump(hic_data_2.bads, out)
out.close()
In [21]:
! dryhic3.r results/HindIII/03_filtering/valid_reads12_HindIII.tsv \
results/HindIII/04_normalizing/bad_columns_1000000_HindIII.pick \
HindIII hg38 1000000 results/HindIII/04_normalizing/biases_dryhic_1000000_HindIII.pick
In [22]:
! dryhic3.r results/MboI/03_filtering/valid_reads12_MboI.tsv \
results/MboI/04_normalizing/bad_columns_1000000_MboI.pick \
MboI hg38 1000000 results/MboI/04_normalizing/biases_dryhic_1000000_MboI.pick
In [23]:
bias_dry_path = 'results/{1}/04_normalizing/biases_dryhic_{0}_{1}.tsv'
hic_data_1.bias = dict([(int(l.split()[0]), float(l.split()[1])) for l in open(bias_dry_path.format(reso, 'HindIII'))])
hic_data_2.bias = dict([(int(l.split()[0]), float(l.split()[1])) for l in open(bias_dry_path.format(reso, 'MboI'))])
In [24]:
hic_map(hic_data_1, normalized=True, focus='chr18', show=True)
hic_map(hic_data_2, normalized=True, focus='chr18', show=True)
In [25]:
hic_map(hic_data_1, by_chrom='intra', normalized=False,
savedata='results/{1}/04_normalizing/{0}_raw'.format(reso, r_enz_1))
hic_map(hic_data_2, by_chrom='intra', normalized=False,
savedata='results/{1}/04_normalizing/{0}_raw'.format(reso, r_enz_2))
In [26]:
hic_map(hic_data_1, by_chrom='intra', normalized=True,
savedata='results/{1}/04_normalizing/{0}_norm'.format(reso, r_enz_1))
hic_map(hic_data_2, by_chrom='intra', normalized=True,
savedata='results/{1}/04_normalizing/{0}_norm'.format(reso, r_enz_2))
In [27]:
if reso >= 300000:
hic_map(hic_data_1, by_chrom=False, normalized=False,
savedata='results/{1}/04_normalizing/{0}_raw.mat'.format(reso, r_enz_1))
hic_map(hic_data_1, by_chrom=False, normalized=True,
savedata='results/{1}/04_normalizing/{0}_norm.mat'.format(reso, r_enz_1))
hic_map(hic_data_2, by_chrom=False, normalized=False,
savedata='results/{1}/04_normalizing/{0}_raw.mat'.format(reso, r_enz_2))
hic_map(hic_data_2, by_chrom=False, normalized=True,
savedata='results/{1}/04_normalizing/{0}_norm.mat'.format(reso, r_enz_2))
In [ ]: