In [1]:
from pytadbit.mapping.filter import filter_reads
The max_molecule_length
parameter used to filter-out pseudo-dangling-ends can be extracted from the previous section in the computation of insert size.
The min_distance_to_re
, that affects the detection of random breaks, should be a bit larger in order to contain almost all the fragments.
In [2]:
! mkdir -p results/HindIII/03_filtering
! mkdir -p results/MboI/03_filtering
In [3]:
r_enz = 'HindIII'
In [4]:
# this will last ~10 minutes
masked1 = filter_reads(
'results/{0}/03_filtering/reads12_{0}.tsv'.format(r_enz),
max_molecule_length=750, over_represented=0.005, max_frag_size=100000,
min_frag_size=100, re_proximity=5, min_dist_to_re=1000)
In [5]:
r_enz = 'MboI'
In [6]:
# this will last ~10 minutes
masked2 = filter_reads(
'results/{0}/03_filtering/reads12_{0}.tsv'.format(r_enz),
max_molecule_length=750, over_represented=0.005, max_frag_size=100000,
min_frag_size=100, re_proximity=5, min_dist_to_re=1000)
This generates a dictionary with the different filters and the reads affected by each.
The filters are:
In [7]:
from pytadbit.mapping.filter import apply_filter
In [8]:
r_enz = 'HindIII'
apply_filter('results/fragment/{0}/03_filtering/reads12_{0}.tsv'.format(r_enz),
'results/fragment/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz), masked1,
filters=[1, 2, 3, 4, 9, 10])
Out[8]:
In [9]:
r_enz = 'MboI'
apply_filter('results/fragment/{0}/03_filtering/reads12_{0}.tsv'.format(r_enz),
'results/fragment/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz), masked2,
filters=[1, 2, 3, 4, 9, 10])
Out[9]:
In [10]:
from pytadbit.mapping.analyze import hic_map
In [11]:
r_enz = 'HindIII'
hic_map('results/fragment/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz),
resolution=1000000, show=True)
In [12]:
r_enz = 'MboI'
hic_map('results/fragment/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz),
resolution=1000000, show=True)
Zoom to a single chromosome or a region:
In [10]:
hic_map('results/fragment/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz),
resolution=1000000, show=True, focus='chr1')
In [ ]:
hic_map('results/fragment/{0}/03_filtering/valid_reads12_{0}.tsv'.format(r_enz),
resolution=1000000, show=True, focus=(500, 1000))
In [ ]: