Iterative mapping


In [12]:
r_enz = "HindIII"

In [4]:
from pytadbit.mapping.full_mapper import full_mapping

In [7]:
! mkdir -p results

In [8]:
! mkdir -p results/HindIII

In [14]:
! mkdir -p results/HindIII/01_mapping

In [9]:
! mkdir -p results/MboI

In [15]:
! mkdir -p results/MboI/01_mapping

In [10]:
ls


00_fastq_QC.ipynb  01_mapping.ipynb  learning_phyton.ipynb  results/
full_mapping("/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem", nthreads=2, clean=True, r_enz=r_enz, frag_map=True, out_map_dir="results/{0}/01_mapping/map{0}_r1".format(r_enz), fastq_path="/media/storage/FASTQs/K562_{0}_1.fastq".format(r_enz), windows((1, 25),(1,30),(1,40),(1,75),(25,75))) This is for the iterative mapping

In [17]:
full_mapping("/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem", 
             nthreads=2, clean=True, r_enz=r_enz, 
             frag_map=True, 
             out_map_dir="results/{0}/01_mapping/map{0}_r1".format(r_enz),
             fastq_path="/media/storage/FASTQs/K562_{0}_1.fastq".format(r_enz))


Preparing FASTQ file
  - conversion to MAP format
Mapping reads in window 1-end...
TO GEM /home/student/tmp/K562_HindIII_1_Vyc9AH
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_1_Vyc9AH -o /home/student/tmp/K562_HindIII_1_Vyc9AH_full_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_1_Vyc9AH
   x removing map /home/student/tmp/K562_HindIII_1_Vyc9AH_full_1-end.map
  - splitting into restriction enzyme (RE) fragments using ligation sites
  - ligation sites are replaced by RE sites to match the reference genome
    * enzyme: HindIII, ligation site: AAGCTAGCTT, RE site: AAGCTT
Preparing MAP file
   x removing pre-GEM input /home/student/tmp/K562_HindIII_1_Vyc9AH_filt_1-end.map
Mapping fragments of remaining reads...
TO GEM /home/student/tmp/K562_HindIII_1_NSIh1z
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_1_NSIh1z -o /home/student/tmp/K562_HindIII_1_NSIh1z_frag_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_1_NSIh1z
   x removing failed to map /home/student/tmp/K562_HindIII_1_Vyc9AH_fail.map
   x removing tmp mapped /home/student/tmp/K562_HindIII_1_NSIh1z_frag_1-end.map
Out[17]:
['results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_full_1-end.map',
 'results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_frag_1-end.map']

In [18]:
full_mapping("/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem", 
             nthreads=2, clean=True, r_enz=r_enz, 
             frag_map=True, 
             out_map_dir="results/{0}/01_mapping/map{0}_r2".format(r_enz),
             fastq_path="/media/storage/FASTQs/K562_{0}_2.fastq".format(r_enz))


/home/student/.miniconda2/lib/python2.7/site-packages/pytadbit/mapping/full_mapper.py:390: UserWarning: WARNING: only 163 Gb left on tmp_dir: /home/student/tmp

  warn('WARNING: only %d Gb left on tmp_dir: %s\n' % (fspace, temp_dir))
Preparing FASTQ file
  - conversion to MAP format
Mapping reads in window 1-end...
TO GEM /home/student/tmp/K562_HindIII_2_DcTYV4
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_2_DcTYV4 -o /home/student/tmp/K562_HindIII_2_DcTYV4_full_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_2_DcTYV4
   x removing map /home/student/tmp/K562_HindIII_2_DcTYV4_full_1-end.map
  - splitting into restriction enzyme (RE) fragments using ligation sites
  - ligation sites are replaced by RE sites to match the reference genome
    * enzyme: HindIII, ligation site: AAGCTAGCTT, RE site: AAGCTT
Preparing MAP file
   x removing pre-GEM input /home/student/tmp/K562_HindIII_2_DcTYV4_filt_1-end.map
Mapping fragments of remaining reads...
TO GEM /home/student/tmp/K562_HindIII_2_2vd54o
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_2_2vd54o -o /home/student/tmp/K562_HindIII_2_2vd54o_frag_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_2_2vd54o
   x removing failed to map /home/student/tmp/K562_HindIII_2_DcTYV4_fail.map
   x removing tmp mapped /home/student/tmp/K562_HindIII_2_2vd54o_frag_1-end.map
Out[18]:
['results/HindIII/01_mapping/mapHindIII_r2/K562_HindIII_2_full_1-end.map',
 'results/HindIII/01_mapping/mapHindIII_r2/K562_HindIII_2_frag_1-end.map']

In [ ]: