Iterative mapping


In [1]:
r_enz = 'HindIII'

In [3]:
from pytadbit.mapping.full_mapper import full_mapper


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-3-fb0f0b3a8525> in <module>()
----> 1 from pytadbit.mapping.full_mapper import full_mapper

ImportError: cannot import name full_mapper

In [4]:
from pytadbit.mapping.full_mapper import full_mapping

In [15]:
full_mapping('/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem', 
             nthreads=2, clean=True, r_enz=r_enz, 
             frag_map=False, 
             out_map_dir='results/{0}/01_mapping/map{0}_r1'.format(r_enz), 
             fastq_path='/media/storage/FASTQs/K562_{0}_1.fastq'.format(r_enz), 
             windows=((1, 25),(1, 30), (1, 40), (1, 75))


  File "<ipython-input-15-1caba3e27fd6>", line 6
    windows=((1, 25),(1, 30), (1, 40), (1, 75))
                                               ^
SyntaxError: invalid syntax

In [ ]:
full_mapping('/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem', 
             nthreads=2, clean=True, r_enz=r_enz, 
             frag_map=False, 
             out_map_dir='results/{0}/01_mapping/map{0}_r2'.format(r_enz), 
             fastq_path='/media/storage/FASTQs/K562_{0}_2.fastq'.format(r_enz), 
             windows=((1, 25),(1, 30), (1, 40), (1, 75))

fragment based mapping


In [6]:
! mkdir -p results

In [7]:
! mkdir -p results/HindIII

In [10]:
! mkdir -p results/HindIII/01_mapping

In [8]:
! mkdir -p results/MboI

In [11]:
! mkdir -p results/MboI/01_mapping

In [12]:
! ls


00_fastq_QC.ipynb  01_mapping.ipynb  learning_python.ipynb  results  resutls

In [16]:
full_mapping('/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem', 
             nthreads=2, clean=True, r_enz=r_enz, 
             frag_map=True, 
             out_map_dir='results/{0}/01_mapping/map{0}_r1'.format(r_enz), 
             fastq_path='/media/storage/FASTQs/K562_{0}_1.fastq'.format(r_enz))


/home/student/.miniconda2/lib/python2.7/site-packages/pytadbit/mapping/full_mapper.py:390: UserWarning: WARNING: only 153 Gb left on tmp_dir: /home/student/tmp

  warn('WARNING: only %d Gb left on tmp_dir: %s\n' % (fspace, temp_dir))
Preparing FASTQ file
  - conversion to MAP format
Mapping reads in window 1-end...
TO GEM /home/student/tmp/K562_HindIII_1_B1BZJD
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_1_B1BZJD -o /home/student/tmp/K562_HindIII_1_B1BZJD_full_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_1_B1BZJD
   x removing map /home/student/tmp/K562_HindIII_1_B1BZJD_full_1-end.map
  - splitting into restriction enzyme (RE) fragments using ligation sites
  - ligation sites are replaced by RE sites to match the reference genome
    * enzyme: HindIII, ligation site: AAGCTAGCTT, RE site: AAGCTT
Preparing MAP file
   x removing pre-GEM input /home/student/tmp/K562_HindIII_1_B1BZJD_filt_1-end.map
Mapping fragments of remaining reads...
TO GEM /home/student/tmp/K562_HindIII_1_t_Ya_O
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_1_t_Ya_O -o /home/student/tmp/K562_HindIII_1_t_Ya_O_frag_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_1_t_Ya_O
   x removing failed to map /home/student/tmp/K562_HindIII_1_B1BZJD_fail.map
   x removing tmp mapped /home/student/tmp/K562_HindIII_1_t_Ya_O_frag_1-end.map
Out[16]:
['results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_full_1-end.map',
 'results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_frag_1-end.map']

In [17]:
full_mapping('/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem', 
             nthreads=2, clean=True, r_enz=r_enz, 
             frag_map=True, 
             out_map_dir='results/{0}/01_mapping/map{0}_r2'.format(r_enz), 
             fastq_path='/media/storage/FASTQs/K562_{0}_2.fastq'.format(r_enz))


/home/student/.miniconda2/lib/python2.7/site-packages/pytadbit/mapping/full_mapper.py:390: UserWarning: WARNING: only 91 Gb left on tmp_dir: /home/student/tmp

  warn('WARNING: only %d Gb left on tmp_dir: %s\n' % (fspace, temp_dir))
Preparing FASTQ file
  - conversion to MAP format
Mapping reads in window 1-end...
TO GEM /home/student/tmp/K562_HindIII_2_rXuayL
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_2_rXuayL -o /home/student/tmp/K562_HindIII_2_rXuayL_full_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_2_rXuayL
   x removing map /home/student/tmp/K562_HindIII_2_rXuayL_full_1-end.map
  - splitting into restriction enzyme (RE) fragments using ligation sites
  - ligation sites are replaced by RE sites to match the reference genome
    * enzyme: HindIII, ligation site: AAGCTAGCTT, RE site: AAGCTT
Preparing MAP file
   x removing pre-GEM input /home/student/tmp/K562_HindIII_2_rXuayL_filt_1-end.map
Mapping fragments of remaining reads...
TO GEM /home/student/tmp/K562_HindIII_2_ZtqPZ5
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_2_ZtqPZ5 -o /home/student/tmp/K562_HindIII_2_ZtqPZ5_frag_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_2_ZtqPZ5
   x removing failed to map /home/student/tmp/K562_HindIII_2_rXuayL_fail.map
   x removing tmp mapped /home/student/tmp/K562_HindIII_2_ZtqPZ5_frag_1-end.map
Out[17]:
['results/HindIII/01_mapping/mapHindIII_r2/K562_HindIII_2_full_1-end.map',
 'results/HindIII/01_mapping/mapHindIII_r2/K562_HindIII_2_frag_1-end.map']

In [19]:
! head results/HindIII/01_mapping/mapHindIII_r2/K562_HindIII_2_full_1-end.map


NS500645:59:HCL32BGXY:1:11101:14163:1054	CAACCAATTGTCAAACAGAAGAATTTTAAATCTACCTATAAGCTGGAAGCCCCACCCCACTTCAAGTTGTCCCAT	AAAAAEEE/EEEEEEEEEEEEEEEEEEEEEEEEEEEEAEE6EEEEEAAEE<EE/<EEEEEEEEEEEEEEE<A/<<	1	chr2:-:62077857:75
NS500645:59:HCL32BGXY:1:11101:14193:1055	CAGGCTGGTCTCGAACTCCTGGGCTCAAGCGATCCACCCACCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTG	AAAAAEEEEEEEE<AEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEE/EAAEEE	1	chr16:+:14833322:75
NS500645:59:HCL32BGXY:1:11101:10953:1055	GATTCTCAAATTATCTATAACAGATCCATCTTCAGAATCTAAGCACAGAGGGAAGCGATCGGGACATACAAGGGC	AAAAAEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEE	1	chr10:-:2392912:75
NS500645:59:HCL32BGXY:1:11101:16360:1056	GTCACAGCCAGTTGGGAAAACTGGCTAGCCATATGCAGAAAACTGAAACTGGACCCCATCCTTACACCTTGCACA	AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE<EEEEEEEE6EEEEAEEEEAEEEEEEEAEEEEEEEEA	0:1	chr2:+:7141515:3G71
NS500645:59:HCL32BGXY:1:11101:21852:1056	CAGACAGACAAATACTGAAAGAATTCACAACTACCAAGCCAATACTACCAACACTGCTAAAAGGAGCTCTAAATC	AAAAAEEEEEEEEEEEEEEEEEEEEEEE<EEEEEEE<EEAAEEEEEEEEAEEEEEEEEEEEEEEAEEEEEEEEE<	1	chr6:+:23863609:75
NS500645:59:HCL32BGXY:1:11101:17157:1056	CCTCATGGGGACCAAGTCTGTGTGCGAAGTAGGTAGCAACTGGTTCCAGCCCATCTACCTCGGAGCCATGTTCTC	AAAAAEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEAEEEEEEAEEEEEEEEEEA	0:1	chr9:-:114790552:21A53
NS500645:59:HCL32BGXY:1:11101:11342:1057	CTGCTCCTGTGAGTCAGAGTGTGTCATTTCCTCACCTAAAACACTCCAGTGGCTCCACCTCGGTCTTGTGAAGCT	AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEE	1	chr6:-:29924856:75
NS500645:59:HCL32BGXY:1:11101:15358:1057	CATGTTTATGTACTCATATGTATATATGTGTATTTGTGTGTATATATACAGACACTCTGATACACATGCACAAAC	AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE	1	chr2:+:58688316:75
NS500645:59:HCL32BGXY:1:11101:8600:1058	CTATTCTTTATTGTAAGATATCAGAACAGCAGAGCTCAAAGTGCACACGATTTCCCTTCTTATGTCAGGCACTGG	AAAAAAEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEA/EEEEEEEEEEEEEAAEEEE	1	chr5:-:5504049:75
NS500645:59:HCL32BGXY:1:11101:23591:1059	CTTGGCTCATCATCTCAGAGTGTTTGAATATTGAATTTCAAGTAGGAAAGCAGTAGACTGCAACAAGACTAAAAC	//AAA/EEE//<EEEEEEEEEEEEEEEEE/EEE</<EEAE6</EEEE/EEEEE/EAEE<EEEAEEEE/A/EEEEE	0:1	chr18:-:37712209:29C45

In [20]:
! head results/HindIII/01_mapping/mapHindIII_r2/K562_HindIII_2_frag_1-end.map


NS500645:59:HCL32BGXY:1:11101:24830:1055~2~	AAGCTTTTTTTTCCCTTTCTTTCTTTGCCTTTAGGGTTGTAGCAAAAAGGAAGATAA	HHHHHHEEEEEEEAEEEEEEEEEEEEEEEEEEAEEEEAEEEEE/EEE6EEEEEEEEE	1	chr1:+:50841566:57
NS500645:59:HCL32BGXY:1:11101:6039:1056	GGCTTCTTCTCTTTAAGGCAACATAGTATAAAGCTT	AAAAAEEEEEEEEEEEEEEEEEEE/EEAE6HHHHHH	1	chr7:+:128976029:36
NS500645:59:HCL32BGXY:1:11101:6039:1056~2~	AAGCTTTCGAAGGCTAGCTAGGATAGATTTGAAACCCAGCC	HHHHHHEEEEEEEEEEEEEEEEEEEEEEEEEEE/EEEAEEE	0:1	chr7:-:129557448:22G18
NS500645:59:HCL32BGXY:1:11101:19311:1056	ACCTACTTCATAAAAAGCCAAGGATGTAATTTTTTCAGTTTAGTGTAAGCTT	/AAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEHHHHHH	1	chr19:+:41557056:52
NS500645:59:HCL32BGXY:1:11101:19311:1056~2~	AAGCTTAGGGTTCAAGTCTAAACAC	HHHHHHEEEEEEAEEEEEEEEEEEE	1	chr19:+:41066202:25
NS500645:59:HCL32BGXY:1:11101:17064:1056	CAACAATCATATATTTCCTGCAAATGGAAAGGCAAATGGTACAGAGTCCCCAATGTACAAGCTT	AAAAAEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEAHHHHHH	0:1	chr4:+:33593268:40C23
NS500645:59:HCL32BGXY:1:11101:26187:1057	CCCACTTACCACTGAAGTTAAAGCTT	AAAAAEEEA<<AEEEEEEEEHHHHHH	1	chr10:+:63502264:26
NS500645:59:HCL32BGXY:1:11101:26187:1057~2~	AAGCTTCTAAGTGAAGTCTCAAAGCCACAGAATTAAGAAAGGCTGTTTGTT	HHHHHHAEA6EEAAEEE<E/<EA/6/A6A</EA<A/A/AA//<</AA<//E	1	chr10:-:63481487:51
NS500645:59:HCL32BGXY:1:11101:8250:1059	AGCCCGCTCAAGGTCTAAACTCGGCTCGTCCCTGGGCCTTCCACCTACAAGAAGCTT	AAAAAE666EEEEEEEEEAAAEAEEEA/EEEEEAEEE6EEEE/EEEE6EAEHHHHHH	0:1	chr13:+:29695513:3A53
NS500645:59:HCL32BGXY:1:11101:17211:1059	AGTCATAGCAACAACAGATACTTAAGCTT	AAAAAEEEEEEEEEEEEEEEEEEHHHHHH	1	chr2:+:25493831:29

In [ ]: