iterative mapping


In [28]:
r_enz = 'HindIII'

In [29]:
from pytadbit.mapping.full_mapper import full_mapping

In [15]:
full_mapping('media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem'nthreads=2, clean=True, r_enz=r_enz, frag_map=False, out_map_dir='results/{0}/01_mapping/map{0}_r1.format(r_enz)',fastq_path='media/storage/FASTQs/K56_{0}_1.fastq'.format(r_enz),windows=((1,25),(1,30),(1,35),(1,40),(1,45),(1,50)))


  File "<ipython-input-15-8f525816f993>", line 1
    full_mapping('media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem'nthreads=2, clean=True, r_enz=r_enz, frag_map=False, out_map_dir='results/{0}/01_mapping/map{0}_r1.format(r_enz)',fastq_path='media/storage/FASTQs/K56_{0}_1.fastq'.format(r_enz),windows=((1,25),(1,30),(1,35),(1,40),(1,45),(1,50)))
                                                                                      ^
SyntaxError: invalid syntax

fragment based mapping


In [21]:
! mkdir -p results

In [22]:
! mkdir -p results/HindIII

In [23]:
! mkdir -p results/HindIII/01_mapping

In [24]:
! mkdir -p results/MboI

In [26]:
! mkdir -p results/Mbo/01mapping

In [ ]:
full_mapping('/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem', nthreads=2, clean=True, r_enz=r_enz, frag_map=True, out_map_dir='results/{0}/01_mapping/map{0}_r1'.format(r_enz),fastq_path='/media/storage/FASTQs/K562_{0}_1.fastq'.format(r_enz))


/home/student/.miniconda2/lib/python2.7/site-packages/pytadbit/mapping/full_mapper.py:390: UserWarning: WARNING: only 199 Gb left on tmp_dir: /home/student/tmp

  warn('WARNING: only %d Gb left on tmp_dir: %s\n' % (fspace, temp_dir))
Preparing FASTQ file
  - conversion to MAP format
Mapping reads in window 1-end...
TO GEM /home/student/tmp/K562_HindIII_1_qjfa9v
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_1_qjfa9v -o /home/student/tmp/K562_HindIII_1_qjfa9v_full_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_1_qjfa9v
   x removing map /home/student/tmp/K562_HindIII_1_qjfa9v_full_1-end.map
  - splitting into restriction enzyme (RE) fragments using ligation sites
  - ligation sites are replaced by RE sites to match the reference genome
    * enzyme: HindIII, ligation site: AAGCTAGCTT, RE site: AAGCTT
Preparing MAP file
   x removing pre-GEM input /home/student/tmp/K562_HindIII_1_qjfa9v_filt_1-end.map
Mapping fragments of remaining reads...
TO GEM /home/student/tmp/K562_HindIII_1_I8AOK_
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_1_I8AOK_ -o /home/student/tmp/K562_HindIII_1_I8AOK__frag_1-end
Parsing result...
   x removing GEM input /home/student/tmp/K562_HindIII_1_I8AOK_
   x removing failed to map /home/student/tmp/K562_HindIII_1_qjfa9v_fail.map
   x removing tmp mapped /home/student/tmp/K562_HindIII_1_I8AOK__frag_1-end.map
Out[ ]:
['results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_full_1-end.map',
 'results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_frag_1-end.map']

In [35]:
full_mapping('/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem', nthreads=2, clean=True, r_enz=r_enz, frag_map=True, out_map_dir='results/{0}/01_mapping/map{0}_r2'.format(r_enz),fastq_path='/media/storage/FASTQs/K562_{0}_2.fastq'.format(r_enz))


/home/student/.miniconda2/lib/python2.7/site-packages/pytadbit/mapping/full_mapper.py:390: UserWarning: WARNING: only 57 Gb left on tmp_dir: /home/student/tmp

  warn('WARNING: only %d Gb left on tmp_dir: %s\n' % (fspace, temp_dir))
Preparing FASTQ file
  - conversion to MAP format
Mapping reads in window 1-end...
TO GEM /home/student/tmp/K562_HindIII_2_iCDZjg
/usr/local/bin/gem-mapper -I /media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem -q offset-33 -m 0.04 -s 0 --allow-incomplete-strata 0.00 --granularity 10000 --max-decoded-matches 1 --min-decoded-strata 0 --min-insert-size 0 --max-insert-size 0 --min-matched-bases 0.8 --gem-quality-threshold 26 --max-big-indel-length 15 --mismatch-alphabet ACGT -E 0.30 --max-extendable-matches 20 --max-extensions-per-match 1 -e 0.04 -T 2 -i /home/student/tmp/K562_HindIII_2_iCDZjg -o /home/student/tmp/K562_HindIII_2_iCDZjg_full_1-end
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-35-7c72cc2eebff> in <module>()
----> 1 full_mapping('/media/storage/db/reference_genome/Homo_sapiens/hg38/hg38.gem', nthreads=2, clean=True, r_enz=r_enz, frag_map=True, out_map_dir='results/{0}/01_mapping/map{0}_r2'.format(r_enz),fastq_path='/media/storage/FASTQs/K562_{0}_2.fastq'.format(r_enz))

/home/student/.miniconda2/lib/python2.7/site-packages/pytadbit/mapping/full_mapper.pyc in full_mapping(gem_index_path, fastq_path, out_map_dir, r_enz, frag_map, min_seq_len, windows, add_site, clean, get_nread, **kwargs)
    435 
    436         if not skip:
--> 437             gem_mapping(gem_index_path, curr_map, out_map_path, **kwargs)
    438             # parse map file to extract not uniquely mapped reads
    439             print 'Parsing result...'

/home/student/.miniconda2/lib/python2.7/site-packages/pytadbit/mapping/full_mapper.pyc in gem_mapping(gem_index_path, fastq_path, out_map_path, gem_binary, **kwargs)
    328     try:
    329         # check_call(gem_cmd, stdout=PIPE, stderr=PIPE)
--> 330         out, err = Popen(gem_cmd, stdout=PIPE, stderr=PIPE).communicate()
    331     except CalledProcessError as e:
    332         print out

/home/student/.miniconda2/lib/python2.7/subprocess.pyc in communicate(self, input)
    477             return (stdout, stderr)
    478 
--> 479         return self._communicate(input)
    480 
    481 

/home/student/.miniconda2/lib/python2.7/subprocess.pyc in _communicate(self, input)
   1096 
   1097             if _has_poll:
-> 1098                 stdout, stderr = self._communicate_with_poll(input)
   1099             else:
   1100                 stdout, stderr = self._communicate_with_select(input)

/home/student/.miniconda2/lib/python2.7/subprocess.pyc in _communicate_with_poll(self, input)
   1150             while fd2file:
   1151                 try:
-> 1152                     ready = poller.poll()
   1153                 except select.error, e:
   1154                     if e.args[0] == errno.EINTR:

KeyboardInterrupt: 

In [36]:
! 
head results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_full_1-end.map


  File "<ipython-input-36-d0298a47dd9d>", line 2
    head results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_full_1-end.map
               ^
SyntaxError: invalid syntax

In [37]:
! head results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_full_1-end.map


NS500645:59:HCL32BGXY:1:11101:10953:1055	CTGACNTACAGGACAGTTATTTTCCAGACTGAAAATTCTCAAACCCAGTCGTGCAAATGATTCTTCCTGCAGTTG	AAAAA#EEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEAEEEEEEEEEEEEEEEEEEEE	0:1:0	chr10:-:2688535:5G69
NS500645:59:HCL32BGXY:1:11101:24830:1055	CTTTTNTTTGCTAGTCAAAGGCCTAGGCCTTTATCTTCCTTTTTGCTACAACCCTAAAGGCAAAGAAAGAAAGGG	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEE	0:1:0	chr1:-:50841578:5C69
NS500645:59:HCL32BGXY:1:11101:6039:1056	AGGGTNACATAGGTGACATAGCACGTAAGAGCTTGGGTCCTGAAGTTAGATGGGGCTGGGTTTCAAATCTATCCT	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE	0:0:1:0	chr7:+:129557395:5G65C3
NS500645:59:HCL32BGXY:1:11101:21852:1056	CTCTGNTTTGTTTCTTTGTTTGTTATGGTTGTTTTTCAGATAGATATAGTTTCATTACAAAAGAAAATTTGTTAT	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEE	0:0:1:0	chr6:-:23446287:5T68G
NS500645:59:HCL32BGXY:1:11101:19311:1056	ATTTTNTACCTTTTGAGAAACACTGAACTAATAATATTTCTGAGACCTCTTCAAGCTCTGACATGCTGTGTTTAG	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE	0:1:0	chr19:-:41066219:5A69
NS500645:59:HCL32BGXY:1:11101:17157:1056	ATAATNTATTTGCTCTCTTCAGCCTTTTTCCAGTTAGTACTCTCATCAGTAAGGCACATGAAGTGTGAAATTTTG	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE	0:1:0	chr9:+:114790228:5A69
NS500645:59:HCL32BGXY:1:11101:26187:1057	AAACTNATTTTTTTTTTTGCTTTTTTTTCCTTTTAAAAATATGAGCAGTAATCACATTGACTAAACAAACAGCCT	AAAAA#EEEEEEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEEAEEEEEEEEEEEE6EEEEEEEAEEAEEE/EEEE	0:1:0	chr10:+:63481424:5G69
NS500645:59:HCL32BGXY:1:11101:8600:1058	AGAGANCCAAAGCTGGATAAATTTTCTACCTGAGAGCTGGAGTTTCCAAGAATGACAGACAGCCATGGGAAGAAG	AAAAA#EEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEAEEEE	0:1:0	chr5:+:53943740:5A69
NS500645:59:HCL32BGXY:1:11101:8731:1058	ATGAANCTACTACAAGAAAACATTAGGGAAACTCTTCAGGATATTGGTCTGGGCAAAAATTTCTTGAGTAATACC	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE	0:1:0	chr3:+:17104208:5A69
NS500645:59:HCL32BGXY:1:11101:23591:1059	CTATTNGTCTTAATAGTCTGAGTTCCCAAGATAAACAGAACATATACATATATTTATATTATAAGGAATTGGCTC	AAAAA#EEE/EEEEEEEEEEEE6E<EEEEE/EEEAEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEE/EAAEEEE	0:1:0	chr18:+:41009204:5A69

In [38]:
! head results/HindIII/01_mapping/mapHindIII_r1/K562_HindIII_1_frag_1-end.map


NS500645:59:HCL32BGXY:1:11101:14163:1054~2~	AAGCTTCAAGGAAATACATTT	HHHHHHEEEEEEAAEE</EEE	1	chr2:+:62077808:21
NS500645:59:HCL32BGXY:1:11101:4416:1054~2~	AAGCTTCCATTCNTTTGCTTCTGTTGTGTGTTTTTTCTTTTGTTTTTTTTTTGTTTTGGTT	HHHHHHAEEEE/#EEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEE/<EEEEEAE	0:0:1:0	chr16:+:81452828:12T45T2
NS500645:59:HCL32BGXY:1:11101:16360:1056	CGCCTNTCTCCGAGGCCCTGTATTCTGGAATCTTGTGGCTCTCTCTTGGATCCTAAGCTT	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEHHHHHH	0:1:0	chr2:-:6811943:5C54
NS500645:59:HCL32BGXY:1:11101:17064:1056	CATCANTAACTGTATCCAAAAGGAGACGCTGCTTGGGGTGTACCCAGAGAAAACACCAAGCTT	AAAAA#EEEEEEEEEEEEEEEEEEEEE<EEEAEEEEEEEEEEEEAEEEEEEEEEEEEHHHHHH	0:1:0	chr4:-:40244040:5A57
NS500645:59:HCL32BGXY:1:11101:15358:1057	AGAAANTTCTACAGACTCCAGAGTGAAGCTT	AAAAA#EEEEEEEEEEEEEEEEEEEHHHHHH	0:1	chr2:-:89284133:5C25
NS500645:59:HCL32BGXY:1:11101:15358:1057~2~	AAGCTTAAAAGATTCACCAATTACTTGAAAATGTTTGTGCATGTGT	HHHHHHEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE	1	chr2:-:58688377:46
NS500645:59:HCL32BGXY:1:11101:22546:1059	AGCTTNATCATTTCTAGCTTTTGAGGAAAACTGGGAGACATGCGACTGCTCCTTTCACTTGAAAGCTT	AAAAA#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEEHHHHHH	0:1:0	chr14:-:54128571:5A62
NS500645:59:HCL32BGXY:1:11101:6478:1064~2~	AAGCTTCCATTTTATTGAAGTAGTGCACAAAGCTTTCCCTCTTATTGAGGGATTTCTTCA	HHHHHHEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEE<EEEEEEEEEEE	1	chr1:-:118273696:60
NS500645:59:HCL32BGXY:1:11101:25274:1064~2~	AAGCTTTTCAAAACAAAATTAGCTCTTTCAGAATTAAGGCCTTGTTCTCTACAGAGTT	HHHHHHEEEEEEEEAEEEEEEEEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEAEEEEE	1	chrX:-:81223467:58
NS500645:59:HCL32BGXY:1:11101:18285:1064	CCCTGNGTTCCTCTGCCTTTCCGGGACAGGGGGGAAGATAAGATTTCATCATGAGAAAGCTT	AAAAA#EEEAEEEEEAEEEEE<EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEHHHHHH	0:1:0	chr16:+:23130111:5T56

In [ ]: