In [4]:
from Bio import SeqIO
import gzip
You can use either uncompressed fastq
In [5]:
in_R1 = gzip.open("./test/C01_R1.fastq.gz", "r")
in_R2 = gzip.open("./test/C01_R2.fastq.gz", "r")
in_R3 = gzip.open("./test/C01_R3.fastq.gz", "r")
R1 = SeqIO.parse(in_R1, 'fastq-sanger')
R2 = SeqIO.parse(in_R2, 'fastq-sanger')
R3 = SeqIO.parse(in_R3, 'fastq-sanger')
In [13]:
in_R1 = open("./test/C01_R1.fastq", "r")
in_R2 = open("./test/C01_R2.fastq", "r")
in_R3 = open("./test/C01_R3.fastq", "r")
R1 = SeqIO.parse(in_R1, 'fastq-sanger')
R2 = SeqIO.parse(in_R2, 'fastq-sanger')
R3 = SeqIO.parse(in_R3, 'fastq-sanger')
In [15]:
print(next(R1, None))
print("")
print(next(R2, None))
print("")
print(next(R3, None))
In [12]:
while True:
read1 = next(R1, None)
read2 = next(R2, None)
read3 = next(R3, None)
HTSeq is a new parser for many NGS format that can read directly fastq.gz. Let's try it
In [1]:
import HTSeq
In [3]:
R1_list = ['./test/C01_R1.fastq.gz', './test/C02_R1.fastq.gz']
R2_list = ['./test/C01_R2.fastq.gz', './test/C02_R2.fastq.gz']
R3_list = ['./test/C01_R3.fastq.gz', './test/C02_R3.fastq.gz']
R4_list = ['./test/C01_R2.fastq.gz', './test/C02_R2.fastq.gz']
In [10]:
chunk=1
for R1,R2,R3 in zip(R1_list, R2_list, R3_list):
print (chunk)
R1_gen = HTSeq.FastqReader(R1, qual_scale="phred")
R2_gen = HTSeq.FastqReader(R2, qual_scale="phred")
R3_gen = HTSeq.FastqReader(R3, qual_scale="phred")
R4_gen = HTSeq.FastqReader(R3, qual_scale="phred")
i =0
for r1,r2,r3,r4 in zip (R1_gen, R2_gen, R3_gen, R4_gen):
print (r1.name+"\n"+r2.name+"\n"+r3.name+"\n"+r4.name+"\n\n")
i+=1
if i > 2:
break
chunk+=1
In [ ]: