In [2]:
! ls


00_fastq_QC.ipynb  learning_phyton.ipynb

In [3]:
! pwd


/home/student/Notebooks/Nazaret

In [4]:
! head /media/storage/FASTQs/K562_HindIII_1.fastq


@NS500645:59:HCL32BGXY:1:11101:14163:1054 1:N:0:GCCAAT
CATTCNTAAAGAAAAGAATTTTCAACNCAGAATTTCATATCCAGCCAACTAAGCTAGCTTCAAGGAAATACATTT
+
AAAAA#EEAEEEEEEEEEEAE/EEEE#EEEEEEEEEEEEEEEEAE</EE/EEEEAEEEEEEEEEEEAAEE</EEE
@NS500645:59:HCL32BGXY:1:11101:4416:1054 1:N:0:GCCAAT
CGAGTNAGGGAAGCTAGCTTCCATTCNTTTGCTTCTGTTGTGTGTTTTTTCTTTTGTTTTTTTTTTGTTTTGGTT
+
AAAAA#EEEEEEEEEEEEEEAEEEE/#EEEEEEEEEEEEEEEEEAEEEEEEEEEEEEEEEEEEEEE/<EEEEEAE
@NS500645:59:HCL32BGXY:1:11101:14193:1055 1:N:0:GCCAAT
AAAGTNCCCTGCATGATAGCATTTGTATAAAGTTCAAAACAGACCAAATGGATCTCTAATTTGTAGAAGGTCAGG

In [5]:
! wc -l /media/storage/FASTQs/K562_HindIII_1.fastq


40000000 /media/storage/FASTQs/K562_HindIII_1.fastq

FASTQ quality check using TADbit


In [8]:
from pytadbit.utils.fastq_utils import quality_plot

In [ ]:
quality_plot("/media/storage/FASTQs/K562_HindIII_1.fastq", r_enz="HindIII", nreads=1000000)

PHRED score >25 is ok

Number of ends >5% is bad


In [14]:
quality_plot("/media/storage/FASTQs/K562_HindIII_1.fastq", r_enz="HindIII", nreads=1000000)


Out[14]:
(0.8355, 28.7747)

In [16]:
quality_plot("/media/storage/FASTQs/K562_MboI_1.fastq", r_enz="MboI", nreads=1000000)


Out[16]:
(2.7736, 35.1285)

Out[-]: (%dangling ends, %ligation sites)


In [19]:
quality_plot("/media/storage/FASTQs/K562_MboI_2.fastq", r_enz="MboI", nreads=100000)


Out[19]:
(2.662, 33.142)

In [ ]: