In [1]:
# Using virtualenvwrapper here but can also be done with Conda
workon pycoQC
(pycoQC)
In [2]:
Fast5_to_seq_summary -h
usage: Fast5_to_seq_summary [-h] [--version] --fast5_dir FAST5_DIR
--seq_summary_fn SEQ_SUMMARY_FN
[--max_fast5 MAX_FAST5] [--threads THREADS]
[--basecall_id BASECALL_ID]
[--fields FIELDS [FIELDS ...]] [--include_path]
[--verbose_level VERBOSE_LEVEL]
Fast5_to_seq_summary generate a sequencing summary like file from a directory
containing Fast5 files
optional arguments:
-h, --help show this help message and exit
--version, -v show program's version number and exit
--fast5_dir FAST5_DIR, -f FAST5_DIR
Directory containing fast5 files. Can contain multiple
subdirectories
--seq_summary_fn SEQ_SUMMARY_FN, -s SEQ_SUMMARY_FN
path of the summary sequencing file where to write the
data extracted from the fast5 files
--max_fast5 MAX_FAST5
Maximum number of file to try to parse. 0 to
deactivate (default: 0)
--threads THREADS, -t THREADS
Total number of threads to use. 1 thread is used for
the reader and 1 for the writer. Minimum 3 (default:
4)
--basecall_id BASECALL_ID
id of the basecalling group. By default leave to 0,
but if you perfome multiple basecalling on the same
fast5 files, this can be used to indicate the
corresponding group (1, 2 ...) (default: 0)
--fields FIELDS [FIELDS ...]
list of field names corresponding to attributes to try
to fetch from the fast5 files (default: ['read_id',
'run_id', 'channel', 'start_time',
'sequence_length_template', 'mean_qscore_template',
'calibration_strand_genome_template',
'barcode_arrangement'])
--include_path If given, the absolute path to the corresponding file
is added in an extra column (default: False)
--verbose_level VERBOSE_LEVEL
Level of verbosity, from 2 (Chatty) to 0 (Nothing)
(default: 0)
(pycoQC)
In [3]:
Fast5_to_seq_summary \
-f data/ \
-s results/summary_sequencing.tsv
head results/summary_sequencing.tsv
22 reads [00:00, 811.31 reads/s]
Total reads: 22 / Average speed: 543.6 reads/s
(pycoQC) (pycoQC) read_id run_id channel start_time sequence_length_template mean_qscore_template calibration_strand_genome_template
5b7fadd0-c646-4c7b-9800-66ee658a5ca8 40ebe55356ada6c830fa793745ef4c498d896c73 150 37 468 7.608 filtered_out
e6a8e4d0-7b3c-471a-be26-fa7857d12663 40ebe55356ada6c830fa793745ef4c498d896c73 318 15 392 8.304 filtered_out
2c32553e-62c6-4c7a-bf05-249771364f04 40ebe55356ada6c830fa793745ef4c498d896c73 237 11 1151 8.544 filtered_out
f8325de9-a77e-4616-a4a8-69ecf32e1688 40ebe55356ada6c830fa793745ef4c498d896c73 354 16 568 8.206 filtered_out
6af04302-04c8-4d8d-8e87-aa69178b3f24 40ebe55356ada6c830fa793745ef4c498d896c73 36 26 832 8.234 filtered_out
3e81c32a-f2ee-4719-a88d-e0affe93d26f 40ebe55356ada6c830fa793745ef4c498d896c73 348 24 1137 8.124 filtered_out
68804104-71dc-465c-b82d-3a99a4689701 40ebe55356ada6c830fa793745ef4c498d896c73 38 20 1010 8.325 filtered_out
37dfa1d5-5d84-486c-bf47-9eb6438f5645 40ebe55356ada6c830fa793745ef4c498d896c73 410 30 555 8.219 filtered_out
3784283c-47cc-48ac-8d7b-7efd32123b56 40ebe55356ada6c830fa793745ef4c498d896c73 243 20 893 8.54 filtered_out
(pycoQC)
In [4]:
Fast5_to_seq_summary \
-f data/ \
-s results/summary_sequencing.tsv \
-t 6
head results/summary_sequencing.tsv
22 reads [00:00, 1129.77 reads/s]
Total reads: 22 / Average speed: 650.82 reads/s
(pycoQC) (pycoQC) read_id run_id channel start_time sequence_length_template mean_qscore_template calibration_strand_genome_template
5b7fadd0-c646-4c7b-9800-66ee658a5ca8 40ebe55356ada6c830fa793745ef4c498d896c73 150 37 468 7.608 filtered_out
e6a8e4d0-7b3c-471a-be26-fa7857d12663 40ebe55356ada6c830fa793745ef4c498d896c73 318 15 392 8.304 filtered_out
f8325de9-a77e-4616-a4a8-69ecf32e1688 40ebe55356ada6c830fa793745ef4c498d896c73 354 16 568 8.206 filtered_out
2c32553e-62c6-4c7a-bf05-249771364f04 40ebe55356ada6c830fa793745ef4c498d896c73 237 11 1151 8.544 filtered_out
6af04302-04c8-4d8d-8e87-aa69178b3f24 40ebe55356ada6c830fa793745ef4c498d896c73 36 26 832 8.234 filtered_out
68804104-71dc-465c-b82d-3a99a4689701 40ebe55356ada6c830fa793745ef4c498d896c73 38 20 1010 8.325 filtered_out
3e81c32a-f2ee-4719-a88d-e0affe93d26f 40ebe55356ada6c830fa793745ef4c498d896c73 348 24 1137 8.124 filtered_out
37dfa1d5-5d84-486c-bf47-9eb6438f5645 40ebe55356ada6c830fa793745ef4c498d896c73 410 30 555 8.219 filtered_out
3784283c-47cc-48ac-8d7b-7efd32123b56 40ebe55356ada6c830fa793745ef4c498d896c73 243 20 893 8.54 filtered_out
(pycoQC)
In [5]:
Fast5_to_seq_summary \
-f data/ \
-s results/custom_summary_sequencing.tsv \
--verbose_level 1 \
--fields mean_qscore_template called_events duration strand_score
head results/custom_summary_sequencing.tsv
Check input data and options
Start processing fast5 files
22 reads [00:00, 821.28 reads/s]
Overall counts valid files: 22
fields found mean_qscore_template: 22
called_events: 22
duration: 22
strand_score: 22
fields not found
Total reads: 22 / Average speed: 464.09 reads/s
(pycoQC) (pycoQC) mean_qscore_template called_events duration strand_score
8.304 1547 23218 -0.0008
8.325 3846 57697 -0.0004
7.608 1615 24233 -0.0007
8.219 2080 31208 -0.0011
7.805 1438 21583 -0.0006
8.544 3740 56107 -0.0003
8.23 2778 51387 -0.0007
8.741 3117 46767 -0.0006
8.987 4235 63526 -0.0003
(pycoQC)
In [6]:
Fast5_to_seq_summary \
-f data/ \
-s results/fn_summary_sequencing.tsv \
--verbose_level 1 \
--include_path
head results/fn_summary_sequencing.tsv
Check input data and options
Start processing fast5 files
22 reads [00:00, 493.99 reads/s]
Overall counts valid files: 22
fields found read_id: 22
run_id: 22
channel: 22
start_time: 22
sequence_length_template: 22
mean_qscore_template: 22
calibration_strand_genome_template: 22
fields not found barcode_arrangement: 22
Total reads: 22 / Average speed: 364.14 reads/s
(pycoQC) (pycoQC) read_id run_id channel start_time sequence_length_template mean_qscore_template calibration_strand_genome_template path
5b7fadd0-c646-4c7b-9800-66ee658a5ca8 40ebe55356ada6c830fa793745ef4c498d896c73 150 37 468 7.608 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_150_strand.fast5
2c32553e-62c6-4c7a-bf05-249771364f04 40ebe55356ada6c830fa793745ef4c498d896c73 237 11 1151 8.544 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_237_strand.fast5
e6a8e4d0-7b3c-471a-be26-fa7857d12663 40ebe55356ada6c830fa793745ef4c498d896c73 318 15 392 8.304 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_318_strand.fast5
f8325de9-a77e-4616-a4a8-69ecf32e1688 40ebe55356ada6c830fa793745ef4c498d896c73 354 16 568 8.206 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_354_strand.fast5
3e81c32a-f2ee-4719-a88d-e0affe93d26f 40ebe55356ada6c830fa793745ef4c498d896c73 348 24 1137 8.124 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_348_strand.fast5
37dfa1d5-5d84-486c-bf47-9eb6438f5645 40ebe55356ada6c830fa793745ef4c498d896c73 410 30 555 8.219 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_410_strand.fast5
9a1c5296-2ab1-4abd-8d50-e059754cf332 40ebe55356ada6c830fa793745ef4c498d896c73 319 33 1235 8.119 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_319_strand.fast5
6af04302-04c8-4d8d-8e87-aa69178b3f24 40ebe55356ada6c830fa793745ef4c498d896c73 36 26 832 8.234 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_36_strand.fast5
0ef36fa0-7c3f-47f5-8410-027f50724701 40ebe55356ada6c830fa793745ef4c498d896c73 108 26 915 9.407 filtered_out /home/aleg/Programming/Packages/pycoQC/docs/Fast5_to_seq_summary/data/20180625_FAH77625_MN23126_sequencing_run_S1_57529_read_10_ch_108_strand.fast5
(pycoQC)
Content source: a-slide/pycoQC
Similar notebooks: