author: josenavasmolina@gmail.com
date: 25 Sep 2017
license: BSD3

adaptor_cleanup.ipynb

Commands to remove the sequences affected by adaptor contamination.


In [ ]:
split_lib_basedir = '/path/to/split_libraries_dirs/'
adaptor_cleanup_dir = '/path/to/output/cleanup_dir/'

In [ ]:
%%bash
for i in `ls $split_lib_basedir`
do
    mkdir -p $adaptor_cleanup_dir/$i
    cat $split_lib_basedir/$i/seqs.fna | \
        egrep 'ATCTCGTATGCCGTCTTCTGC|GCAGAAGACGGCATACGAGAT|GTAGTCCGGCTGACTGACT|AGTCAGTCAGCCGGACTAC' \
            -B 1 --no-group-separator > $adaptor_cleanup_dir/$i/seqs_to_filter.fna
    filter_fasta.py -f $split_lib_basedir/$i/seqs.fna \
                    -o $adaptor_cleanup_dir/$i/filtered_seqs.fna \
                    -a $adaptor_cleanup_dir/$i/seqs_to_filter.fna \
                    -n
done

In [ ]: