In [ ]:
#################################
# Raw FASTQ files preprocessing #
#################################
#
# (Latest) library structure (may be changed):
# [Alu primer - 12 bp][Alu sequence - 6 bp][Flank][Adapter 1 - 10 bp][Adapter 2, barcode - 9 bp][Adapter3 - 12 bp]
#
# R1: alu primer mate
# R2: adapter mate
#
#
# Steps:
# - separate reads into good and bad, depending on the mistake in the primers, the adapter
# and wrong elements in flanks
# (as well as keeps barcodes good reads in the file for good R2 and location of mistake in the file bad R1)
# Import module (main code)
import trimmR
import imp
imp.reload(trimmR)
# Variable parameters
#################################
# The number of permissible error:
mist = 1
# Primer, ad1 = Adapter 1, ad2 = Adapter 3 (aka Green)
primer = 'GAGCCACCGCGC'
ad1 = 'GCGTGCTGCGG'
ad2 = 'AGGGCGGT'
# Length of barcode
barlen = 9
# List of wrong elements in flank
elem_remove = ['ACGT']
# Input FASTQ files folder path.
inputdir = '~/data/'
# Output folder for processed FASTQ files.
outputdir = '~/data/processed'
#################################
# Main function
trimmR.main(inputdir, outputdir, mist, primer, ad1, ad2, barlen)