In [ ]:
#################################
# Raw FASTQ files preprocessing #
#################################
# 
# (Latest) library structure (may be changed):
# [Alu primer - 12 bp][Alu sequence - 6 bp][Flank][Adapter 1 - 10 bp][Adapter 2, barcode - 9 bp][Adapter3 - 12 bp]
# 
# R1: alu primer mate
# R2: adapter mate
# 
# 
# Steps:
# - cut alu primer + alu sequence and all adapters (but save the information about the alu sequence and the barcode sequence)


# All imports here, don't touch it unless you are a developer.
#import preprocessing


# Library parameters.
ALU_PRIMER_LEN = 12
ALU_SEQUENCE_LEN = 6
ADAPTER_1 = 10
BARCODE_LEN = 9
ADAPTER_3 = 12

# Input FASTQ files folder path.
RAW_FASTQ_FOLDER = "~/data/"
# Output folder for processed FASTQ files.
OUTPUT_FASTQ_FOLDER = "~/data/processed"


# preprocessing.process_raw_fastq(RAW_FASTQ_FOLDER, ...)

In [ ]:
######################################
# Mapping flanks to the human genome #
######################################
# 
# Run bwa-mem and save the results to tables with coordinates.
# 


BWAMEM_PATH = "~/programs/bwamem/bwamem"
OUTPUT_SAM_FOLDER = "~/data/sam"
OUTPUT_TABLE_FOLDER = "~/data/coordtable"


# bwamem.run_bwamem(BWAMEM_PATH, OUTPUT_FASTQ_FOLDER, OUTPUT_SAM_FOLDER, OUTPUT_TABLE_FOLDER)

In [ ]:


In [ ]: