Pathoscope pipeline development

Objective: develop script for processing MiSeq and PGM data using pathoscope

To Do:

  • Check commands
  • PathoID command
  • function for running pipeline

In [1]:
import sys
import time
import subprocess

In [2]:
def pathoqc_command(fastq1, out_dir, path_pathoqc, plat, thread_num, fastq2=False):
    ## log file stores standard out
    log_file = open(out_dir + "/pathoqc"+ time.strftime("-%Y-%m-%d-%H-%M-%S.log"),'w')
    stderr_file = open(out_dir + "/pathoqc"+ time.strftime("-%Y-%m-%d-%H-%M-%S.stder"),'w')
    
    pathoqc_command = ["python",path_pathoqc]
    
    if fastq2:
        pathoqc_command = pathoqc_command + ['-1',fastq1, '-2',fastq2,'-s',plat,'-p',str(thread_num),'-o',out_dir]
        subprocess.call(pathoqc_command, stdout=log_file, stderr=stderr_file)
    else:
        pathoqc_command = pathoqc_command + ['-U',fastq1,'-s',plat,'-p',thread_num,'-o',out_dir]
        subprocess.call(pathoqc_command, stdout=log_file, stderr=stderr_file)

In [3]:
def pathomap_command(ref_path, index_dir, fastq1, out_dir, path_pathoscope,fastq2=False):
    import re
    ## log file stores standard out
    log_file = open(out_dir + "/pathomap"+time.strftime("-%Y-%m-%d-%H-%M-%S.log"),'w')
    stderr_file = open(out_dir + "/pathomap"+ time.strftime("-%Y-%m-%d-%H-%M-%S.stder"),'w')
    
    ## output sam file
    out_sam =  re.sub('[fastq,fq]', 'sam', fastq1)
    
    ## pathoscope command root
    pathomap_command = ["python",path_pathoscope,'MAP','-targetRefFiles',ref_path,'-indexDir',index_dir,'-outDir',out_dir,'-outAlign', out_sam]
    
    if fastq2:
        pathomap_command = pathomap_command + ['-1',fastq1, '-2',fastq2]
        subprocess.call(pathomap_command, stdout=log_file,stderr=stderr_file)
    else:
        pathomap_command = pathomap_command + ['-U',fastq1]
        subprocess.call(pathomap_command, stdout=log_file,stderr=stderr_file)

In [4]:
def pathoid_command(path_pathoscope, input_sam, out_dir):
    # command for running pathoid

    ## log file stores standard out
    log_file = open(out_dir + "/pathoid"+time.strftime("-%Y-%m-%d-%H-%M-%S.log"),'w')
    stderr_file = open(out_dir + "/pathoid"+ time.strftime("-%Y-%m-%d-%H-%M-%S.stder"),'w')
    
    ## pathoscope command root
    pathoid_command = ["python",path_pathoscope,'ID','-alignFile',input_sam,'-fileType',
                       'sam','-outDir',out_dir,'--outMatrix']
    subprocess.call(pathoid_command, stdout=log_file,stderr=stderr_file)

In [6]:
dev_plat='Illumina'
dev_fastq1='/media/nolson/second/current_projects/micro_rm_dev/utilities/pathoqc_v0.1.1/pathoqc/examples/pairedEnd_1.fastq'
dev_fastq2='/media/nolson/second/current_projects/micro_rm_dev/utilities/pathoqc_v0.1.1/pathoqc/examples/pairedEnd_2.fastq'
dev_out_dir = '/media/nolson/second/current_projects/micro_rm_dev/dev/genome_purity/'
path_pathoqc='/media/nolson/second/current_projects/micro_rm_dev/utilities/pathoqc_v0.1.1/pathoqc/pathoqc.py'
thread_num=8
ref_path='/media/nolson/second/current_projects/micro_rm_dev/utilities/patho_utils/micro_rm_patho_db_ti.fa'
index_path='/media/nolson/second/current_projects/micro_rm_dev/utilities/patho_utils/'
path_pathoscope='/media/nolson/second/current_projects/micro_rm_dev/utilities/pathoscope2/pathoscope/pathoscope.py'
trim_fastq1='/media/nolson/second/current_projects/micro_rm_dev/dev/genome_purity/pairedEnd_1_tr.fq'
trim_fastq2= '/media/nolson/second/current_projects/micro_rm_dev/dev/genome_purity/pairedEnd_2_tr.fq'
pathomap_sam = '/media/nolson/second/current_projects/micro_rm_dev/dev/genome_purity/pathomap-appendAlign.sam'

In [7]:
pathoqc_command(plat=dev_plat,fastq1=dev_fastq1,fastq2=dev_fastq2,out_dir=dev_out_dir,path_pathoqc=path_pathoqc,
                thread_num=8)
pathomap_command(ref_path=ref_path,index_dir=index_path,fastq1=dev_fastq1,fastq2=dev_fastq2,out_dir=dev_out_dir, 
                 path_pathoscope=path_pathoscope)
pathoid_command(pathomap_sam, dev_out_dir)

In [ ]: