In [ ]:
import os
import sys
sys.path.append(os.getcwd().replace("notebooks/awsCluster", "src/awsCluster"))
from util import DesignFileLoader
## S3 input and output address.
s3_input_files_address = "s3://path/to/s3_input_files_address"
s3_output_files_address = "s3://path/to/s3_output_files_address"
## CFNCluster name
your_cluster_name = "cluster_name"
## The private key pair for accessing cluster.
private_key = "/path/to/aws_priate_key.pem"
## Project information
project_name = "project_name"
## The use case number
# use_case = 1: whole-genome sequencing pipeline
# use_case = 2.1: the workflow "star_htseq_workflow" of the RNA sequencing pipeline
# use_case = 2.2: the workflow "kallisto_deseq_workflow" of the RNA sequencing pipeline
# use_case = 2.3: the workflow "star_gatk_workflow" of the RNA sequencing pipeline
# use_case = 3: ChiP sequencing pipeline
# use_case = 4: miRNA sequencing pipeline
use_case = "1"
## If delete cfncluster after job is done.
delete_cfncluster = False
In [ ]:
from cfnCluster import CFNClusterManager, ConnectionManager
## Create a new cluster
master_ip_address = CFNClusterManager.create_cfn_cluster(cluster_name=your_cluster_name)
ssh_client = ConnectionManager.connect_master(hostname=master_ip_address,
username="ec2-user",
private_key_file=private_key)
In [ ]:
from dnaSeq import WGSPipelineManager
if use_case == "1":
## analysis steps include: "fastqc", "bwa-alignment", "post-alignment", "gatk-haplotype"
analysis_steps = ["fastqc", "bwa-alignment"]
## load design file by calling a function load_design_file in GroupFileMaker.
design_file = os.getcwd().replace("notebooks/awsCluster", "data/awsCluster/dnaseq_design_example.txt")
sample_list, group_list = DesignFileLoader.load_design_file(design_file)
WGSPipelineManager.execute(ssh_client, project_name, analysis_steps, s3_input_files_address,
sample_list, group_list, s3_output_files_address)
In [ ]:
from rnaSeq import RNAPipelineManager
if use_case == "2.1":
## star_htseq_workflow
workflow = "star_htseq_workflow"
## "fastqc", "alignment", "counting"
analysis_steps = ["fastqc", "alignment", "counting"]
## load design file by calling a function load_design_file in GroupFileMaker.
design_file = os.getcwd().replace("notebooks/awsCluster", "data/awsCluster/rnaseq_design_example.txt")
sample_list, group_list = DesignFileLoader.load_design_file(design_file)
## run the RNA sequencing pipeline
RNAPipelineManager.run_analysis(ssh_client, workflow, project_name, analysis_steps,
s3_input_files_address, sample_list, group_list, s3_output_files_address)
In [ ]:
from rnaSeq import RNAPipelineManager
if use_case == "2.2":
## kallisto_deseq_workflow
workflow = "kallisto_deseq_workflow"
## "fastqc", "alignment", "counting", "differential_calculation"
analysis_steps = ["fastqc"]
## load design file by calling a function load_design_file in GroupFileMaker.
design_file = os.getcwd().replace("notebooks/awsCluster", "data/awsCluster/rnaseq_design_example.txt")
sample_list, group_list = DesignFileLoader.load_design_file(design_file)
## run the RNA sequencing pipeline
RNAPipelineManager.run_analysis(ssh_client, workflow, project_name, analysis_steps,
s3_input_files_address, sample_list, group_list, s3_output_files_address)
In [ ]:
from rnaSeq import RNAPipelineManager
if use_case == "2.3":
## star_gatk_workflow
workflow = "star_gatk_workflow"
## "fastqc", "alignment", "variant_calling"
analysis_steps = ["fastqc"]
## load design file by calling a function load_design_file in GroupFileMaker.
design_file = os.getcwd().replace("notebooks/awsCluster", "data/awsCluster/rnaseq_vc_design_example.txt")
sample_list, group_list = DesignFileLoader.load_design_file(design_file)
## run the RNA sequencing pipeline
RNAPipelineManager.run_analysis(ssh_client, workflow, project_name, analysis_steps,
s3_input_files_address, sample_list, group_list, s3_output_files_address)
In [ ]:
from chipSeq import ChipPipelineManager
if use_case == "3":
## "homer_workflow"
workflow = "homer_workflow"
## "fastqc", "alignment", "make_tag_directory", "make_UCSC_file", "find_peaks", "annotate_peaks", "pos2bed", "find_motifs_genome"
analysis_steps = ["annotate_peaks", "pos2bed", "find_motifs_genome"]
## the style option can be either "factor", "histone".
style = "histone"
## current available genomes: hg18, hg19, mm9, mm10
genome = "hg19"
## load design file by calling a function load_design_file in GroupFileMaker.
design_file = os.getcwd().replace("notebooks/awsCluster", "data/awsCluster/chipSeq_design_example.txt")
sample_list, group_list = DesignFileLoader.load_chipseq_design_file(design_file)
## run the ChiP sequencing pipeline
ChipPipelineManager.run_analysis(ssh_client, workflow, project_name, analysis_steps,
s3_input_files_address, sample_list, group_list, style, genome, s3_output_files_address)
In [ ]:
from miRNASeq import SmallRNAPipelineManager
if use_case == "4":
## "fastqc", "bowtie-alignment", "counting"
analysis_steps = ["fastqc", "bowtie-alignment", "counting"]
## load design file by calling a function load_design_file in GroupFileMaker.
design_file = os.getcwd().replace("notebooks/awsCluster", "data/awsCluster/smallrnaseq_design_example.txt")
sample_list, group_list = DesignFileLoader.load_design_file(design_file)
SmallRNAPipelineManager.execute(ssh_client, project_name, analysis_steps, s3_input_files_address,
sample_list, group_list, s3_output_files_address)
In [ ]:
from miRNASeq import SmallRNAPipelineManager
from chipSeq import ChipPipelineManager
from dnaSeq import WGSPipelineManager
from rnaSeq import RNAPipelineManager
if use_case == "1":
WGSPipelineManager.check_processing_status(ssh_client)
if use_case.startswith("2"):
RNAPipelineManager.check_processing_status(ssh_client)
if use_case == "3":
ChipPipelineManager.check_processing_status(ssh_client)
if use_case == "4":
SmallRNAPipelineManager.check_processing_status(ssh_client)
In [ ]:
from cfnCluster import CFNClusterManager
if delete_cfncluster == True:
CFNClusterManager.delete_cfn_cluster(cluster_name=your_cluster_name)
In [ ]:
from rnaSeq import ConfigureBuilder
## creating a list of options for configuration.
## configuring Star options
star_option_list = [["runThreadN", "1"], ["clip3pNbases", "0"], ["outFileNamePrefix", "mysam"], ["outReadsUnmapped", "None"]]
ConfigureBuilder.configure_star(star_option_list)
## configuring Kallisto options
kallisto_option_list = [["fragment-length", "50"], ["bootstrap-samples", "0"], ["seed", "42"]]
ConfigureBuilder.configure_kallisto(kallisto_option_list)