Snakemake

conda install -c bioconda snakemake
conda install graphviz

In [ ]:
SAMPLES = ['ctl1', 'ctl2']

rule all:
    input:
        'merged.txt'

rule acounts:
    input:
        file='{sample}.fastq'
    output:
        '{sample}_counts.txt'
    run:
        with open(input.file, 'r') as f:
            nc = [str(l.count('A')) for l in f if not l[0]=='@']
        data = ', '.join(nc)+'\n'
        with open(output[0], 'w') as f: f.write(data)

rule merge:
    input:
        counts=expand('{sample}_counts.txt',sample=SAMPLES)
    output:
        'merged.txt'
    shell:
        """
        for f in {input.counts}
        do
			cat $f >> {output}
		done
        """

In [ ]:
snakemake --dag merged.txt | dot -Tsvg > dag.svg

In [1]:
snakemake --name mylittleworkflow.txt


learning.ipynb	scicomp.ipynb	  visualization.ipynb
networks.ipynb	statistics.ipynb  workflows.ipynb

Nextflow


In [2]:
#!/usr/bin/env nextflow
 
params.range = 100
 
/*
 * A trivial Perl script producing a list of numbers pair
 */
process perlTask {
    output:
    stdout randNums
 
    shell:
    '''
    #!/usr/bin/env perl
    use strict;
    use warnings;
 
    my $count;
    my $range = !{params.range};
    for ($count = 0; $count < 10; $count++) {
        print rand($range) . ', ' . rand($range) . "\n";
    }
    '''
}
 
 
/*
 * A Python script task which parses the output of the previous script
 */
process pyTask {
    echo true
 
    input:
    stdin randNums
 
    '''
    #!/usr/bin/env python
    import sys
 
    x = 0
    y = 0
    lines = 0
    for line in sys.stdin:
        items = line.strip().split(",")
        x = x+ float(items[0])
        y = y+ float(items[1])
        lines = lines+1
 
    print "avg: %s - %s" % ( x/lines, y/lines )
    '''
 
}


  File "<ipython-input-2-d94d806386de>", line 8
    * A trivial Perl script producing a list of numbers pair
    ^
IndentationError: unexpected indent

Task:

  • Design a lightweight pipeline module for your architecture (PC, cluster or cloud)!

In [ ]: