In [2]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [3]:
from glob import glob
from os.path import splitext

In [4]:
cd moleculo/


/Users/luizirber/biodata/moleculo

In [8]:
for f in glob("LR6000017-DNA_A01-LRAAA-*_LongRead_500_1499nt.fastq.gz"):
    fbase = splitext(f)[0]
    fcount = fbase + ".kh"
    fdist = fbase + ".dist"
    #!load-into-counting.py -x 1e8 -N 4 -k 32 $fcount $f
    #!abundance-dist.py -s $fcount $f $fdist

In [9]:
!ls -1 *.dist


LR6000017-DNA_A01-LRAAA-1_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-1_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-2_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-2_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-3_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-3_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-4_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-4_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-5_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-5_LongRead_500_1499nt.fastq.dist

In [6]:
distfiles = !ls *.dist
for distfile in distfiles:
    dist = numpy.loadtxt(distfile)
    figure()
    title(splitext(distfile)[0])
    xlabel('k-mer abundance')
    ylabel('# of k-mers with that abundance')
    axis(ymax=10)
    plot(dist[:,0], dist[:,1])



In [ ]: