notebook.community

Edit and run



In [2]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [3]:

    
from glob import glob
from os.path import splitext



In [4]:

    
cd moleculo/









    



/Users/luizirber/biodata/moleculo



In [8]:

    
for f in glob("LR6000017-DNA_A01-LRAAA-*_LongRead_500_1499nt.fastq.gz"):
    fbase = splitext(f)[0]
    fcount = fbase + ".kh"
    fdist = fbase + ".dist"
    #!load-into-counting.py -x 1e8 -N 4 -k 32 $fcount $f
    #!abundance-dist.py -s $fcount $f $fdist



In [9]:

    
!ls -1 *.dist









    



LR6000017-DNA_A01-LRAAA-1_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-1_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-2_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-2_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-3_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-3_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-4_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-4_LongRead_500_1499nt.fastq.dist
LR6000017-DNA_A01-LRAAA-5_LongRead.fastq.dist
LR6000017-DNA_A01-LRAAA-5_LongRead_500_1499nt.fastq.dist



In [6]:

    
distfiles = !ls *.dist
for distfile in distfiles:
    dist = numpy.loadtxt(distfile)
    figure()
    title(splitext(distfile)[0])
    xlabel('k-mer abundance')
    ylabel('# of k-mers with that abundance')
    axis(ymax=10)
    plot(dist[:,0], dist[:,1])



In [ ]: