In [1]:
ls -lah ../data | grep Cae


-rw-rw-r-- 1 ilya ilya  12M May  2  2015 Caenorhabditis_elegans.WBcel235.rel79.cdna.all.fa.gz

In [7]:
~/bin/kallisto_linux-v0.43.0/kallisto index -i cele ../data/Caenorhabditis_elegans.WBcel235.rel79.cdna.all.fa.gz


[build] loading fasta file ../data/Caenorhabditis_elegans.WBcel235.rel79.cdna.all.fa.gz
[build] k-mer length: 31
[build] warning: clipped off poly-A tail (longer than 10)
        from 17 target sequences
[build] counting k-mers ... done.
[build] building target de Bruijn graph ...  done 
[build] creating equivalence classes ...  done
[build] target de Bruijn graph has 83150 contigs and contains 28899711 k-mers 


In [16]:
ls -lah ../data | grep lo


drwxrwxr-x 2 ilya ilya 4.0K Aug 18 14:19 lo03
drwxrwxr-x 2 ilya ilya 4.0K Aug 18 13:34 lo05
drwxrwxr-x 2 ilya ilya 4.0K Aug 18 13:47 lo09
drwxrwxr-x 2 ilya ilya 4.0K Aug 24 14:20 lo11

In [17]:
ls -lah | grep cele


-rw-rw-r-- 1 ilya ilya 587M Aug 24 15:00 cele

In [19]:
# Run kallisto quant for samples

base_dir="../data"
for sample in $(ls $base_dir | grep lo)
do
    echo "Running kallisto quant for sample "$sample
    output=../data/kallisto/$sample
    if [ ! -d "$output" ]; then
        mkdir -p $output
    fi
    read1=$base_dir/$sample/R1.fastq.gz
    read2=$base_dir/$sample/R2.fastq.gz
    ~/bin/kallisto_linux-v0.43.0/kallisto quant \
        -i cele \
        -t 4 \
        -o $output -b 100 <(zcat $read1) <(zcat $read2)
done


Running kallisto quant for sample lo03

[quant] fragment length distribution will be estimated from the data
[index] k-mer length: 31
[index] number of targets: 32,905
[index] number of k-mers: 28,899,711
[index] number of equivalence classes: 52,215
[quant] running in paired-end mode
[quant] will process pair 1: /dev/fd/63
                             /dev/fd/62
[quant] finding pseudoalignments for the reads ... done
[quant] processed 23,072,281 reads, 21,806,695 reads pseudoaligned
[quant] estimated average fragment length: 157.124
[   em] quantifying the abundances ... done
[   em] the Expectation-Maximization algorithm ran for 1,508 rounds
[bstrp] number of EM bootstraps complete: 100

Running kallisto quant for sample lo05

[quant] fragment length distribution will be estimated from the data
[index] k-mer length: 31
[index] number of targets: 32,905
[index] number of k-mers: 28,899,711
[index] number of equivalence classes: 52,215
[quant] running in paired-end mode
[quant] will process pair 1: /dev/fd/63
                             /dev/fd/62
[quant] finding pseudoalignments for the reads ... done
[quant] processed 9,220,629 reads, 8,155,087 reads pseudoaligned
[quant] estimated average fragment length: 172.331
[   em] quantifying the abundances ... done
[   em] the Expectation-Maximization algorithm ran for 1,351 rounds
[bstrp] number of EM bootstraps complete: 100

Running kallisto quant for sample lo09

[quant] fragment length distribution will be estimated from the data
[index] k-mer length: 31
[index] number of targets: 32,905
[index] number of k-mers: 28,899,711
[index] number of equivalence classes: 52,215
[quant] running in paired-end mode
[quant] will process pair 1: /dev/fd/63
                             /dev/fd/62
[quant] finding pseudoalignments for the reads ... done
[quant] processed 6,489,681 reads, 6,077,363 reads pseudoaligned
[quant] estimated average fragment length: 138.835
[   em] quantifying the abundances ... done
[   em] the Expectation-Maximization algorithm ran for 1,400 rounds
[bstrp] number of EM bootstraps complete: 100

Running kallisto quant for sample lo11

[quant] fragment length distribution will be estimated from the data
[index] k-mer length: 31
[index] number of targets: 32,905
[index] number of k-mers: 28,899,711
[index] number of equivalence classes: 52,215
[quant] running in paired-end mode
[quant] will process pair 1: /dev/fd/63
                             /dev/fd/62
[quant] finding pseudoalignments for the reads ... done
[quant] processed 8,288,222 reads, 7,727,927 reads pseudoaligned
[quant] estimated average fragment length: 119.261
[   em] quantifying the abundances ... done
[   em] the Expectation-Maximization algorithm ran for 1,267 rounds
[bstrp] number of EM bootstraps complete: 100


In [21]:
ls -lah ../data/kallisto/lo03


total 15M
drwxrwxr-x 2 ilya ilya 4.0K Aug 24 15:11 .
drwxrwxr-x 6 ilya ilya 4.0K Aug 24 15:10 ..
-rw-rw-r-- 1 ilya ilya  13M Aug 24 15:13 abundance.h5
-rw-rw-r-- 1 ilya ilya 1.1M Aug 24 15:11 abundance.tsv
-rw-rw-r-- 1 ilya ilya  300 Aug 24 15:11 run_info.json

In [22]:
head ../data/kallisto/lo03/abundance.tsv


target_id	length	eff_length	est_counts	tpm
Y110A7A.10	1787	1630.88	2693.83	75.4985
F27C8.1	1940	1783.88	115	2.94661
F07C3.7	1728	1571.88	180	5.23412
F52H2.2	1739	1582.88	621	17.9322
T13A10.10a	1734	1577.88	127	3.67892
T13A10.10b	1598	1441.88	0	0
C55C2.5a	1840	1683.88	76.6547	2.08074
C55C2.5b	1606	1449.88	5.34533	0.168513
C55C2.5c	1671	1514.88	0	0

In [ ]: