In [1]:
./evaluate.py -h


usage: evaluate.py [-h] [-t T] [--mode {Kevlar,GATK,TrioDenovo}] [--cov COV]
                   [--correct CORRECT] [--missing MISSING] [--false FALSE]
                   [--collisions COLLISIONS] [--vartype {SNV,INDEL}]
                   [--minlength MINLENGTH] [--maxlength MAXLENGTH] [--do-all]
                   simvar varcalls

positional arguments:
  simvar                simulated variants in .bed format
  varcalls              VCF file of variant calls

optional arguments:
  -h, --help            show this help message and exit
  -t T, --tolerance T   extend real variants by T nucleotides when querying
                        for overlap with variant calls; default is 10
  --mode {Kevlar,GATK,TrioDenovo}
                        Kevlar|GATK|TrioDenovo
  --cov COV             coverage
  --correct CORRECT     print correct variants to file
  --missing MISSING     print missing variants to file
  --false FALSE         print false variants to file
  --collisions COLLISIONS
                        print calls that match the same variant
  --vartype {SNV,INDEL}
  --minlength MINLENGTH
  --maxlength MAXLENGTH
  --do-all              ignore all other arguments and analyze all data

In [2]:
./evaluate.py --mode Kevlar --cov 30 \
    --correct calls.CORRECT \
    --missing calls.MISSING \
    --false calls.FALSE \
    14153-refr-calls-denovodb-dedup.bed calls.likescoremin50.varfilt.scored.vcf.gz


Caller Coverage VarType Correct False Missing
Kevlar       30     All     101   115      95

In [3]:
./evaluate.py --mode Kevlar --cov 30 \
    --vartype SNV \
    --correct calls.snv.CORRECT \
    --missing calls.snv.MISSING \
    --false calls.snv.FALSE \
    14153-refr-calls-denovodb-dedup.bed calls.likescoremin50.varfilt.scored.vcf.gz


Caller Coverage VarType Correct False Missing
Kevlar       30     SNV      96    51      84

In [4]:
./evaluate.py --mode Kevlar --cov 30 \
    --vartype INDEL \
    --correct calls.indel.CORRECT \
    --missing calls.indel.MISSING \
    --false calls.indel.FALSE \
    14153-refr-calls-denovodb-dedup.bed calls.likescoremin50.varfilt.scored.vcf.gz


Caller Coverage            VarType Correct False Missing
Kevlar       30  INDEL None-Nonebp       4    65      12

In [5]:
cut -f 2 calls.MISSING | tr : '\t' | tr - '\t' | sed 's/^/chr/' > calls.MISSING.bed
echo -n "Repetitive: "
bedtools intersect -a calls.MISSING.bed -b REPEATS.bed.gz -wa -u | wc -l
echo -n "Non-repetitive: "
bedtools intersect -a calls.MISSING.bed -b REPEATS.bed.gz -wa -v > calls.MISSING.nonrep.bed
wc -l < calls.MISSING.nonrep.bed
echo
head calls.MISSING.nonrep.bed


Repetitive: 75
Non-repetitive: 20

chrX	18534420	18534430
chr2	196738258	196738268
chr15	67856022	67856032
chr11	58534972	58534982
chr22	37453533	37453543
chr5	107337526	107337536
chr9	140919033	140919043
chr1	198243296	198243306
chr10	56436845	56436855
chr11	78722477	78722487