notebook.community

Edit and run



In [1]:

    
%run ../../../shared_setup.ipynb









    



docker image cggh/biipy:v1.6.0



In [2]:

    
truth_dir = '/data/plasmodium/pfalciparum/pf-crosses/data/evaluation/truth'



In [3]:

    
gatk_callset_fn_template = '/data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/{cross}.gatk.final.npz'



In [4]:

    
def trim_alleles(a, b):

    # SNPs or already trimmed
    if len(a) == 1 or len(b) == 1:
        return a, b
    
    # sort by length, shortest first
    reverse = False
    if len(a) > len(b):
        a, b = b, a
        reverse = True
    
    # pick off reference base
    ref_base = a[0]
    assert a[0] == b[0], (a, b)
    
    # pick off suffix
    a_suffix = a[1:]
    b_suffix = b[1:]
    
    # trim
    if b_suffix.endswith(a_suffix):
        a = ref_base
        b = ref_base + b_suffix[:-1*len(a_suffix)]
        
    if reverse:
        return b, a
    else:
        return a, b



In [5]:

    
def tabulate_variants_gatk(cross, parent):

    callset_gatk = np.load(gatk_callset_fn_template.format(cross=cross))
    variants = callset_gatk['variants']

    # select variants
    filter_condition = numexpr.evaluate('~FILTER_CNV & '
                                        '~FILTER_DUP_SITE & '
                                        '~FILTER_LOW_CONFIDENCE & '
                                        '~FILTER_LOW_CONFIDENCE_PARENT & '
                                        '~FILTER_MISSING_PARENT & '
                                        '~FILTER_NON_CORE & '
                                        '~FILTER_NON_MENDELIAN',
                                        local_dict=variants)
    log(callset_gatk['calldata'].dtype.names[parent])
    c2d = vcfnp.view2d(callset_gatk['calldata'])
    genotype = c2d['genotype']
    genotype_parent = genotype[:, parent]
    genotype_condition = genotype_parent > 0
    condition = filter_condition & genotype_condition
    log('n_variants', nnz(condition))

    # apply selection
    variants = np.compress(condition, variants)
    genotype_parent = np.compress(condition, genotype_parent)
    
    # construct table
    tbl = (etl
        .fromarray(variants)
        .addcolumn('genotype', genotype_parent)
        .cut('CHROM', 'POS', 'REF', 'ALT', 'genotype')
        .convert('ALT', lambda v, row: v[row.genotype-1], pass_row=True)
        .cutout('genotype')
        .convert(['CHROM', 'REF', 'ALT'], lambda v: str(v, 'ascii')) 
        .addfield('svlen', lambda row: len(row.ALT) - len(row.REF))
        .addfield('trim', lambda row: trim_alleles(row.REF, row.ALT))
        .unpack('trim', ['REF_trim', 'ALT_trim'])
        .addfield('svlen_trim', lambda row: len(row.ALT_trim) - len(row.REF_trim))
        .cutout('REF', 'ALT', 'svlen')
        .rename({'REF_trim': 'REF', 'ALT_trim': 'ALT', 'svlen_trim': 'svlen'})
        .addfield('svtype', lambda row: 'SNP' if row.svlen == 0 else 'INS' if row.svlen > 0 else 'DEL')
        .addfield('discovery', True)
    )
    
    return tbl



In [6]:

    
def load_coverage(alignment_method, assembly, rebuild=False):
    bam_fn = os.path.join(truth_dir, alignment_method, 'alignment', assembly + '.bam')
    cov_fn = bam_fn + '.coverage.npy'
    if not os.path.exists(cov_fn) or rebuild:
        cov = pysamstats.load_coverage(bam_fn, 
                                       pad=True, 
                                       fields=['chrom', 'pos', 'reads_all'],
                                       one_based=True)
        np.save(cov_fn, cov)
    else:
        cov = np.load(cov_fn).view(np.recarray)
    return cov



In [7]:

    
def tabulate_variants_truth(alignment_method, calling_method, assembly):
    truth_vcf_fn = os.path.join(truth_dir, alignment_method, 'calling', calling_method, assembly + '.leftaligned.vcf.gz')

    # extract variants
    truth_variants = vcfnp.variants(truth_vcf_fn, 
                                    fields=['CHROM', 'POS', 'REF', 'ALT'],
                                    dtypes={'REF': 'S200', 'ALT': 'S200'},
                                    arities={'ALT': 4},
                                    cache=False)
    
    # extract genotype
    truth_genotype = vcfnp.calldata_2d(truth_vcf_fn, 
                                       fields=['genotype'], 
                                       ploidy=1,
                                       cache=False)['genotype'][:, 0]
    
    # tabulate
    tbl = (etl
        .fromarray(truth_variants)
        .addcolumn('genotype', truth_genotype)
        .convert('ALT', lambda v, row: v[row.genotype-1], pass_row=True)
        .cutout('genotype')
        .convert(['CHROM', 'REF', 'ALT'], lambda v: str(v, 'ascii')) 
        .addfield('svlen', lambda row: len(row.ALT) - len(row.REF))
        .addfield('trim', lambda row: trim_alleles(row.REF, row.ALT))
        .unpack('trim', ['REF_trim', 'ALT_trim'])
        .addfield('svlen_trim', lambda row: len(row.ALT_trim) - len(row.REF_trim))
        .cutout('REF', 'ALT', 'svlen')
        .rename({'REF_trim': 'REF', 'ALT_trim': 'ALT', 'svlen_trim': 'svlen'})
        .intervalleftjoin(tbl_regions_1b, lkey='CHROM', rkey='region_chrom',
                          lstart='POS', lstop='POS', rstart='region_start', rstop='region_stop',
                          include_stop=True)
        .eq('region_type', 'Core')
        .cutout('region_chrom', 'region_start', 'region_stop', 'region_type', 'region_size')
        .addfield('svtype', lambda row: 'SNP' if row.svlen == 0 else 'INS' if row.svlen > 0 else 'DEL')
        .addfield('truth', True)
    )

    return tbl



In [12]:

    
def join_truth(tbl_discovery, alignment_method, calling_method, assembly, key=('CHROM', 'POS')):
    
    # tabulate truth variants
    tbl_truth = tabulate_variants_truth(alignment_method, calling_method, assembly)
    
    # load coverage from truth assembly
    cov = load_coverage(alignment_method, assembly)
    cov_idx = allel.SortedMultiIndex(cov['chrom'], cov['pos'], copy=False)
#     tbl_cov = etl.fromarray(cov).convert('chrom', lambda v: str(v, 'ascii')).rename({'chrom': 'CHROM', 'pos': 'POS'})
    
    # tabulate
    tbl = (
        tbl_discovery
        .outerjoin(tbl_truth, key=key, rprefix='truth_')
        .rename('truth_truth', 'truth')
        .addfield('truth_coverage', lambda row: cov.reads_all[cov_idx.locate_key(row.CHROM.encode('ascii'), row.POS)])
        .eq('truth_coverage', 1)
        .addfield('status', lambda row: 'FP' if row.truth is None else 'FN' if row.discovery is None else 'TP')
        .cutout('discovery', 'truth')
        .intervaljoinvalues(tbl_genes, value='feature_id', lkey='CHROM', lstart='POS', lstop='POS', 
                            rkey='feature_chrom', rstart='feature_start', rstop='feature_stop', include_stop=True)
        .rename('feature_id', 'gene')
        .convert('gene', lambda v: v[0] if v else None)
        .intervaljoinvalues(tbl_exons, value='feature_id', lkey='CHROM', lstart='POS', lstop='POS', 
                            rkey='feature_chrom', rstart='feature_start', rstop='feature_stop', include_stop=True)
        .rename('feature_id', 'is_coding')
        .convert('is_coding', lambda v: len(v) > 0)
        .cache(100000)
    )
    
    return tbl



In [13]:

    
def analyse_confusion(tbl_joined):
    
    df = tbl_joined.cut('svtype', 'truth_svtype', 'status', 'is_coding').todataframe()
    status = df.status
    is_coding = df.is_coding
    is_snp = (df.svtype == 'SNP')
    is_indel = (df.svtype == 'INS') | (df.svtype == 'DEL')
    is_truth_snp = (df.truth_svtype == 'SNP')
    is_truth_indel = (df.truth_svtype == 'INS') | (df.truth_svtype == 'DEL')
    
    tbl_confusion = [['svtype', 'n', 'TP', 'FP', 'FN', 'FDR', 'sensitivity']]
    
    fig = plt.figure(figsize=(8, 6))

    # all variants analyse confusion
    fp = nnz(status == 'FP')
    fn = nnz(status == 'FN')
    tp = nnz(status == 'TP')
    n = fp + fn + tp
    fdr = fp / (fp + tp) if (fp + tp) > 0 else 0
    sens = tp / (fn + tp) if (fn + tp) > 0 else 0
    tbl_confusion.append(['all', n, tp, fp, fn, fdr, sens])
    
    # coding SNP analyse confusion
    fp = nnz(is_coding & is_snp & (status == 'FP'))
    fn = nnz(is_coding & is_truth_snp & (status == 'FN'))
    tp = nnz(is_coding & is_snp & (status == 'TP'))
    n = fp + fn + tp
    fdr = fp / (fp + tp) if (fp + tp) > 0 else 0
    sens = tp / (fn + tp) if (fn + tp) > 0 else 0
    tbl_confusion.append(['SNP coding', n, tp, fp, fn, fdr, sens])
    from matplotlib_venn import venn2
    ax = fig.add_subplot(2, 2, 1)
    v = venn2(subsets=[fp, fn, tp], set_labels=['discovery', 'truth'], ax=ax)
    v.get_patch_by_id('10').set_color('#ff4444')
    v.get_patch_by_id('01').set_color('#4444ff')
    plt.gca().set_title('SNP coding')
    
    # non-coding SNP analyse confusion
    fp = nnz(~is_coding & is_snp & (status == 'FP'))
    fn = nnz(~is_coding & is_truth_snp & (status == 'FN'))
    tp = nnz(~is_coding & is_snp & (status == 'TP'))
    n = fp + fn + tp
    fdr = fp / (fp + tp) if (fp + tp) > 0 else 0
    sens = tp / (fn + tp) if (fn + tp) > 0 else 0
    tbl_confusion.append(['SNP non-coding', n, tp, fp, fn, fdr, sens])
    from matplotlib_venn import venn2
    ax = fig.add_subplot(2, 2, 2)
    v = venn2(subsets=[fp, fn, tp], set_labels=['discovery', 'truth'], ax=ax)
    v.get_patch_by_id('10').set_color('#ff4444')
    v.get_patch_by_id('01').set_color('#4444ff')
    plt.gca().set_title('SNP non-coding')
    
    # coding INDEL analyse confusion
    fp = nnz(is_coding & is_indel & (status == 'FP'))
    fn = nnz(is_coding & is_truth_indel & (status == 'FN'))
    tp = nnz(is_coding & is_indel & (status == 'TP'))
    n = fp + fn + tp
    fdr = fp / (fp + tp) if (fp + tp) > 0 else 0
    sens = tp / (fn + tp) if (fn + tp) > 0 else 0
    tbl_confusion.append(['INDEL coding', n, tp, fp, fn, fdr, sens])
    from matplotlib_venn import venn2
    ax = fig.add_subplot(2, 2, 3)
    v = venn2(subsets=[fp, fn, tp], set_labels=['discovery', 'truth'], ax=ax)
    v.get_patch_by_id('10').set_color('#ff4444')
    v.get_patch_by_id('01').set_color('#4444ff')
    plt.gca().set_title('INDEL coding')
    
    # non-coding INDEL analyse confusion
    fp = nnz(~is_coding & is_indel & (status == 'FP'))
    fn = nnz(~is_coding & is_truth_indel & (status == 'FN'))
    tp = nnz(~is_coding & is_indel & (status == 'TP'))
    n = fp + fn + tp
    fdr = fp / (fp + tp) if (fp + tp) > 0 else 0
    sens = tp / (fn + tp) if (fn + tp) > 0 else 0
    tbl_confusion.append(['INDEL non-coding', n, tp, fp, fn, fdr, sens])
    from matplotlib_venn import venn2
    ax = fig.add_subplot(2, 2, 4)
    v = venn2(subsets=[fp, fn, tp], set_labels=['discovery', 'truth'], ax=ax)
    v.get_patch_by_id('10').set_color('#ff4444')
    v.get_patch_by_id('01').set_color('#4444ff')
    plt.gca().set_title('INDEL non-coding')
    
    tbl_confusion = etl.wrap(tbl_confusion)
    tbl_confusion.displayall()



In [14]:

    
def confusion_tr_style(row):
    style = 'background-color: %s' % ('#4f4' if row.status == 'TP' 
                                      else '#f44' if row.status == 'FP'
                                      else '#44f' if row.status == 'FN'
                                      else 'white')
    return style

3D7 Illumina assembly - cross-check truth



In [15]:

    
# bwamem_intractg 
# gatk_ug vs bcftools_multiallelic

tbl_variants_check1 = tabulate_variants_truth(alignment_method='bwamem_intractg',
                                              calling_method='gatk_ug',
                                              assembly='garimella_3d7_ERR019061_contigs').cutout('truth').addfield('discovery', True)
tbl_variants_check2 = join_truth(tbl_variants_check1, 
                                 alignment_method='bwamem_intractg',
                                 calling_method='bcftools_multiallelic',
                                 assembly='garimella_3d7_ERR019061_contigs',
                                 key=('CHROM', 'POS', 'REF', 'ALT'))
log(tbl_variants_check2.nrows())
tbl_variants_check2.eq('status', 'FP').display(5, tr_style=confusion_tr_style)
tbl_variants_check2.eq('status', 'FN').display(5, tr_style=confusion_tr_style)
analyse_confusion(tbl_variants_check2)









    



[vcfnp] 2016-03-08 22:44:04.364898 :: caching is disabled
[vcfnp] 2016-03-08 22:44:04.365488 :: building array
[vcfnp] 2016-03-08 22:44:04.393727 :: caching is disabled
[vcfnp] 2016-03-08 22:44:04.394112 :: building array
[vcfnp] 2016-03-08 22:44:04.449992 :: caching is disabled
[vcfnp] 2016-03-08 22:44:04.450529 :: building array
[vcfnp] 2016-03-08 22:44:04.496156 :: caching is disabled
[vcfnp] 2016-03-08 22:44:04.496652 :: building array
2016-03-08 22:44:09.216125 :: 450






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_03_v3
545588
AAAATATATATAAAT
A
-14
DEL
None
None
1
FP
None
False


Pf3D7_04_v3
343440
ACTTTTATTATCGTCAGGTTTG
A
-21
DEL
None
None
1
FP
PF3D7_0406500
True


Pf3D7_04_v3
375115
AATGTAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATG
A
-72
DEL
None
None
1
FP
PF3D7_0407600
True


Pf3D7_09_v3
184870
GTAATACATATAAAAAATTATTAATATATATATAATAAAA
G
-39
DEL
None
None
1
FP
None
False


Pf3D7_10_v3
1418531
TTTTTTTATATTTTTCTATATTTCTTATATTTTTA
T
-34
DEL
None
None
1
FP
None
False



...






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
191069
CA
C
None
None
-1
DEL
1
FN
PF3D7_0104300
True


Pf3D7_01_v3
273111
T
TTATATATATATATATATATATA
None
None
22
INS
1
FN
PF3D7_0106400
False


Pf3D7_01_v3
287205
T
TTATATA
None
None
6
INS
1
FN
PF3D7_0106800
False


Pf3D7_01_v3
288488
A
AT
None
None
1
INS
1
FN
PF3D7_0106800
False


Pf3D7_01_v3
291538
A
AATGAAAAAG
None
None
9
INS
1
FN
PF3D7_0106900
True



...






    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
450
368
6
76
0.016042780748663103
0.8288288288288288


SNP coding
42
34
0
8
0.0
0.8095238095238095


SNP non-coding
219
215
0
4
0.0
0.9817351598173516


INDEL coding
28
17
3
8
0.15
0.68


INDEL non-coding
161
102
3
56
0.02857142857142857
0.6455696202531646



In [16]:

    
# bwamem_intractg 
# gatk_ug vs bcftools_consensus

tbl_variants_check1 = tabulate_variants_truth(alignment_method='bwamem_intractg',
                                              calling_method='gatk_ug',
                                              assembly='garimella_3d7_ERR019061_contigs').cutout('truth').addfield('discovery', True)
tbl_variants_check2 = join_truth(tbl_variants_check1, 
                                 alignment_method='bwamem_intractg',
                                 calling_method='bcftools_consensus',
                                 assembly='garimella_3d7_ERR019061_contigs',
                                 key=('CHROM', 'POS', 'REF', 'ALT'))
tbl_variants_check2.eq('status', 'FP').display(5, tr_style=confusion_tr_style)
tbl_variants_check2.eq('status', 'FN').display(5, tr_style=confusion_tr_style)
analyse_confusion(tbl_variants_check2)









    



[vcfnp] 2016-03-08 22:44:10.510801 :: caching is disabled
[vcfnp] 2016-03-08 22:44:10.511466 :: building array
[vcfnp] 2016-03-08 22:44:10.541697 :: caching is disabled
[vcfnp] 2016-03-08 22:44:10.542196 :: building array
[vcfnp] 2016-03-08 22:44:10.601143 :: caching is disabled
[vcfnp] 2016-03-08 22:44:10.601808 :: building array
[vcfnp] 2016-03-08 22:44:10.645569 :: caching is disabled
[vcfnp] 2016-03-08 22:44:10.645963 :: building array






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
555445
TA
T
-1
DEL
None
None
1
FP
None
False


Pf3D7_02_v3
402926
CA
C
-1
DEL
None
None
1
FP
None
False


Pf3D7_02_v3
827872
GA
G
-1
DEL
None
None
1
FP
None
False


Pf3D7_03_v3
545588
AAAATATATATAAAT
A
-14
DEL
None
None
1
FP
None
False


Pf3D7_03_v3
604213
TA
T
-1
DEL
None
None
1
FP
None
False



...






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
273111
T
TTATATATATATATATATATATA
None
None
22
INS
1
FN
PF3D7_0106400
False


Pf3D7_01_v3
287205
T
TTATATA
None
None
6
INS
1
FN
PF3D7_0106800
False


Pf3D7_01_v3
291538
A
AATGAAAAAG
None
None
9
INS
1
FN
PF3D7_0106900
True


Pf3D7_01_v3
294071
C
CATATAT
None
None
6
INS
1
FN
PF3D7_0107000
False


Pf3D7_02_v3
631341
C
CAAAAAAAAAAAAA
None
None
13
INS
1
FN
None
False



...






    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
399
311
63
25
0.16844919786096257
0.9255952380952381


SNP coding
35
34
0
1
0.0
0.9714285714285714


SNP non-coding
215
214
1
0
0.004651162790697674
1.0


INDEL coding
24
8
12
4
0.6
0.6666666666666666


INDEL non-coding
125
55
50
20
0.47619047619047616
0.7333333333333333



In [17]:

    
# bwamem_intractg 
# bctfools_consensus vs bcftools_multiallelic

tbl_variants_check1 = tabulate_variants_truth(alignment_method='bwamem_intractg',
                                              calling_method='bcftools_consensus',
                                              assembly='garimella_3d7_ERR019061_contigs').cutout('truth').addfield('discovery', True)
tbl_variants_check2 = join_truth(tbl_variants_check1, 
                                 alignment_method='bwamem_intractg',
                                 calling_method='bcftools_multiallelic',
                                 assembly='garimella_3d7_ERR019061_contigs',
                                 key=('CHROM', 'POS', 'REF', 'ALT'))
tbl_variants_check2.eq('status', 'FP').display(5, tr_style=confusion_tr_style)
tbl_variants_check2.eq('status', 'FN').display(5, tr_style=confusion_tr_style)
analyse_confusion(tbl_variants_check2)









    



[vcfnp] 2016-03-08 22:44:15.800168 :: caching is disabled
[vcfnp] 2016-03-08 22:44:15.800721 :: building array
[vcfnp] 2016-03-08 22:44:15.833851 :: caching is disabled
[vcfnp] 2016-03-08 22:44:15.834529 :: building array
[vcfnp] 2016-03-08 22:44:15.889670 :: caching is disabled
[vcfnp] 2016-03-08 22:44:15.890184 :: building array
[vcfnp] 2016-03-08 22:44:15.935920 :: caching is disabled
[vcfnp] 2016-03-08 22:44:15.936373 :: building array






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding












    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
191069
CA
C
None
None
-1
DEL
1
FN
PF3D7_0104300
True


Pf3D7_01_v3
288488
A
AT
None
None
1
INS
1
FN
PF3D7_0106800
False


Pf3D7_01_v3
371593
G
GT
None
None
1
INS
1
FN
None
False


Pf3D7_01_v3
430485
A
AT
None
None
1
INS
1
FN
PF3D7_0111200
False


Pf3D7_01_v3
493069
C
CT
None
None
1
INS
1
FN
None
False



...






    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
444
336
0
108
0.0
0.7567567567567568


SNP coding
42
35
0
7
0.0
0.8333333333333334


SNP non-coding
219
214
0
5
0.0
0.9771689497716894


INDEL coding
25
12
0
13
0.0
0.48


INDEL non-coding
158
75
0
83
0.0
0.47468354430379744

HB3 coding sequences - cross-check truth



In [18]:

    
# bwamem_intractg 
# gatk_ug vs bcftools_multiallelic

tbl_variants_check1 = tabulate_variants_truth(alignment_method='bwamem_intractg',
                                              calling_method='gatk_ug',
                                              assembly='genbank_hb3_coding_sequences').cutout('truth').addfield('discovery', True)
tbl_variants_check2 = join_truth(tbl_variants_check1, 
                                 alignment_method='bwamem_intractg',
                                 calling_method='bcftools_multiallelic',
                                 assembly='genbank_hb3_coding_sequences',
                                 key=('CHROM', 'POS', 'REF', 'ALT'))
log(tbl_variants_check2.nrows())
tbl_variants_check2.valuecounts('gene').sort('gene').displayall()
tbl_variants_check2.eq('status', 'FP').display(5, tr_style=confusion_tr_style)
tbl_variants_check2.eq('status', 'FN').display(5, tr_style=confusion_tr_style)
analyse_confusion(tbl_variants_check2)









    



[vcfnp] 2016-03-08 22:44:21.159411 :: caching is disabled
[vcfnp] 2016-03-08 22:44:21.159834 :: building array
[vcfnp] 2016-03-08 22:44:21.198628 :: caching is disabled
[vcfnp] 2016-03-08 22:44:21.199019 :: building array
[vcfnp] 2016-03-08 22:44:21.236398 :: caching is disabled
[vcfnp] 2016-03-08 22:44:21.236874 :: building array
[vcfnp] 2016-03-08 22:44:21.281014 :: caching is disabled
[vcfnp] 2016-03-08 22:44:21.281470 :: building array
2016-03-08 22:44:30.002365 :: 398






    







0|gene
1|count
2|frequency




None
49
0.12311557788944724


PF3D7_0106300
6
0.01507537688442211


PF3D7_0207300
8
0.020100502512562814


PF3D7_0207400
5
0.01256281407035176


PF3D7_0207500
4
0.010050251256281407


PF3D7_0207600
11
0.02763819095477387


PF3D7_0207700
8
0.020100502512562814


PF3D7_0207800
4
0.010050251256281407


PF3D7_0207900
14
0.035175879396984924


PF3D7_0208000
7
0.017587939698492462


PF3D7_0220800
21
0.052763819095477386


PF3D7_0304600
12
0.03015075376884422


PF3D7_0402300
15
0.03768844221105527


PF3D7_0417200
1
0.002512562814070352


PF3D7_0424200
3
0.007537688442211055


PF3D7_0508000
4
0.010050251256281407


PF3D7_0620400
5
0.01256281407035176


PF3D7_0708400
9
0.022613065326633167


PF3D7_0709100
19
0.04773869346733668


PF3D7_0709300
32
0.08040201005025126


PF3D7_0804800
1
0.002512562814070352


PF3D7_0831600
23
0.05778894472361809


PF3D7_0902800
4
0.010050251256281407


PF3D7_0905400
6
0.01507537688442211


PF3D7_0929400
3
0.007537688442211055


PF3D7_0930300
41
0.10301507537688442


PF3D7_0935800
7
0.017587939698492462


PF3D7_1033800
3
0.007537688442211055


PF3D7_1035200
1
0.002512562814070352


PF3D7_1115700
13
0.032663316582914576


PF3D7_1133400
29
0.0728643216080402


PF3D7_1246100
1
0.002512562814070352


PF3D7_1323500
2
0.005025125628140704


PF3D7_1335000
1
0.002512562814070352


PF3D7_1335100
5
0.01256281407035176


PF3D7_1337200
3
0.007537688442211055


PF3D7_1434200
3
0.007537688442211055


PF3D7_1447900
13
0.032663316582914576


PF3D7_1467300
2
0.005025125628140704










    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_02_v3
841385
AAGGTAATGTATTATATAATAAATATAATATTACAAAATTGTTATCTGCATTTGTTATTTTTATGAAATCATTATAAATCATTATTTAACTTTTTTTTCTTTTTGTTTTTAAT
A
-112
DEL
None
None
1
FP
PF3D7_0220800
True


Pf3D7_02_v3
841657
TGGTATATTATAAAATAGAAACTAAGAAAAAAAATTAAATTAATTGAATATATATATATTTTTATATTGTTTAATTTTTTTTATTTTA
T
-87
DEL
None
None
1
FP
PF3D7_0220800
True


Pf3D7_03_v3
221965
GGGGTTTGCATTTGGGTTTGCATTT
G
-24
DEL
None
None
1
FP
PF3D7_0304600
True


Pf3D7_04_v3
141594
CAAACGATATTGATGAAATAAACGATATTGATGAAAT
C
-36
DEL
None
None
1
FP
PF3D7_0402300
True


Pf3D7_07_v3
382251
T
TGAA
3
INS
None
None
1
FP
PF3D7_0708400
True



...






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
266073
A
ATT
None
None
2
INS
1
FN
PF3D7_0106300
False


Pf3D7_02_v3
290368
G
GATAT
None
None
4
INS
1
FN
PF3D7_0207300
False


Pf3D7_02_v3
296617
A
AATATATATATAT
None
None
12
INS
1
FN
PF3D7_0207400
False


Pf3D7_02_v3
297374
T
TA
None
None
1
INS
1
FN
PF3D7_0207400
False


Pf3D7_02_v3
305614
G
GTATATATATATA
None
None
12
INS
1
FN
PF3D7_0207600
False



...






    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
398
355
11
32
0.030054644808743168
0.917312661498708


SNP coding
272
271
0
1
0.0
0.9963235294117647


SNP non-coding
29
29
0
0
0.0
1.0


INDEL coding
31
11
8
12
0.42105263157894735
0.4782608695652174


INDEL non-coding
66
44
3
19
0.06382978723404255
0.6984126984126984



In [19]:

    
# bwamem_intractg 
# bcftools_consensus vs bcftools_multiallelic

tbl_variants_check1 = tabulate_variants_truth(alignment_method='bwamem_intractg',
                                              calling_method='bcftools_consensus',
                                              assembly='genbank_hb3_coding_sequences').cutout('truth').addfield('discovery', True)
tbl_variants_check2 = join_truth(tbl_variants_check1, 
                                 alignment_method='bwamem_intractg',
                                 calling_method='bcftools_multiallelic',
                                 assembly='genbank_hb3_coding_sequences',
                                 key=('CHROM', 'POS', 'REF', 'ALT'))
log(tbl_variants_check2.nrows())
tbl_variants_check2.eq('status', 'FP').display(5, tr_style=confusion_tr_style)
tbl_variants_check2.eq('status', 'FN').display(5, tr_style=confusion_tr_style)
analyse_confusion(tbl_variants_check2)









    



[vcfnp] 2016-03-08 22:44:30.652491 :: caching is disabled
[vcfnp] 2016-03-08 22:44:30.653398 :: building array
[vcfnp] 2016-03-08 22:44:30.716880 :: caching is disabled
[vcfnp] 2016-03-08 22:44:30.717474 :: building array
[vcfnp] 2016-03-08 22:44:30.782850 :: caching is disabled
[vcfnp] 2016-03-08 22:44:30.783397 :: building array
[vcfnp] 2016-03-08 22:44:30.825447 :: caching is disabled
[vcfnp] 2016-03-08 22:44:30.825928 :: building array
2016-03-08 22:44:35.139932 :: 387






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding












    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
266073
A
ATT
None
None
2
INS
1
FN
PF3D7_0106300
False


Pf3D7_02_v3
297374
T
TA
None
None
1
INS
1
FN
PF3D7_0207400
False


Pf3D7_04_v3
137768
AT
A
None
None
-1
DEL
1
FN
PF3D7_0402300
False


Pf3D7_06_v3
130254
TA
T
None
None
-1
DEL
1
FN
None
False


Pf3D7_06_v3
132107
TA
T
None
None
-1
DEL
1
FN
None
False



...






    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
387
367
0
20
0.0
0.9483204134366925


SNP coding
272
272
0
0
0.0
1.0


SNP non-coding
29
29
0
0
0.0
1.0


INDEL coding
23
19
0
4
0.0
0.8260869565217391


INDEL non-coding
63
47
0
16
0.0
0.746031746031746

3D7 GATK versus Garimella



In [20]:

    
tbl_variants_3d7_gatk = tabulate_variants_gatk('3d7_hb3', 0)
tbl_variants_3d7_gatk









    



2016-03-08 22:44:36.500076 :: 3D7/PG0051-C/ERR019061
2016-03-08 22:44:36.972788 :: n_variants 109






    Out[20]:







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|discovery




Pf3D7_01_v3
108579
A
ATATT
4
INS
True


Pf3D7_01_v3
190317
CATAATA
C
-6
DEL
True


Pf3D7_01_v3
237483
AATAT
A
-4
DEL
True


Pf3D7_01_v3
273111
T
TTATA
4
INS
True


Pf3D7_01_v3
286838
AATATATATATAT
A
-12
DEL
True



...



In [21]:

    
tbl_variants_3d7_gatk_vs_garimella_allele = join_truth(
    tbl_variants_3d7_gatk,
    alignment_method='bwamem_intractg', 
    calling_method='bcftools_multiallelic', 
    assembly='garimella_3d7_ERR019061_contigs',
    key=('CHROM', 'POS', 'REF', 'ALT')
)
log(tbl_variants_3d7_gatk_vs_garimella_allele.nrows())
tbl_variants_3d7_gatk_vs_garimella_allele.eq('status', 'FP').display(5, tr_style=confusion_tr_style)
tbl_variants_3d7_gatk_vs_garimella_allele.eq('status', 'FN').display(5, tr_style=confusion_tr_style)
analyse_confusion(tbl_variants_3d7_gatk_vs_garimella_allele)









    



[vcfnp] 2016-03-08 22:44:37.005491 :: caching is disabled
[vcfnp] 2016-03-08 22:44:37.005857 :: building array
[vcfnp] 2016-03-08 22:44:37.041260 :: caching is disabled
[vcfnp] 2016-03-08 22:44:37.041631 :: building array
2016-03-08 22:44:42.410329 :: 467






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
237483
AATAT
A
-4
DEL
None
None
1
FP
None
False


Pf3D7_01_v3
273111
T
TTATA
4
INS
None
None
1
FP
PF3D7_0106400
False


Pf3D7_01_v3
289078
T
TTATATATATATATATA
16
INS
None
None
1
FP
None
False


Pf3D7_01_v3
294390
CATATATAT
C
-8
DEL
None
None
1
FP
PF3D7_0107000
False


Pf3D7_01_v3
319523
AATATAT
A
-6
DEL
None
None
1
FP
None
False



...






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
102198
G
A
None
None
0
SNP
1
FN
PF3D7_0102200
True


Pf3D7_01_v3
191069
CA
C
None
None
-1
DEL
1
FN
PF3D7_0104300
True


Pf3D7_01_v3
202364
G
A
None
None
0
SNP
1
FN
None
False


Pf3D7_01_v3
219230
C
A
None
None
0
SNP
1
FN
None
False


Pf3D7_01_v3
273111
T
TTATATATATATATATATATATA
None
None
22
INS
1
FN
PF3D7_0106400
False



...






    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
467
33
23
411
0.4107142857142857
0.07432432432432433


SNP coding
45
0
3
42
1.0
0.0


SNP non-coding
219
1
0
218
0.0
0.0045662100456621


INDEL coding
27
7
2
18
0.2222222222222222
0.28


INDEL non-coding
176
25
18
133
0.4186046511627907
0.15822784810126583

HB3(1) GATK versus Garimella



In [22]:

    
tbl_variants_hb31_gatk = tabulate_variants_gatk('3d7_hb3', 1)
tbl_variants_hb31_gatk









    



2016-03-08 22:44:43.749197 :: HB3/PG0052-C/ERR019054
2016-03-08 22:44:44.234426 :: n_variants 36635






    Out[22]:







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|discovery




Pf3D7_01_v3
93901
AATATATATAT
A
-10
DEL
True


Pf3D7_01_v3
94590
T
TATAC
4
INS
True


Pf3D7_01_v3
94993
CATATAT
C
-6
DEL
True


Pf3D7_01_v3
95518
G
T
0
SNP
True


Pf3D7_01_v3
95621
T
A
0
SNP
True



...



In [23]:

    
tbl_variants_hb31_gatk_vs_garimella_allele = join_truth(
    tbl_variants_hb31_gatk,
    alignment_method='bwamem_intractg', 
    calling_method='bcftools_multiallelic', 
    assembly='garimella_hb3_ERR019054_contigs',
    key=('CHROM', 'POS', 'REF', 'ALT')
)
log(tbl_variants_hb31_gatk_vs_garimella_allele.nrows())
tbl_variants_hb31_gatk_vs_garimella_allele.eq('status', 'FP').display(5, tr_style=confusion_tr_style)
tbl_variants_hb31_gatk_vs_garimella_allele.eq('status', 'FN').display(5, tr_style=confusion_tr_style)
analyse_confusion(tbl_variants_hb31_gatk_vs_garimella_allele)









    



[vcfnp] 2016-03-08 22:44:44.286170 :: caching is disabled
[vcfnp] 2016-03-08 22:44:44.286552 :: building array
[vcfnp] 2016-03-08 22:44:44.761141 :: caching is disabled
[vcfnp] 2016-03-08 22:44:44.761517 :: building array
2016-03-08 22:50:31.973350 :: 36686






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
103413
A
AAT
2
INS
None
None
1
FP
None
False


Pf3D7_01_v3
117342
AAT
A
-2
DEL
None
None
1
FP
None
False


Pf3D7_01_v3
122104
A
AATATAT
6
INS
None
None
1
FP
None
False


Pf3D7_01_v3
123313
C
CAT
2
INS
None
None
1
FP
None
False


Pf3D7_01_v3
123589
T
TTATA
4
INS
None
None
1
FP
None
False



...






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_svlen
7|truth_svtype
8|truth_coverage
9|status
10|gene
11|is_coding




Pf3D7_01_v3
101759
T
C
None
None
0
SNP
1
FN
PF3D7_0102200
True


Pf3D7_01_v3
101784
A
T
None
None
0
SNP
1
FN
PF3D7_0102200
True


Pf3D7_01_v3
117524
C
CAAAAGAAAAAAAGAA
None
None
15
INS
1
FN
None
False


Pf3D7_01_v3
123223
A
AT
None
None
1
INS
1
FN
None
False


Pf3D7_01_v3
123311
A
C
None
None
0
SNP
1
FN
None
False



...






    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
36686
23302
4705
8679
0.1679937158567501
0.7286201181951784


SNP coding
8884
6425
415
2044
0.06067251461988304
0.7586491911677884


SNP non-coding
6428
3524
115
2789
0.03160208848584776
0.5582132108347854


INDEL coding
3845
2382
486
977
0.1694560669456067
0.7091396248883596


INDEL non-coding
17529
10971
3689
2869
0.2516371077762619
0.7927023121387283

HB3(1) GATK vs GenBank coding sequences



In [24]:

    
tbl_variants_hb31_gatk_vs_genbank_site = join_truth(tbl_variants_hb31_gatk,
                                                    alignment_method='bwamem_intractg', 
                                                    calling_method='bcftools_consensus', 
                                                    assembly='genbank_hb3_coding_sequences',
                                                    key=('CHROM', 'POS'))
tbl_variants_hb31_gatk_vs_genbank_site.display(20, tr_style=confusion_tr_style)









    



[vcfnp] 2016-03-08 22:50:32.503672 :: caching is disabled
[vcfnp] 2016-03-08 22:50:32.504167 :: building array
[vcfnp] 2016-03-08 22:50:32.537002 :: caching is disabled
[vcfnp] 2016-03-08 22:50:32.537401 :: building array






    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_REF
7|truth_ALT
8|truth_svlen
9|truth_svtype
10|truth_coverage
11|status
12|gene
13|is_coding




Pf3D7_01_v3
265603
CAT
C
-2
DEL
CAT
C
-2
DEL
1
TP
PF3D7_0106300
False


Pf3D7_01_v3
266480
A
T
0
SNP
A
T
0
SNP
1
TP
PF3D7_0106300
True


Pf3D7_01_v3
266640
TCTC
T
-3
DEL
TCTC
T
-3
DEL
1
TP
PF3D7_0106300
True


Pf3D7_01_v3
267777
C
T
0
SNP
C
T
0
SNP
1
TP
PF3D7_0106300
True


Pf3D7_01_v3
269132
C
T
0
SNP
C
T
0
SNP
1
TP
PF3D7_0106300
True


Pf3D7_02_v3
290368
G
GATAT
4
INS
G
GATAT
4
INS
1
TP
PF3D7_0207300
False


Pf3D7_02_v3
290569
T
A
0
SNP
T
A
0
SNP
1
TP
PF3D7_0207300
True


Pf3D7_02_v3
290572
T
A
0
SNP
T
A
0
SNP
1
TP
PF3D7_0207300
True


Pf3D7_02_v3
290667
G
C
0
SNP
G
C
0
SNP
1
TP
PF3D7_0207300
True


Pf3D7_02_v3
291565
A
AATATATATATATAT
14
INS
A
AATATATATATATAT
14
INS
1
TP
PF3D7_0207300
False


Pf3D7_02_v3
291983
C
T
0
SNP
C
T
0
SNP
1
TP
PF3D7_0207300
True


Pf3D7_02_v3
292241
C
CATAT
4
INS
C
CATATAT
6
INS
1
TP
PF3D7_0207300
False


Pf3D7_02_v3
292382
A
T
0
SNP
A
T
0
SNP
1
TP
PF3D7_0207300
True


Pf3D7_02_v3
292765
A
T
0
SNP
A
T
0
SNP
1
TP
None
False


Pf3D7_02_v3
292771
T
A
0
SNP
T
A
0
SNP
1
TP
None
False


Pf3D7_02_v3
292773
None
None
None
None
T
A
0
SNP
1
FN
None
False


Pf3D7_02_v3
292775
None
None
None
None
T
A
0
SNP
1
FN
None
False


Pf3D7_02_v3
296350
AATATATAT
A
-8
DEL
AATATATAT
A
-8
DEL
1
TP
PF3D7_0207400
False


Pf3D7_02_v3
296617
A
AATATATATATATAT
14
INS
A
AATATATATATAT
12
INS
1
TP
PF3D7_0207400
False


Pf3D7_02_v3
297388
None
None
None
None
T
A
0
SNP
1
FN
PF3D7_0207400
False



...



In [25]:

    
tbl_variants_hb31_gatk_vs_genbank_site.valuecounts('gene').displayall()









    







0|gene
1|count
2|frequency




PF3D7_0930300
41
0.09534883720930233


None
38
0.08837209302325581


PF3D7_0709300
32
0.07441860465116279


PF3D7_1133400
29
0.06744186046511629


PF3D7_0220800
28
0.06511627906976744


PF3D7_0831600
25
0.05813953488372093


PF3D7_0709100
18
0.04186046511627907


PF3D7_0402300
15
0.03488372093023256


PF3D7_0207600
15
0.03488372093023256


PF3D7_1115700
15
0.03488372093023256


PF3D7_0207900
15
0.03488372093023256


PF3D7_1335100
13
0.030232558139534883


PF3D7_1448500
12
0.027906976744186046


PF3D7_0304600
11
0.02558139534883721


PF3D7_1447900
11
0.02558139534883721


PF3D7_0207700
10
0.023255813953488372


PF3D7_1337200
8
0.018604651162790697


PF3D7_0708400
8
0.018604651162790697


PF3D7_0207300
8
0.018604651162790697


PF3D7_0208000
7
0.01627906976744186


PF3D7_0935800
7
0.01627906976744186


PF3D7_0207400
6
0.013953488372093023


PF3D7_0905400
6
0.013953488372093023


PF3D7_0106300
5
0.011627906976744186


PF3D7_0207800
5
0.011627906976744186


PF3D7_0620400
5
0.011627906976744186


PF3D7_0902800
4
0.009302325581395349


PF3D7_0808200
4
0.009302325581395349


PF3D7_0508000
4
0.009302325581395349


PF3D7_1434200
4
0.009302325581395349


PF3D7_0207500
4
0.009302325581395349


PF3D7_0929400
3
0.0069767441860465115


PF3D7_0424200
3
0.0069767441860465115


PF3D7_1467300
2
0.004651162790697674


PF3D7_1323500
2
0.004651162790697674


PF3D7_1033800
2
0.004651162790697674


PF3D7_0417200
1
0.002325581395348837


PF3D7_1035200
1
0.002325581395348837


PF3D7_0804800
1
0.002325581395348837


PF3D7_1335000
1
0.002325581395348837


PF3D7_1246100
1
0.002325581395348837



In [26]:

    
analyse_confusion(tbl_variants_hb31_gatk_vs_genbank_site)









    







0|svtype
1|n
2|TP
3|FP
4|FN
5|FDR
6|sensitivity




all
430
267
63
100
0.19090909090909092
0.7275204359673024


SNP coding
312
217
40
55
0.1556420233463035
0.7977941176470589


SNP non-coding
32
12
3
17
0.2
0.41379310344827586


INDEL coding
26
7
7
12
0.5
0.3684210526315789


INDEL non-coding
60
31
13
16
0.29545454545454547
0.6595744680851063



In [27]:

    
tbl_variants_hb31_gatk_vs_genbank_site.select(lambda row: row.svtype in {'INS', 'DEL'} or row.truth_svtype in {'INS', 'DEL'}).displayall(tr_style=confusion_tr_style)









    







0|CHROM
1|POS
2|REF
3|ALT
4|svlen
5|svtype
6|truth_REF
7|truth_ALT
8|truth_svlen
9|truth_svtype
10|truth_coverage
11|status
12|gene
13|is_coding




Pf3D7_01_v3
265603
CAT
C
-2
DEL
CAT
C
-2
DEL
1
TP
PF3D7_0106300
False


Pf3D7_01_v3
266640
TCTC
T
-3
DEL
TCTC
T
-3
DEL
1
TP
PF3D7_0106300
True


Pf3D7_02_v3
290368
G
GATAT
4
INS
G
GATAT
4
INS
1
TP
PF3D7_0207300
False


Pf3D7_02_v3
291565
A
AATATATATATATAT
14
INS
A
AATATATATATATAT
14
INS
1
TP
PF3D7_0207300
False


Pf3D7_02_v3
292241
C
CATAT
4
INS
C
CATATAT
6
INS
1
TP
PF3D7_0207300
False


Pf3D7_02_v3
296350
AATATATAT
A
-8
DEL
AATATATAT
A
-8
DEL
1
TP
PF3D7_0207400
False


Pf3D7_02_v3
296617
A
AATATATATATATAT
14
INS
A
AATATATATATAT
12
INS
1
TP
PF3D7_0207400
False


Pf3D7_02_v3
297538
CATAT
C
-4
DEL
None
None
None
None
1
FP
PF3D7_0207400
False


Pf3D7_02_v3
297555
ATATAT
A
-5
DEL
None
None
None
None
1
FP
PF3D7_0207400
False


Pf3D7_02_v3
297556
TATA
T
-3
DEL
TATA
T
-3
DEL
1
TP
PF3D7_0207400
False


Pf3D7_02_v3
301090
C
CATAT
4
INS
CATATATATATATAT
C
-14
DEL
1
TP
PF3D7_0207500
False


Pf3D7_02_v3
302334
None
None
None
None
CATATATATAT
C
-10
DEL
1
FN
PF3D7_0207500
False


Pf3D7_02_v3
302466
C
CATAT
4
INS
C
CATATATAT
8
INS
1
TP
PF3D7_0207500
False


Pf3D7_02_v3
305614
G
GTATATATATA
10
INS
G
GTATATATATATA
12
INS
1
TP
PF3D7_0207600
False


Pf3D7_02_v3
306212
None
None
None
None
T
TGAACTTGAACTTGAACTTGAACTA
24
INS
1
FN
PF3D7_0207600
True


Pf3D7_02_v3
306657
None
None
None
None
TGACTTGCTCCCGTACTACCTTGTGGACTTGCTCCCGTACTACCTTGTG
T
-48
DEL
1
FN
PF3D7_0207600
True


Pf3D7_02_v3
306936
None
None
None
None
T
TCTTTTTCTTTTC
12
INS
1
FN
PF3D7_0207600
False


Pf3D7_02_v3
311008
AAT
A
-2
DEL
None
None
None
None
1
FP
PF3D7_0207700
False


Pf3D7_02_v3
311295
None
None
None
None
AATAT
A
-4
DEL
1
FN
PF3D7_0207700
False


Pf3D7_02_v3
315496
None
None
None
None
AATAAATATATATATAT
A
-16
DEL
1
FN
PF3D7_0207800
False


Pf3D7_02_v3
315807
CAT
C
-2
DEL
None
None
None
None
1
FP
PF3D7_0207800
False


Pf3D7_02_v3
316685
None
None
None
None
T
TTA
2
INS
1
FN
PF3D7_0207800
False


Pf3D7_02_v3
319944
None
None
None
None
ATATATATATATATATATATATATATATATATG
A
-32
DEL
1
FN
PF3D7_0207900
False


Pf3D7_02_v3
319946
ATATATATATATATATATATATATATATATG
A
-30
DEL
None
None
None
None
1
FP
PF3D7_0207900
False


Pf3D7_02_v3
320702
G
GATTGTGGTA
9
INS
G
GATTGTGGTA
9
INS
1
TP
PF3D7_0207900
True


Pf3D7_02_v3
321087
CAA
C
-2
DEL
CAA
C
-2
DEL
1
TP
PF3D7_0207900
False


Pf3D7_02_v3
321229
T
TTATATATATATA
12
INS
T
TTATATATATATATA
14
INS
1
TP
PF3D7_0207900
False


Pf3D7_02_v3
324519
A
AAT
2
INS
A
AAT
2
INS
1
TP
PF3D7_0208000
False


Pf3D7_02_v3
324804
TTATA
T
-4
DEL
TTATA
T
-4
DEL
1
TP
PF3D7_0208000
False


Pf3D7_02_v3
325571
A
AAT
2
INS
A
AAT
2
INS
1
TP
PF3D7_0208000
False


Pf3D7_02_v3
841678
CT
C
-1
DEL
None
None
None
None
1
FP
PF3D7_0220800
False


Pf3D7_02_v3
841693
AAATT
A
-4
DEL
None
None
None
None
1
FP
PF3D7_0220800
False


Pf3D7_02_v3
841697
T
TG
1
INS
None
None
None
None
1
FP
PF3D7_0220800
False


Pf3D7_02_v3
841700
T
TATATA
5
INS
None
None
None
None
1
FP
PF3D7_0220800
False


Pf3D7_03_v3
222277
None
None
None
None
G
GTTATTTCCATCTCTTTTATCTTCATCTTTACCTTCACGACCATTATCTCCATTATTA
57
INS
1
FN
PF3D7_0304600
True


Pf3D7_04_v3
137702
None
None
None
None
A
AATATAAACAT
10
INS
1
FN
PF3D7_0402300
False


Pf3D7_04_v3
146262
T
TTCATAA
6
INS
T
TTCATAATCATAATCATAATCATAATCATAA
30
INS
1
TP
PF3D7_0402300
True


Pf3D7_04_v3
1086025
TTTTCATCTTCATCTTCAA
T
-18
DEL
TTTTCATCTTCATCTTCAA
T
-18
DEL
1
TP
PF3D7_0424200
True


Pf3D7_04_v3
1088611
T
TTATTATTAA
9
INS
T
TTATTATTAA
9
INS
1
TP
PF3D7_0424200
True


Pf3D7_06_v3
132115
TTA
T
-2
DEL
None
None
None
None
1
FP
None
False


Pf3D7_06_v3
852707
None
None
None
None
TTCATTATTAATATTA
T
-15
DEL
1
FN
PF3D7_0620400
True


Pf3D7_07_v3
382251
T
TGAA
3
INS
None
None
None
None
1
FP
PF3D7_0708400
True


Pf3D7_07_v3
382289
G
GAGA
3
INS
None
None
None
None
1
FP
PF3D7_0708400
True


Pf3D7_07_v3
383109
None
None
None
None
TA
T
-1
DEL
1
FN
PF3D7_0708400
False


Pf3D7_07_v3
383752
C
CATTTTTTACCTTTT
14
INS
C
CATTTTTTACCTTTT
14
INS
1
TP
PF3D7_0708400
False


Pf3D7_07_v3
408058
TA
T
-1
DEL
None
None
None
None
1
FP
None
False


Pf3D7_07_v3
408063
A
ATTTTT
5
INS
None
None
None
None
1
FP
None
False


Pf3D7_07_v3
408142
TTA
T
-2
DEL
TTA
T
-2
DEL
1
TP
None
False


Pf3D7_07_v3
410359
GCCA
G
-3
DEL
None
None
None
None
1
FP
PF3D7_0709100
True


Pf3D7_07_v3
410432
None
None
None
None
T
TCATCATCAAATAGGTTGTCATCACCAAATACGTTGC
36
INS
1
FN
PF3D7_0709100
True


Pf3D7_07_v3
414662
None
None
None
None
A
ATTTTATTATCATCACAAT
18
INS
1
FN
PF3D7_0709300
True


Pf3D7_07_v3
416763
AATTATTATT
A
-9
DEL
AATTATTATT
A
-9
DEL
1
TP
PF3D7_0709300
True


Pf3D7_07_v3
418149
None
None
None
None
T
TATTACTATGACTATTCATATGAACATTATTATGATCATCCCCATGAAC
48
INS
1
FN
PF3D7_0709300
True


Pf3D7_07_v3
420778
None
None
None
None
T
TCCATACGATTTATTACTA
18
INS
1
FN
PF3D7_0709300
True


Pf3D7_08_v3
278396
None
None
None
None
C
CG
1
INS
1
FN
PF3D7_0804800
True


Pf3D7_08_v3
418224
T
TTTTTTA
6
INS
T
TTTTTTA
6
INS
1
TP
None
False


Pf3D7_08_v3
418696
CATAT
C
-4
DEL
CATAT
C
-4
DEL
1
TP
None
False


Pf3D7_08_v3
418777
C
CATATAT
6
INS
C
CATATAT
6
INS
1
TP
None
False


Pf3D7_08_v3
418829
None
None
None
None
AATATATAT
A
-8
DEL
1
FN
None
False


Pf3D7_09_v3
123727
A
AAAAT
4
INS
A
AAAAT
4
INS
1
TP
PF3D7_0902800
False


Pf3D7_09_v3
124079
A
AATATATATATATAT
14
INS
A
AATATATATATATATATAT
18
INS
1
TP
PF3D7_0902800
False


Pf3D7_09_v3
124874
None
None
None
None
G
GTA
2
INS
1
FN
PF3D7_0902800
False


Pf3D7_10_v3
1350698
None
None
None
None
A
AATATATAT
8
INS
1
FN
None
False


Pf3D7_10_v3
1351023
CTATA
C
-4
DEL
CTATATA
C
-6
DEL
1
TP
None
False


Pf3D7_10_v3
1351723
TTATATA
T
-6
DEL
TTATATA
T
-6
DEL
1
TP
PF3D7_1033800
False


Pf3D7_10_v3
1352424
ATATG
A
-4
DEL
ATATG
A
-4
DEL
1
TP
PF3D7_1033800
False


Pf3D7_12_v3
1485941
TTATATA
T
-6
DEL
TTATATA
T
-6
DEL
1
TP
None
False


Pf3D7_12_v3
1915676
AATATATATATATAT
A
-14
DEL
AATATATATATATAT
A
-14
DEL
1
TP
None
False


Pf3D7_12_v3
1917600
ATATATATATATATTTATT
A
-18
DEL
ATATATATATATATTTATT
A
-18
DEL
1
TP
PF3D7_1246100
False


Pf3D7_13_v3
974578
None
None
None
None
T
TATATTATATATA
12
INS
1
FN
None
False


Pf3D7_13_v3
974756
None
None
None
None
T
TATTTTTTTTTA
11
INS
1
FN
None
False


Pf3D7_13_v3
975154
GTATATATATATATATA
G
-16
DEL
GTATATATATATATATA
G
-16
DEL
1
TP
None
False


Pf3D7_13_v3
975571
None
None
None
None
A
ATGGAAAATAAAAATGACAATG
21
INS
1
FN
PF3D7_1323500
True


Pf3D7_13_v3
1419029
None
None
None
None
T
TTATA
4
INS
1
FN
None
False


Pf3D7_13_v3
1498640
TAAATAATAATAATAA
T
-15
DEL
None
None
None
None
1
FP
PF3D7_1337200
True


Pf3D7_14_v3
1369204
AAATATAATAT
A
-10
DEL
None
None
None
None
1
FP
PF3D7_1434200
False


Pf3D7_14_v3
1369261
None
None
None
None
GATATATATATATATATAT
G
-18
DEL
1
FN
PF3D7_1434200
False


Pf3D7_14_v3
1369466
G
GTATATATA
8
INS
GTA
G
-2
DEL
1
TP
PF3D7_1434200
False


Pf3D7_14_v3
1369652
AATAT
A
-4
DEL
AATATATAT
A
-8
DEL
1
TP
PF3D7_1434200
False


Pf3D7_14_v3
1954838
None
None
None
None
G
GC
1
INS
1
FN
PF3D7_1447900
True


Pf3D7_14_v3
1956720
ATAT
A
-3
DEL
ATAT
A
-3
DEL
1
TP
PF3D7_1447900
True


Pf3D7_14_v3
1957265
None
None
None
None
G
GTTA
3
INS
1
FN
PF3D7_1447900
True


Pf3D7_14_v3
1957957
None
None
None
None
A
AATATATAT
8
INS
1
FN
None
False


Pf3D7_14_v3
1987944
C
CTATTAT
6
INS
None
None
None
None
1
FP
PF3D7_1448500
True


Pf3D7_14_v3
1991837
TTTA
T
-3
DEL
None
None
None
None
1
FP
PF3D7_1448500
True


Pf3D7_14_v3
1994431
AATT
A
-3
DEL
None
None
None
None
1
FP
PF3D7_1448500
True



In [ ]:

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_03_v3	545588	AAAATATATATAAAT	A	-14	DEL	None	None	1	FP	None	False
Pf3D7_04_v3	343440	ACTTTTATTATCGTCAGGTTTG	A	-21	DEL	None	None	1	FP	PF3D7_0406500	True
Pf3D7_04_v3	375115	AATGTAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATGATGAAAATG	A	-72	DEL	None	None	1	FP	PF3D7_0407600	True
Pf3D7_09_v3	184870	GTAATACATATAAAAAATTATTAATATATATATAATAAAA	G	-39	DEL	None	None	1	FP	None	False
Pf3D7_10_v3	1418531	TTTTTTTATATTTTTCTATATTTCTTATATTTTTA	T	-34	DEL	None	None	1	FP	None	False

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_01_v3	191069	CA	C	None	None	-1	DEL	1	FN	PF3D7_0104300	True
Pf3D7_01_v3	273111	T	TTATATATATATATATATATATA	None	None	22	INS	1	FN	PF3D7_0106400	False
Pf3D7_01_v3	287205	T	TTATATA	None	None	6	INS	1	FN	PF3D7_0106800	False
Pf3D7_01_v3	288488	A	AT	None	None	1	INS	1	FN	PF3D7_0106800	False
Pf3D7_01_v3	291538	A	AATGAAAAAG	None	None	9	INS	1	FN	PF3D7_0106900	True

0\|svtype	1\|n	2\|TP	3\|FP	4\|FN	5\|FDR	6\|sensitivity
all	450	368	6	76	0.016042780748663103	0.8288288288288288
SNP coding	42	34	0	8	0.0	0.8095238095238095
SNP non-coding	219	215	0	4	0.0	0.9817351598173516
INDEL coding	28	17	3	8	0.15	0.68
INDEL non-coding	161	102	3	56	0.02857142857142857	0.6455696202531646

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_01_v3	555445	TA	T	-1	DEL	None	None	1	FP	None	False
Pf3D7_02_v3	402926	CA	C	-1	DEL	None	None	1	FP	None	False
Pf3D7_02_v3	827872	GA	G	-1	DEL	None	None	1	FP	None	False
Pf3D7_03_v3	545588	AAAATATATATAAAT	A	-14	DEL	None	None	1	FP	None	False
Pf3D7_03_v3	604213	TA	T	-1	DEL	None	None	1	FP	None	False

0\|svtype	1\|n	2\|TP	3\|FP	4\|FN	5\|FDR	6\|sensitivity
all	399	311	63	25	0.16844919786096257	0.9255952380952381
SNP coding	35	34	0	1	0.0	0.9714285714285714
SNP non-coding	215	214	1	0	0.004651162790697674	1.0
INDEL coding	24	8	12	4	0.6	0.6666666666666666
INDEL non-coding	125	55	50	20	0.47619047619047616	0.7333333333333333

0\|svtype	1\|n	2\|TP	4\|FN	6\|sensitivity
all	444	336	108	0.7567567567567568
SNP coding	42	35	7	0.8333333333333334
SNP non-coding	219	214	5	0.9771689497716894
INDEL coding	25	12	13	0.48
INDEL non-coding	158	75	83	0.47468354430379744

0\|gene	1\|count	2\|frequency
None	49	0.12311557788944724
PF3D7_0106300	6	0.01507537688442211
PF3D7_0207300	8	0.020100502512562814
PF3D7_0207400	5	0.01256281407035176
PF3D7_0207500	4	0.010050251256281407
PF3D7_0207600	11	0.02763819095477387
PF3D7_0207700	8	0.020100502512562814
PF3D7_0207800	4	0.010050251256281407
PF3D7_0207900	14	0.035175879396984924
PF3D7_0208000	7	0.017587939698492462
PF3D7_0220800	21	0.052763819095477386
PF3D7_0304600	12	0.03015075376884422
PF3D7_0402300	15	0.03768844221105527
PF3D7_0417200	1	0.002512562814070352
PF3D7_0424200	3	0.007537688442211055
PF3D7_0508000	4	0.010050251256281407
PF3D7_0620400	5	0.01256281407035176
PF3D7_0708400	9	0.022613065326633167
PF3D7_0709100	19	0.04773869346733668
PF3D7_0709300	32	0.08040201005025126
PF3D7_0804800	1	0.002512562814070352
PF3D7_0831600	23	0.05778894472361809
PF3D7_0902800	4	0.010050251256281407
PF3D7_0905400	6	0.01507537688442211
PF3D7_0929400	3	0.007537688442211055
PF3D7_0930300	41	0.10301507537688442
PF3D7_0935800	7	0.017587939698492462
PF3D7_1033800	3	0.007537688442211055
PF3D7_1035200	1	0.002512562814070352
PF3D7_1115700	13	0.032663316582914576
PF3D7_1133400	29	0.0728643216080402
PF3D7_1246100	1	0.002512562814070352
PF3D7_1323500	2	0.005025125628140704
PF3D7_1335000	1	0.002512562814070352
PF3D7_1335100	5	0.01256281407035176
PF3D7_1337200	3	0.007537688442211055
PF3D7_1434200	3	0.007537688442211055
PF3D7_1447900	13	0.032663316582914576
PF3D7_1467300	2	0.005025125628140704

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_02_v3	841385	AAGGTAATGTATTATATAATAAATATAATATTACAAAATTGTTATCTGCATTTGTTATTTTTATGAAATCATTATAAATCATTATTTAACTTTTTTTTCTTTTTGTTTTTAAT	A	-112	DEL	None	None	1	FP	PF3D7_0220800	True
Pf3D7_02_v3	841657	TGGTATATTATAAAATAGAAACTAAGAAAAAAAATTAAATTAATTGAATATATATATATTTTTATATTGTTTAATTTTTTTTATTTTA	T	-87	DEL	None	None	1	FP	PF3D7_0220800	True
Pf3D7_03_v3	221965	GGGGTTTGCATTTGGGTTTGCATTT	G	-24	DEL	None	None	1	FP	PF3D7_0304600	True
Pf3D7_04_v3	141594	CAAACGATATTGATGAAATAAACGATATTGATGAAAT	C	-36	DEL	None	None	1	FP	PF3D7_0402300	True
Pf3D7_07_v3	382251	T	TGAA	3	INS	None	None	1	FP	PF3D7_0708400	True

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_01_v3	266073	A	ATT	None	None	2	INS	1	FN	PF3D7_0106300	False
Pf3D7_02_v3	290368	G	GATAT	None	None	4	INS	1	FN	PF3D7_0207300	False
Pf3D7_02_v3	296617	A	AATATATATATAT	None	None	12	INS	1	FN	PF3D7_0207400	False
Pf3D7_02_v3	297374	T	TA	None	None	1	INS	1	FN	PF3D7_0207400	False
Pf3D7_02_v3	305614	G	GTATATATATATA	None	None	12	INS	1	FN	PF3D7_0207600	False

0\|svtype	1\|n	2\|TP	3\|FP	4\|FN	5\|FDR	6\|sensitivity
all	398	355	11	32	0.030054644808743168	0.917312661498708
SNP coding	272	271	0	1	0.0	0.9963235294117647
SNP non-coding	29	29	0	0	0.0	1.0
INDEL coding	31	11	8	12	0.42105263157894735	0.4782608695652174
INDEL non-coding	66	44	3	19	0.06382978723404255	0.6984126984126984

0\|svtype	1\|n	2\|TP	4\|FN	6\|sensitivity
all	387	367	20	0.9483204134366925
SNP coding	272	272	0	1.0
SNP non-coding	29	29	0	1.0
INDEL coding	23	19	4	0.8260869565217391
INDEL non-coding	63	47	16	0.746031746031746

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|discovery
Pf3D7_01_v3	108579	A	ATATT	4	INS	True
Pf3D7_01_v3	190317	CATAATA	C	-6	DEL	True
Pf3D7_01_v3	237483	AATAT	A	-4	DEL	True
Pf3D7_01_v3	273111	T	TTATA	4	INS	True
Pf3D7_01_v3	286838	AATATATATATAT	A	-12	DEL	True

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_01_v3	102198	G	A	None	None	0	SNP	1	FN	PF3D7_0102200	True
Pf3D7_01_v3	191069	CA	C	None	None	-1	DEL	1	FN	PF3D7_0104300	True
Pf3D7_01_v3	202364	G	A	None	None	0	SNP	1	FN	None	False
Pf3D7_01_v3	219230	C	A	None	None	0	SNP	1	FN	None	False
Pf3D7_01_v3	273111	T	TTATATATATATATATATATATA	None	None	22	INS	1	FN	PF3D7_0106400	False

0\|svtype	1\|n	2\|TP	3\|FP	4\|FN	5\|FDR	6\|sensitivity
all	467	33	23	411	0.4107142857142857	0.07432432432432433
SNP coding	45	0	3	42	1.0	0.0
SNP non-coding	219	1	0	218	0.0	0.0045662100456621
INDEL coding	27	7	2	18	0.2222222222222222	0.28
INDEL non-coding	176	25	18	133	0.4186046511627907	0.15822784810126583

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|discovery
Pf3D7_01_v3	93901	AATATATATAT	A	-10	DEL	True
Pf3D7_01_v3	94590	T	TATAC	4	INS	True
Pf3D7_01_v3	94993	CATATAT	C	-6	DEL	True
Pf3D7_01_v3	95518	G	T	0	SNP	True
Pf3D7_01_v3	95621	T	A	0	SNP	True

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_01_v3	103413	A	AAT	2	INS	None	None	1	FP	None	False
Pf3D7_01_v3	117342	AAT	A	-2	DEL	None	None	1	FP	None	False
Pf3D7_01_v3	122104	A	AATATAT	6	INS	None	None	1	FP	None	False
Pf3D7_01_v3	123313	C	CAT	2	INS	None	None	1	FP	None	False
Pf3D7_01_v3	123589	T	TTATA	4	INS	None	None	1	FP	None	False

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_svlen	7\|truth_svtype	8\|truth_coverage	9\|status	10\|gene	11\|is_coding
Pf3D7_01_v3	101759	T	C	None	None	0	SNP	1	FN	PF3D7_0102200	True
Pf3D7_01_v3	101784	A	T	None	None	0	SNP	1	FN	PF3D7_0102200	True
Pf3D7_01_v3	117524	C	CAAAAGAAAAAAAGAA	None	None	15	INS	1	FN	None	False
Pf3D7_01_v3	123223	A	AT	None	None	1	INS	1	FN	None	False
Pf3D7_01_v3	123311	A	C	None	None	0	SNP	1	FN	None	False

0\|svtype	1\|n	2\|TP	3\|FP	4\|FN	5\|FDR	6\|sensitivity
all	36686	23302	4705	8679	0.1679937158567501	0.7286201181951784
SNP coding	8884	6425	415	2044	0.06067251461988304	0.7586491911677884
SNP non-coding	6428	3524	115	2789	0.03160208848584776	0.5582132108347854
INDEL coding	3845	2382	486	977	0.1694560669456067	0.7091396248883596
INDEL non-coding	17529	10971	3689	2869	0.2516371077762619	0.7927023121387283

0\|CHROM	1\|POS	2\|REF	3\|ALT	4\|svlen	5\|svtype	6\|truth_REF	7\|truth_ALT	8\|truth_svlen	9\|truth_svtype	10\|truth_coverage	11\|status	12\|gene	13\|is_coding
Pf3D7_01_v3	265603	CAT	C	-2	DEL	CAT	C	-2	DEL	1	TP	PF3D7_0106300	False
Pf3D7_01_v3	266480	A	T	0	SNP	A	T	0	SNP	1	TP	PF3D7_0106300	True
Pf3D7_01_v3	266640	TCTC	T	-3	DEL	TCTC	T	-3	DEL	1	TP	PF3D7_0106300	True
Pf3D7_01_v3	267777	C	T	0	SNP	C	T	0	SNP	1	TP	PF3D7_0106300	True
Pf3D7_01_v3	269132	C	T	0	SNP	C	T	0	SNP	1	TP	PF3D7_0106300	True
Pf3D7_02_v3	290368	G	GATAT	4	INS	G	GATAT	4	INS	1	TP	PF3D7_0207300	False
Pf3D7_02_v3	290569	T	A	0	SNP	T	A	0	SNP	1	TP	PF3D7_0207300	True
Pf3D7_02_v3	290572	T	A	0	SNP	T	A	0	SNP	1	TP	PF3D7_0207300	True
Pf3D7_02_v3	290667	G	C	0	SNP	G	C	0	SNP	1	TP	PF3D7_0207300	True
Pf3D7_02_v3	291565	A	AATATATATATATAT	14	INS	A	AATATATATATATAT	14	INS	1	TP	PF3D7_0207300	False
Pf3D7_02_v3	291983	C	T	0	SNP	C	T	0	SNP	1	TP	PF3D7_0207300	True
Pf3D7_02_v3	292241	C	CATAT	4	INS	C	CATATAT	6	INS	1	TP	PF3D7_0207300	False
Pf3D7_02_v3	292382	A	T	0	SNP	A	T	0	SNP	1	TP	PF3D7_0207300	True
Pf3D7_02_v3	292765	A	T	0	SNP	A	T	0	SNP	1	TP	None	False
Pf3D7_02_v3	292771	T	A	0	SNP	T	A	0	SNP	1	TP	None	False
Pf3D7_02_v3	292773	None	None	None	None	T	A	0	SNP	1	FN	None	False
Pf3D7_02_v3	292775	None	None	None	None	T	A	0	SNP	1	FN	None	False
Pf3D7_02_v3	296350	AATATATAT	A	-8	DEL	AATATATAT	A	-8	DEL	1	TP	PF3D7_0207400	False
Pf3D7_02_v3	296617	A	AATATATATATATAT	14	INS	A	AATATATATATAT	12	INS	1	TP	PF3D7_0207400	False
Pf3D7_02_v3	297388	None	None	None	None	T	A	0	SNP	1	FN	PF3D7_0207400	False

0\|gene	1\|count	2\|frequency
PF3D7_0930300	41	0.09534883720930233
None	38	0.08837209302325581
PF3D7_0709300	32	0.07441860465116279
PF3D7_1133400	29	0.06744186046511629
PF3D7_0220800	28	0.06511627906976744
PF3D7_0831600	25	0.05813953488372093
PF3D7_0709100	18	0.04186046511627907
PF3D7_0402300	15	0.03488372093023256
PF3D7_0207600	15	0.03488372093023256
PF3D7_1115700	15	0.03488372093023256
PF3D7_0207900	15	0.03488372093023256
PF3D7_1335100	13	0.030232558139534883
PF3D7_1448500	12	0.027906976744186046
PF3D7_0304600	11	0.02558139534883721
PF3D7_1447900	11	0.02558139534883721
PF3D7_0207700	10	0.023255813953488372
PF3D7_1337200	8	0.018604651162790697
PF3D7_0708400	8	0.018604651162790697
PF3D7_0207300	8	0.018604651162790697
PF3D7_0208000	7	0.01627906976744186
PF3D7_0935800	7	0.01627906976744186
PF3D7_0207400	6	0.013953488372093023
PF3D7_0905400	6	0.013953488372093023
PF3D7_0106300	5	0.011627906976744186
PF3D7_0207800	5	0.011627906976744186
PF3D7_0620400	5	0.011627906976744186
PF3D7_0902800	4	0.009302325581395349
PF3D7_0808200	4	0.009302325581395349
PF3D7_0508000	4	0.009302325581395349
PF3D7_1434200	4	0.009302325581395349
PF3D7_0207500	4	0.009302325581395349
PF3D7_0929400	3	0.0069767441860465115
PF3D7_0424200	3	0.0069767441860465115
PF3D7_1467300	2	0.004651162790697674
PF3D7_1323500	2	0.004651162790697674
PF3D7_1033800	2	0.004651162790697674
PF3D7_0417200	1	0.002325581395348837
PF3D7_1035200	1	0.002325581395348837
PF3D7_0804800	1	0.002325581395348837
PF3D7_1335000	1	0.002325581395348837
PF3D7_1246100	1	0.002325581395348837

0\|svtype	1\|n	2\|TP	3\|FP	4\|FN	5\|FDR	6\|sensitivity
all	430	267	63	100	0.19090909090909092	0.7275204359673024
SNP coding	312	217	40	55	0.1556420233463035	0.7977941176470589
SNP non-coding	32	12	3	17	0.2	0.41379310344827586
INDEL coding	26	7	7	12	0.5	0.3684210526315789
INDEL non-coding	60	31	13	16	0.29545454545454547	0.6595744680851063