Extract mutations in VGSC

This notebook extracts data on all mutations in the VGSC gene.

Setup


In [1]:
%run setup.ipynb



In [2]:
# download gene annotations from vectorbase
!wget \
    --no-clobber \
    -O ../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz \
    https://www.vectorbase.org/download/anopheles-gambiae-pestbasefeaturesagamp44gff3gz


--2018-08-01 15:06:26--  https://www.vectorbase.org/download/anopheles-gambiae-pestbasefeaturesagamp44gff3gz
Resolving www.vectorbase.org (www.vectorbase.org)... 129.74.255.228
Connecting to www.vectorbase.org (www.vectorbase.org)|129.74.255.228|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.vectorbase.org/sites/default/files/ftp/downloads/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz [following]
--2018-08-01 15:06:27--  https://www.vectorbase.org/sites/default/files/ftp/downloads/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz
Reusing existing connection to www.vectorbase.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2688173 (2.6M) [application/x-gzip]
Saving to: ‘../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz’

100%[======================================>] 2,688,173   2.78MB/s   in 0.9s   

2018-08-01 15:06:28 (2.78 MB/s) - ‘../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz’ saved [2688173/2688173]


In [3]:
# download the Davies et al. (2007) gene models
!wget \
    --no-clobber \
    -O ../data/davies_vgsc_model_20170125.gff3 \
    http://alimanfoo.github.io/assets/davies_vgsc_model_20170125.gff3


--2018-08-01 15:06:33--  http://alimanfoo.github.io/assets/davies_vgsc_model_20170125.gff3
Resolving alimanfoo.github.io (alimanfoo.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to alimanfoo.github.io (alimanfoo.github.io)|185.199.108.153|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 48092 (47K) [application/octet-stream]
Saving to: ‘../data/davies_vgsc_model_20170125.gff3’

100%[======================================>] 48,092      --.-K/s   in 0.009s  

2018-08-01 15:06:34 (4.84 MB/s) - ‘../data/davies_vgsc_model_20170125.gff3’ saved [48092/48092]


In [4]:
# load the vectorbase geneset
geneset_agamp44 = allel.FeatureTable.from_gff3('../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz',
                                               attributes=['ID', 'Parent'])
geneset_agamp44 = geneset_to_pandas(geneset_agamp44)
geneset_agamp44.head()


Out[4]:
seqid source type start end score strand phase ID Parent
0 2L VectorBase chromosome 1 49364325 -1.0 . -1 2L b'.'
1 2L VectorBase gene 157348 186936 -1.0 - -1 AGAP004677 b'.'
2 2L VectorBase mRNA 157348 181305 -1.0 - -1 AGAP004677-RA AGAP004677
3 2L VectorBase three_prime_UTR 157348 157495 -1.0 - -1 b'.' AGAP004677-RA
4 2L VectorBase exon 157348 157623 -1.0 - -1 b'.' AGAP004677-RA

In [5]:
# subset to VGSC
geneset_agamp44_vgsc = geneset_agamp44.query(region_vgsc.query_str).copy()
# replace CDS IDs as not informative
geneset_agamp44_vgsc['ID'].values[(geneset_agamp44_vgsc.type == 'CDS').values] = ''
geneset_agamp44_vgsc.type.value_counts()


Out[5]:
exon    93
CDS     93
mRNA     3
gene     1
Name: type, dtype: int64

In [6]:
# load the Davies geneset
geneset_davies = allel.FeatureTable.from_gff3('../data/davies_vgsc_model_20170125.gff3',
                                              attributes=['ID', 'Parent'])
geneset_davies = geneset_to_pandas(geneset_davies)
geneset_davies.head()


Out[6]:
seqid source type start end score strand phase ID Parent
0 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C1N2 AGAP004707
1 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C3N2 AGAP004707
2 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C5N2 AGAP004707
3 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C7N2 AGAP004707
4 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C8N2 AGAP004707

In [7]:
# make a combined geneset
geneset_vgsc_combined = pandas.concat([geneset_agamp44_vgsc, geneset_davies])
geneset_vgsc_combined.query("type == 'mRNA'")


Out[7]:
seqid source type start end score strand phase ID Parent
666 2L VectorBase mRNA 2358158 2431617 -1.0 + -1 AGAP004707-RA AGAP004707
729 2L VectorBase mRNA 2358158 2431617 -1.0 + -1 AGAP004707-RB AGAP004707
792 2L VectorBase mRNA 2358158 2431617 -1.0 + -1 AGAP004707-RC AGAP004707
0 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C1N2 AGAP004707
1 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C3N2 AGAP004707
2 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C5N2 AGAP004707
3 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C7N2 AGAP004707
4 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C8N2 AGAP004707
5 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C10N2 AGAP004707
6 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C11N2 AGAP004707
7 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C1N9 AGAP004707
8 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C8N9 AGAP004707
9 2L Davies et al. (2007) mRNA 2358158 2431617 -1.0 + -1 Davies-C1N9ck AGAP004707

In [8]:
# setup a variant annotator
annotator = veff.Annotator(
    fasta_path='../ngs.sanger.ac.uk/production/ag1000g/phase1/AR3/genome/Anopheles-gambiae-PEST_CHROMOSOMES_AgamP3.fa', 
    gff3_path=['../data/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.4.gff3.gz',
               '../data/davies_vgsc_model_20170125.gff3'],
    seqid='2L'
)

In [9]:
# identify VGSC transcripts
transcript_ids = [f.feature_id for f in annotator.get_children('AGAP004707')]
transcript_ids


Out[9]:
['AGAP004707-RA',
 'AGAP004707-RB',
 'AGAP004707-RC',
 'Davies-C1N2',
 'Davies-C3N2',
 'Davies-C5N2',
 'Davies-C7N2',
 'Davies-C8N2',
 'Davies-C10N2',
 'Davies-C11N2',
 'Davies-C1N9',
 'Davies-C8N9',
 'Davies-C1N9ck']

In [10]:
# tabulate Davies exons
tbl_davies_exons = (
    etl
    .fromdataframe(geneset_davies)
    .eq('type', 'CDS')
    .cutout('Parent', 'source', 'type', 'score', 'strand', 'phase')
    .merge(key=('start', 'end'))
    .rename('seqid', 'exon_seqid')
    .rename('ID', 'exon')
    .rename('start', 'exon_start')
    .rename('end', 'exon_end')
    .movefield('exon_seqid', 0)
)
tbl_davies_exons.displayall()


0|exon_seqid 1|exon_start 2|exon_end 3|exon
2L 2358158 2358304 1
2L 2359640 2359672 2j
2L 2361989 2362144 3
2L 2381065 2381270 4
2L 2382270 2382398 5
2L 2385694 2385785 6
2L 2390129 2390341 7
2L 2390425 2390485 8
2L 2390594 2390738 9
2L 2391156 2391320 10
2L 2399898 2400173 11i+
2L 2401549 2401569 12
2L 2402447 2402509 13a
2L 2403086 2403269 14
2L 2407622 2407818 15
2L 2407894 2407993 16
2L 2408071 2408139 17
2L 2416794 2417071 18b+
2L 2417185 2417358 19
2L 2417637 2417799 20c
2L 2421385 2421547 20d
2L 2422468 2422655 21
2L 2422713 2422920 22
2L 2424207 2424418 23f+
2L 2424237 2424418 23f-
2L 2424651 2424870 24h+
2L 2424729 2424870 24h-
2L 2424946 2425211 25
2L 2425278 2425451 26
2L 2425770 2425892 27k
2L 2427988 2428110 27l
2L 2429097 2429219 28
2L 2429282 2429476 29
2L 2429556 2429801 30
2L 2429872 2430142 31
2L 2430224 2430528 32
2L 2430601 2431617 33

Extract table of variants


In [11]:
callset = phase1_ar31.callset
callset


Out[11]:
<zarr.hierarchy.Group '/' read-only>

In [12]:
# what fields are available?
print(', '.join(callset['2L/variants']))


ABHet, ABHom, AC, AF, ALT, AN, ANN, Accessible, BaseCounts, BaseQRankSum, CHROM, Coverage, CoverageMQ0, DP, DS, Dels, FILTER_FS, FILTER_HRun, FILTER_HighCoverage, FILTER_HighMQ0, FILTER_LowCoverage, FILTER_LowMQ, FILTER_LowQual, FILTER_NoCoverage, FILTER_PASS, FILTER_QD, FILTER_ReadPosRankSum, FILTER_RefN, FILTER_RepeatDUST, FS, HRun, HW, HaplotypeScore, HighCoverage, HighMQ0, InbreedingCoeff, LOF, LowCoverage, LowMQ, LowPairing, MLEAC, MLEAF, MQ, MQ0, MQRankSum, NDA, NMD, NoCoverage, OND, POS, QD, QUAL, REF, RPA, RU, ReadPosRankSum, RefMasked, RefN, RepeatDUST, RepeatMasker, RepeatTRF, STR, VariantType, is_snp, num_alleles, svlen

In [13]:
# what SNPEFF fields are available?
print(', '.join(callset['2L/variants/ANN'].dtype.names))


Allele, Annotation, Annotation_Impact, Gene_Name, Gene_ID, Feature_Type, Feature_ID, Transcript_BioType, Rank, HGVS_c, HGVS_p, cDNA_pos, cDNA_length, CDS_pos, CDS_length, AA_pos, AA_length, Distance

In [14]:
samples = phase1_ar3.df_samples
samples.head()


Out[14]:
ox_code src_code sra_sample_accession population country region contributor contact year m_s ... pca_3L_free_pc3 pca_3L_free_pc4 pca_2La_pc1 pca_2La_pc2 pca_2La_pc3 pca_2La_pc4 pca_2Rb_pc1 pca_2Rb_pc2 pca_2Rb_pc3 pca_2Rb_pc4
index
0 AB0085-C BF2-4 ERS223996 BFS Burkina Faso Pala Austin Burt Sam O'Loughlin 2012 S ... -8.290940 -18.542768 -55.511389 32.682143 -1.833739 -0.381984 -38.934301 31.939383 19.345606 7.362004
1 AB0087-C BF3-3 ERS224013 BFM Burkina Faso Bana Austin Burt Sam O'Loughlin 2012 M ... -38.603076 76.915617 -51.856633 27.401249 2.586488 0.643100 -10.072879 29.007266 -21.736087 -30.309562
2 AB0088-C BF3-5 ERS223991 BFM Burkina Faso Bana Austin Burt Sam O'Loughlin 2012 M ... -35.553340 73.985488 -50.942456 28.572207 3.072583 -0.643137 12.281744 22.288417 -43.661301 -51.557140
3 AB0089-C BF3-8 ERS224031 BFM Burkina Faso Bana Austin Burt Sam O'Loughlin 2012 M ... -36.621568 76.453993 -51.169247 29.414975 2.198434 0.013562 -10.664072 29.601333 -16.831559 -25.452854
4 AB0090-C BF3-10 ERS223936 BFM Burkina Faso Bana Austin Burt Sam O'Loughlin 2012 M ... -36.097756 72.036309 -48.607416 26.407532 1.643226 0.102582 12.897113 22.194444 -48.882378 -52.420487

5 rows × 38 columns


In [15]:
def tabulate_variants(callset, snpeff, seqid, start, end, pop_ids, subpops):
    """Build a table of variants for a given callset and genome region."""
    
    variants = callset[seqid]['variants']
    ann = snpeff[seqid]['variants']['ANN']
    pos = allel.SortedIndex(variants['POS'])
    loc = pos.locate_range(start, end)
    genotype = allel.GenotypeArray(callset[seqid]['calldata/genotype'][loc])
    acs = genotype.count_alleles_subpops(max_allele=3, subpops=subpops)
    
    # extract columns
    variants_fields = [
        'CHROM',
        'POS',
        'num_alleles',
        'REF',
        'ALT',
        'AC',
        'FILTER_PASS',
        'NoCoverage',
        'LowCoverage',
        'HighCoverage',
        'LowMQ',
        'HighMQ0',
        'RepeatDUST',
        'RepeatMasker',
        'RepeatTRF',
        'FS',
        'HRun',
        'QD',
        'ReadPosRankSum',
    ]
    ann_fields = ['Allele', 'Annotation', 'HGVS_c', 'HGVS_p', 'Feature_ID', 'CDS_pos']
    cols = (
        [variants[f][loc] for f in variants_fields] + 
        [ann[loc][f] for f in ann_fields] + 
        [acs[p].to_frequencies() for p in pop_ids]
    )

    def split_alleles(row):
        for i in range(row.num_alleles - 1):
            # break down alleles
            out = [
                row['CHROM'], 
                row['POS'], 
                row['num_alleles'], 
                row['REF'], 
                row['ALT'][i], 
                row['AC'][i], 
                i, 
            ]
            # add in remaining variant annotations
            out += [row[f] for f in variants_fields[6:]]
            # SNPEFF annotation only applies to first allele
            if i == 0:
                out += [row[f] for f in ann_fields]
            else:
                out += [None for f in ann_fields]
            # add in population allele frequencies
            out += [row[p][i+1] for p in pop_ids]
            yield out
        
    tbl = (
        etl
        .fromcolumns(cols, header=variants_fields + ann_fields + list(pop_ids))
        .rowmapmany(split_alleles, header=variants_fields[:6] + ['ALTIX'] + variants_fields[6:] + ann_fields + list(pop_ids), failonerror=True)
        .convert('CHROM REF ALT Allele Annotation HGVS_c HGVS_p Feature_ID'.split(), lambda v: str(v, 'ascii'))
        .rename({f: 'SNPEFF_' + f for f in ann_fields})
        .rename({p: 'AF_%s' % p for p in pop_ids})
        .addfield('check_allele', lambda row: row['SNPEFF_Allele'] is None or row['SNPEFF_Allele'] == row['ALT'])
    )
    
    return tbl

In [16]:
pop_ids = 'AOM BFM GWA GNS BFS CMS GAS UGS KES'.split()

In [17]:
subpops = {p: samples[samples.population == p].index.values.tolist() for p in pop_ids}

In [18]:
# build a table of variants from phase 1
tbl_variants_phase1 = tabulate_variants(callset, callset, 
                                        seqid=region_vgsc.seqid, start=region_vgsc.start, end=region_vgsc.end, 
                                        pop_ids=pop_ids, subpops=subpops)
tbl_variants_phase1


Out[18]:
0|CHROM 1|POS 2|num_alleles 3|REF 4|ALT 5|AC 6|ALTIX 7|FILTER_PASS 8|NoCoverage 9|LowCoverage 10|HighCoverage 11|LowMQ 12|HighMQ0 13|RepeatDUST 14|RepeatMasker 15|RepeatTRF 16|FS 17|HRun 18|QD 19|ReadPosRankSum 20|SNPEFF_Allele 21|SNPEFF_Annotation 22|SNPEFF_HGVS_c 23|SNPEFF_HGVS_p 24|SNPEFF_Feature_ID 25|SNPEFF_CDS_pos 26|AF_AOM 27|AF_BFM 28|AF_GWA 29|AF_GNS 30|AF_BFS 31|AF_CMS 32|AF_GAS 33|AF_UGS 34|AF_KES 35|check_allele
2L 2358254 2 G A 1 0 True 0 0 10 0 0 False False False 9.8672 1 17.547 -0.049988 A missense_variant n.97G>A p.Asp33Asn AGAP004707-RA 97 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True
2L 2358316 2 T G 73 0 True 0 0 15 0 0 False False False 2.4844 0 16.438 1.4219 G intron_variant n.147+12T>G . AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.132727272727 0.0 0.0 0.0 True
2L 2358328 2 T C 2 0 True 0 0 14 0 0 False False False 2.7363 0 16.062 -0.646 C intron_variant n.147+24T>C . AGAP004707-RA -1 0.0 0.00724637681159 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True
2L 2358353 2 C T 1 0 True 0 1 15 0 0 False False False 1.9512 0 9.8594 1.1582 T intron_variant n.147+49C>T . AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True
2L 2358405 2 T A 1 0 True 0 6 14 0 0 False False False 20.844 1 10.859 1.1562 A intron_variant n.147+101T>A . AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True

...

Annotate effects for all transcripts


In [19]:
cds_effects = [
    'NON_SYNONYMOUS_CODING', 
    'SYNONYMOUS_CODING',    
]
intron_effects = [
    'INTRONIC', 
    'SPLICE_CORE',
    'SPLICE_REGION',        
]
selected_effects = cds_effects + intron_effects

In [20]:
def lpop(l, default=None):
    """Pop the first item from a list if not empty."""
    try:
        return l[0]
    except IndexError:
        return default

In [21]:
def transcript_effect(transcript_id):
    def f(row):
        e = lpop([e for e in row.VEFF if e.transcript_id == transcript_id])
        if e and e.effect in cds_effects:
            return (e.effect, e.aa_change)
        elif e and e.effect in intron_effects:
            return (e.effect, e.intron_cds_5prime, e.intron_5prime_dist, e.intron_cds_3prime, e.intron_3prime_dist)
        else:
            return None
    return f

In [22]:
tbl_variants_phase1_eff = (
    tbl_variants_phase1
    # join in Davies exon information
    .intervalleftjoin(
        # don't include shorter exon alternatives
        tbl_davies_exons.select('exon', lambda v: v[-1] != '-'),
        lkey='CHROM', rkey='exon_seqid', lstart='POS', rstart='exon_start', lstop='POS', rstop='exon_end', include_stop=True)
    .cutout('exon_seqid')
    .addfield('VEFF', lambda row: [e for e in annotator.get_effects(chrom=row.CHROM, pos=row.POS, ref=row.REF, alt=row.ALT) 
                                   if e.effect in selected_effects])
    .addfield(transcript_ids[0], transcript_effect(transcript_ids[0]))
    .addfield(transcript_ids[1], transcript_effect(transcript_ids[1]))
    .addfield(transcript_ids[2], transcript_effect(transcript_ids[2]))
    .addfield(transcript_ids[3], transcript_effect(transcript_ids[3]))
    .addfield(transcript_ids[4], transcript_effect(transcript_ids[4]))
    .addfield(transcript_ids[5], transcript_effect(transcript_ids[5]))
    .addfield(transcript_ids[6], transcript_effect(transcript_ids[6]))
    .addfield(transcript_ids[7], transcript_effect(transcript_ids[7]))
    .addfield(transcript_ids[8], transcript_effect(transcript_ids[8]))
    .addfield(transcript_ids[9], transcript_effect(transcript_ids[9]))
    .addfield(transcript_ids[10], transcript_effect(transcript_ids[10]))
    .addfield(transcript_ids[11], transcript_effect(transcript_ids[11]))
    .addfield(transcript_ids[12], transcript_effect(transcript_ids[12]))
    .cutout('VEFF')
    .replaceall('.', None)
    .replaceall('', None)
    .cache()
)

In [23]:
tbl_variants_phase1_eff.display(20)


0|CHROM 1|POS 2|num_alleles 3|REF 4|ALT 5|AC 6|ALTIX 7|FILTER_PASS 8|NoCoverage 9|LowCoverage 10|HighCoverage 11|LowMQ 12|HighMQ0 13|RepeatDUST 14|RepeatMasker 15|RepeatTRF 16|FS 17|HRun 18|QD 19|ReadPosRankSum 20|SNPEFF_Allele 21|SNPEFF_Annotation 22|SNPEFF_HGVS_c 23|SNPEFF_HGVS_p 24|SNPEFF_Feature_ID 25|SNPEFF_CDS_pos 26|AF_AOM 27|AF_BFM 28|AF_GWA 29|AF_GNS 30|AF_BFS 31|AF_CMS 32|AF_GAS 33|AF_UGS 34|AF_KES 35|check_allele 36|exon_start 37|exon_end 38|exon 39|AGAP004707-RA 40|AGAP004707-RB 41|AGAP004707-RC 42|Davies-C1N2 43|Davies-C3N2 44|Davies-C5N2 45|Davies-C7N2 46|Davies-C8N2 47|Davies-C10N2 48|Davies-C11N2 49|Davies-C1N9 50|Davies-C8N9 51|Davies-C1N9ck
2L 2358254 2 G A 1 0 True 0 0 10 0 0 False False False 9.8672 1 17.547 -0.049988 A missense_variant n.97G>A p.Asp33Asn AGAP004707-RA 97 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2358158 2358304 1 ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N')
2L 2358316 2 T G 73 0 True 0 0 15 0 0 False False False 2.4844 0 16.438 1.4219 G intron_variant n.147+12T>G None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.132727272727 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 12, 'AGAP004707-PA', -3691) ('INTRONIC', 'AGAP004707-PB', 12, 'AGAP004707-PB', -3691) ('INTRONIC', 'AGAP004707-PC', 12, 'AGAP004707-PC', -3691) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '2j', -1324) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '2j', -1324) ('INTRONIC', '1', 12, '3', -3673)
2L 2358328 2 T C 2 0 True 0 0 14 0 0 False False False 2.7363 0 16.062 -0.646 C intron_variant n.147+24T>C None AGAP004707-RA -1 0.0 0.00724637681159 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 24, 'AGAP004707-PA', -3679) ('INTRONIC', 'AGAP004707-PB', 24, 'AGAP004707-PB', -3679) ('INTRONIC', 'AGAP004707-PC', 24, 'AGAP004707-PC', -3679) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '2j', -1312) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '2j', -1312) ('INTRONIC', '1', 24, '3', -3661)
2L 2358353 2 C T 1 0 True 0 1 15 0 0 False False False 1.9512 0 9.8594 1.1582 T intron_variant n.147+49C>T None AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 49, 'AGAP004707-PA', -3654) ('INTRONIC', 'AGAP004707-PB', 49, 'AGAP004707-PB', -3654) ('INTRONIC', 'AGAP004707-PC', 49, 'AGAP004707-PC', -3654) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '2j', -1287) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '2j', -1287) ('INTRONIC', '1', 49, '3', -3636)
2L 2358405 2 T A 1 0 True 0 6 14 0 0 False False False 20.844 1 10.859 1.1562 A intron_variant n.147+101T>A None AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 101, 'AGAP004707-PA', -3602) ('INTRONIC', 'AGAP004707-PB', 101, 'AGAP004707-PB', -3602) ('INTRONIC', 'AGAP004707-PC', 101, 'AGAP004707-PC', -3602) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '2j', -1235) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '2j', -1235) ('INTRONIC', '1', 101, '3', -3584)
2L 2358441 2 A T 78 0 False 0 6 17 0 0 False False False 2.4805 1 21.703 0.94385 T intron_variant n.147+137A>T None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.0145454545455 0.625 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 137, 'AGAP004707-PA', -3566) ('INTRONIC', 'AGAP004707-PB', 137, 'AGAP004707-PB', -3566) ('INTRONIC', 'AGAP004707-PC', 137, 'AGAP004707-PC', -3566) ('INTRONIC', '1', 137, '3', -3548) ('INTRONIC', '1', 137, '3', -3548) ('INTRONIC', '1', 137, '3', -3548) ('INTRONIC', '1', 137, '3', -3548) ('INTRONIC', '1', 137, '2j', -1199) ('INTRONIC', '1', 137, '3', -3548) ('INTRONIC', '1', 137, '3', -3548) ('INTRONIC', '1', 137, '3', -3548) ('INTRONIC', '1', 137, '2j', -1199) ('INTRONIC', '1', 137, '3', -3548)
2L 2358463 2 G T 5 0 False 0 4 16 0 0 False False False 22.0 0 15.211 -0.42798 T intron_variant n.147+159G>T None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0568181818182 True None None None ('INTRONIC', 'AGAP004707-PA', 159, 'AGAP004707-PA', -3544) ('INTRONIC', 'AGAP004707-PB', 159, 'AGAP004707-PB', -3544) ('INTRONIC', 'AGAP004707-PC', 159, 'AGAP004707-PC', -3544) ('INTRONIC', '1', 159, '3', -3526) ('INTRONIC', '1', 159, '3', -3526) ('INTRONIC', '1', 159, '3', -3526) ('INTRONIC', '1', 159, '3', -3526) ('INTRONIC', '1', 159, '2j', -1177) ('INTRONIC', '1', 159, '3', -3526) ('INTRONIC', '1', 159, '3', -3526) ('INTRONIC', '1', 159, '3', -3526) ('INTRONIC', '1', 159, '2j', -1177) ('INTRONIC', '1', 159, '3', -3526)
2L 2358468 2 A C 150 0 False 0 4 17 0 0 False False False 1.668 0 19.812 -0.198 C intron_variant n.147+164A>C None AGAP004707-RA -1 0.0 0.0 0.0978260869565 0.0 0.0 0.0509090909091 0.0 0.52427184466 0.0568181818182 True None None None ('INTRONIC', 'AGAP004707-PA', 164, 'AGAP004707-PA', -3539) ('INTRONIC', 'AGAP004707-PB', 164, 'AGAP004707-PB', -3539) ('INTRONIC', 'AGAP004707-PC', 164, 'AGAP004707-PC', -3539) ('INTRONIC', '1', 164, '3', -3521) ('INTRONIC', '1', 164, '3', -3521) ('INTRONIC', '1', 164, '3', -3521) ('INTRONIC', '1', 164, '3', -3521) ('INTRONIC', '1', 164, '2j', -1172) ('INTRONIC', '1', 164, '3', -3521) ('INTRONIC', '1', 164, '3', -3521) ('INTRONIC', '1', 164, '3', -3521) ('INTRONIC', '1', 164, '2j', -1172) ('INTRONIC', '1', 164, '3', -3521)
2L 2358501 2 A T 5 0 False 0 4 22 0 0 False False False 11.672 0 14.359 -1.2432 T intron_variant n.147+197A>T None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0568181818182 True None None None ('INTRONIC', 'AGAP004707-PA', 197, 'AGAP004707-PA', -3506) ('INTRONIC', 'AGAP004707-PB', 197, 'AGAP004707-PB', -3506) ('INTRONIC', 'AGAP004707-PC', 197, 'AGAP004707-PC', -3506) ('INTRONIC', '1', 197, '3', -3488) ('INTRONIC', '1', 197, '3', -3488) ('INTRONIC', '1', 197, '3', -3488) ('INTRONIC', '1', 197, '3', -3488) ('INTRONIC', '1', 197, '2j', -1139) ('INTRONIC', '1', 197, '3', -3488) ('INTRONIC', '1', 197, '3', -3488) ('INTRONIC', '1', 197, '3', -3488) ('INTRONIC', '1', 197, '2j', -1139) ('INTRONIC', '1', 197, '3', -3488)
2L 2358536 2 T G 4 0 False 0 3 25 0 0 False False False 4.3203 1 17.234 2.2852 G intron_variant n.147+232T>G None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.00727272727273 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 232, 'AGAP004707-PA', -3471) ('INTRONIC', 'AGAP004707-PB', 232, 'AGAP004707-PB', -3471) ('INTRONIC', 'AGAP004707-PC', 232, 'AGAP004707-PC', -3471) ('INTRONIC', '1', 232, '3', -3453) ('INTRONIC', '1', 232, '3', -3453) ('INTRONIC', '1', 232, '3', -3453) ('INTRONIC', '1', 232, '3', -3453) ('INTRONIC', '1', 232, '2j', -1104) ('INTRONIC', '1', 232, '3', -3453) ('INTRONIC', '1', 232, '3', -3453) ('INTRONIC', '1', 232, '3', -3453) ('INTRONIC', '1', 232, '2j', -1104) ('INTRONIC', '1', 232, '3', -3453)
2L 2358560 2 C T 1 0 False 0 2 21 0 0 False False False 0.0 0 16.109 0.40991 T intron_variant n.147+256C>T None AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 256, 'AGAP004707-PA', -3447) ('INTRONIC', 'AGAP004707-PB', 256, 'AGAP004707-PB', -3447) ('INTRONIC', 'AGAP004707-PC', 256, 'AGAP004707-PC', -3447) ('INTRONIC', '1', 256, '3', -3429) ('INTRONIC', '1', 256, '3', -3429) ('INTRONIC', '1', 256, '3', -3429) ('INTRONIC', '1', 256, '3', -3429) ('INTRONIC', '1', 256, '2j', -1080) ('INTRONIC', '1', 256, '3', -3429) ('INTRONIC', '1', 256, '3', -3429) ('INTRONIC', '1', 256, '3', -3429) ('INTRONIC', '1', 256, '2j', -1080) ('INTRONIC', '1', 256, '3', -3429)
2L 2358581 2 T A 330 0 False 0 2 20 0 0 False False False 0.20801 1 21.109 -0.3811 A intron_variant n.147+277T>A None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.525454545455 0.366071428571 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 277, 'AGAP004707-PA', -3426) ('INTRONIC', 'AGAP004707-PB', 277, 'AGAP004707-PB', -3426) ('INTRONIC', 'AGAP004707-PC', 277, 'AGAP004707-PC', -3426) ('INTRONIC', '1', 277, '3', -3408) ('INTRONIC', '1', 277, '3', -3408) ('INTRONIC', '1', 277, '3', -3408) ('INTRONIC', '1', 277, '3', -3408) ('INTRONIC', '1', 277, '2j', -1059) ('INTRONIC', '1', 277, '3', -3408) ('INTRONIC', '1', 277, '3', -3408) ('INTRONIC', '1', 277, '3', -3408) ('INTRONIC', '1', 277, '2j', -1059) ('INTRONIC', '1', 277, '3', -3408)
2L 2358591 2 G T 4 0 False 0 4 18 0 0 False False False 1.9912 1 15.461 -0.50586 T intron_variant n.147+287G>T None AGAP004707-RA -1 0.0 0.0 0.0326086956522 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 287, 'AGAP004707-PA', -3416) ('INTRONIC', 'AGAP004707-PB', 287, 'AGAP004707-PB', -3416) ('INTRONIC', 'AGAP004707-PC', 287, 'AGAP004707-PC', -3416) ('INTRONIC', '1', 287, '3', -3398) ('INTRONIC', '1', 287, '3', -3398) ('INTRONIC', '1', 287, '3', -3398) ('INTRONIC', '1', 287, '3', -3398) ('INTRONIC', '1', 287, '2j', -1049) ('INTRONIC', '1', 287, '3', -3398) ('INTRONIC', '1', 287, '3', -3398) ('INTRONIC', '1', 287, '3', -3398) ('INTRONIC', '1', 287, '2j', -1049) ('INTRONIC', '1', 287, '3', -3398)
2L 2358621 2 G C 2 0 True 0 8 13 0 0 False False False 7.3477 1 14.0 -1.6348 C intron_variant n.147+317G>C None AGAP004707-RA -1 0.0 0.0 0.0217391304348 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 317, 'AGAP004707-PA', -3386) ('INTRONIC', 'AGAP004707-PB', 317, 'AGAP004707-PB', -3386) ('INTRONIC', 'AGAP004707-PC', 317, 'AGAP004707-PC', -3386) ('INTRONIC', '1', 317, '3', -3368) ('INTRONIC', '1', 317, '3', -3368) ('INTRONIC', '1', 317, '3', -3368) ('INTRONIC', '1', 317, '3', -3368) ('INTRONIC', '1', 317, '2j', -1019) ('INTRONIC', '1', 317, '3', -3368) ('INTRONIC', '1', 317, '3', -3368) ('INTRONIC', '1', 317, '3', -3368) ('INTRONIC', '1', 317, '2j', -1019) ('INTRONIC', '1', 317, '3', -3368)
2L 2358667 2 T A 473 0 True 0 11 10 0 0 False False False 0.56299 0 30.047 0.18506 A intron_variant n.147+363T>A None AGAP004707-RA -1 0.833333333333 0.840579710145 0.0108695652174 1.0 0.993827160494 0.06 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 363, 'AGAP004707-PA', -3340) ('INTRONIC', 'AGAP004707-PB', 363, 'AGAP004707-PB', -3340) ('INTRONIC', 'AGAP004707-PC', 363, 'AGAP004707-PC', -3340) ('INTRONIC', '1', 363, '3', -3322) ('INTRONIC', '1', 363, '3', -3322) ('INTRONIC', '1', 363, '3', -3322) ('INTRONIC', '1', 363, '3', -3322) ('INTRONIC', '1', 363, '2j', -973) ('INTRONIC', '1', 363, '3', -3322) ('INTRONIC', '1', 363, '3', -3322) ('INTRONIC', '1', 363, '3', -3322) ('INTRONIC', '1', 363, '2j', -973) ('INTRONIC', '1', 363, '3', -3322)
2L 2358668 2 G A 7 0 False 0 12 10 0 0 False False False 28.266 0 1.8604 -0.94482 A intron_variant n.147+364G>A None AGAP004707-RA -1 0.00833333333333 0.00724637681159 0.0326086956522 0.0161290322581 0.00617283950617 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 364, 'AGAP004707-PA', -3339) ('INTRONIC', 'AGAP004707-PB', 364, 'AGAP004707-PB', -3339) ('INTRONIC', 'AGAP004707-PC', 364, 'AGAP004707-PC', -3339) ('INTRONIC', '1', 364, '3', -3321) ('INTRONIC', '1', 364, '3', -3321) ('INTRONIC', '1', 364, '3', -3321) ('INTRONIC', '1', 364, '3', -3321) ('INTRONIC', '1', 364, '2j', -972) ('INTRONIC', '1', 364, '3', -3321) ('INTRONIC', '1', 364, '3', -3321) ('INTRONIC', '1', 364, '3', -3321) ('INTRONIC', '1', 364, '2j', -972) ('INTRONIC', '1', 364, '3', -3321)
2L 2358707 2 T C 39 0 True 0 12 15 0 1 False False False 41.875 0 16.781 4.9414 C intron_variant n.147+403T>C None AGAP004707-RA -1 0.0 0.0 0.423913043478 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 403, 'AGAP004707-PA', -3300) ('INTRONIC', 'AGAP004707-PB', 403, 'AGAP004707-PB', -3300) ('INTRONIC', 'AGAP004707-PC', 403, 'AGAP004707-PC', -3300) ('INTRONIC', '1', 403, '3', -3282) ('INTRONIC', '1', 403, '3', -3282) ('INTRONIC', '1', 403, '3', -3282) ('INTRONIC', '1', 403, '3', -3282) ('INTRONIC', '1', 403, '2j', -933) ('INTRONIC', '1', 403, '3', -3282) ('INTRONIC', '1', 403, '3', -3282) ('INTRONIC', '1', 403, '3', -3282) ('INTRONIC', '1', 403, '2j', -933) ('INTRONIC', '1', 403, '3', -3282)
2L 2358709 2 A C 2 0 False 0 12 16 0 1 False False False 4.3906 0 12.797 -1.2656 C intron_variant n.147+405A>C None AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 405, 'AGAP004707-PA', -3298) ('INTRONIC', 'AGAP004707-PB', 405, 'AGAP004707-PB', -3298) ('INTRONIC', 'AGAP004707-PC', 405, 'AGAP004707-PC', -3298) ('INTRONIC', '1', 405, '3', -3280) ('INTRONIC', '1', 405, '3', -3280) ('INTRONIC', '1', 405, '3', -3280) ('INTRONIC', '1', 405, '3', -3280) ('INTRONIC', '1', 405, '2j', -931) ('INTRONIC', '1', 405, '3', -3280) ('INTRONIC', '1', 405, '3', -3280) ('INTRONIC', '1', 405, '3', -3280) ('INTRONIC', '1', 405, '2j', -931) ('INTRONIC', '1', 405, '3', -3280)
2L 2358716 2 T C 1 0 False 0 9 15 0 0 False False False 25.453 0 1.0596 -2.9727 C intron_variant n.147+412T>C None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.00617283950617 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 412, 'AGAP004707-PA', -3291) ('INTRONIC', 'AGAP004707-PB', 412, 'AGAP004707-PB', -3291) ('INTRONIC', 'AGAP004707-PC', 412, 'AGAP004707-PC', -3291) ('INTRONIC', '1', 412, '3', -3273) ('INTRONIC', '1', 412, '3', -3273) ('INTRONIC', '1', 412, '3', -3273) ('INTRONIC', '1', 412, '3', -3273) ('INTRONIC', '1', 412, '2j', -924) ('INTRONIC', '1', 412, '3', -3273) ('INTRONIC', '1', 412, '3', -3273) ('INTRONIC', '1', 412, '3', -3273) ('INTRONIC', '1', 412, '2j', -924) ('INTRONIC', '1', 412, '3', -3273)
2L 2358734 2 C T 2 0 True 0 9 14 0 0 False False False 6.4297 1 16.016 -0.42896 T intron_variant n.147+430C>T None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.00363636363636 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 430, 'AGAP004707-PA', -3273) ('INTRONIC', 'AGAP004707-PB', 430, 'AGAP004707-PB', -3273) ('INTRONIC', 'AGAP004707-PC', 430, 'AGAP004707-PC', -3273) ('INTRONIC', '1', 430, '3', -3255) ('INTRONIC', '1', 430, '3', -3255) ('INTRONIC', '1', 430, '3', -3255) ('INTRONIC', '1', 430, '3', -3255) ('INTRONIC', '1', 430, '2j', -906) ('INTRONIC', '1', 430, '3', -3255) ('INTRONIC', '1', 430, '3', -3255) ('INTRONIC', '1', 430, '3', -3255) ('INTRONIC', '1', 430, '2j', -906) ('INTRONIC', '1', 430, '3', -3255)

...

Inspect missense variants


In [24]:
def simplify_missense_effect(v):
    if v and v[0] == 'NON_SYNONYMOUS_CODING':
        return v[1]
    else:
        return ''

    
td_styles = {
    'FILTER_PASS': lambda v: 'background-color: red' if not v else '',
    'NoCoverage': lambda v: 'background-color: red' if v > 1 else '',
    'LowCoverage': lambda v: 'background-color: red' if v > 76 else '',
    'HighCoverage': lambda v: 'background-color: red' if v > 15 else '',
    'LowMQ': lambda v: 'background-color: red' if v > 76 else '',
    'HighMQ0': lambda v: 'background-color: red' if v > 1 else '',
    'RepeatDUST': lambda v: 'background-color: red' if v else '',
    'FS': lambda v: 'background-color: red' if v > 60 else '',
    'QD': lambda v: 'background-color: red' if v < 5 else '',
    'ReadPosRankSum': lambda v: 'background-color: red' if v < -8 else '',
    'HRun': lambda v: 'background-color: red' if v > 4 else '',
    'num_alleles': lambda v: 'background-color: orange' if v > 2 else '',
}


def tr_style(row):
    """Colour row by alternate allele count."""
    return 'background-color:rgba(0, 255, 0, %.3f)' % (min(1, row['AC']/100))


tbl_variants_phase1_missense = (
    tbl_variants_phase1_eff
    .select(lambda row: any(row[t] and row[t][0] == 'NON_SYNONYMOUS_CODING' for t in transcript_ids))
    .convert(transcript_ids, simplify_missense_effect)
)
tbl_variants_phase1_missense.displayall(td_styles=td_styles, tr_style=tr_style)


0|CHROM 1|POS 2|num_alleles 3|REF 4|ALT 5|AC 6|ALTIX 7|FILTER_PASS 8|NoCoverage 9|LowCoverage 10|HighCoverage 11|LowMQ 12|HighMQ0 13|RepeatDUST 14|RepeatMasker 15|RepeatTRF 16|FS 17|HRun 18|QD 19|ReadPosRankSum 20|SNPEFF_Allele 21|SNPEFF_Annotation 22|SNPEFF_HGVS_c 23|SNPEFF_HGVS_p 24|SNPEFF_Feature_ID 25|SNPEFF_CDS_pos 26|AF_AOM 27|AF_BFM 28|AF_GWA 29|AF_GNS 30|AF_BFS 31|AF_CMS 32|AF_GAS 33|AF_UGS 34|AF_KES 35|check_allele 36|exon_start 37|exon_end 38|exon 39|AGAP004707-RA 40|AGAP004707-RB 41|AGAP004707-RC 42|Davies-C1N2 43|Davies-C3N2 44|Davies-C5N2 45|Davies-C7N2 46|Davies-C8N2 47|Davies-C10N2 48|Davies-C11N2 49|Davies-C1N9 50|Davies-C8N9 51|Davies-C1N9ck
2L 2358254 2 G A 1 0 True 0 0 10 0 0 False False False 9.8672 1 17.547 -0.049988 A missense_variant n.97G>A p.Asp33Asn AGAP004707-RA 97 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2358158 2358304 1 D33N D33N D33N D33N D33N D33N D33N D33N D33N D33N D33N D33N D33N
2L 2359670 2 G A 7 0 False 1 171 1 1 0 False False False 8.6641 6 14.406 -0.029007 A intron_variant n.147+1366G> None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.0109090909091 0.0 0.0 0.0113636363636 True 2359640 2359672 2j E60K E60K
2L 2362002 2 A T 2 0 True 0 1 3 0 0 False False False 0.5459 0 12.531 -0.55322 T splice_region_variant&intron_varia n.148-5A>T None AGAP004707-RA -1 0.0 0.0144927536232 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2361989 2362144 3 D54V D54V D54V D54V D65V D54V D54V D54V D65V D54V
2L 2362019 2 G T 2 0 True 0 0 6 0 0 False False False 3.9824 0 13.641 0.7749 T missense_variant n.160G>T p.Gly54Cys AGAP004707-RA 160 0.0 0.0144927536232 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2361989 2362144 3 G54C G54C G54C G60C G60C G60C G60C G71C G60C G60C G60C G71C G60C
2L 2362023 2 C T 1 0 True 0 1 3 0 0 False False False 0.0 0 13.477 -1.1611 T missense_variant n.164C>T p.Pro55Leu AGAP004707-RA 164 0.0 0.0 0.0 0.0 0.00617283950617 0.0 0.0 0.0 0.0 True 2361989 2362144 3 P55L P55L P55L P61L P61L P61L P61L P72L P61L P61L P61L P72L P61L
2L 2390168 2 A G 2 0 True 0 2 10 0 0 False False False 0.56982 1 15.219 -0.026001 G missense_variant n.752A>G p.Lys251Arg AGAP004707-RA 752 0.0 0.0 0.0 0.0 0.0 0.0 0.0178571428571 0.0 0.0 True 2390129 2390341 7 K251R K251R K251R K257R K214R K257R K257R K268R K257R K257R K257R K268R K257R
2L 2390177 2 G A 198 0 True 0 3 8 0 0 False False False 0.12695 1 18.625 0.83496 A missense_variant n.761G>A p.Arg254Lys AGAP004707-RA 761 0.0 0.0 0.0 0.0 0.0 0.316363636364 0.214285714286 0.0 0.0 True 2390129 2390341 7 R254K R254K R254K R260K R217K R260K R260K R271K R260K R260K R260K R271K R260K
2L 2390311 2 G A 1 0 True 0 0 10 0 0 False False False 0.0 3 14.07 -0.70996 A missense_variant n.895G>A p.Glu299Lys AGAP004707-RA 895 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2390129 2390341 7 E299K E299K E299K E305K E262K E305K E305K E316K E305K E305K E305K E316K E305K
2L 2390448 2 G A 6 0 True 0 0 15 0 0 False False False 0.71094 0 16.125 -0.65918 A missense_variant n.949G>A p.Gly317Ser AGAP004707-RA 949 0.0 0.0 0.0 0.0 0.0 0.0109090909091 0.0 0.0 0.0 True 2390425 2390485 8 G317S G317S G317S G323S G280S G323S G323S G334S G323S G323S G323S G334S G323S
2L 2391228 3 G C 10 0 True 0 0 12 0 0 False False False 2.0352 0 14.867 -1.1777 C missense_variant n.1204G>C p.Val402Leu AGAP004707-RA 1204 0.0 0.0724637681159 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2391156 2391320 10 V402L V402L V402L V408L V365L V408L V419L V408L V408L V408L V419L V408L
2L 2391228 3 G T 9 1 True 0 0 12 0 0 False False False 2.0352 0 14.867 -1.1777 None None None None None None 0.0 0.0652173913043 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2391156 2391320 10 V402L V402L V402L V408L V365L V408L V419L V408L V408L V408L V419L V408L
2L 2399997 2 G C 38 0 True 0 1 7 0 0 False False False 13.359 0 15.688 0.11798 C missense_variant n.1396G>C p.Asp466His AGAP004707-RA 1396 0.0 0.0 0.0 0.0 0.0 0.0690909090909 0.0 0.0 0.0 True 2399898 2400173 11i+ D466H D466H D466H D472H D429H D417H D472H D483H D472H D472H D472H D483H D472H
2L 2400071 3 G A 16 0 True 0 0 8 0 0 False False False 5.6875 0 16.969 1.3232 A missense_variant n.1470G>A p.Met490Ile AGAP004707-RA 1470 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.181818181818 True 2399898 2400173 11i+ M490I M490I M490I M496I M453I M441I M496I M507I M496I M496I M496I M507I M496I
2L 2400071 3 G T 2 1 True 0 0 8 0 0 False False False 5.6875 0 16.969 1.3232 None None None None None None 0.0 0.0 0.0 0.0 0.0 0.00363636363636 0.0 0.0 0.0 True 2399898 2400173 11i+ M490I M490I M490I M496I M453I M441I M496I M507I M496I M496I M496I M507I M496I
2L 2400076 2 T A 2 0 True 0 0 9 0 0 False False False 1.1055 1 10.469 -0.54199 A missense_variant n.1475T>A p.Ile492Asn AGAP004707-RA 1475 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00970873786408 0.0 True 2399898 2400173 11i+ I492N I492N I492N I498N I455N I443N I498N I509N I498N I498N I498N I509N I498N
2L 2402484 2 G T 1 0 True 0 4 13 0 0 False False False 1.8857 1 14.719 0.075989 T missense_variant n.1610G>T p.Arg537Leu AGAP004707-RA 1610 0.0 0.0 0.0 0.0 0.0 0.0 0.00892857142857 0.0 0.0 True 2402447 2402509 13a R537L R537L R537L R550L R550L R550L
2L 2402508 2 A T 1 0 True 1 3 15 0 0 False False False 3.0098 0 11.156 -0.041992 T missense_variant&splice_region_var n.1634A>T p.Gln545Leu AGAP004707-RA 1634 0.0 0.0 0.0 0.0 0.00617283950617 0.0 0.0 0.0 0.0 True 2402447 2402509 13a Q545L Q545L Q545L Q558L Q558L Q558L
2L 2403206 2 G A 1 0 False 0 1 16 0 0 False False False 4.8047 0 15.477 -0.20398 A missense_variant n.1756G>A p.Ala586Thr AGAP004707-RA 1756 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2403086 2403269 14 A586T A586T A586T A599T A528T A516T A571T A582T A571T A571T A599T A582T A599T
2L 2407811 2 G A 1 0 True 0 2 14 0 0 False False False 0.0 1 10.789 -0.86914 A missense_variant n.2009G>A p.Arg670Gln AGAP004707-RA 2009 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True 2407622 2407818 15 R670Q R670Q R670Q R683Q R612Q R600Q R655Q R666Q R655Q R655Q R683Q R666Q R683Q
2L 2407912 2 A T 1 0 True 0 0 6 0 0 False False False 0.0 1 22.172 0.5332 T missense_variant n.2035A>T p.Thr679Ser AGAP004707-RA 2035 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2407894 2407993 16 T679S T679S T679S T692S T621S T609S T664S T675S T664S T664S T692S T675S T692S
2L 2407987 2 A G 1 0 True 0 2 7 0 0 False False False 0.0 0 14.102 1.0859 G missense_variant n.2110A>G p.Met704Val AGAP004707-RA 2110 0.0 0.00724637681159 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2407894 2407993 16 M704V M704V M704V M717V M646V M634V M689V M700V M689V M689V M717V M700V M717V
2L 2408125 2 C A 1 0 False 0 252 2 1 0 False False False 2.0879 2 7.0586 0.5918 A missense_variant n.2171C>A p.Ala724Glu AGAP004707-RA 2171 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2408071 2408139 17 A724E A724E A724E A737E A666E A654E A709E A720E A709E A709E A737E A720E A737E
2L 2408136 2 G A 1 0 False 0 318 2 1 0 False False False 2.0879 0 1.8799 -2.0742 A missense_variant n.2182G>A p.Gly728Arg AGAP004707-RA 2182 0.00833333333333 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2408071 2408139 17 G728R G728R G728R G741R G670R G658R G713R G724R G713R G713R G741R G724R G741R
2L 2416980 2 C T 32 0 True 0 2 11 0 0 False False False 2.0449 0 17.703 0.37305 T missense_variant n.2372C>T p.Thr791Met AGAP004707-RA 2372 0.0 0.0144927536232 0.0 0.129032258065 0.135802469136 0.0 0.0 0.0 0.0 True 2416794 2417071 18b+ T791M T791M T791M T804M T733M T721M T776M T787M T776M T776M T804M T787M T804M
2L 2417206 2 A C 1 0 True 0 15 3 0 0 False False False 0.0 2 9.4297 0.31299 C missense_variant n.2485A>C p.Ile829Leu AGAP004707-RA 2485 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2417185 2417358 19 I829L I829L I829L I842L I771L I759L I814L I825L I814L I814L I842L I825L I842L
2L 2417231 2 C T 1 0 True 0 38 1 0 0 False False False 3.6992 0 13.688 -0.46802 T missense_variant n.2510C>T p.Ala837Val AGAP004707-RA 2510 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2417185 2417358 19 A837V A837V A837V A850V A779V A767V A822V A833V A822V A822V A850V A833V A850V
2L 2417772 2 A T 1 0 True 0 2 10 0 0 False False False 7.707 1 16.156 -0.035004 T missense_variant n.2773A>T p.Met925Leu AGAP004707-RA 2773 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00485436893204 0.0 True 2417637 2417799 20c M925L M925L M938L
2L 2421454 2 A G 1 0 True 0 0 13 0 0 False False False 3.8008 1 14.094 0.88916 G intron_variant n.2801-1014A None AGAP004707-RA -1 0.0 0.00724637681159 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2421385 2421547 20d M903V M916V M845V M833V M888V M899V M888V M888V M916V M899V
2L 2422486 2 C A 1 0 True 0 1 6 0 0 False False False 4.168 0 12.102 -0.45605 A missense_variant n.2819C>A p.Pro940His AGAP004707-RA 2819 0.0 0.0 0.0 0.0 0.00617283950617 0.0 0.0 0.0 0.0 True 2422468 2422655 21 P940H P940H P940H P953H P882H P870H P925H P936H P925H P925H P953H P936H P953H
2L 2422651 2 T C 430 0 True 0 2 10 0 0 False False False 0.95117 0 27.203 -0.92822 C missense_variant n.2984T>C p.Leu995Ser AGAP004707-RA 2984 0.0 0.0 0.0 0.0 0.0 0.154545454545 0.642857142857 1.0 0.761363636364 True 2422468 2422655 21 L995S L995S L995S L1008S L937S L925S L980S L991S L980S L980S L1008S L991S L1008S
2L 2422652 2 A T 775 0 True 0 2 9 0 0 False False False 0.73291 3 29.047 0.19104 T missense_variant n.2985A>T p.Leu995Phe AGAP004707-RA 2985 0.858333333333 0.847826086957 0.0 1.0 1.0 0.529090909091 0.357142857143 0.0 0.0 True 2422468 2422655 21 L995F L995F L995F L1008F L937F L925F L980F L991F L980F L980F L1008F L991F L1008F
2L 2422875 2 G A 1 0 False 0 3 17 0 0 False False False 0.71289 0 14.867 -1.6621 A missense_variant n.3151G>A p.Val1051Ile AGAP004707-RA 3151 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2422713 2422920 22 V1051I V1051I V1051I V1064I V993I V981I V1036I V1047I V1036I V1036I V1064I V1047I V1064I
2L 2424383 2 G T 1 0 True 0 1 14 0 0 False False False 2.8906 0 18.156 -0.31494 T missense_variant n.3373G>T p.Ala1125Ser AGAP004707-RA 3373 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True 2424207 2424418 23f+ A1125S A1125S A1125S A1138S A1067S A1045S A1100S A1121S A1110S A1110S A1138S A1121S A1138S
2L 2424384 2 C T 11 0 True 0 1 14 0 0 False False False 5.4141 0 14.172 -2.3242 T missense_variant n.3374C>T p.Ala1125Val AGAP004707-RA 3374 0.0916666666667 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2424207 2424418 23f+ A1125V A1125V A1125V A1138V A1067V A1045V A1100V A1121V A1110V A1110V A1138V A1121V A1138V
2L 2424401 2 A G 2 0 False 0 0 16 0 0 False False False 4.1133 0 14.742 -1.7324 G missense_variant n.3391A>G p.Ile1131Val AGAP004707-RA 3391 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.00892857142857 0.0 0.0 True 2424207 2424418 23f+ I1131V I1131V I1131V I1144V I1073V I1051V I1106V I1127V I1116V I1116V I1144V I1127V I1144V
2L 2424720 2 T G 11 0 False 0 2 8 0 0 False False False 1953.0 1 0.090027 1.6211 G missense_variant n.3478T>G p.Ser1160Ala AGAP004707-RA 3478 0.00833333333333 0.0144927536232 0.0108695652174 0.0 0.0 0.0109090909091 0.00892857142857 0.0 0.0 True 2424651 2424870 24h+ S1160A S1173A S1156A S1173A
2L 2425077 2 G A 5 0 True 0 0 14 0 0 False False False 0.66992 0 14.891 -0.61816 A missense_variant n.3760G>A p.Val1254Ile AGAP004707-RA 3760 0.0 0.0 0.054347826087 0.0 0.0 0.0 0.0 0.0 0.0 True 2424946 2425211 25 V1254I V1228I V1228I V1241I V1170I V1148I V1203I V1224I V1213I V1213I V1267I V1250I V1267I
2L 2425291 2 T C 1 0 True 0 0 10 0 0 False False False 4.4258 0 15.32 -0.38403 C missense_variant n.3908T>C p.Val1303Ala AGAP004707-RA 3908 0.0 0.0 0.0 0.0 0.00617283950617 0.0 0.0 0.0 0.0 True 2425278 2425451 26 V1303A V1277A V1277A V1290A V1219A V1197A V1252A V1273A V1262A V1262A V1316A V1299A V1316A
2L 2425417 2 A T 3 0 True 0 0 13 0 0 False False False 0.39111 1 14.141 0.104 T missense_variant n.4034A>T p.Asn1345Ile AGAP004707-RA 4034 0.0 0.0217391304348 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2425278 2425451 26 N1345I N1319I N1319I N1332I N1261I N1239I N1294I N1315I N1304I N1304I N1358I N1341I N1358I
2L 2428015 2 C T 1 0 True 0 3 12 0 0 False False False 0.66895 3 13.711 -0.55322 T missense_variant n.4096C>T p.Leu1366Phe AGAP004707-RA 4096 0.0 0.0 0.0 0.0 0.00617283950617 0.0 0.0 0.0 0.0 True 2427988 2428110 27l L1366F L1340F L1340F L1353F L1282F L1260F L1315F L1336F L1325F L1325F L1379F L1362F
2L 2429617 2 T C 19 0 True 0 2 12 0 0 False False False 1.4502 0 15.531 -0.98291 C missense_variant n.4580T>C p.Ile1527Thr AGAP004707-RA 4580 0.0 0.13768115942 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2429556 2429801 30 I1527T I1501T I1501T I1511T I1440T I1418T I1473T I1494T I1483T I1483T I1537T I1520T I1537T
2L 2429622 2 T C 1 0 True 0 1 15 0 0 False False False 10.922 1 14.977 -0.098022 C missense_variant n.4585T>C p.Phe1529Leu AGAP004707-RA 4585 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00485436893204 0.0 True 2429556 2429801 30 F1529L F1503L F1503L F1513L F1442L F1420L F1475L F1496L F1485L F1485L F1539L F1522L F1539L
2L 2429745 2 A T 110 0 False 0 0 17 0 0 False False False 0.4541 1 16.484 1.9131 T missense_variant n.4708A>T p.Asn1570Tyr AGAP004707-RA 4708 0.0 0.260869565217 0.0 0.0967741935484 0.216049382716 0.06 0.0 0.0 0.0 True 2429556 2429801 30 N1570Y N1544Y N1544Y N1554Y N1483Y N1461Y N1516Y N1537Y N1526Y N1526Y N1580Y N1563Y N1580Y
2L 2429788 2 T C 2 0 False 0 0 24 0 0 False False False 6.2383 0 19.406 -0.46509 C missense_variant n.4751T>C p.Ile1584Thr AGAP004707-RA 4751 0.0 0.0 0.0 0.0 0.0123456790123 0.0 0.0 0.0 0.0 True 2429556 2429801 30 I1584T I1558T I1558T I1568T I1497T I1475T I1530T I1551T I1540T I1540T I1594T I1577T I1594T
2L 2429897 2 A G 11 0 True 0 1 13 0 0 False False False 3.5234 1 16.906 -0.56006 G missense_variant n.4790A>G p.Glu1597Gly AGAP004707-RA 4790 0.0 0.0 0.0 0.0645161290323 0.0432098765432 0.0 0.0 0.0 0.0 True 2429872 2430142 31 E1597G E1571G E1571G E1581G E1510G E1488G E1543G E1564G E1553G E1553G E1607G E1590G E1607G
2L 2429915 2 A C 7 0 True 0 1 13 0 0 False False False 5.2461 0 16.406 -1.6729 C missense_variant n.4808A>C p.Lys1603Thr AGAP004707-RA 4808 0.0 0.0507246376812 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2429872 2430142 31 K1603T K1577T K1577T K1587T K1516T K1494T K1549T K1570T K1559T K1559T K1613T K1596T K1613T
2L 2429958 2 A C 1 0 True 0 1 14 0 0 False False False 1.0488 0 12.539 -1.8672 C missense_variant n.4851A>C p.Leu1617Phe AGAP004707-RA 4851 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2429872 2430142 31 L1617F L1591F L1591F L1601F L1530F L1508F L1563F L1584F L1573F L1573F L1627F L1610F L1627F
2L 2430106 2 T A 3 0 False 0 0 18 0 0 False False False 1.6719 0 15.719 -1.3877 A missense_variant n.4999T>A p.Leu1667Met AGAP004707-RA 4999 0.0 0.0217391304348 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2429872 2430142 31 L1667M L1641M L1641M L1651M L1580M L1558M L1613M L1634M L1623M L1623M L1677M L1660M L1677M
2L 2430229 2 G A 4 0 False 0 1 18 0 0 False False False 20.531 0 14.383 0.22498 A missense_variant n.5041G>A p.Val1681Ile AGAP004707-RA 5041 0.0 0.0 0.0 0.0 0.0 0.00727272727273 0.0 0.0 0.0 True 2430224 2430528 32 V1681I V1655I V1655I V1665I V1594I V1572I V1627I V1648I V1637I V1637I V1691I V1674I V1691I
2L 2430236 2 G T 7 0 False 0 1 17 0 0 False False False 1.0479 1 14.352 -0.62988 T missense_variant n.5048G>T p.Ser1683Ile AGAP004707-RA 5048 0.0 0.0 0.0 0.0 0.0 0.0127272727273 0.0 0.0 0.0 True 2430224 2430528 32 S1683I S1657I S1657I S1667I S1596I S1574I S1629I S1650I S1639I S1639I S1693I S1676I S1693I
2L 2430424 2 G T 28 0 True 0 1 10 0 0 False False False 4.4297 3 17.094 -0.71924 T missense_variant n.5236G>T p.Ala1746Ser AGAP004707-RA 5236 0.0 0.0 0.0 0.112903225806 0.12962962963 0.0 0.0 0.0 0.0 True 2430224 2430528 32 A1746S A1720S A1720S A1730S A1659S A1637S A1692S A1713S A1702S A1702S A1756S A1739S A1756S
2L 2430451 2 C T 1 0 True 0 2 10 0 0 False False False 0.56689 0 16.047 0.098022 T missense_variant n.5263C>T p.His1755Tyr AGAP004707-RA 5263 0.0 0.0 0.0 0.0 0.0 0.0 0.00892857142857 0.0 0.0 True 2430224 2430528 32 H1755Y H1729Y H1729Y H1739Y H1668Y H1646Y H1701Y H1722Y H1711Y H1711Y H1765Y H1748Y H1765Y
2L 2430817 2 G A 13 0 True 0 0 9 0 0 False False False 1.3701 0 14.648 -0.66504 A missense_variant n.5557G>A p.Val1853Ile AGAP004707-RA 5557 0.0 0.0 0.0 0.0806451612903 0.0493827160494 0.0 0.0 0.0 0.0 True 2430601 2431617 33 V1853I V1827I V1827I V1837I V1766I V1744I V1799I V1820I V1809I V1809I V1863I V1846I V1863I
2L 2430863 2 T C 52 0 True 0 1 8 0 0 False False False 1.4785 0 15.812 0.78809 C missense_variant n.5603T>C p.Ile1868Thr AGAP004707-RA 5603 0.0 0.0 0.0 0.177419354839 0.253086419753 0.0 0.0 0.0 0.0 True 2430601 2431617 33 I1868T I1842T I1842T I1852T I1781T I1759T I1814T I1835T I1824T I1824T I1878T I1861T I1878T
2L 2430880 2 C T 29 0 True 0 1 10 0 0 False False False 19.312 1 16.547 -1.8408 T missense_variant n.5620C>T p.Pro1874Ser AGAP004707-RA 5620 0.0 0.210144927536 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2430601 2431617 33 P1874S P1848S P1848S P1858S P1787S P1765S P1820S P1841S P1830S P1830S P1884S P1867S P1884S
2L 2430881 2 C T 80 0 True 0 1 9 0 0 False False False 2.4453 1 18.562 -1.4189 T missense_variant n.5621C>T p.Pro1874Leu AGAP004707-RA 5621 0.0 0.0724637681159 0.0 0.451612903226 0.259259259259 0.0 0.0 0.0 0.0 True 2430601 2431617 33 P1874L P1848L P1848L P1858L P1787L P1765L P1820L P1841L P1830L P1830L P1884L P1867L P1884L
2L 2431019 2 T C 12 0 True 0 0 5 0 0 False False False 0.85107 1 13.523 -1.6211 C missense_variant n.5759T>C p.Phe1920Ser AGAP004707-RA 5759 0.0 0.0 0.0 0.0 0.0 0.0145454545455 0.0357142857143 0.0 0.0 True 2430601 2431617 33 F1920S F1894S F1894S F1904S F1833S F1811S F1866S F1887S F1876S F1876S F1930S F1913S F1930S
2L 2431061 2 C T 16 0 True 0 0 8 0 0 False False False 1.5 1 16.688 -0.46802 T missense_variant n.5801C>T p.Ala1934Val AGAP004707-RA 5801 0.0 0.115942028986 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2430601 2431617 33 A1934V A1908V A1908V A1918V A1847V A1825V A1880V A1901V A1890V A1890V A1944V A1927V A1944V
2L 2431079 2 T C 44 0 True 0 0 6 0 0 False False False 3.4004 0 15.953 2.1738 C missense_variant n.5819T>C p.Ile1940Thr AGAP004707-RA 5819 0.0 0.0434782608696 0.0 0.0 0.0 0.0690909090909 0.0 0.0 0.0 True 2430601 2431617 33 I1940T I1914T I1914T I1924T I1853T I1831T I1886T I1907T I1896T I1896T I1950T I1933T I1950T
2L 2431232 2 G A 1 0 True 0 0 7 0 0 False False False 3.1719 2 12.328 -0.80713 A missense_variant n.5972G>A p.Gly1991Glu AGAP004707-RA 5972 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True 2430601 2431617 33 G1991E G1965E G1965E G1975E G1904E G1882E G1937E G1958E G1947E G1947E G2001E G1984E G2001E
2L 2431331 2 C A 1 0 True 0 1 5 0 0 False False False 4.1406 2 12.797 -0.031006 A missense_variant n.6071C>A p.Thr2024Lys AGAP004707-RA 6071 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2430601 2431617 33 T2024K T1998K T1998K T2008K T1937K T1915K T1970K T1991K T1980K T1980K T2034K T2017K T2034K
2L 2431417 2 A G 2 0 True 0 0 5 0 0 False False False 10.93 0 13.789 -1.9297 G missense_variant n.6157A>G p.Ile2053Val AGAP004707-RA 6157 0.0 0.0 0.0 0.0 0.0 0.00363636363636 0.0 0.0 0.0 True 2430601 2431617 33 I2053V I2027V I2027V I2037V I1966V I1944V I1999V I2020V I2009V I2009V I2063V I2046V I2063V
2L 2431487 2 G C 5 0 True 0 1 6 0 0 False False False 0.69678 1 14.32 -0.78418 C missense_variant n.6227G>C p.Ser2076Thr AGAP004707-RA 6227 0.0 0.0 0.0 0.0 0.0 0.00909090909091 0.0 0.0 0.0 True 2430601 2431617 33 S2076T S2050T S2050T S2060T S1989T S1967T S2022T S2043T S2032T S2032T S2086T S2069T S2086T

Inspect splice site variants


In [25]:
def simplify_intron_effect(v):
    if v and v[0] in ['SPLICE_REGION', 'SPLICE_CORE']:
        if math.fabs(v[2]) < math.fabs(v[4]):
            return v[1], v[2]
        else:
            return v[3], v[4]
    else:
        return ''

    
td_styles = {
    'FILTER_PASS': lambda v: 'background-color: red' if not v else '',
    'NoCoverage': lambda v: 'background-color: red' if v > 1 else '',
    'LowCoverage': lambda v: 'background-color: red' if v > 76 else '',
    'HighCoverage': lambda v: 'background-color: red' if v > 15 else '',
    'LowMQ': lambda v: 'background-color: red' if v > 76 else '',
    'HighMQ0': lambda v: 'background-color: red' if v > 1 else '',
    'RepeatDUST': lambda v: 'background-color: red' if v else '',
    'FS': lambda v: 'background-color: red' if v > 60 else '',
    'QD': lambda v: 'background-color: red' if v < 5 else '',
    'ReadPosRankSum': lambda v: 'background-color: red' if v < -8 else '',
    'HRun': lambda v: 'background-color: red' if v > 4 else '',
    'num_alleles': lambda v: 'background-color: orange' if v > 2 else '',
}


def tr_style(row):
    """Colour row by alternate allele count."""
    return 'background-color:rgba(0, 255, 0, %.3f)' % (min(1, row['AC']/100))


tbl_variants_phase1_splice = (
    tbl_variants_phase1_eff
    .select(lambda row: any(row[t] and row[t][0] in ['SPLICE_REGION', 'SPLICE_CORE'] for t in transcript_ids))
    .convert(transcript_ids, simplify_intron_effect)
)
tbl_variants_phase1_splice.displayall(td_styles=td_styles, tr_style=tr_style)


0|CHROM 1|POS 2|num_alleles 3|REF 4|ALT 5|AC 6|ALTIX 7|FILTER_PASS 8|NoCoverage 9|LowCoverage 10|HighCoverage 11|LowMQ 12|HighMQ0 13|RepeatDUST 14|RepeatMasker 15|RepeatTRF 16|FS 17|HRun 18|QD 19|ReadPosRankSum 20|SNPEFF_Allele 21|SNPEFF_Annotation 22|SNPEFF_HGVS_c 23|SNPEFF_HGVS_p 24|SNPEFF_Feature_ID 25|SNPEFF_CDS_pos 26|AF_AOM 27|AF_BFM 28|AF_GWA 29|AF_GNS 30|AF_BFS 31|AF_CMS 32|AF_GAS 33|AF_UGS 34|AF_KES 35|check_allele 36|exon_start 37|exon_end 38|exon 39|AGAP004707-RA 40|AGAP004707-RB 41|AGAP004707-RC 42|Davies-C1N2 43|Davies-C3N2 44|Davies-C5N2 45|Davies-C7N2 46|Davies-C8N2 47|Davies-C10N2 48|Davies-C11N2 49|Davies-C1N9 50|Davies-C8N9 51|Davies-C1N9ck
2L 2362002 2 A T 2 0 True 0 1 3 0 0 False False False 0.5459 0 12.531 -0.55322 T splice_region_variant&intron_varia n.148-5A>T None AGAP004707-RA -1 0.0 0.0144927536232 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2361989 2362144 3 ('AGAP004707-PA', -5) ('AGAP004707-PB', -5) ('AGAP004707-PC', -5)
2L 2362003 2 C T 2 0 True 0 1 3 0 0 False False False 0.50195 0 14.062 0.024994 T splice_region_variant&intron_varia n.148-4C>T None AGAP004707-RA -1 0.0 0.0144927536232 0.0 0.0 0.0 0.0 0.0 0.0 0.0 True 2361989 2362144 3 ('AGAP004707-PA', -4) ('AGAP004707-PB', -4) ('AGAP004707-PC', -4)
2L 2382263 2 A G 166 0 True 0 45 1 0 0 False False False 5.957 0 25.375 -2.8809 G splice_region_variant&intron_varia n.492-7A>G None AGAP004707-RA -1 0.00833333333333 0.0 0.0 0.0 0.0 0.0 0.0 0.47572815534 0.761363636364 True None None None ('AGAP004707-PA', -7) ('AGAP004707-PB', -7) ('AGAP004707-PC', -7) ('5', -7) ('5', -7) ('5', -7) ('5', -7) ('5', -7) ('5', -7) ('5', -7) ('5', -7) ('5', -7)
2L 2390126 2 C T 2 0 True 0 2 11 0 0 False False False 3.4746 0 14.32 -1.0264 T splice_region_variant&intron_varia n.713-3C>T None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.00363636363636 0.0 0.0 0.0 True None None None ('AGAP004707-PA', -3) ('AGAP004707-PB', -3) ('AGAP004707-PC', -3) ('7', -3) ('7', -3) ('7', -3) ('7', -3) ('7', -3) ('7', -3) ('7', -3) ('7', -3) ('7', -3) ('7', -3)
2L 2400176 2 A G 1 0 True 0 0 7 0 0 False False False 0.0 0 22.203 0.74316 G splice_region_variant&intron_varia n.1572+3A>G None AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('AGAP004707-PA', 3) ('AGAP004707-PB', 3) ('AGAP004707-PC', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3) ('11i+', 3)
2L 2407888 2 T C 4 0 True 0 2 9 0 0 False False False 5.7578 0 16.281 -0.76416 C splice_region_variant&intron_varia n.2017-6T>C None AGAP004707-RA -1 0.0 0.0 0.0434782608696 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('AGAP004707-PA', -6) ('AGAP004707-PB', -6) ('AGAP004707-PC', -6) ('16', -6) ('16', -6) ('16', -6) ('16', -6) ('16', -6) ('16', -6) ('16', -6) ('16', -6) ('16', -6) ('16', -6)
2L 2417362 2 A G 496 0 False 5 712 0 0 0 False False False 63.062 1 28.844 1.251 G splice_region_variant&intron_varia n.2637+4A>G None AGAP004707-RA -1 0.838983050847 0.840579710145 0.0666666666667 1.0 1.0 0.0967153284672 0.0 0.0 0.0 True None None None ('AGAP004707-PA', 4) ('AGAP004707-PB', 4) ('AGAP004707-PC', 4) ('19', 4) ('19', 4) ('19', 4) ('19', 4) ('19', 4) ('19', 4) ('19', 4) ('19', 4) ('19', 4) ('19', 4)
2L 2425766 2 T A 79 0 True 0 1 9 0 0 False False False 9.9062 0 21.391 1.6143 A intron_variant n.4068+315T> None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.0145454545455 0.633928571429 0.0 0.0 True None None None ('27k', -4)
2L 2429868 2 C A 2 0 True 0 0 14 0 0 False False False 8.5469 0 14.961 -0.014 A splice_region_variant&intron_varia n.4765-4C>A None AGAP004707-RA -1 0.0 0.0 0.0217391304348 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('AGAP004707-PA', -4) ('AGAP004707-PB', -4) ('AGAP004707-PC', -4) ('31', -4) ('31', -4) ('31', -4) ('31', -4) ('31', -4) ('31', -4) ('31', -4) ('31', -4) ('31', -4) ('31', -4)

Write out variants to file


In [26]:
(tbl_variants_phase1_eff
 .teepickle('../data/tbl_variants_phase1.pkl')
 .convert(transcript_ids, lambda v: ':'.join(map(str, v)))
 .replaceall(None, 'NA')
 .tocsv('../data/tbl_variants_phase1.csv')
)

In [27]:
# check OK
etl.frompickle('../data/tbl_variants_phase1.pkl')


Out[27]:
0|CHROM 1|POS 2|num_alleles 3|REF 4|ALT 5|AC 6|ALTIX 7|FILTER_PASS 8|NoCoverage 9|LowCoverage 10|HighCoverage 11|LowMQ 12|HighMQ0 13|RepeatDUST 14|RepeatMasker 15|RepeatTRF 16|FS 17|HRun 18|QD 19|ReadPosRankSum 20|SNPEFF_Allele 21|SNPEFF_Annotation 22|SNPEFF_HGVS_c 23|SNPEFF_HGVS_p 24|SNPEFF_Feature_ID 25|SNPEFF_CDS_pos 26|AF_AOM 27|AF_BFM 28|AF_GWA 29|AF_GNS 30|AF_BFS 31|AF_CMS 32|AF_GAS 33|AF_UGS 34|AF_KES 35|check_allele 36|exon_start 37|exon_end 38|exon 39|AGAP004707-RA 40|AGAP004707-RB 41|AGAP004707-RC 42|Davies-C1N2 43|Davies-C3N2 44|Davies-C5N2 45|Davies-C7N2 46|Davies-C8N2 47|Davies-C10N2 48|Davies-C11N2 49|Davies-C1N9 50|Davies-C8N9 51|Davies-C1N9ck
2L 2358254 2 G A 1 0 True 0 0 10 0 0 False False False 9.8672 1 17.547 -0.049988 A missense_variant n.97G>A p.Asp33Asn AGAP004707-RA 97 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2358158 2358304 1 ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N') ('NON_SYNONYMOUS_CODING', 'D33N')
2L 2358316 2 T G 73 0 True 0 0 15 0 0 False False False 2.4844 0 16.438 1.4219 G intron_variant n.147+12T>G None AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.132727272727 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 12, 'AGAP004707-PA', -3691) ('INTRONIC', 'AGAP004707-PB', 12, 'AGAP004707-PB', -3691) ('INTRONIC', 'AGAP004707-PC', 12, 'AGAP004707-PC', -3691) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '2j', -1324) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '3', -3673) ('INTRONIC', '1', 12, '2j', -1324) ('INTRONIC', '1', 12, '3', -3673)
2L 2358328 2 T C 2 0 True 0 0 14 0 0 False False False 2.7363 0 16.062 -0.646 C intron_variant n.147+24T>C None AGAP004707-RA -1 0.0 0.00724637681159 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 24, 'AGAP004707-PA', -3679) ('INTRONIC', 'AGAP004707-PB', 24, 'AGAP004707-PB', -3679) ('INTRONIC', 'AGAP004707-PC', 24, 'AGAP004707-PC', -3679) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '2j', -1312) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '3', -3661) ('INTRONIC', '1', 24, '2j', -1312) ('INTRONIC', '1', 24, '3', -3661)
2L 2358353 2 C T 1 0 True 0 1 15 0 0 False False False 1.9512 0 9.8594 1.1582 T intron_variant n.147+49C>T None AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 49, 'AGAP004707-PA', -3654) ('INTRONIC', 'AGAP004707-PB', 49, 'AGAP004707-PB', -3654) ('INTRONIC', 'AGAP004707-PC', 49, 'AGAP004707-PC', -3654) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '2j', -1287) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '3', -3636) ('INTRONIC', '1', 49, '2j', -1287) ('INTRONIC', '1', 49, '3', -3636)
2L 2358405 2 T A 1 0 True 0 6 14 0 0 False False False 20.844 1 10.859 1.1562 A intron_variant n.147+101T>A None AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True None None None ('INTRONIC', 'AGAP004707-PA', 101, 'AGAP004707-PA', -3602) ('INTRONIC', 'AGAP004707-PB', 101, 'AGAP004707-PB', -3602) ('INTRONIC', 'AGAP004707-PC', 101, 'AGAP004707-PC', -3602) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '2j', -1235) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '3', -3584) ('INTRONIC', '1', 101, '2j', -1235) ('INTRONIC', '1', 101, '3', -3584)

...


In [28]:
etl.fromcsv('../data/tbl_variants_phase1.csv')


Out[28]:
0|CHROM 1|POS 2|num_alleles 3|REF 4|ALT 5|AC 6|ALTIX 7|FILTER_PASS 8|NoCoverage 9|LowCoverage 10|HighCoverage 11|LowMQ 12|HighMQ0 13|RepeatDUST 14|RepeatMasker 15|RepeatTRF 16|FS 17|HRun 18|QD 19|ReadPosRankSum 20|SNPEFF_Allele 21|SNPEFF_Annotation 22|SNPEFF_HGVS_c 23|SNPEFF_HGVS_p 24|SNPEFF_Feature_ID 25|SNPEFF_CDS_pos 26|AF_AOM 27|AF_BFM 28|AF_GWA 29|AF_GNS 30|AF_BFS 31|AF_CMS 32|AF_GAS 33|AF_UGS 34|AF_KES 35|check_allele 36|exon_start 37|exon_end 38|exon 39|AGAP004707-RA 40|AGAP004707-RB 41|AGAP004707-RC 42|Davies-C1N2 43|Davies-C3N2 44|Davies-C5N2 45|Davies-C7N2 46|Davies-C8N2 47|Davies-C10N2 48|Davies-C11N2 49|Davies-C1N9 50|Davies-C8N9 51|Davies-C1N9ck
2L 2358254 2 G A 1 0 True 0 0 10 0 0 False False False 9.8672 1 17.547 -0.049988 A missense_variant n.97G>A p.Asp33Asn AGAP004707-RA 97 0.0 0.0 0.0 0.0 0.0 0.00181818181818 0.0 0.0 0.0 True 2358158 2358304 1 NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N NON_SYNONYMOUS_CODING:D33N
2L 2358316 2 T G 73 0 True 0 0 15 0 0 False False False 2.4844 0 16.438 1.4219 G intron_variant n.147+12T>G NA AGAP004707-RA -1 0.0 0.0 0.0 0.0 0.0 0.132727272727 0.0 0.0 0.0 True NA NA NA INTRONIC:AGAP004707-PA:12:AGAP004707-PA:-3691 INTRONIC:AGAP004707-PB:12:AGAP004707-PB:-3691 INTRONIC:AGAP004707-PC:12:AGAP004707-PC:-3691 INTRONIC:1:12:3:-3673 INTRONIC:1:12:3:-3673 INTRONIC:1:12:3:-3673 INTRONIC:1:12:3:-3673 INTRONIC:1:12:2j:-1324 INTRONIC:1:12:3:-3673 INTRONIC:1:12:3:-3673 INTRONIC:1:12:3:-3673 INTRONIC:1:12:2j:-1324 INTRONIC:1:12:3:-3673
2L 2358328 2 T C 2 0 True 0 0 14 0 0 False False False 2.7363 0 16.062 -0.646 C intron_variant n.147+24T>C NA AGAP004707-RA -1 0.0 0.00724637681159 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True NA NA NA INTRONIC:AGAP004707-PA:24:AGAP004707-PA:-3679 INTRONIC:AGAP004707-PB:24:AGAP004707-PB:-3679 INTRONIC:AGAP004707-PC:24:AGAP004707-PC:-3679 INTRONIC:1:24:3:-3661 INTRONIC:1:24:3:-3661 INTRONIC:1:24:3:-3661 INTRONIC:1:24:3:-3661 INTRONIC:1:24:2j:-1312 INTRONIC:1:24:3:-3661 INTRONIC:1:24:3:-3661 INTRONIC:1:24:3:-3661 INTRONIC:1:24:2j:-1312 INTRONIC:1:24:3:-3661
2L 2358353 2 C T 1 0 True 0 1 15 0 0 False False False 1.9512 0 9.8594 1.1582 T intron_variant n.147+49C>T NA AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True NA NA NA INTRONIC:AGAP004707-PA:49:AGAP004707-PA:-3654 INTRONIC:AGAP004707-PB:49:AGAP004707-PB:-3654 INTRONIC:AGAP004707-PC:49:AGAP004707-PC:-3654 INTRONIC:1:49:3:-3636 INTRONIC:1:49:3:-3636 INTRONIC:1:49:3:-3636 INTRONIC:1:49:3:-3636 INTRONIC:1:49:2j:-1287 INTRONIC:1:49:3:-3636 INTRONIC:1:49:3:-3636 INTRONIC:1:49:3:-3636 INTRONIC:1:49:2j:-1287 INTRONIC:1:49:3:-3636
2L 2358405 2 T A 1 0 True 0 6 14 0 0 False False False 20.844 1 10.859 1.1562 A intron_variant n.147+101T>A NA AGAP004707-RA -1 0.0 0.0 0.0108695652174 0.0 0.0 0.0 0.0 0.0 0.0 True NA NA NA INTRONIC:AGAP004707-PA:101:AGAP004707-PA:-3602 INTRONIC:AGAP004707-PB:101:AGAP004707-PB:-3602 INTRONIC:AGAP004707-PC:101:AGAP004707-PC:-3602 INTRONIC:1:101:3:-3584 INTRONIC:1:101:3:-3584 INTRONIC:1:101:3:-3584 INTRONIC:1:101:3:-3584 INTRONIC:1:101:2j:-1235 INTRONIC:1:101:3:-3584 INTRONIC:1:101:3:-3584 INTRONIC:1:101:3:-3584 INTRONIC:1:101:2j:-1235 INTRONIC:1:101:3:-3584

...


In [ ]: