In [1]:
import collections

In [3]:
Foo = collections.namedtuple('Foo', ('bar', 'baz'))
foo = Foo(None, None)
foo


Out[3]:
Foo(bar=None, baz=None)

In [4]:
bool(-1)


Out[4]:
True

In [6]:
-3 % 3


Out[6]:
0

In [129]:
from __future__ import division
import numpy as np
import petl.interactive as etl
import petlx.all
import collections
import operator
import pyfasta
from Bio.Seq import Seq


CodingPosition = collections.namedtuple('CodingPosition', 
                                        ('gene_id', 'gene_start', 'gene_stop',
                                         'transcript_id', 'transcript_start', 'transcript_stop',
                                         'exon_id', 'exon_start', 'exon_stop',
                                         'strand',
                                         'chrom',
                                         'pos',
                                         'ref',
                                         'ref_start',
                                         'ref_stop',
                                         'ref_cds_start',
                                         'ref_cds_stop'))

CodonChange = collections.namedtuple('CodonChange',
                                     CodingPosition._fields + ('alt', 'ref_codon', 'alt_codon', 'codon_change'))

AminoAcidChange = collections.namedtuple('AminoAcidChange', 
                                         CodonChange._fields + ('aa_pos', 'ref_aa', 'alt_aa', 'aa_change'))


class EffectPredictor(object):
    
    
    def __init__(self, fasta_fn, gff3_fn, 
                 gff3_id_attribute='ID', 
                 gff3_parent_attribute='Parent',
                 gff3_gene_types=['gene', 'pseudogene'],
                 gff3_transcript_types=['mRNA', 'pseudogenic_transcript'],
                 gff3_exon_types=['CDS', 'pseudogenic_exon']):
        """Create a variant effect predictor.
        
        Parameters
        ----------
        
        fasta_fn : string
            Path to reference genome FASTA file.
        gff3_fn : string
            Path to genome annotations GFF3 file.
        gff3_id_attribute : string, optional
            Name of attribute containing feature identifier.
        gff3_parent_attribute : string, optional
            Name of attribute containing parent feature identifier. 
        gff3_gene_types : list of strings, optional
            Feature types to consider as genes.
        gff3_transcript_types : list of strings, optional
            Feature types to consider as transcripts.
        gff3_exon_types : list of strings, optional
            Feature types to consider as exons.
            
        """
        
        # store parameters
        self._fasta_fn = fasta_fn
        self._gff3_fn = gff3_fn
        self._gff3_id_attribute = gff3_id_attribute
        self._gff3_parent_attribute = gff3_parent_attribute
        self._gff3_gene_types = gff3_gene_types
        self._gff3_transcript_types = gff3_transcript_types
        self._gff3_exon_types = gff3_exon_types
        
        # setup reference sequence
        self._fasta = pyfasta.Fasta(fasta_fn)
        
        # setup access to GFF3 as a table
        self._tbl_features = (etl
            .fromgff3(gff3_fn)
            .unpackdict('attributes', [gff3_id_attribute, gff3_parent_attribute])
            .rename({gff3_id_attribute: 'feature_id', gff3_parent_attribute: 'parent_id'})
        )
        # TODO remove hack for testing
        self._tbl_features = self._tbl_features.eq('seqid', 'Pf3D7_01_v3')
        # index features by ID
        self._lkp_feature = self._tbl_features.recordlookupone('feature_id')
        # index features by parent ID
        self._lkp_children = self._tbl_features.recordlookup('parent_id')
        # index features by genomic location
        self._interval_trees = self._tbl_features.facetintervalrecordlookup('seqid', 'start', 'end', proximity=1)
        
        
    def get_feature(self, feature_id):
        """Look up a feature by ID."""
        return self._lkp_feature[feature_id]
    
    
    def get_feature_children(self, feature_id):
        """Look up the children of a feature."""
        return self._lkp_children[feature_id]
    
    
    def find_features(self, chrom, start, stop=None):
        """Look up features overlapping a given genomic location."""
        if stop is None:
            stop = start
        return self._interval_trees[chrom].find(start, stop)
    
    
    def get_coding_positions(self, chrom, pos, ref):
        """Calculate position of the reference allele relative to the start 
        of gene coding sequences.
        
        Parameters
        ----------
        
        chrom : string
            Chromosome.
        pos : int
            Position.
        ref : string
            Reference allele.
            
        Returns
        -------
        
        Generator of `CodingPosition`s.
                
        """
        
        # ensure types and case
        ref = str(ref).upper()

        # convenience
        fasta = self._fasta

        # obtain start and end coordinates of the reference allele
        # N.B., use one-based inclusive coordinate system (like GFF3) throughout
        ref_start = pos
        ref_stop = pos + len(ref) - 1

        # check the reference allele matches the reference sequence
        ref_seq = fasta.sequence({'chr': chrom, 
                                  'start': ref_start, 
                                  'stop': ref_stop})
        ref_seq = str(ref_seq).lower()
        assert ref_seq == ref.lower(), 'reference allele does not match reference sequence, expected %r, found %r' % (ref_seq, ref.lower())
        
        # find overlapping features
        overlapping_features = self.find_features(chrom, ref_start, ref_stop)
        
        # filter to find overlapping exons
        overlapping_exons = [f for f in overlapping_features 
                             if f.type in self._gff3_exon_types]
        
        for overlapping_exon in overlapping_exons:
            
            strand = overlapping_exon.strand
            transcript = self.get_feature(overlapping_exon.parent_id)
            gene = self.get_feature(transcript.parent_id)
            
            # find all exons in transcript
            exons = self.get_feature_children(transcript.feature_id)
            
            if strand == '+':
                
                # sort exons
                exons = sorted(exons, key=operator.itemgetter('start'))
                
                # find index of overlapping exons in all exons
                exon_ids = [exon.feature_id for exon in exons]
                exon_index = exon_ids.index(overlapping_exon.feature_id)
                
                # find offset
                offset = sum([exon.end - exon.start + 1 for exon in exons[:exon_index]])
                
                # find ref cds start position relative to overlapping exon
                if ref_start < overlapping_exon.start:
                    ref_cds_start = -1
                else:
                    ref_cds_start = offset + (ref_start - overlapping_exon.start)
                
                # find ref cds stop position relative to overlapping exon
                if ref_stop > overlapping_exon.end:
                    ref_cds_stop = -1
                else:
                    ref_cds_stop = offset + (ref_stop - overlapping_exon.start)
                                    
            else:
                
                # sort exons (backwards this time)
                exons = sorted(exons, key=operator.itemgetter('end'), reverse=True)
                    
                # find index of overlapping exons in all exons
                exon_ids = [exon.feature_id for exon in exons]
                exon_index = exon_ids.index(overlapping_exon.feature_id)

                # find offset
                offset = sum([exon.end - exon.start + 1 for exon in exons[:exon_index]])
                
                # find ref cds start position relative to overlapping exon
                # N.B., have to think back-to-front
                if ref_stop > overlapping_exon.end:
                    ref_cds_start = -1
                else:
                    ref_cds_start = offset + (overlapping_exon.end - ref_stop)
                
                # find ref cds stop position relative to overlapping exon
                # N.B., have to think back-to-front
                if ref_start < overlapping_exon.start:
                    ref_cds_stop = -1
                else:
                    ref_cds_stop = offset + (overlapping_exon.end - ref_start)
            
            yield CodingPosition(gene.feature_id, gene.start, gene.end,
                                 transcript.feature_id, transcript.start, transcript.end,
                                 overlapping_exon.feature_id, overlapping_exon.start, overlapping_exon.end,
                                 strand,
                                 chrom, 
                                 pos,
                                 ref,
                                 ref_start,
                                 ref_stop,
                                 ref_cds_start,
                                 ref_cds_stop)


    def get_codon_changes(self, chrom, pos, ref, alt):
        """Calculate the codon changes resulting from a given variant.
        
        Parameters
        ----------
        
        chrom : string
            Chromosome.
        pos : int
            Position.
        ref : string
            Reference allele.
        alt : string
            Alternate allele.
            
        Returns
        -------
        
        Generator of `CodonChange`s.
        
        """
        
        # ensure types and case
        ref = str(ref).upper()
        alt = str(alt).upper()

        # convenience
        fasta = self._fasta

        for coding_position in self.get_coding_positions(chrom, pos, ref):
            
            # convenience
            strand = coding_position.strand
            ref_start = coding_position.ref_start
            ref_stop = coding_position.ref_stop
            ref_cds_start = coding_position.ref_cds_start
            ref_cds_stop = coding_position.ref_cds_stop
            
            if ref_cds_start >= 0 and ref_cds_stop >= 0:
            
                # lookup exon
                exon = self.get_feature(coding_position.exon_id)

                if strand == '+':

                    # calculate position of reference allele start within codon
                    ref_start_phase = ref_cds_start % 3

                    # obtain any previous nucleotides to complete the first codon
                    prefix = fasta.sequence({'chr': chrom, 
                                             'start': ref_start - ref_start_phase, 
                                             'stop': ref_start - 1})
                    prefix = str(prefix).lower()

                    # begin constructing reference and alternate codon sequences
                    ref_codon = prefix + ref
                    alt_codon = prefix + alt

                    # obtain any subsequence nucleotides to complete the last codon
                    if len(ref_codon) % 3:
                        ref_stop_phase = len(ref_codon) % 3
                        suffix = fasta.sequence({'chr': chrom, 
                                                 'start': ref_stop + 1, 
                                                 'stop': ref_stop + 3 - ref_stop_phase})
                        suffix = str(suffix).lower()
                        ref_codon += suffix

                    if len(alt_codon) % 3:
                        alt_stop_phase = len(alt_codon) % 3
                        suffix = fasta.sequence({'chr': chrom, 
                                                 'start': ref_stop + 1, 
                                                 'stop': ref_stop + 3 - alt_stop_phase})
                        suffix = str(suffix).lower()
                        alt_codon += suffix

                else:

                    # N.B., we are on the reverse strand, so position reported for 
                    # variant is actually position at the *end* of the reference allele
                    # which is particularly important for deletions

                    # we will construct everything for the forward strand (i.e., back-to-front)
                    # then take reverse complement afterwards at the end of this code block

                    # calculate position of reference allele start within codon
                    ref_start_phase = ref_cds_start % 3

                    # obtain any previous nucleotides to complete the first codon
                    prefix = fasta.sequence({'chr': chrom, 
                                             'start': ref_stop + 1, 
                                             'stop': ref_stop + ref_start_phase})
                    prefix = str(prefix).lower()

                    # begin constructing reference and alternate codon sequences
                    ref_codon = ref + prefix
                    alt_codon = alt + prefix

                    # obtain any subsequence nucleotides to complete the last codon
                    if len(ref_codon) % 3:
                        ref_stop_phase = len(ref_codon) % 3
                        suffix = fasta.sequence({'chr': chrom, 
                                                 'start': ref_start - 3 + ref_stop_phase, 
                                                 'stop': ref_start - 1})
                        suffix = str(suffix).lower()
                        ref_codon = suffix + ref_codon

                    if len(alt_codon) % 3:
                        alt_stop_phase = len(alt_codon) % 3
                        suffix = fasta.sequence({'chr': chrom, 
                                                 'start': ref_start - 3 + alt_stop_phase, 
                                                 'stop': ref_start - 1})
                        suffix = str(suffix).lower()
                        alt_codon = suffix + alt_codon

                    # take reverse complement
                    ref_codon = str(Seq(ref_codon).reverse_complement())
                    alt_codon = str(Seq(alt_codon).reverse_complement())

                codon_change = '%s/%s' % (ref_codon, alt_codon)
                yield CodonChange(*(coding_position + (alt, ref_codon, alt_codon, codon_change)))

            
    def get_amino_acid_changes(self, chrom, pos, ref, alt):
        """Calculate the amino acid changes resulting from a given variant.
        
        Parameters
        ----------
        
        chrom : string
            Chromosome.
        pos : int
            Position.
        ref : string
            Reference allele.
        alt : string
            Alternate allele.
            
        Returns
        -------
        
        Generator of `AminoAcidChange`s.
        
        """
        
        for codon_change in self.get_codon_changes(chrom, pos, ref, alt):
            
            ref_aa = str(Seq(codon_change.ref_codon).translate())
            alt_aa = str(Seq(codon_change.alt_codon).translate()) 
            aa_pos = (codon_change.ref_cds_start // 3) + 1
            aa_change = '%s%s%s' % (ref_aa, aa_pos, alt_aa) 
                
            yield AminoAcidChange(*(codon_change + (aa_pos, ref_aa, alt_aa, aa_change)))
            
            
    def get_amino_acid_change(self, chrom, pos, ref, alt, transcript_id):
        """TODO"""

        coding_pos = self.get_coding_
        # ensure types and case
        ref = str(ref).upper()
        alt = str(alt).upper()

        # convenience
        fasta = self._fasta
        
        # look up transcript
        transcript = self.get_feature(transcript_id)
        
        # obtain start and end coordinates of the reference allele
        # N.B., use one-based inclusive coordinate system (like GFF3) throughout
        ref_start = pos
        ref_stop = pos + len(ref) - 1

        # check the reference allele matches the reference sequence
        ref_seq = fasta.sequence({'chr': chrom, 
                                  'start': ref_start, 
                                  'stop': ref_stop})
        ref_seq = str(ref_seq).lower()
        assert ref_seq == ref.lower(), 'reference allele does not match reference sequence, expected %r, found %r' % (ref_seq, ref.lower())
        
        if ref_start < transcript.start or ref_stop > transcript.end:
            
            raise Exception('reference allele does not overlap transcript or overlaps transcript boundary')
            
        else:
            
            


            
    def get_effects(chrom, pos, ref, alt):
        """TODO"""

        # ensure types and case
        ref = str(ref).upper()
        alt = str(alt).upper()

        # convenience
        fasta = self._fasta

        # obtain start and end coordinates of the reference allele
        # N.B., use one-based inclusive coordinate system (like GFF3) throughout
        ref_start = pos
        ref_stop = pos + len(ref) - 1

        # check the reference allele matches the reference sequence
        ref_seq = fasta.sequence({'chr': chrom, 
                                  'start': ref_start, 
                                  'stop': ref_stop})
        ref_seq = str(ref_seq).lower()
        assert ref_seq == ref.lower(), 'reference allele does not match reference sequence, expected %r, found %r' % (ref_seq, ref.lower())
        
        # find overlapping genome features
        overlapping_features = self.find_features(chrom, ref_start, ref_stop)

        # filter to find overlapping genes
        genes = [f for f in overlapping_features 
                 if f.type in self._gff3_gene_types]
        
        if not genes:
            
            yield EffectIntergenic()
            
        else:
            
            for gene in genes:
                
                transcripts = self.get_children(gene.feature_id)
            
                if not transcripts:

                    yield EffectIntragenic(gene.feature_id)

                else:

                    for transcript in transcripts:

                        if ref_start >= transcript.start and ref_stop <= transcript.end:
                            
                            aa_change = self._get_amino_acid_change(chrom, pos, ref, alt, transcript)

                            if aa_change.ref_cds_start < 0 and aa_change.ref_cds_stop < 0:

                                yield EffectIntronic(*aa_change)
                                
                            elif ref_cds_start < 0 or ref_cds_stop < 0:
                                
                                yield EffectNotImplemented(*aa_change)
                                
                            else:
                                                                
                                if len(ref) == len(alt) == 1:
                                    
                                    # SNPs
                                    if aa_change.ref_aa == aa_change.alt_aa:
                                        
                                        yield EffectSynonymousCoding(*aa_change)
                                        
                                    elif aa_change.ref_aa == 'M' and ref_cds_start == 0:
                                        
                                        yield EffectStartLost(*aa_change)
                                        
                                    elif aa_change.ref_aa == '*':
                                        
                                        yield EffectStopLost(*aa_change)
                                        
                                    elif aa_change.alt_aa == '*':
                                        
                                        yield EffectStopGained(*aa_change)
                                        
                                    else:
                                        
                                        yield EffectNonSynonymousCoding(*aa_change)
                                
                                else:
                                    
                                    # TODO INDELs
                            
                        else:
                            
                            yield EffectNotImplemented(gene.feature_id, transcript.feature_id)

In [229]:
eff._tbl_features.counts('type').displayall()


type count frequency
CDS 374 0.406080347448
polypeptide 148 0.160694896851
gene 141 0.153094462541
mRNA 135 0.14657980456
polypeptide_motif 39 0.042345276873
pseudogenic_exon 36 0.0390879478827
pseudogene 13 0.014115092291
pseudogenic_transcript 13 0.014115092291
repeat_region 12 0.0130293159609
rRNA 5 0.00542888165038
ncRNA 4 0.0043431053203
centromere 1 0.00108577633008

Sandbox


In [130]:
fasta_fn = '../../../data/genome/sanger/version3/September_2012/Pf3D7_v3.fa'
gff3_fn = '../../../data/genome/sanger/version3/September_2012/Pf3D7_v3.gff'
eff = EffectPredictor(fasta_fn, gff3_fn)

In [131]:
import pprint
pp = pprint.PrettyPrinter(indent=1, width=80, depth=3)

def dbg(results):
    pp.pprint([x._asdict().items() for x in results])

In [132]:
eff._tbl_features.eq('type', 'gene').rowslice(15, 19)


Out[132]:
seqid source type start end score strand phase feature_id parent_id
Pf3D7_01_v3 chado gene 98819 102282 . + . PF3D7_0102200 None
Pf3D7_01_v3 chado gene 104704 105209 . + . PF3D7_0102300 None
Pf3D7_01_v3 chado gene 110750 115799 . - . PF3D7_0102500 None
Pf3D7_01_v3 chado gene 119041 121249 . - . PF3D7_0102600 None

In [133]:
eff._fasta['Pf3D7_01_v3'][98815:98824]


Out[133]:
u'attatgaga'

Debug coding positions


In [134]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 98818, 'TAT'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98818),
  ('ref', 'TAT'),
  ('ref_start', 98818),
  ('ref_stop', 98820),
  ('ref_cds_start', -1),
  ('ref_cds_stop', 1)]]

In [135]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 98819, 'A'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98819),
  ('ref', 'A'),
  ('ref_start', 98819),
  ('ref_stop', 98819),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 0)]]

In [136]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 98820, 'T'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98820),
  ('ref', 'T'),
  ('ref_start', 98820),
  ('ref_stop', 98820),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1)]]

In [137]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 98821, 'G'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98821),
  ('ref', 'G'),
  ('ref_start', 98821),
  ('ref_stop', 98821),
  ('ref_cds_start', 2),
  ('ref_cds_stop', 2)]]

In [138]:
eff._fasta['Pf3D7_01_v3'][99010:99016]


Out[138]:
u'aatgta'

In [139]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 99013, 'T'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 99013),
  ('ref', 'T'),
  ('ref_start', 99013),
  ('ref_stop', 99013),
  ('ref_cds_start', 194),
  ('ref_cds_stop', 194)]]

In [140]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 99013, 'TG'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 99013),
  ('ref', 'TG'),
  ('ref_start', 99013),
  ('ref_stop', 99014),
  ('ref_cds_start', 194),
  ('ref_cds_stop', -1)]]

In [141]:
eff._fasta['Pf3D7_01_v3'][115796:115799]


Out[141]:
u'cat'

In [142]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 115799, 'T'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115799),
  ('ref', 'T'),
  ('ref_start', 115799),
  ('ref_stop', 115799),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 0)]]

In [143]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 115798, 'A'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115798),
  ('ref', 'A'),
  ('ref_start', 115798),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1)]]

In [144]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 115797, 'C'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115797),
  ('ref', 'C'),
  ('ref_start', 115797),
  ('ref_stop', 115797),
  ('ref_cds_start', 2),
  ('ref_cds_stop', 2)]]

In [145]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 115799, 'TA'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115799),
  ('ref', 'TA'),
  ('ref_start', 115799),
  ('ref_stop', 115800),
  ('ref_cds_start', -1),
  ('ref_cds_stop', 0)]]

In [146]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 111338, 'T'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 111338),
  ('ref', 'T'),
  ('ref_start', 111338),
  ('ref_stop', 111338),
  ('ref_cds_start', 4461),
  ('ref_cds_stop', 4461)]]

In [147]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 111337, 'C'))


[]

In [148]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 111337, 'CT'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 111337),
  ('ref', 'CT'),
  ('ref_start', 111337),
  ('ref_stop', 111338),
  ('ref_cds_start', 4461),
  ('ref_cds_stop', -1)]]

Debug amino acid changes


In [149]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98819, 'A', 'T'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98819),
  ('ref', 'A'),
  ('ref_start', 98819),
  ('ref_stop', 98819),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 0),
  ('alt', 'T'),
  ('ref_codon', 'Atg'),
  ('alt_codon', 'Ttg'),
  ('codon_change', 'Atg/Ttg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'L'),
  ('aa_change', 'M1L')]]

In [150]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98820, 'T', 'C'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98820),
  ('ref', 'T'),
  ('ref_start', 98820),
  ('ref_stop', 98820),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1),
  ('alt', 'C'),
  ('ref_codon', 'aTg'),
  ('alt_codon', 'aCg'),
  ('codon_change', 'aTg/aCg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'T'),
  ('aa_change', 'M1T')]]

In [152]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98821, 'G', 'C'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98821),
  ('ref', 'G'),
  ('ref_start', 98821),
  ('ref_stop', 98821),
  ('ref_cds_start', 2),
  ('ref_cds_stop', 2),
  ('alt', 'C'),
  ('ref_codon', 'atG'),
  ('alt_codon', 'atC'),
  ('codon_change', 'atG/atC'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'I'),
  ('aa_change', 'M1I')]]

In [153]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98822, 'A', 'C'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98822),
  ('ref', 'A'),
  ('ref_start', 98822),
  ('ref_stop', 98822),
  ('ref_cds_start', 3),
  ('ref_cds_stop', 3),
  ('alt', 'C'),
  ('ref_codon', 'Aga'),
  ('alt_codon', 'Cga'),
  ('codon_change', 'Aga/Cga'),
  ('aa_pos', 2),
  ('ref_aa', 'R'),
  ('alt_aa', 'R'),
  ('aa_change', 'R2R')]]

In [154]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98819, 'A', 'ATTT'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98819),
  ('ref', 'A'),
  ('ref_start', 98819),
  ('ref_stop', 98819),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 0),
  ('alt', 'ATTT'),
  ('ref_codon', 'Atg'),
  ('alt_codon', 'ATTTtg'),
  ('codon_change', 'Atg/ATTTtg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'IL'),
  ('aa_change', 'M1IL')]]

In [155]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98820, 'T', 'TAAA'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98820),
  ('ref', 'T'),
  ('ref_start', 98820),
  ('ref_stop', 98820),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1),
  ('alt', 'TAAA'),
  ('ref_codon', 'aTg'),
  ('alt_codon', 'aTAAAg'),
  ('codon_change', 'aTg/aTAAAg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'IK'),
  ('aa_change', 'M1IK')]]

In [156]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98819, 'ATG', 'A'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98819),
  ('ref', 'ATG'),
  ('ref_start', 98819),
  ('ref_stop', 98821),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 2),
  ('alt', 'A'),
  ('ref_codon', 'ATG'),
  ('alt_codon', 'Aag'),
  ('codon_change', 'ATG/Aag'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'K'),
  ('aa_change', 'M1K')]]

In [158]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98820, 'TGAG', 'T'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98820),
  ('ref', 'TGAG'),
  ('ref_start', 98820),
  ('ref_stop', 98823),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 4),
  ('alt', 'T'),
  ('ref_codon', 'aTGAGa'),
  ('alt_codon', 'aTa'),
  ('codon_change', 'aTGAGa/aTa'),
  ('aa_pos', 1),
  ('ref_aa', 'MR'),
  ('alt_aa', 'I'),
  ('aa_change', 'MR1I')]]

In [159]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98819, 'AT', 'GG'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98819),
  ('ref', 'AT'),
  ('ref_start', 98819),
  ('ref_stop', 98820),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 1),
  ('alt', 'GG'),
  ('ref_codon', 'ATg'),
  ('alt_codon', 'GGg'),
  ('codon_change', 'ATg/GGg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'G'),
  ('aa_change', 'M1G')]]

In [160]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98820, 'TG', 'CC'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98820),
  ('ref', 'TG'),
  ('ref_start', 98820),
  ('ref_stop', 98821),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 2),
  ('alt', 'CC'),
  ('ref_codon', 'aTG'),
  ('alt_codon', 'aCC'),
  ('codon_change', 'aTG/aCC'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'T'),
  ('aa_change', 'M1T')]]

In [161]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 98821, 'GA', 'CC'))


[[('gene_id', 'PF3D7_0102200'),
  ('gene_start', 98819),
  ('gene_stop', 102282),
  ('transcript_id', 'PF3D7_0102200.1'),
  ('transcript_start', 98819),
  ('transcript_stop', 102282),
  ('exon_id', 'PF3D7_0102200.1:exon:1'),
  ('exon_start', 98819),
  ('exon_stop', 99013),
  ('strand', '+'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 98821),
  ('ref', 'GA'),
  ('ref_start', 98821),
  ('ref_stop', 98822),
  ('ref_cds_start', 2),
  ('ref_cds_stop', 3),
  ('alt', 'CC'),
  ('ref_codon', 'atGAga'),
  ('alt_codon', 'atCCga'),
  ('codon_change', 'atGAga/atCCga'),
  ('aa_pos', 1),
  ('ref_aa', 'MR'),
  ('alt_aa', 'IR'),
  ('aa_change', 'MR1IR')]]

In [199]:
eff._fasta['Pf3D7_01_v3'][257823:257830]


Out[199]:
u'ttgatta'

In [198]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 257826, 'G', 'GATTATTATTATTATT'))


[[('gene_id', 'PF3D7_0106000'),
  ('gene_start', 255775),
  ('gene_stop', 258543),
  ('transcript_id', 'PF3D7_0106000.1'),
  ('transcript_start', 255775),
  ('transcript_stop', 258543),
  ('exon_id', 'PF3D7_0106000.1:exon:1'),
  ('exon_start', 255775),
  ('exon_stop', 258543),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 257826),
  ('ref', 'G'),
  ('ref_start', 257826),
  ('ref_stop', 257826),
  ('ref_cds_start', 717),
  ('ref_cds_stop', 717),
  ('alt', 'GATTATTATTATTATT'),
  ('ref_codon', 'Caa'),
  ('alt_codon', 'AATAATAATAATAATCaa'),
  ('codon_change', 'Caa/AATAATAATAATAATCaa'),
  ('aa_pos', 240),
  ('ref_aa', 'Q'),
  ('alt_aa', 'NNNNNQ'),
  ('aa_change', 'Q240NNNNNQ')]]

In [197]:
vcf_fn = '../../../data/public/20141022/3d7_hb3.combined.final.vcf.gz'
tbl = (etl
    .fromvcf(vcf_fn, samples=None)
    .unpackinfo('EFF')
    .addfield('my_eff', lambda row: list(eff.get_amino_acid_changes(row.CHROM, row.POS, row.REF, row.ALT[0])))
    .convert(['EFF', 'my_eff'], lambda v: v[0] if v else None)
    .addfield('eff_effect', lambda row: row.EFF.split('(')[0] if row.EFF is not None else None)
    .addfield('eff_codon_change', lambda row: row.EFF.split('|')[2] if row.EFF is not None else None)
    .addfield('my_codon_change', lambda row: row.my_eff.codon_change if row.my_eff is not None else None)
    .addfield('eff_a_change', lambda row: row.EFF.split('|')[3] if row.EFF is not None else None)
    .addfield('my_aa_change', lambda row: row.my_eff.aa_change if row.my_eff is not None else None)
    .cutout('ID', 'my_eff', 'QUAL', 'FILTER', 'EFF')
    .selectnotin('eff_effect', ['INTERGENIC', 'INTRON'])
)
tbl.display(200)


CHROM POS REF ALT eff_effect eff_codon_change my_codon_change eff_a_change my_aa_change
Pf3D7_01_v3 95518 G [T] NON_SYNONYMOUS_CODING aGa/aTa aGa/aTa R156I R156I
Pf3D7_01_v3 95621 T [A] NON_SYNONYMOUS_CODING caT/caA caT/caA H190Q H190Q
Pf3D7_01_v3 95632 G [A] NON_SYNONYMOUS_CODING tGt/tAt tGt/tAt C194Y C194Y
Pf3D7_01_v3 95641 G [A] NON_SYNONYMOUS_CODING tGt/tAt tGt/tAt C197Y C197Y
Pf3D7_01_v3 95680 A [G] NON_SYNONYMOUS_CODING aAt/aGt aAt/aGt N210S N210S
Pf3D7_01_v3 95685 G [T] NON_SYNONYMOUS_CODING Gat/Tat Gat/Tat D212Y D212Y
Pf3D7_01_v3 95686 A [C] NON_SYNONYMOUS_CODING gAt/gCt gAt/gCt D212A D212A
Pf3D7_01_v3 95710 G [C] NON_SYNONYMOUS_CODING tGt/tCt tGt/tCt C220S C220S
Pf3D7_01_v3 95715 A [G] NON_SYNONYMOUS_CODING Agt/Ggt Agt/Ggt S222G S222G
Pf3D7_01_v3 95716 G [T] NON_SYNONYMOUS_CODING aGt/aTt aGt/aTt S222I S222I
Pf3D7_01_v3 95742 G [A] NON_SYNONYMOUS_CODING Gct/Act Gct/Act A231T A231T
Pf3D7_01_v3 95754 G [T] NON_SYNONYMOUS_CODING Gct/Tct Gct/Tct A235S A235S
Pf3D7_01_v3 98868 A [G] NON_SYNONYMOUS_CODING gAt/gGt gAt/gGt D17G D17G
Pf3D7_01_v3 101269 G [T] NON_SYNONYMOUS_CODING Gta/Tta Gta/Tta V749L V749L
Pf3D7_01_v3 101790 AAAT [A] CODON_DELETION aat/- gaAAAT/gaA N923- EN922E
Pf3D7_01_v3 107756 T [G] STOP_LOST tAa/tCa tAa/tCa *198S *198S
Pf3D7_01_v3 107823 T [C] NON_SYNONYMOUS_CODING Atg/Gtg Atg/Gtg M176V M176V
Pf3D7_01_v3 114473 T [G] NON_SYNONYMOUS_CODING Aaa/Caa Aaa/Caa K443Q K443Q
Pf3D7_01_v3 120736 T [A] NON_SYNONYMOUS_CODING aaA/aaT aaA/aaT K116N K116N
Pf3D7_01_v3 127256 AATT [A] CODON_DELETION aat/- AATTgt/Tgt N295- NC295C
Pf3D7_01_v3 127692 G [GTTATTA] CODON_CHANGE_PLUS_CODON_INSERTION aac/aTAATAAac aaC/aaTAATAAC N150IIN N150NNN
Pf3D7_01_v3 127725 G [A] SYNONYMOUS_CODING aaC/aaT aaC/aaT N139 N139N
Pf3D7_01_v3 130339 C [T] NON_SYNONYMOUS_CODING Gca/Aca Gca/Aca A168T A168T
Pf3D7_01_v3 135874 ATTTTCCTTTTCC [A] CODON_CHANGE_PLUS_CODON_DELETION aaggaaaaggaaaat/aat aaGGAAAAGGAAAAT/aaT KEKEN1124N KEKEN1124N
Pf3D7_01_v3 136422 G [T] NON_SYNONYMOUS_CODING Caa/Aaa Caa/Aaa Q946K Q946K
Pf3D7_01_v3 137201 ATAT [A] CODON_CHANGE_PLUS_CODON_DELETION aatata/ata aATATa/aTa NI685I NI685I
Pf3D7_01_v3 137258 G [A] NON_SYNONYMOUS_CODING aCg/aTg aCg/aTg T667M T667M
Pf3D7_01_v3 138966 G [C] NON_SYNONYMOUS_CODING Caa/Gaa Caa/Gaa Q98E Q98E
Pf3D7_01_v3 139191 C [T] NON_SYNONYMOUS_CODING Gca/Aca Gca/Aca A23T A23T
Pf3D7_01_v3 145560 A [T] SYNONYMOUS_CODING tcT/tcA tcT/tcA S279 S279S
Pf3D7_01_v3 146124 TTTA [T] CODON_CHANGE_PLUS_CODON_DELETION aataaa/aaa aaTAAA/aaA NK90K NK90K
Pf3D7_01_v3 147952 T [TTTTTAC] CODON_INSERTION -/GTAAAA aAt/aGTAAAAAt -1609VK N1609SKN
Pf3D7_01_v3 148490 A [C] NON_SYNONYMOUS_CODING Tca/Gca Tca/Gca S1430A S1430A
Pf3D7_01_v3 148768 TCACGGTGGAAATTATCAC [T] CODON_CHANGE_PLUS_CODON_DELETION ggtgataatttccaccgtgat/gat gGTGATAATTTCCACCGTGAt/gAt GDNFHRD1331D GDNFHRD1331D
Pf3D7_01_v3 148917 A [T] NON_SYNONYMOUS_CODING aaT/aaA aaT/aaA N1287K N1287K
Pf3D7_01_v3 155281 A [T] NON_SYNONYMOUS_CODING tAt/tTt tAt/tTt Y369F Y369F
Pf3D7_01_v3 155819 A [AAATAATAAT] CODON_INSERTION -/AATAATAAT ctA/ctAAATAATAAT -549NNN L548LNNN
Pf3D7_01_v3 155877 G [A] NON_SYNONYMOUS_CODING Gac/Aac Gac/Aac D568N D568N
Pf3D7_01_v3 155977 G [A] NON_SYNONYMOUS_CODING gGc/gAc gGc/gAc G601D G601D
Pf3D7_01_v3 155978 C [T] SYNONYMOUS_CODING ggC/ggT ggC/ggT G601 G601G
Pf3D7_01_v3 156910 A [G] NON_SYNONYMOUS_CODING Agt/Ggt Agt/Ggt S867G S867G
Pf3D7_01_v3 159562 A [G] SYNONYMOUS_CODING ttA/ttG ttA/ttG L1623 L1623L
Pf3D7_01_v3 161439 C [CAAT] CODON_INSERTION -/AAT gaC/gaCAAT -104N D103DN
Pf3D7_01_v3 161640 T [C] SYNONYMOUS_CODING acT/acC acT/acC T170 T170T
Pf3D7_01_v3 161865 G [T] NON_SYNONYMOUS_CODING aaG/aaT aaG/aaT K245N K245N
Pf3D7_01_v3 162124 C [CATGATCAAGTAAAAAATAAACATGATCAAGTAAAAAATAAAGATGATAAAATAAAAAATAAAG] CODON_INSERTION cat/cATGATCAAGTAAAAAATAAACATGATCAAGTAAAAAATAAAGATGATAAAATAAAAAATAAAGat Cat/CATGATCAAGTAAAAAATAAACATGATCAAGTAAAAAATAAAGATGATAAAATAAAAAATAAAGat H332HDQVKNKHDQVKNKDDKIKNKD H332HDQVKNKHDQVKNKDDKIKNKD
Pf3D7_01_v3 162967 C [CATAATA] CODON_INSERTION cat/cATAATAat Cat/CATAATAat H613HNN H613HNN
Pf3D7_01_v3 162981 C [G] NON_SYNONYMOUS_CODING atC/atG atC/atG I617M I617M
Pf3D7_01_v3 163145 A [T] NON_SYNONYMOUS_CODING cAt/cTt cAt/cTt H672L H672L
Pf3D7_01_v3 164206 G [GTAAACAAAAATATAAATA] CODON_INSERTION gta/gTAAACAAAAATATAAATAta Gta/GTAAACAAAAATATAAATAta V1026VNKNINI V1026VNKNINI
Pf3D7_01_v3 167312 A [G] NON_SYNONYMOUS_CODING tAt/tGt tAt/tGt Y267C Y267C
Pf3D7_01_v3 173982 C [CAAT] CODON_INSERTION -/AAT taC/taCAAT -374N Y373YN
Pf3D7_01_v3 178726 G [A] NON_SYNONYMOUS_CODING Gtt/Att Gtt/Att V214I V214I
Pf3D7_01_v3 179265 A [G] NON_SYNONYMOUS_CODING gAa/gGa gAa/gGa E284G E284G
Pf3D7_01_v3 179346 G [A] NON_SYNONYMOUS_CODING gGa/gAa gGa/gAa G311E G311E
Pf3D7_01_v3 179347 A [G] SYNONYMOUS_CODING ggA/ggG ggA/ggG G311 G311G
Pf3D7_01_v3 179969 G [A] NON_SYNONYMOUS_CODING Gaa/Aaa Gaa/Aaa E519K E519K
Pf3D7_01_v3 180034 A [T] NON_SYNONYMOUS_CODING gaA/gaT gaA/gaT E540D E540D
Pf3D7_01_v3 180063 C [A] NON_SYNONYMOUS_CODING aCt/aAt aCt/aAt T550N T550N
Pf3D7_01_v3 180073 C [G] SYNONYMOUS_CODING gtC/gtG gtC/gtG V553 V553V
Pf3D7_01_v3 180075 A [G] NON_SYNONYMOUS_CODING aAa/aGa aAa/aGa K554R K554R
Pf3D7_01_v3 180076 A [G] SYNONYMOUS_CODING aaA/aaG aaA/aaG K554 K554K
Pf3D7_01_v3 180077 A [G] NON_SYNONYMOUS_CODING Aaa/Gaa Aaa/Gaa K555E K555E
Pf3D7_01_v3 180078 A [C] NON_SYNONYMOUS_CODING aAa/aCa aAa/aCa K555T K555T
Pf3D7_01_v3 180159 C [A] NON_SYNONYMOUS_CODING aCt/aAt aCt/aAt T582N T582N
Pf3D7_01_v3 180160 T [A] SYNONYMOUS_CODING acT/acA acT/acA T582 T582T
Pf3D7_01_v3 180161 A [G] NON_SYNONYMOUS_CODING Acg/Gcg Acg/Gcg T583A T583A
Pf3D7_01_v3 180162 C [A] NON_SYNONYMOUS_CODING aCg/aAg aCg/aAg T583K T583K
Pf3D7_01_v3 180170 A [C] NON_SYNONYMOUS_CODING Aaa/Caa Aaa/Caa K586Q K586Q
Pf3D7_01_v3 180174 T [G] NON_SYNONYMOUS_CODING aTt/aGt aTt/aGt I587S I587S
Pf3D7_01_v3 180175 T [G] NON_SYNONYMOUS_CODING atT/atG atT/atG I587M I587M
Pf3D7_01_v3 180181 T [C] SYNONYMOUS_CODING ttT/ttC ttT/ttC F589 F589F
Pf3D7_01_v3 180186 A [G] NON_SYNONYMOUS_CODING gAa/gGa gAa/gGa E591G E591G
Pf3D7_01_v3 180192 A [T] NON_SYNONYMOUS_CODING aAt/aTt aAt/aTt N593I N593I
Pf3D7_01_v3 180217 C [T] SYNONYMOUS_CODING atC/atT atC/atT I601 I601I
Pf3D7_01_v3 180230 G [C] NON_SYNONYMOUS_CODING Gag/Cag Gag/Cag E606Q E606Q
Pf3D7_01_v3 180233 A [G] NON_SYNONYMOUS_CODING Aat/Gat Aat/Gat N607D N607D
Pf3D7_01_v3 180277 G [A] SYNONYMOUS_CODING caG/caA caG/caA Q621 Q621Q
Pf3D7_01_v3 180285 G [T] NON_SYNONYMOUS_CODING aGa/aTa aGa/aTa R624I R624I
Pf3D7_01_v3 180288 A [C] NON_SYNONYMOUS_CODING aAt/aCt aAt/aCt N625T N625T
Pf3D7_01_v3 180297 A [G] NON_SYNONYMOUS_CODING gAt/gGt gAt/gGt D628G D628G
Pf3D7_01_v3 180299 G [C] NON_SYNONYMOUS_CODING Gga/Cga Gga/Cga G629R G629R
Pf3D7_01_v3 180304 G [A] NON_SYNONYMOUS_CODING atG/atA atG/atA M630I M630I
Pf3D7_01_v3 180305 G [A] NON_SYNONYMOUS_CODING Gat/Aat Gat/Aat D631N D631N
Pf3D7_01_v3 180306 A [T] NON_SYNONYMOUS_CODING gAt/gTt gAt/gTt D631V D631V
Pf3D7_01_v3 180309 A [C] NON_SYNONYMOUS_CODING gAa/gCa gAa/gCa E632A E632A
Pf3D7_01_v3 180311 C [A] NON_SYNONYMOUS_CODING Cat/Aat Cat/Aat H633N H633N
Pf3D7_01_v3 183844 G [C] NON_SYNONYMOUS_CODING Caa/Gaa Caa/Gaa Q127E Q127E
Pf3D7_01_v3 183845 A [T] SYNONYMOUS_CODING gcT/gcA gcT/gcA A126 A126A
Pf3D7_01_v3 190317 CATAATA [C] CODON_DELETION cataataat/cat CATAATAat/Cat HNN17H HNN17H
Pf3D7_01_v3 190616 A [G] SYNONYMOUS_CODING acA/acG acA/acG T116 T116T
Pf3D7_01_v3 190862 CAATAAT [C] CODON_DELETION aataat/- aaCAATAAT/aaC NN199- NNN198N
Pf3D7_01_v3 190944 G [GATAGTATTAATAATAGTATTAATA] CODON_INSERTION gat/gATAGTATTAATAATAGTATTAATAat Gat/GATAGTATTAATAATAGTATTAATAat D226DSINNSINN D226DSINNSINN
Pf3D7_01_v3 191685 TATA [T] CODON_DELETION tataat/tat TATAat/Tat YN473Y YN473Y
Pf3D7_01_v3 192111 T [C] NON_SYNONYMOUS_CODING Tat/Cat Tat/Cat Y615H Y615H
Pf3D7_01_v3 192605 A [T] NON_SYNONYMOUS_CODING aaA/aaT aaA/aaT K779N K779N
Pf3D7_01_v3 192608 GGGT [A] CODON_DELETION ggt/- aaGGGT/aaA G781- KG780K
Pf3D7_01_v3 192617 T [C] SYNONYMOUS_CODING gaT/gaC gaT/gaC D783 D783D
Pf3D7_01_v3 192618 A [G] NON_SYNONYMOUS_CODING Aat/Gat Aat/Gat N784D N784D
Pf3D7_01_v3 192621 A [G] NON_SYNONYMOUS_CODING Aaa/Gaa Aaa/Gaa K785E K785E
Pf3D7_01_v3 192623 A [AGATGACGATGAC] CODON_INSERTION -/GATGACGATGAC aaA/aaAGATGACGATGAC -786DDDD K785KDDDD
Pf3D7_01_v3 193667 G [C] NON_SYNONYMOUS_CODING agG/agC agG/agC R1133S R1133S
Pf3D7_01_v3 194117 T [C] SYNONYMOUS_CODING ttT/ttC ttT/ttC F1283 F1283F
Pf3D7_01_v3 194843 CAAATATGAA [C] CODON_DELETION aaatatgaa/- gaCAAATATGAA/gaC KYE1526- DKYE1525D
Pf3D7_01_v3 195383 A [AAAC] CODON_INSERTION -/AAC aaA/aaAAAC -1706N K1705KN
Pf3D7_01_v3 196010 A [C] NON_SYNONYMOUS_CODING aaA/aaC aaA/aaC K1914N K1914N
Pf3D7_01_v3 196011 G [A] NON_SYNONYMOUS_CODING Gaa/Aaa Gaa/Aaa E1915K E1915K
Pf3D7_01_v3 196981 G [A] NON_SYNONYMOUS_CODING aGa/aAa aGa/aAa R2238K R2238K
Pf3D7_01_v3 199812 G [C] NON_SYNONYMOUS_CODING cGt/cCt cGt/cCt R3104P R3104P
Pf3D7_01_v3 199847 C [G] NON_SYNONYMOUS_CODING Cat/Gat Cat/Gat H3116D H3116D
Pf3D7_01_v3 199848 A [G] NON_SYNONYMOUS_CODING cAt/cGt cAt/cGt H3116R H3116R
Pf3D7_01_v3 199851 C [T] NON_SYNONYMOUS_CODING gCa/gTa gCa/gTa A3117V A3117V
Pf3D7_01_v3 199855 C [T] SYNONYMOUS_CODING aaC/aaT aaC/aaT N3118 N3118N
Pf3D7_01_v3 199863 G [A] NON_SYNONYMOUS_CODING aGt/aAt aGt/aAt S3121N S3121N
Pf3D7_01_v3 205066 G [A] SYNONYMOUS_CODING ttC/ttT ttC/ttT F108 F108F
Pf3D7_01_v3 206406 A [AAAAGATGAT] CODON_INSERTION -/AAAGATGAT gaA/gaAAAAGATGAT -80KDD E79EKDD
Pf3D7_01_v3 207421 A [AATGATAATTATGATAATAATAATTATGATAATTATGATAATAATAATTATGATAATT] CODON_INSERTION aat/aATGATAATTATGATAATAATAATTATGATAATTATGATAATAATAATTATGATAATTat Aat/AATGATAATTATGATAATAATAATTATGATAATTATGATAATAATAATTATGATAATTat N418NDNYDNNNYDNYDNNNYDNY N418NDNYDNNNYDNYDNNNYDNY
Pf3D7_01_v3 207736 T [C] NON_SYNONYMOUS_CODING Tat/Cat Tat/Cat Y523H Y523H
Pf3D7_01_v3 207960 A [C] NON_SYNONYMOUS_CODING aaA/aaC aaA/aaC K597N K597N
Pf3D7_01_v3 207962 T [A] NON_SYNONYMOUS_CODING aTt/aAt aTt/aAt I598N I598N
Pf3D7_01_v3 208014 A [T] NON_SYNONYMOUS_CODING aaA/aaT aaA/aaT K615N K615N
Pf3D7_01_v3 216754 T [G] NON_SYNONYMOUS_CODING Tac/Gac Tac/Gac Y327D Y327D
Pf3D7_01_v3 216756 C [CAATAATAATAATAAT] CODON_INSERTION -/AATAATAATAATAAT taC/taCAATAATAATAATAAT -328NNNNN Y327YNNNNN
Pf3D7_01_v3 227206 C [T] NON_SYNONYMOUS_CODING aGa/aAa aGa/aAa R27K R27K
Pf3D7_01_v3 233811 G [A] NON_SYNONYMOUS_CODING tCa/tTa tCa/tTa S134L S134L
Pf3D7_01_v3 239206 T [TATAATAATA] CODON_INSERTION tat/tATAATAATAat Tat/TATAATAATAat Y181YNNN Y181YNNN
Pf3D7_01_v3 239511 G [GACAAATGATGTGAAA] CODON_INSERTION -/ACAAATGATGTGAAA aaG/aaGACAAATGATGTGAAA -283TNDVK K282KTNDVK
Pf3D7_01_v3 241843 C [G] NON_SYNONYMOUS_CODING Caa/Gaa Caa/Gaa Q1060E Q1060E
Pf3D7_01_v3 242535 TAATATTCAA [T] CODON_DELETION aatattcaa/- caTAATATTCAA/caT NIQ1291- HNIQ1290H
Pf3D7_01_v3 243925 A [G] NON_SYNONYMOUS_CODING Aat/Gat Aat/Gat N1754D N1754D
Pf3D7_01_v3 244758 C [CAATAATAATAATAATAATAATAAT] CODON_INSERTION -/AATAATAATAATAATAATAATAAT gaC/gaCAATAATAATAATAATAATAATAAT -2032NNNNNNNN D2031DNNNNNNNN
Pf3D7_01_v3 246384 CAAT [C] CODON_DELETION aat/- aaCAAT/aaC N2574- NN2573N
Pf3D7_01_v3 247606 T [TATAATA] CODON_INSERTION tat/tATAATAat Tat/TATAATAat Y2981YNN Y2981YNN
Pf3D7_01_v3 247675 TATACAAATC [T] CODON_DELETION tatacaaatcat/tat TATACAAATCat/Tat YTNH3004Y YTNH3004Y
Pf3D7_01_v3 257826 G [GATTATTATTATTATT] CODON_CHANGE_PLUS_CODON_INSERTION aat/aaAATAATAATAATAATt Caa/AATAATAATAATAATCaa N239KIIIII Q240NNNNNQ
Pf3D7_01_v3 263684 A [C] NON_SYNONYMOUS_CODING Tat/Gat Tat/Gat Y102D Y102D
Pf3D7_01_v3 266480 A [T] SYNONYMOUS_CODING atT/atA atT/atA I898 I898I
Pf3D7_01_v3 266640 TCTC [T] CODON_CHANGE_PLUS_CODON_DELETION ggagat/gat gGAGAt/gAt GD844D GD844D
Pf3D7_01_v3 267777 C [T] NON_SYNONYMOUS_CODING aGt/aAt aGt/aAt S466N S466N
Pf3D7_01_v3 269132 C [T] SYNONYMOUS_CODING gaG/gaA gaG/gaA E14 E14E
Pf3D7_01_v3 274550 C [A] NON_SYNONYMOUS_CODING aCc/aAc aCc/aAc T79N T79N
Pf3D7_01_v3 275206 C [G] NON_SYNONYMOUS_CODING Cat/Gat Cat/Gat H298D H298D
Pf3D7_01_v3 275655 T [C] SYNONYMOUS_CODING aaT/aaC aaT/aaC N447 N447N
Pf3D7_01_v3 276134 A [C] NON_SYNONYMOUS_CODING aAt/aCt aAt/aCt N607T N607T
Pf3D7_01_v3 276137 A [C] NON_SYNONYMOUS_CODING aAt/aCt aAt/aCt N608T N608T
Pf3D7_01_v3 276845 G [A] NON_SYNONYMOUS_CODING aGt/aAt aGt/aAt S844N S844N
Pf3D7_01_v3 281975 A [T] NON_SYNONYMOUS_CODING cAt/cTt cAt/cTt H274L H274L
Pf3D7_01_v3 282004 G [A] NON_SYNONYMOUS_CODING Gca/Aca Gca/Aca A284T A284T
Pf3D7_01_v3 282010 TATGATA [T] CODON_DELETION tatgataat/tat TATGATAat/Tat YDN286Y YDN286Y
Pf3D7_01_v3 282425 GTGATGGAGATGATGACGATGATGATGTAGGTGATGATAACGTTGA [G] CODON_DELETION ggtgatggagatgatgacgatgatgatgtaggtgatgataacgttgat/ggt gGTGATGGAGATGATGACGATGATGATGTAGGTGATGATAACGTTGAt/gGt GDGDDDDDDVGDDNVD424G GDGDDDDDDVGDDNVD424G
Pf3D7_01_v3 282546 TGATAAC [T] CODON_DELETION gataac/- gaTGATAAC/gaT DN465- DDN464D
Pf3D7_01_v3 282549 TAAC [T] CODON_DELETION aac/- gaTAAC/gaT N466- DN465D
Pf3D7_01_v3 282555 TGAC [T] CODON_DELETION gac/- gaTGAC/gaT D468- DD467D
Pf3D7_01_v3 282558 C [T] SYNONYMOUS_CODING gaC/gaT gaC/gaT D468 D468D
Pf3D7_01_v3 282597 CGATGACGATGAT [C] CODON_DELETION gatgacgatgat/- aaCGATGACGATGAT/aaC DDDD482- NDDDD481N
Pf3D7_01_v3 282609 T [TAAC] CODON_INSERTION -/AAC gaT/gaTAAC -486N D485DN
Pf3D7_01_v3 283144 C [G] NON_SYNONYMOUS_CODING Cac/Gac Cac/Gac H664D H664D
Pf3D7_01_v3 283203 G [GAT] FRAME_SHIFT -/AT gaG/gaGATg -684? E683EM
Pf3D7_01_v3 283273 A [AATGATGATGATGATGATGATG] CODON_INSERTION aat/aATGATGATGATGATGATGATGat Aat/AATGATGATGATGATGATGATGat N707NDDDDDDD N707NDDDDDDD
Pf3D7_01_v3 283356 C [CGATGATGATGAAAATGAT] CODON_INSERTION -/GATGATGATGAAAATGAT ggC/ggCGATGATGATGAAAATGAT -735DDDEND G734GDDDEND
Pf3D7_01_v3 283406 T [TTGA] CODON_INSERTION att/atTGAt aTt/aTTGAt I751ID I751ID
Pf3D7_01_v3 288303 AATATATATATAT [AATATATATAT, A] None None None None None
Pf3D7_01_v3 291547 G [A] NON_SYNONYMOUS_CODING Gat/Aat Gat/Aat D388N D388N
Pf3D7_01_v3 306237 C [A] SYNONYMOUS_CODING acG/acT acG/acT T1007 T1007T
Pf3D7_01_v3 307173 G [A] SYNONYMOUS_CODING gaC/gaT gaC/gaT D695 D695D
Pf3D7_01_v3 308600 C [T] NON_SYNONYMOUS_CODING Gat/Aat Gat/Aat D220N D220N
Pf3D7_01_v3 308617 TCCTC [T] FRAME_SHIFT - GAGGAt/Atg -213 ED213M
Pf3D7_01_v3 314881 A [T] NON_SYNONYMOUS_CODING aaA/aaT aaA/aaT K88N K88N
Pf3D7_01_v3 315518 TATA [T] CODON_DELETION tataat/tat TATAat/Tat YN301Y YN301Y
Pf3D7_01_v3 316004 GAAAATGTAA [G] CODON_DELETION gaaaatgtaaaa/gaa GAAAATGTAAaa/Gaa ENVK463E ENVK463E
Pf3D7_01_v3 316513 T [G] NON_SYNONYMOUS_CODING aaT/aaG aaT/aaG N632K N632K
Pf3D7_01_v3 318006 ATATCATGTT [A] CODON_DELETION aatatcatgttt/aat aATATCATGTTt/aAt NIMF1130N NIMF1130N
Pf3D7_01_v3 318011 ATGTT [A] FRAME_SHIFT - ATGTTt/Ata -1132 MF1132I
Pf3D7_01_v3 320629 CAATAAT [C, CAATAATAATAATAATAATAAT] CODON_DELETION aataat/- aaCAATAAT/aaC NN155- NNN154N
Pf3D7_01_v3 322875 A [AATAATATTTATC] CODON_INSERTION aat/aATAATATTTATCat Aat/AATAATATTTATCat N169NNIYH N169NNIYH
Pf3D7_01_v3 323333 TGAA [T] CODON_DELETION gaa/- aaTGAA/aaT E322- NE321N
Pf3D7_01_v3 324213 G [GATA] CODON_INSERTION gat/gATAat Gat/GATAat D615DN D615DN
Pf3D7_01_v3 325302 G [A] NON_SYNONYMOUS_CODING Gaa/Aaa Gaa/Aaa E978K E978K
Pf3D7_01_v3 335111 CATATATATATATAT [C] FRAME_SHIFT - None -83 None
Pf3D7_01_v3 335390 ATT [A] FRAME_SHIFT - None -176 None
Pf3D7_01_v3 338214 TGATGAAGATGATGATGATGATGAA [T] CODON_DELETION gatgaagatgatgatgatgatgaa/- gaTGATGAAGATGATGATGATGATGAA/gaT DEDDDDDE350- DDEDDDDDE349D
Pf3D7_01_v3 338340 A [AGATGACGAGGATTATGACGATGATGAT] CODON_INSERTION -/GATGACGAGGATTATGACGATGATGAT gaA/gaAGATGACGAGGATTATGACGATGATGAT -392DDEDYDDDD E391EDDEDYDDDD
Pf3D7_01_v3 338598 C [CAAT] CODON_INSERTION -/AAT atC/atCAAT -478N I477IN
Pf3D7_01_v3 339060 T [TAAC] CODON_INSERTION -/AAC aaT/aaTAAC -632N N631NN
Pf3D7_01_v3 339074 G [A] NON_SYNONYMOUS_CODING aGt/aAt aGt/aAt S636N S636N
Pf3D7_01_v3 339436 A [G] NON_SYNONYMOUS_CODING Att/Gtt Att/Gtt I757V I757V
Pf3D7_01_v3 339504 A [C] SYNONYMOUS_CODING acA/acC acA/acC T779 T779T
Pf3D7_01_v3 340513 ATTC [A] CODON_CHANGE_PLUS_CODON_DELETION attcat/aat ATTCat/Aat IH1116N IH1116N
Pf3D7_01_v3 341173 A [G] NON_SYNONYMOUS_CODING Atg/Gtg Atg/Gtg M1336V M1336V
Pf3D7_01_v3 341176 A [G] NON_SYNONYMOUS_CODING Aac/Gac Aac/Gac N1337D N1337D
Pf3D7_01_v3 341183 A [C] NON_SYNONYMOUS_CODING aAc/aCc aAc/aCc N1339T N1339T
Pf3D7_01_v3 342021 A [AAAT] CODON_INSERTION -/AAT aaA/aaAAAT -1619N K1618KN
Pf3D7_01_v3 342450 CAAT [C] CODON_DELETION aat/- agCAAT/agC N1762- SN1761S
Pf3D7_01_v3 342833 T [TTAATAA] CODON_INSERTION att/atTAATAAt aTt/aTTAATAAt I1889INN I1889INN
Pf3D7_01_v3 343374 CAAT [C] CODON_DELETION aat/- aaCAAT/aaC N2070- NN2069N
Pf3D7_01_v3 343596 A [AAATAAT] CODON_INSERTION -/AATAAT gaA/gaAAATAAT -2144NN E2143ENN
Pf3D7_01_v3 345060 C [CAT] INTRAGENIC None None
Pf3D7_01_v3 345066 C [T] INTRAGENIC None None
Pf3D7_01_v3 345730 A [G] NON_SYNONYMOUS_CODING Tca/Cca Tca/Cca S130P S130P
Pf3D7_01_v3 349247 C [T] SYNONYMOUS_CODING gaC/gaT gaC/gaT D26 D26D

...


In [200]:
eff._tbl_features.eq('type', 'gene').display(20)


seqid source type start end score strand phase feature_id parent_id
Pf3D7_01_v3 chado gene 29510 37126 . + . PF3D7_0100100 None
Pf3D7_01_v3 chado gene 38982 40207 . - . PF3D7_0100200 None
Pf3D7_01_v3 chado gene 42367 46507 . - . PF3D7_0100300 None
Pf3D7_01_v3 chado gene 50363 51636 . + . PF3D7_0100400 None
Pf3D7_01_v3 chado gene 53778 55006 . - . PF3D7_0100600 None
Pf3D7_01_v3 chado gene 56690 56893 . - . PF3D7_0100700 None
Pf3D7_01_v3 chado gene 59772 61003 . + . PF3D7_0100800 None
Pf3D7_01_v3 chado gene 62187 63400 . - . PF3D7_0100900 None
Pf3D7_01_v3 chado gene 65817 66989 . - . PF3D7_0101000 None
Pf3D7_01_v3 chado gene 69304 70417 . - . PF3D7_0101100 None
Pf3D7_01_v3 chado gene 71624 72426 . + . PF3D7_0101200 None
Pf3D7_01_v3 chado gene 74563 75366 . + . PF3D7_0101300 None
Pf3D7_01_v3 chado gene 81765 83106 . - . PF3D7_0101600 None
Pf3D7_01_v3 chado gene 87203 88177 . - . PF3D7_0101800 None
Pf3D7_01_v3 chado gene 90242 91420 . - . PF3D7_0101900 None
Pf3D7_01_v3 chado gene 98819 102282 . + . PF3D7_0102200 None
Pf3D7_01_v3 chado gene 104704 105209 . + . PF3D7_0102300 None
Pf3D7_01_v3 chado gene 110750 115799 . - . PF3D7_0102500 None
Pf3D7_01_v3 chado gene 119041 121249 . - . PF3D7_0102600 None
Pf3D7_01_v3 chado gene 124517 125484 . + . PF3D7_0102700 None

...


In [201]:
Seq(eff._fasta['Pf3D7_01_v3'][115793:115802])


Out[201]:
Seq(u'tttcatatt', Alphabet())

In [202]:
Seq(eff._fasta['Pf3D7_01_v3'][115793:115802]).reverse_complement()


Out[202]:
Seq('aatatgaaa', Alphabet())

In [206]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 115799, 'T'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115799),
  ('ref', 'T'),
  ('ref_start', 115799),
  ('ref_stop', 115799),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 0)]]

In [208]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 115798, 'A'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115798),
  ('ref', 'A'),
  ('ref_start', 115798),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1)]]

In [209]:
dbg(eff.get_coding_positions('Pf3D7_01_v3', 115797, 'C'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115797),
  ('ref', 'C'),
  ('ref_start', 115797),
  ('ref_stop', 115797),
  ('ref_cds_start', 2),
  ('ref_cds_stop', 2)]]

In [212]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115799, 'T', 'C'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115799),
  ('ref', 'T'),
  ('ref_start', 115799),
  ('ref_stop', 115799),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 0),
  ('alt', 'C'),
  ('ref_codon', 'Atg'),
  ('alt_codon', 'Gtg'),
  ('codon_change', 'Atg/Gtg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'V'),
  ('aa_change', 'M1V')]]

In [213]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115798, 'A', 'X'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115798),
  ('ref', 'A'),
  ('ref_start', 115798),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1),
  ('alt', 'X'),
  ('ref_codon', 'aTg'),
  ('alt_codon', 'aXg'),
  ('codon_change', 'aTg/aXg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'X'),
  ('aa_change', 'M1X')]]

In [214]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115797, 'C', 'X'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115797),
  ('ref', 'C'),
  ('ref_start', 115797),
  ('ref_stop', 115797),
  ('ref_cds_start', 2),
  ('ref_cds_stop', 2),
  ('alt', 'X'),
  ('ref_codon', 'atG'),
  ('alt_codon', 'atX'),
  ('codon_change', 'atG/atX'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'X'),
  ('aa_change', 'M1X')]]

In [216]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115796, 'T', 'C'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115796),
  ('ref', 'T'),
  ('ref_start', 115796),
  ('ref_stop', 115796),
  ('ref_cds_start', 3),
  ('ref_cds_stop', 3),
  ('alt', 'C'),
  ('ref_codon', 'Aaa'),
  ('alt_codon', 'Gaa'),
  ('codon_change', 'Aaa/Gaa'),
  ('aa_pos', 2),
  ('ref_aa', 'K'),
  ('alt_aa', 'E'),
  ('aa_change', 'K2E')]]

In [218]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115799, 'T', 'TCGG'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115799),
  ('ref', 'T'),
  ('ref_start', 115799),
  ('ref_stop', 115799),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 0),
  ('alt', 'TCGG'),
  ('ref_codon', 'Atg'),
  ('alt_codon', 'CCGAtg'),
  ('codon_change', 'Atg/CCGAtg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'PM'),
  ('aa_change', 'M1PM')]]

In [219]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115798, 'A', 'AN'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115798),
  ('ref', 'A'),
  ('ref_start', 115798),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1),
  ('alt', 'AN'),
  ('ref_codon', 'aTg'),
  ('alt_codon', 'aNT'),
  ('codon_change', 'aTg/aNT'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'X'),
  ('aa_change', 'M1X')]]

In [220]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115798, 'A', 'AXX'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115798),
  ('ref', 'A'),
  ('ref_start', 115798),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1),
  ('alt', 'AXX'),
  ('ref_codon', 'aTg'),
  ('alt_codon', 'aXXTga'),
  ('codon_change', 'aTg/aXXTga'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'X*'),
  ('aa_change', 'M1X*')]]

In [221]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115798, 'A', 'AXXX'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115798),
  ('ref', 'A'),
  ('ref_start', 115798),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 1),
  ('alt', 'AXXX'),
  ('ref_codon', 'aTg'),
  ('alt_codon', 'aXXXTg'),
  ('codon_change', 'aTg/aXXXTg'),
  ('aa_pos', 1),
  ('ref_aa', 'M'),
  ('alt_aa', 'XX'),
  ('aa_change', 'M1XX')]]

In [222]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115796, 'TCAT', 'T'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115796),
  ('ref', 'TCAT'),
  ('ref_start', 115796),
  ('ref_stop', 115799),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 3),
  ('alt', 'T'),
  ('ref_codon', 'ATGAaa'),
  ('alt_codon', 'Aaa'),
  ('codon_change', 'ATGAaa/Aaa'),
  ('aa_pos', 1),
  ('ref_aa', 'MK'),
  ('alt_aa', 'K'),
  ('aa_change', 'MK1K')]]

In [223]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115796, 'TCAT', 'TCA'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115796),
  ('ref', 'TCAT'),
  ('ref_start', 115796),
  ('ref_stop', 115799),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 3),
  ('alt', 'TCA'),
  ('ref_codon', 'ATGAaa'),
  ('alt_codon', 'TGA'),
  ('codon_change', 'ATGAaa/TGA'),
  ('aa_pos', 1),
  ('ref_aa', 'MK'),
  ('alt_aa', '*'),
  ('aa_change', 'MK1*')]]

In [224]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115796, 'TCAT', 'TC'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115796),
  ('ref', 'TCAT'),
  ('ref_start', 115796),
  ('ref_stop', 115799),
  ('ref_cds_start', 0),
  ('ref_cds_stop', 3),
  ('alt', 'TC'),
  ('ref_codon', 'ATGAaa'),
  ('alt_codon', 'GAa'),
  ('codon_change', 'ATGAaa/GAa'),
  ('aa_pos', 1),
  ('ref_aa', 'MK'),
  ('alt_aa', 'E'),
  ('aa_change', 'MK1E')]]

In [225]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115796, 'TCA', 'T'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115796),
  ('ref', 'TCA'),
  ('ref_start', 115796),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 3),
  ('alt', 'T'),
  ('ref_codon', 'aTGAaa'),
  ('alt_codon', 'aAa'),
  ('codon_change', 'aTGAaa/aAa'),
  ('aa_pos', 1),
  ('ref_aa', 'MK'),
  ('alt_aa', 'K'),
  ('aa_change', 'MK1K')]]

In [226]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115796, 'TC', 'T'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115796),
  ('ref', 'TC'),
  ('ref_start', 115796),
  ('ref_stop', 115797),
  ('ref_cds_start', 2),
  ('ref_cds_stop', 3),
  ('alt', 'T'),
  ('ref_codon', 'atGAaa'),
  ('alt_codon', 'atA'),
  ('codon_change', 'atGAaa/atA'),
  ('aa_pos', 1),
  ('ref_aa', 'MK'),
  ('alt_aa', 'I'),
  ('aa_change', 'MK1I')]]

In [227]:
dbg(eff.get_amino_acid_changes('Pf3D7_01_v3', 115795, 'TTCA', 'T'))


[[('gene_id', 'PF3D7_0102500'),
  ('gene_start', 110750),
  ('gene_stop', 115799),
  ('transcript_id', 'PF3D7_0102500.1'),
  ('transcript_start', 110750),
  ('transcript_stop', 115799),
  ('exon_id', 'PF3D7_0102500.1:exon:4'),
  ('exon_start', 111338),
  ('exon_stop', 115799),
  ('strand', '-'),
  ('chrom', 'Pf3D7_01_v3'),
  ('pos', 115795),
  ('ref', 'TTCA'),
  ('ref_start', 115795),
  ('ref_stop', 115798),
  ('ref_cds_start', 1),
  ('ref_cds_stop', 4),
  ('alt', 'T'),
  ('ref_codon', 'aTGAAa'),
  ('alt_codon', 'aAa'),
  ('codon_change', 'aTGAAa/aAa'),
  ('aa_pos', 1),
  ('ref_aa', 'MK'),
  ('alt_aa', 'K'),
  ('aa_change', 'MK1K')]]