In [1]:
import sys
import numpy as np
import bcolz
import numexpr
import humanize
import h5py
import tempfile
# import logging
# import imp
# imp.reload(logging)
# logging.basicConfig(level=logging.DEBUG)
# logger = logging.getLogger()
# logger.debug('logging initialised')
def binarysize(n):
    return humanize.naturalsize(n, binary=True)

%reload_ext memory_profiler
sys.path.insert(0, '../..')
%reload_ext autoreload
%autoreload 1
%aimport allel.model
%aimport allel.bcolz
%aimport allel.io

In [2]:
ft = allel.model.FeatureTable.from_gff3('../../fixture/sample.gff')
ft


Out[2]:
seqid source type start end score strand phase
0 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1
1 b'apidb|MAL2' b'ApiDB' b'supercontig' 1 947102 -1 b'+' -1
2 b'apidb|MAL3' b'ApiDB' b'supercontig' 1 1060087 -1 b'+' -1
3 b'apidb|MAL4' b'ApiDB' b'supercontig' 1 1204112 -1 b'+' -1
4 b'apidb|MAL5' b'ApiDB' b'supercontig' 1 1343552 -1 b'+' -1

In [3]:
ftz = allel.model.FeatureTable.from_gff3('../../fixture/sample.sorted.gff.gz')
ftz


Out[3]:
seqid source type start end score strand phase
0 b'apidb|API_IRAB' b'ApiDB' b'supercontig' 1 34242 -1 b'+' -1
1 b'apidb|M76611' b'ApiDB' b'supercontig' 1 5967 -1 b'+' -1
2 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1
3 b'apidb|MAL1' b'ApiDB' b'CDS' 29733 34985 -1 b'+' 0
4 b'apidb|MAL1' b'ApiDB' b'exon' 29733 34985 -1 b'+' -1

In [4]:
ftr = allel.model.FeatureTable.from_gff3('../../fixture/sample.sorted.gff.gz', region='apidb|MAL1')
ftr


Out[4]:
seqid source type start end score strand phase
0 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1
1 b'apidb|MAL1' b'ApiDB' b'CDS' 29733 34985 -1 b'+' 0
2 b'apidb|MAL1' b'ApiDB' b'exon' 29733 34985 -1 b'+' -1
3 b'apidb|MAL1' b'ApiDB' b'gene' 29733 37349 -1 b'+' -1
4 b'apidb|MAL1' b'ApiDB' b'mRNA' 29733 37349 -1 b'+' -1

In [5]:
ftrb = allel.model.FeatureTable.from_gff3('../../fixture/sample.sorted.gff.gz', region='apidb|MAL1:50000-100000')
ftrb


Out[5]:
seqid source type start end score strand phase
0 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1
1 b'apidb|MAL1' b'ApiDB' b'CDS' 50586 50639 -1 b'+' 0
2 b'apidb|MAL1' b'ApiDB' b'exon' 50586 50639 -1 b'+' -1
3 b'apidb|MAL1' b'ApiDB' b'gene' 50586 51859 -1 b'+' -1
4 b'apidb|MAL1' b'ApiDB' b'mRNA' 50586 51859 -1 b'+' -1

In [6]:
fta = allel.model.FeatureTable.from_gff3('../../fixture/sample.gff', attributes=('ID', 'Parent'))
fta


Out[6]:
seqid source type start end score strand phase ID Parent
0 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1 b'apidb|MAL1' b'.'
1 b'apidb|MAL2' b'ApiDB' b'supercontig' 1 947102 -1 b'+' -1 b'apidb|MAL2' b'.'
2 b'apidb|MAL3' b'ApiDB' b'supercontig' 1 1060087 -1 b'+' -1 b'apidb|MAL3' b'.'
3 b'apidb|MAL4' b'ApiDB' b'supercontig' 1 1204112 -1 b'+' -1 b'apidb|MAL4' b'.'
4 b'apidb|MAL5' b'ApiDB' b'supercontig' 1 1343552 -1 b'+' -1 b'apidb|MAL5' b'.'

In [7]:
fta.dtype


Out[7]:
dtype([('seqid', 'S15'), ('source', 'S5'), ('type', 'S11'), ('start', '<i8'), ('end', '<i8'), ('score', '<i8'), ('strand', 'S1'), ('phase', '<i8'), ('ID', 'S21'), ('Parent', 'S20')])

In [8]:
ftc = allel.bcolz.FeatureCTable.from_gff3('../../fixture/sample.gff')
ftc


Out[8]:
seqid source type start end score strand phase
0 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1
1 b'apidb|MAL2' b'ApiDB' b'supercontig' 1 947102 -1 b'+' -1
2 b'apidb|MAL3' b'ApiDB' b'supercontig' 1 1060087 -1 b'+' -1
3 b'apidb|MAL4' b'ApiDB' b'supercontig' 1 1204112 -1 b'+' -1
4 b'apidb|MAL5' b'ApiDB' b'supercontig' 1 1343552 -1 b'+' -1

In [9]:
ftc.ctbl


Out[9]:
ctable((177,), [('seqid', 'S15'), ('source', 'S5'), ('type', 'S11'), ('start', '<i8'), ('end', '<i8'), ('score', '<i8'), ('strand', 'S1'), ('phase', '<i8')])
  nbytes: 11.06 KB; cbytes: 2.00 MB; ratio: 0.01
  cparams := cparams(clevel=5, shuffle=True, cname='blosclz')
[(b'apidb|MAL1', b'ApiDB', b'supercontig', 1, 643292, -1, b'+', -1)
 (b'apidb|MAL2', b'ApiDB', b'supercontig', 1, 947102, -1, b'+', -1)
 (b'apidb|MAL3', b'ApiDB', b'supercontig', 1, 1060087, -1, b'+', -1)
 (b'apidb|MAL4', b'ApiDB', b'supercontig', 1, 1204112, -1, b'+', -1)
 (b'apidb|MAL5', b'ApiDB', b'supercontig', 1, 1343552, -1, b'+', -1)
 (b'apidb|MAL6', b'ApiDB', b'supercontig', 1, 1418244, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'supercontig', 1, 1501717, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'supercontig', 1, 1419563, -1, b'+', -1)
 (b'apidb|MAL9', b'ApiDB', b'supercontig', 1, 1541723, -1, b'+', -1)
 (b'apidb|MAL10', b'ApiDB', b'supercontig', 1, 1687655, -1, b'+', -1)
 (b'apidb|MAL11', b'ApiDB', b'supercontig', 1, 2038337, -1, b'+', -1)
 (b'apidb|MAL12', b'ApiDB', b'supercontig', 1, 2271478, -1, b'+', -1)
 (b'apidb|MAL13', b'ApiDB', b'supercontig', 1, 2895605, -1, b'+', -1)
 (b'apidb|MAL14', b'ApiDB', b'supercontig', 1, 3291871, -1, b'+', -1)
 (b'apidb|M76611', b'ApiDB', b'supercontig', 1, 5967, -1, b'+', -1)
 (b'apidb|PfNF54', b'ApiDB', b'supercontig', 1, 5967, -1, b'+', -1)
 (b'apidb|X95275', b'ApiDB', b'supercontig', 1, 15421, -1, b'+', -1)
 (b'apidb|X95276', b'ApiDB', b'supercontig', 1, 14009, -1, b'+', -1)
 (b'apidb|API_IRAB', b'ApiDB', b'supercontig', 1, 34242, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'supercontig', 1, 5967, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'gene', 1933, 3471, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'mRNA', 1933, 3471, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'CDS', 1933, 3471, -1, b'+', 0)
 (b'apidb|NC_002375', b'ApiDB', b'exon', 1933, 3471, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'gene', 3492, 4622, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'mRNA', 3492, 4622, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'CDS', 3492, 4622, -1, b'+', 0)
 (b'apidb|NC_002375', b'ApiDB', b'exon', 3492, 4622, -1, b'+', -1)
 (b'apidb|MAL14', b'ApiDB', b'gene', 1889356, 1889472, -1, b'+', -1)
 (b'apidb|MAL14', b'ApiDB', b'mRNA', 1889356, 1889472, -1, b'+', -1)
 (b'apidb|MAL14', b'ApiDB', b'CDS', 1889356, 1889472, -1, b'+', 0)
 (b'apidb|MAL14', b'ApiDB', b'exon', 1889356, 1889472, -1, b'+', -1)
 (b'apidb|NC_002375', b'ApiDB', b'gene', 734, 1573, -1, b'-', -1)
 (b'apidb|NC_002375', b'ApiDB', b'mRNA', 734, 1573, -1, b'-', -1)
 (b'apidb|NC_002375', b'ApiDB', b'CDS', 734, 1573, -1, b'-', 0)
 (b'apidb|NC_002375', b'ApiDB', b'exon', 734, 1573, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 474888, 477036, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'rRNA', 474888, 477036, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'exon', 474888, 477036, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 478428, 482531, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'rRNA', 478428, 482531, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'exon', 478428, 482531, -1, b'+', -1)
 (b'apidb|MAL5', b'ApiDB', b'gene', 1289594, 1291685, -1, b'+', -1)
 (b'apidb|MAL5', b'ApiDB', b'rRNA', 1289594, 1291685, -1, b'+', -1)
 (b'apidb|MAL5', b'ApiDB', b'exon', 1289594, 1291685, -1, b'+', -1)
 (b'apidb|MAL5', b'ApiDB', b'gene', 1292403, 1296192, -1, b'+', -1)
 (b'apidb|MAL5', b'ApiDB', b'rRNA', 1292403, 1296192, -1, b'+', -1)
 (b'apidb|MAL5', b'ApiDB', b'exon', 1292403, 1296192, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'gene', 121104, 122236, -1, b'-', -1)
 (b'apidb|MAL7', b'ApiDB', b'mRNA', 121104, 122236, -1, b'-', -1)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 122168, 122236, -1, b'-', 0)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 121836, 121959, -1, b'-', 0)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 121104, 121690, -1, b'-', 2)
 (b'apidb|MAL7', b'ApiDB', b'exon', 122168, 122236, -1, b'-', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 121836, 121959, -1, b'-', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 121104, 121690, -1, b'-', -1)
 (b'apidb|MAL7', b'ApiDB', b'gene', 123448, 124216, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'mRNA', 123448, 124216, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 123448, 123516, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 123593, 124216, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'exon', 123448, 123516, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 123593, 124216, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'gene', 126425, 127236, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'mRNA', 126425, 127236, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 126425, 126493, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 126598, 127236, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'exon', 126425, 126493, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 126598, 127236, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'gene', 140158, 141050, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'mRNA', 140158, 141050, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 140158, 140226, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 140367, 141050, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'exon', 140158, 140226, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 140367, 141050, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'gene', 144051, 146252, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'mRNA', 144051, 146252, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 144051, 144236, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 144422, 145151, -1, b'+', 0)
 (b'apidb|MAL7', b'ApiDB', b'CDS', 145327, 146252, -1, b'+', 2)
 (b'apidb|MAL7', b'ApiDB', b'exon', 144051, 144236, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 144422, 145151, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 145327, 146252, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'gene', 1139137, 1139564, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'rRNA', 1139137, 1139564, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 1139137, 1139564, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'gene', 1141946, 1144480, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'rRNA', 1141946, 1144480, -1, b'+', -1)
 (b'apidb|MAL7', b'ApiDB', b'exon', 1141946, 1144480, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1326215, 1332258, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1326215, 1332258, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1326215, 1327079, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1327247, 1327473, -1, b'+', 2)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1327585, 1332258, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1326215, 1327079, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1327247, 1327473, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1327585, 1332258, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1322488, 1323896, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1322488, 1323896, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1322488, 1322694, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1322823, 1323896, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1322488, 1322694, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1322823, 1323896, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1308124, 1308995, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1308124, 1308995, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1308124, 1308234, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1308450, 1308995, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1308124, 1308234, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1308450, 1308995, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1304394, 1305884, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1304394, 1305884, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1304394, 1304543, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1304664, 1305884, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1304394, 1304543, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1304664, 1305884, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1270206, 1270718, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1270206, 1270718, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1270206, 1270718, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1270206, 1270718, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1261119, 1267769, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1261119, 1267769, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1266176, 1267769, -1, b'-', 0)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1264040, 1266037, -1, b'-', 2)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1261119, 1263919, -1, b'-', 2)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1266176, 1267769, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1264040, 1266037, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1261119, 1263919, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1258562, 1260112, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1258562, 1260112, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1258842, 1260112, -1, b'-', 0)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1258562, 1258607, -1, b'-', 1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1258842, 1260112, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1258562, 1258607, -1, b'-', -1)
 (b'apidb|MAL8', b'ApiDB', b'gene', 1257626, 1258096, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'mRNA', 1257626, 1258096, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1257626, 1257659, -1, b'+', 0)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1257751, 1257913, -1, b'+', 2)
 (b'apidb|MAL8', b'ApiDB', b'CDS', 1258009, 1258096, -1, b'+', 1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1257626, 1257659, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1257751, 1257913, -1, b'+', -1)
 (b'apidb|MAL8', b'ApiDB', b'exon', 1258009, 1258096, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 29733, 37349, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'mRNA', 29733, 37349, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 29733, 34985, -1, b'+', 0)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 36111, 37349, -1, b'+', 0)
 (b'apidb|MAL1', b'ApiDB', b'exon', 29733, 34985, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'exon', 36111, 37349, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 39205, 40430, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'mRNA', 39205, 40430, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 40377, 40430, -1, b'-', 0)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 39205, 40146, -1, b'-', 0)
 (b'apidb|MAL1', b'ApiDB', b'exon', 40377, 40430, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'exon', 39205, 40146, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 42590, 46730, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'mRNA', 42590, 46730, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 43998, 46730, -1, b'-', 0)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 42590, 43840, -1, b'-', 0)
 (b'apidb|MAL1', b'ApiDB', b'exon', 43998, 46730, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'exon', 42590, 43840, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 50586, 51859, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'mRNA', 50586, 51859, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 50586, 50639, -1, b'+', 0)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 50795, 51859, -1, b'+', 0)
 (b'apidb|MAL1', b'ApiDB', b'exon', 50586, 50639, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'exon', 50795, 51859, -1, b'+', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 53392, 53503, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'mRNA', 53392, 53503, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 53392, 53503, -1, b'-', 0)
 (b'apidb|MAL1', b'ApiDB', b'exon', 53392, 53503, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 54001, 55229, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'mRNA', 54001, 55229, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 55161, 55229, -1, b'-', 0)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 54001, 55011, -1, b'-', 0)
 (b'apidb|MAL1', b'ApiDB', b'exon', 55161, 55229, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'exon', 54001, 55011, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'gene', 56913, 57116, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'mRNA', 56913, 57116, -1, b'-', -1)
 (b'apidb|MAL1', b'ApiDB', b'CDS', 56913, 57116, -1, b'-', 0)]

In [10]:
ftcz = allel.bcolz.FeatureCTable.from_gff3('../../fixture/sample.sorted.gff.gz')
ftcz


Out[10]:
seqid source type start end score strand phase
0 b'apidb|API_IRAB' b'ApiDB' b'supercontig' 1 34242 -1 b'+' -1
1 b'apidb|M76611' b'ApiDB' b'supercontig' 1 5967 -1 b'+' -1
2 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1
3 b'apidb|MAL1' b'ApiDB' b'CDS' 29733 34985 -1 b'+' 0
4 b'apidb|MAL1' b'ApiDB' b'exon' 29733 34985 -1 b'+' -1

In [11]:
ftca = allel.bcolz.FeatureCTable.from_gff3('../../fixture/sample.gff', attributes=('ID', 'Parent'))
ftca


Out[11]:
seqid source type start end score strand phase ID Parent
0 b'apidb|MAL1' b'ApiDB' b'supercontig' 1 643292 -1 b'+' -1 b'apidb|MAL1' b'.'
1 b'apidb|MAL2' b'ApiDB' b'supercontig' 1 947102 -1 b'+' -1 b'apidb|MAL2' b'.'
2 b'apidb|MAL3' b'ApiDB' b'supercontig' 1 1060087 -1 b'+' -1 b'apidb|MAL3' b'.'
3 b'apidb|MAL4' b'ApiDB' b'supercontig' 1 1204112 -1 b'+' -1 b'apidb|MAL4' b'.'
4 b'apidb|MAL5' b'ApiDB' b'supercontig' 1 1343552 -1 b'+' -1 b'apidb|MAL5' b'.'

In [12]:
%timeit allel.model.FeatureTable.from_gff3('../../fixture/sample.gff')


1000 loops, best of 3: 817 µs per loop

In [13]:
%timeit allel.bcolz.FeatureCTable.from_gff3('../../fixture/sample.gff')


100 loops, best of 3: 2.06 ms per loop

In [14]:
%timeit allel.model.FeatureTable.from_gff3('../../fixture/sample.gff', attributes=('ID', 'Parent'))


100 loops, best of 3: 4.46 ms per loop

In [16]:
m = ft.query("(seqid == b'apidb|MAL1') & (type == b'gene')").to_mask(1000000)
m


Out[16]:
array([False, False, False, ..., False, False, False], dtype=bool)

In [18]:
m.size, np.count_nonzero(m), np.count_nonzero(~m)


Out[18]:
(1000000, 22056, 977944)

In [19]:
m2 = ftc.query("(seqid == b'apidb|MAL1') & (type == b'gene')").to_mask(1000000)
m2


Out[19]:
array([False, False, False, ..., False, False, False], dtype=bool)

In [20]:
m2.size, np.count_nonzero(m2), np.count_nonzero(~m2)


Out[20]:
(1000000, 22056, 977944)