In [1]:
import sys
sys.path.insert(0, '../')
import numpy as np
%reload_ext autoreload
%autoreload 1
%aimport allel
%aimport allel.model.ndarray
%aimport allel.model.chunked
%aimport allel.model.dask
allel.__version__
Out[1]:
In [2]:
g = allel.GenotypeVector([[0, 1], [-1, -1]]*20)
g.is_phased = np.array([True, False]*20, dtype=bool)
g
Out[2]:
In [3]:
g.display(30, 10)
In [4]:
g.displayall()
In [5]:
print(repr(g))
In [6]:
print(g)
In [7]:
print(g.to_str(40))
In [8]:
g.concatenate(g)
Out[8]:
In [9]:
g.concatenate(g, axis=1)
Out[9]:
In [10]:
g = allel.GenotypeArray([[[0, 1], [-1, -1]]*20]*20)
g.is_phased = np.array([[True, False, False, False]*10]*20, dtype=bool)
g.mask = np.array([[1, 0, 0, 0, 0]*8]*20, dtype=bool)
In [11]:
print(g)
In [12]:
print(repr(g))
In [13]:
g
Out[13]:
In [14]:
g.display(row_threshold=10, row_edgeitems=5, col_threshold=20, col_edgeitems=10)
In [15]:
g.displayall()
In [16]:
g[0]
Out[16]:
In [17]:
g[:, 4]
Out[17]:
In [18]:
g[:, 4].displayall()
In [19]:
g.mask = None
g
Out[19]:
In [20]:
g.displayall()
In [21]:
g.concatenate(g)
Out[21]:
In [22]:
g.concatenate(g, axis=1)
Out[22]:
In [23]:
g.to_n_alt()
Out[23]:
In [24]:
h = g.to_haplotypes()
In [25]:
h
Out[25]:
In [26]:
print(repr(h))
In [27]:
print(h)
In [28]:
h.display(20, 40)
In [29]:
h.displayall()
In [30]:
h.concatenate(h)
Out[30]:
In [31]:
h.concatenate(h, axis=1)
Out[31]:
In [32]:
ac = g.count_alleles()
ac
Out[32]:
In [33]:
ac.displayall()
In [34]:
print(ac)
In [35]:
print(repr(ac))
In [36]:
ac + 5
Out[36]:
In [37]:
ac + ac
Out[37]:
In [38]:
ac.concatenate(ac)
Out[38]:
In [39]:
ac.concatenate(ac, axis=1)
Out[39]:
In [40]:
gac = g.to_allele_counts()
gac
Out[40]:
In [41]:
print(gac)
In [42]:
print(repr(gac))
In [43]:
g[:, 0] = 2
In [44]:
g.to_allele_counts()
Out[44]:
In [45]:
gac[:5]
Out[45]:
In [46]:
gac[0]
Out[46]:
In [47]:
gac[:, 0]
Out[47]:
In [48]:
gac[:3].is_called()
Out[48]:
In [49]:
gac[0].is_called()
Out[49]:
In [50]:
pos = np.unique(np.random.randint(0, 1000000, size=50000))
ref = np.random.choice([b'A', b'C', b'T', b'G'], size=pos.shape[0])
alt = np.random.choice([b'A', b'C', b'T', b'G'], size=pos.shape[0])
ra = np.rec.fromarrays([pos, ref, alt], names=['POS', 'REF', 'ALT'])
vtbl = allel.VariantTable(ra)
vtbl
Out[50]:
In [51]:
vtbl[:5]
Out[51]:
In [52]:
vtbl[:20].displayall()
In [53]:
vtbl.take(list(range(5)))
Out[53]:
In [54]:
vtbl.compress(np.random.randint(0, 2, size=vtbl.shape[0]).astype(bool))
Out[54]:
In [55]:
vtbl.concatenate(vtbl)
Out[55]:
In [56]:
vtbl[['POS', 'ALT']]
Out[56]:
In [57]:
print(vtbl)
In [58]:
print(repr(vtbl))
In [59]:
gc = allel.GenotypeChunkedArray(g).copy()
gc
Out[59]:
In [60]:
print(gc)
In [61]:
print(repr(gc))
In [62]:
gc[:5]
Out[62]:
In [63]:
gc[:, :5]
Out[63]:
In [64]:
gc[5]
Out[64]:
In [65]:
gc[:, 5]
Out[65]:
In [66]:
x = gc.take([0, 2, 4], axis=0)
x
Out[66]:
In [67]:
gc.take([0, 1, 2], axis=1)
Out[67]:
In [68]:
gcna = gc.to_n_alt()
gcna
Out[68]:
In [69]:
gcna > 0
Out[69]:
In [70]:
hc = gc.to_haplotypes()
hc
Out[70]:
In [71]:
hc.take([0, 1, 2], axis=1)
Out[71]:
In [72]:
ac = g.count_alleles()
ac
Out[72]:
In [73]:
acc = gc.count_alleles()
acc
Out[73]:
In [74]:
gc.is_called()
Out[74]:
In [75]:
gc.to_n_alt()
Out[75]:
In [76]:
vctbl = allel.VariantChunkedTable(vtbl).copy()
vctbl
Out[76]:
In [77]:
vctbl[:]
Out[77]:
In [78]:
g
Out[78]:
In [79]:
gd = allel.GenotypeDaskArray(g)
In [80]:
gd.mask is None
Out[80]:
In [81]:
gd.is_phased is None
Out[81]:
In [82]:
print(repr(gd))
In [83]:
print(gd)
In [84]:
gd
Out[84]:
In [85]:
gd.values
Out[85]:
In [86]:
gd.values.name
Out[86]:
In [87]:
gd[:]
Out[87]:
In [88]:
gd[:5]
Out[88]:
In [89]:
gd.compute()
Out[89]:
In [90]:
gd.concatenate(gd)
Out[90]:
In [91]:
gd.concatenate(gd, axis=1)
Out[91]:
In [92]:
hd = gd.to_haplotypes()
hd
Out[92]:
In [93]:
hd.compute()
Out[93]:
In [94]:
acd = gd.count_alleles()
acd
Out[94]:
In [95]:
acd.compute()
Out[95]:
In [96]:
gd[4]
Out[96]:
In [97]:
gd[4].compute()
Out[97]:
In [98]:
acd + acd
Out[98]:
In [99]:
(acd + acd).compute()
Out[99]:
In [100]:
gacd = allel.GenotypeAlleleCountsDaskArray(g.to_allele_counts())
gacd
Out[100]:
In [101]:
gacd.compute()
Out[101]:
In [102]:
gacd.count_alleles()
Out[102]:
In [103]:
gacd.count_alleles().compute()
Out[103]:
In [ ]:
In [ ]: