In [1]:
import sys
sys.path.insert(0, '../..')
from allel.io_vcf_read import read_vcf, read_vcf_chunks, vcf_to_npz, vcf_to_hdf5, vcf_to_zarr
# from allel.opt.io_vcf_read import (iter_vcf, 
#                                    CalldataParser_parse, 
#                                    GenotypeInt8Parser_parse, 
#                                    ParserContext_next, 
#                                    BufferedReader_read
#                                   )

prof_vcf_fn = '../../profdata/2L_2358158_2431617.vcf'
sample_vcf_fn = '../../fixture/sample.vcf'

In [2]:
# create a slightly larger profiling file
!cat {prof_vcf_fn} > ../../profdata/prof_gt.vcf
!for i in `seq 1 100`; do cat {prof_vcf_fn} | grep -v '^#' >> ../../profdata/prof_gt.vcf; done
!gzip -f ../../profdata/prof_gt.vcf

In [3]:
!cat {prof_vcf_fn} | wc -l


1979

In [4]:
!zcat ../../profdata/prof_gt.vcf.gz | wc -l


198679

In [5]:
!cat {sample_vcf_fn}


##fileformat=VCFv4.0
##fileDate=20090805
##source=myImputationProgramV3.1
##reference=1000GenomesPilot-NCBI36
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FILTER=<ID=q10,Description="Quality below 10">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
##ALT=<ID=CNV,Description="Copy number variable region">
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	./.:40:3
20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
X	10	rsTest	AC	A,ATG	10	PASS	.	GT	0	0/1	0|2

In [6]:
headers, chunks = read_vcf_chunks(sample_vcf_fn, buffer_size=2**15, chunk_length=3, 
                                  fields='*',
                                  types={'CHROM': 'S4', 'ID': 'S20', 'DP': 'i8', 'AF': 'f8', 'HQ': 'i1', 'GQ': 'i1'},
                                  numbers={'ALT': 2, 'AF': 2, 'AA': 2}
                                 )
headers.samples


Out[6]:
['NA00001', 'NA00002', 'NA00003']

In [7]:
headers.filters


Out[7]:
{'q10': {'Description': 'Quality below 10', 'ID': 'q10'},
 's50': {'Description': 'Less than 50% of samples have data', 'ID': 's50'}}

In [8]:
headers.infos


Out[8]:
{'AA': {'Description': 'Ancestral Allele',
  'ID': 'AA',
  'Number': '1',
  'Type': 'String'},
 'AC': {'Description': 'Allele count in genotypes, for each ALT allele, in the same order as listed',
  'ID': 'AC',
  'Number': '.',
  'Type': 'Integer'},
 'AF': {'Description': 'Allele Frequency',
  'ID': 'AF',
  'Number': '.',
  'Type': 'Float'},
 'AN': {'Description': 'Total number of alleles in called genotypes',
  'ID': 'AN',
  'Number': '1',
  'Type': 'Integer'},
 'DB': {'Description': 'dbSNP membership, build 129',
  'ID': 'DB',
  'Number': '0',
  'Type': 'Flag'},
 'DP': {'Description': 'Total Depth',
  'ID': 'DP',
  'Number': '1',
  'Type': 'Integer'},
 'H2': {'Description': 'HapMap2 membership',
  'ID': 'H2',
  'Number': '0',
  'Type': 'Flag'},
 'NS': {'Description': 'Number of Samples With Data',
  'ID': 'NS',
  'Number': '1',
  'Type': 'Integer'}}

In [9]:
headers.formats


Out[9]:
{'DP': {'Description': 'Read Depth',
  'ID': 'DP',
  'Number': '1',
  'Type': 'Integer'},
 'GQ': {'Description': 'Genotype Quality',
  'ID': 'GQ',
  'Number': '1',
  'Type': 'Integer'},
 'GT': {'Description': 'Genotype',
  'ID': 'GT',
  'Number': '1',
  'Type': 'String'},
 'HQ': {'Description': 'Haplotype Quality',
  'ID': 'HQ',
  'Number': '2',
  'Type': 'Integer'}}

In [10]:
chunks = list(chunks)
len(chunks)


Out[10]:
3

In [11]:
sorted(chunks[0].keys())


Out[11]:
['calldata/DP',
 'calldata/GQ',
 'calldata/GT',
 'calldata/HQ',
 'variants/AA',
 'variants/AC',
 'variants/AF',
 'variants/ALT',
 'variants/AN',
 'variants/CHROM',
 'variants/DB',
 'variants/DP',
 'variants/FILTER_PASS',
 'variants/FILTER_q10',
 'variants/FILTER_s50',
 'variants/H2',
 'variants/ID',
 'variants/NS',
 'variants/POS',
 'variants/QUAL',
 'variants/REF']

In [12]:
sum([r['variants/CHROM'].shape[0] for r in chunks])


Out[12]:
9

In [13]:
chunks[0]['variants/CHROM']


Out[13]:
array([b'19', b'19', b'20'], 
      dtype='|S4')

In [14]:
chunks[-1]['variants/CHROM']


Out[14]:
array([b'20', b'20', b'X'], 
      dtype='|S4')

In [15]:
chunks[0]['variants/POS']


Out[15]:
array([  111,   112, 14370], dtype=int32)

In [16]:
chunks[-1]['variants/POS']


Out[16]:
array([1234567, 1235237,      10], dtype=int32)

In [17]:
chunks[0]['variants/ID']


Out[17]:
array([b'.', b'.', b'rs6054257'], 
      dtype='|S20')

In [18]:
chunks[0]['variants/REF']


Out[18]:
array([b'A', b'A', b'G'], 
      dtype='|S1')

In [19]:
chunks[0]['variants/ALT']


Out[19]:
array([[b'C', b''],
       [b'G', b''],
       [b'A', b'']], 
      dtype='|S1')

In [20]:
chunks[-1]['variants/ALT']


Out[20]:
array([[b'G', b'G'],
       [b'.', b''],
       [b'A', b'A']], 
      dtype='|S1')

In [21]:
chunks[0]['variants/QUAL']


Out[21]:
array([  9.60000038,  10.        ,  29.        ], dtype=float32)

In [22]:
chunks[0]['variants/FILTER_PASS']


Out[22]:
array([False, False,  True], dtype=bool)

In [23]:
chunks[0]['variants/NS']


Out[23]:
array([-1, -1,  3], dtype=int32)

In [24]:
chunks[0]['variants/DP']


Out[24]:
array([-1, -1, 14])

In [25]:
chunks[0]['variants/AF']


Out[25]:
array([[ nan,  nan],
       [ nan,  nan],
       [ 0.5,  nan]])

In [26]:
chunks[-1]['variants/AC']


Out[26]:
array([[ 3,  1, -1],
       [-1, -1, -1],
       [-1, -1, -1]], dtype=int32)

In [27]:
chunks[0]['variants/NS']


Out[27]:
array([-1, -1,  3], dtype=int32)

In [28]:
chunks[0]['variants/AN']


Out[28]:
array([-1, -1, -1], dtype=int32)

In [29]:
chunks[0]['variants/DB']


Out[29]:
array([False, False,  True], dtype=bool)

In [30]:
chunks[0]['variants/H2']


Out[30]:
array([False, False,  True], dtype=bool)

In [31]:
chunks[1]['variants/AA']


Out[31]:
array([[b'', b''],
       [b'T', b''],
       [b'T', b'']], 
      dtype='|S12')

In [32]:
chunks[0]['calldata/GT']


Out[32]:
array([[[0, 0],
        [0, 0],
        [0, 1]],

       [[0, 0],
        [0, 0],
        [0, 1]],

       [[0, 0],
        [1, 0],
        [1, 1]]], dtype=int8)

In [33]:
chunks[0]['calldata/GQ']


Out[33]:
array([[-1, -1, -1],
       [-1, -1, -1],
       [48, 48, 43]], dtype=int8)

In [34]:
chunks[0]['calldata/HQ']


Out[34]:
array([[[10, 10],
        [10, 10],
        [ 3,  3]],

       [[10, 10],
        [10, 10],
        [ 3,  3]],

       [[51, 51],
        [51, 51],
        [-1, -1]]], dtype=int8)

In [35]:
chunks[0]['calldata/DP']


Out[35]:
array([[-1, -1, -1],
       [-1, -1, -1],
       [ 1,  8,  5]], dtype=int16)

In [36]:
callset = read_vcf(sample_vcf_fn, buffer_size=2**15, chunk_length=1000)
sorted(callset.keys())


Out[36]:
['calldata/GT',
 'samples',
 'variants/ALT',
 'variants/CHROM',
 'variants/FILTER_PASS',
 'variants/ID',
 'variants/POS',
 'variants/QUAL',
 'variants/REF']

In [37]:
callset = read_vcf(sample_vcf_fn, fields='FILTER',
                   buffer_size=2**15, chunk_length=1000)

In [38]:
sorted(callset.keys())


Out[38]:
['variants/FILTER_PASS', 'variants/FILTER_q10', 'variants/FILTER_s50']

In [39]:
callset['variants/FILTER_q10']


Out[39]:
array([False, False, False,  True, False, False, False, False, False], dtype=bool)

In [40]:
callset['variants/FILTER_s50']


Out[40]:
array([False, False, False, False, False, False, False, False, False], dtype=bool)

In [41]:
callset = read_vcf(sample_vcf_fn, fields='variants/*')

In [42]:
callset = read_vcf(sample_vcf_fn, fields='*', numbers=dict(ALT=1, AA=1),
                   buffer_size=2**15, chunk_length=1000)
sorted(callset.keys())


Out[42]:
['calldata/DP',
 'calldata/GQ',
 'calldata/GT',
 'calldata/HQ',
 'samples',
 'variants/AA',
 'variants/AC',
 'variants/AF',
 'variants/ALT',
 'variants/AN',
 'variants/CHROM',
 'variants/DB',
 'variants/DP',
 'variants/FILTER_PASS',
 'variants/FILTER_q10',
 'variants/FILTER_s50',
 'variants/H2',
 'variants/ID',
 'variants/NS',
 'variants/POS',
 'variants/QUAL',
 'variants/REF']

In [43]:
callset['variants/ALT'].shape


Out[43]:
(9,)

In [44]:
callset['variants/ALT']


Out[44]:
array([b'C', b'G', b'A', b'A', b'G', b'.', b'G', b'.', b'A'], 
      dtype='|S1')

In [45]:
callset['variants/AA']


Out[45]:
array([b'', b'', b'', b'', b'T', b'T', b'G', b'', b''], 
      dtype='|S12')

In [46]:
callset = read_vcf(prof_vcf_fn, buffer_size=2**15, chunk_length=1000)

In [47]:
sorted(callset.keys())


Out[47]:
['calldata/GT',
 'samples',
 'variants/ALT',
 'variants/CHROM',
 'variants/FILTER_PASS',
 'variants/ID',
 'variants/POS',
 'variants/QUAL',
 'variants/REF']

In [48]:
callset['samples']


Out[48]:
array([b'AB0085-C', b'AB0087-C', b'AB0088-C', b'AB0089-C', b'AB0090-C',
       b'AB0091-C', b'AB0092-C', b'AB0094-C', b'AB0095-C', b'AB0097-C',
       b'AB0098-C', b'AB0099-C', b'AB0100-C', b'AB0101-C', b'AB0103-C',
       b'AB0104-C', b'AB0109-C', b'AB0110-C', b'AB0111-C', b'AB0112-C',
       b'AB0113-C', b'AB0114-C', b'AB0117-C', b'AB0119-C', b'AB0122-C',
       b'AB0123-C', b'AB0124-C', b'AB0126-C', b'AB0127-C', b'AB0128-C',
       b'AB0129-C', b'AB0130-C', b'AB0133-C', b'AB0134-C', b'AB0135-C',
       b'AB0136-C', b'AB0137-C', b'AB0138-C', b'AB0139-C', b'AB0140-C',
       b'AB0142-C', b'AB0143-C', b'AB0145-C', b'AB0146-C', b'AB0147-C',
       b'AB0148-C', b'AB0151-C', b'AB0153-C', b'AB0155-C', b'AB0157-C',
       b'AB0158-C', b'AB0159-C', b'AB0160-C', b'AB0161-C', b'AB0164-C',
       b'AB0166-C', b'AB0169-C', b'AB0170-C', b'AB0171-C', b'AB0172-C',
       b'AB0173-C', b'AB0174-C', b'AB0175-C', b'AB0176-C', b'AB0177-C',
       b'AB0178-C', b'AB0179-C', b'AB0181-C', b'AB0182-C', b'AB0183-C',
       b'AB0184-C', b'AB0185-C', b'AB0186-C', b'AB0187-C', b'AB0188-C',
       b'AB0189-C', b'AB0190-C', b'AB0191-C', b'AB0192-C', b'AB0197-C',
       b'AB0198-C', b'AB0199-C', b'AB0201-C', b'AB0202-C', b'AB0203-C',
       b'AB0204-C', b'AB0205-C', b'AB0206-C', b'AB0207-C', b'AB0208-C',
       b'AB0209-C', b'AB0210-C', b'AB0211-C', b'AB0212-C', b'AB0213-C',
       b'AB0217-C', b'AB0219-C', b'AB0221-C', b'AB0222-C', b'AB0223-C',
       b'AB0224-C', b'AB0226-C', b'AB0227-C', b'AB0228-C', b'AB0229-C',
       b'AB0231-C', b'AB0233-C', b'AB0234-C', b'AB0235-C', b'AB0236-C',
       b'AB0237-C', b'AB0238-C', b'AB0239-C', b'AB0240-C', b'AB0241-C',
       b'AB0242-C', b'AB0243-C', b'AB0244-C', b'AB0246-C', b'AB0249-C',
       b'AB0250-C', b'AB0251-C', b'AB0252-C', b'AB0253-C', b'AB0256-C',
       b'AB0257-C', b'AB0258-C', b'AB0260-C', b'AB0261-C', b'AB0262-C',
       b'AB0263-C', b'AB0264-C', b'AB0265-C', b'AB0266-C', b'AB0267-C',
       b'AB0268-C', b'AB0270-C', b'AB0271-C', b'AB0272-C', b'AB0273-C',
       b'AB0274-C', b'AB0276-C', b'AB0277-C', b'AB0278-C', b'AB0279-C',
       b'AB0280-C', b'AB0281-C', b'AB0282-C', b'AB0283-C', b'AB0284-C',
       b'AC0090-C', b'AC0091-C', b'AC0092-C', b'AC0093-C', b'AC0094-C',
       b'AC0095-C', b'AC0096-C', b'AC0097-C', b'AC0098-C', b'AC0099-C',
       b'AC0100-C', b'AC0101-C', b'AC0102-C', b'AC0103-C', b'AC0104-C',
       b'AC0106-C', b'AC0107-C', b'AC0108-C', b'AC0109-C', b'AC0110-C',
       b'AC0111-C', b'AC0112-C', b'AC0113-C', b'AC0114-C', b'AC0115-C',
       b'AC0116-C', b'AC0117-C', b'AC0118-C', b'AC0119-C', b'AC0120-C',
       b'AC0121-C', b'AC0122-C', b'AC0123-C', b'AC0124-C', b'AC0125-C',
       b'AC0126-C', b'AC0127-C', b'AC0128-C', b'AC0129-C', b'AC0130-C',
       b'AC0131-C', b'AC0132-C', b'AC0133-C', b'AC0135-C', b'AC0136-C',
       b'AC0137-C', b'AC0138-C', b'AC0139-C', b'AC0140-C', b'AC0142-C',
       b'AC0143-C', b'AC0144-C', b'AC0145-C', b'AC0147-C', b'AC0148-C',
       b'AC0149-C', b'AC0150-C', b'AC0151-C', b'AC0152-C', b'AC0153-C',
       b'AC0154-C', b'AC0156-C', b'AC0158-C', b'AC0159-C', b'AC0160-C',
       b'AC0161-C', b'AC0162-C', b'AC0163-C', b'AC0164-C', b'AC0166-C',
       b'AC0167-C', b'AC0168-C', b'AC0169-C', b'AC0170-C', b'AC0171-C',
       b'AC0172-C', b'AC0173-C', b'AC0174-C', b'AC0176-C', b'AC0178-C',
       b'AC0179-C', b'AC0180-C', b'AC0181-C', b'AC0182-C', b'AC0183-C',
       b'AC0184-C', b'AC0186-C', b'AC0187-C', b'AC0188-C', b'AC0189-C',
       b'AC0190-C', b'AC0191-C', b'AC0192-C', b'AC0193-C', b'AC0194-C',
       b'AC0195-C', b'AC0196-C', b'AC0197-C', b'AC0199-C', b'AC0200-C',
       b'AC0201-C', b'AC0202-C', b'AC0203-C', b'AJ0023-C', b'AJ0024-C',
       b'AJ0032-C', b'AJ0035-C', b'AJ0036-C', b'AJ0039-C', b'AJ0043-C',
       b'AJ0044-C', b'AJ0045-C', b'AJ0047-C', b'AJ0051-C', b'AJ0052-C',
       b'AJ0056-C', b'AJ0061-C', b'AJ0063-C', b'AJ0064-C', b'AJ0066-C',
       b'AJ0070-C', b'AJ0071-C', b'AJ0072-C', b'AJ0074-C', b'AJ0075-C',
       b'AJ0076-C', b'AJ0077-C', b'AJ0078-C', b'AJ0081-C', b'AJ0084-C',
       b'AJ0085-C', b'AJ0086-C', b'AJ0088-C', b'AJ0090-C', b'AJ0092-C',
       b'AJ0093-C', b'AJ0096-C', b'AJ0097-C', b'AJ0098-C', b'AJ0100-C',
       b'AJ0101-C', b'AJ0102-C', b'AJ0103-C', b'AJ0105-C', b'AJ0107-C',
       b'AJ0109-C', b'AJ0113-C', b'AJ0115-C', b'AJ0116-C', b'AK0065-C',
       b'AK0066-C', b'AK0067-C', b'AK0068-C', b'AK0069-C', b'AK0070-C',
       b'AK0072-C', b'AK0073-C', b'AK0074-C', b'AK0075-C', b'AK0076-C',
       b'AK0077-C', b'AK0078-C', b'AK0079-C', b'AK0080-C', b'AK0081-C',
       b'AK0082-C', b'AK0085-C', b'AK0086-C', b'AK0087-C', b'AK0088-C',
       b'AK0089-C', b'AK0090-C', b'AK0091-C', b'AK0092-C', b'AK0093-C',
       b'AK0094-C', b'AK0095-C', b'AK0096-C', b'AK0098-C', b'AK0099-C',
       b'AK0100-C', b'AK0101-C', b'AK0102-C', b'AK0103-C', b'AK0104-C',
       b'AK0105-C', b'AK0106-C', b'AK0108-C', b'AK0109-C', b'AK0110-C',
       b'AK0116-C', b'AK0119-C', b'AK0127-C', b'AN0007-C', b'AN0008-C',
       b'AN0009-C', b'AN0010-C', b'AN0011-C', b'AN0012-C', b'AN0014-C',
       b'AN0016-C', b'AN0017-C', b'AN0018-C', b'AN0019-C', b'AN0020-C',
       b'AN0022-C', b'AN0023-C', b'AN0024-C', b'AN0025-C', b'AN0026-C',
       b'AN0027-C', b'AN0028-C', b'AN0029-C', b'AN0030-C', b'AN0031-C',
       b'AN0032-C', b'AN0033-C', b'AN0034-C', b'AN0035-C', b'AN0036-C',
       b'AN0037-C', b'AN0038-C', b'AN0039-C', b'AN0040-C', b'AN0041-C',
       b'AN0042-C', b'AN0043-C', b'AN0045-C', b'AN0046-C', b'AN0047-C',
       b'AN0048-C', b'AN0049-C', b'AN0050-C', b'AN0051-C', b'AN0053-C',
       b'AN0054-C', b'AN0055-C', b'AN0056-C', b'AN0057-C', b'AN0058-C',
       b'AN0059-C', b'AN0060-C', b'AN0063-C', b'AN0064-C', b'AN0065-C',
       b'AN0066-C', b'AN0067-C', b'AN0068-C', b'AN0069-C', b'AN0070-C',
       b'AN0071-C', b'AN0072-C', b'AN0073-C', b'AN0074-C', b'AN0075-C',
       b'AN0076-C', b'AN0077-C', b'AN0079-C', b'AN0080-C', b'AN0081-C',
       b'AN0082-C', b'AN0083-C', b'AN0084-C', b'AN0085-C', b'AN0086-C',
       b'AN0087-C', b'AN0088-C', b'AN0089-C', b'AN0090-C', b'AN0091-C',
       b'AN0092-C', b'AN0093-C', b'AN0094-C', b'AN0095-C', b'AN0096-C',
       b'AN0097-C', b'AN0098-C', b'AN0099-C', b'AN0100-C', b'AN0101-C',
       b'AN0102-C', b'AN0103-C', b'AN0104-C', b'AN0105-C', b'AN0106-C',
       b'AN0107-C', b'AN0108-C', b'AN0109-C', b'AN0111-C', b'AN0112-C',
       b'AN0113-C', b'AN0114-C', b'AN0115-C', b'AN0117-C', b'AN0120-C',
       b'AN0121-C', b'AN0122-C', b'AN0123-C', b'AN0124-C', b'AN0125-C',
       b'AN0126-C', b'AN0127-C', b'AN0128-C', b'AN0129-C', b'AN0130-C',
       b'AN0131-C', b'AN0132-C', b'AN0134-C', b'AN0135-C', b'AN0136-C',
       b'AN0137-C', b'AN0138-C', b'AN0139-C', b'AN0140-C', b'AN0141-C',
       b'AN0143-C', b'AN0147-C', b'AN0149-C', b'AN0151-C', b'AN0152-C',
       b'AN0153-C', b'AN0154-C', b'AN0155-C', b'AN0156-C', b'AN0157-C',
       b'AN0158-C', b'AN0159-C', b'AN0160-C', b'AN0162-C', b'AN0163-C',
       b'AN0164-C', b'AN0165-C', b'AN0166-C', b'AN0167-C', b'AN0168-C',
       b'AN0169-C', b'AN0170-C', b'AN0171-C', b'AN0172-C', b'AN0173-C',
       b'AN0174-C', b'AN0175-C', b'AN0176-C', b'AN0177-C', b'AN0178-C',
       b'AN0179-C', b'AN0180-C', b'AN0181-C', b'AN0182-C', b'AN0183-C',
       b'AN0184-C', b'AN0185-C', b'AN0186-C', b'AN0187-C', b'AN0188-C',
       b'AN0189-C', b'AN0190-C', b'AN0191-C', b'AN0192-C', b'AN0193-C',
       b'AN0194-C', b'AN0196-C', b'AN0197-C', b'AN0198-C', b'AN0199-C',
       b'AN0200-C', b'AN0201-C', b'AN0202-C', b'AN0203-C', b'AN0204-C',
       b'AN0205-C', b'AN0206-C', b'AN0207-C', b'AN0208-C', b'AN0209-C',
       b'AN0210-C', b'AN0212-C', b'AN0213-C', b'AN0214-C', b'AN0215-C',
       b'AN0217-C', b'AN0218-C', b'AN0219-C', b'AN0220-C', b'AN0221-C',
       b'AN0222-C', b'AN0223-C', b'AN0224-C', b'AN0225-C', b'AN0226-C',
       b'AN0227-C', b'AN0228-C', b'AN0229-C', b'AN0230-C', b'AN0231-C',
       b'AN0233-C', b'AN0234-C', b'AN0235-C', b'AN0236-C', b'AN0237-C',
       b'AN0238-C', b'AN0239-C', b'AN0240-C', b'AN0241-C', b'AN0242-C',
       b'AN0243-C', b'AN0244-C', b'AN0245-C', b'AN0246-C', b'AN0247-C',
       b'AN0248-C', b'AN0250-C', b'AN0251-C', b'AN0252-C', b'AN0253-C',
       b'AN0254-C', b'AN0255-C', b'AN0256-C', b'AN0258-C', b'AN0259-C',
       b'AN0260-C', b'AN0261-C', b'AN0262-C', b'AN0263-C', b'AN0264-C',
       b'AN0266-C', b'AN0267-C', b'AN0268-C', b'AN0269-C', b'AN0270-C',
       b'AN0272-C', b'AN0275-C', b'AN0276-C', b'AN0277-C', b'AN0280-C',
       b'AN0282-C', b'AN0283-C', b'AN0284-C', b'AN0285-C', b'AN0286-C',
       b'AN0287-C', b'AN0288-C', b'AN0290-C', b'AN0291-C', b'AN0292-C',
       b'AN0294-C', b'AN0295-C', b'AN0296-C', b'AN0297-C', b'AN0298-C',
       b'AN0299-C', b'AN0300-C', b'AN0301-C', b'AN0303-C', b'AN0304-C',
       b'AN0305-C', b'AN0307-C', b'AN0308-C', b'AN0309-C', b'AN0310-C',
       b'AN0312-C', b'AN0313-C', b'AN0314-C', b'AN0315-C', b'AN0317-C',
       b'AN0318-C', b'AN0319-C', b'AN0321-C', b'AR0007-C', b'AR0008-C',
       b'AR0009-C', b'AR0010-C', b'AR0011-C', b'AR0012-C', b'AR0014-C',
       b'AR0015-C', b'AR0017-C', b'AR0019-C', b'AR0020-C', b'AR0021-C',
       b'AR0022-C', b'AR0023-C', b'AR0024-C', b'AR0026-C', b'AR0027-C',
       b'AR0034-C', b'AR0035-C', b'AR0042-C', b'AR0043-C', b'AR0045-C',
       b'AR0047-C', b'AR0049-C', b'AR0050-C', b'AR0051-C', b'AR0053-C',
       b'AR0054-C', b'AR0057-C', b'AR0059-C', b'AR0061-C', b'AR0062-C',
       b'AR0063-C', b'AR0065-C', b'AR0066-C', b'AR0069-C', b'AR0070-C',
       b'AR0071-C', b'AR0072-C', b'AR0073-C', b'AR0074-C', b'AR0075-C',
       b'AR0076-C', b'AR0078-C', b'AR0079-C', b'AR0080-C', b'AR0081-C',
       b'AR0083-C', b'AR0084-C', b'AR0086-C', b'AR0087-C', b'AR0089-C',
       b'AR0090-C', b'AR0092-C', b'AR0093-C', b'AR0095-C', b'AR0096-C',
       b'AR0098-C', b'AR0099-C', b'AR0100-C', b'AS0001-C', b'AS0002-C',
       b'AS0003-C', b'AS0004-C', b'AS0006-C', b'AS0007-C', b'AS0008-C',
       b'AS0009-C', b'AS0010-C', b'AS0011-C', b'AS0012-C', b'AS0013-C',
       b'AS0014-C', b'AS0015-C', b'AS0016-C', b'AS0017-C', b'AS0018-C',
       b'AS0019-C', b'AS0020-C', b'AS0021-C', b'AS0022-C', b'AS0024-C',
       b'AS0026-C', b'AS0028-C', b'AS0030-C', b'AS0032-C', b'AS0033-C',
       b'AS0034-C', b'AS0035-C', b'AS0036-C', b'AS0037-C', b'AS0039-C',
       b'AS0042-C', b'AS0044-C', b'AS0045-C', b'AS0047-C', b'AS0049-C',
       b'AS0052-C', b'AS0053-C', b'AS0054-C', b'AS0055-C', b'AS0056-C',
       b'AS0058-C', b'AS0059-C', b'AS0064-C', b'AS0065-C', b'AS0066-C',
       b'AS0068-C', b'AS0069-C', b'AS0070-C', b'AS0071-C', b'AS0072-C',
       b'AS0073-C', b'AS0074-C', b'AS0076-C', b'AS0077-C', b'AV0001-C',
       b'AV0002-C', b'AV0003-C', b'AV0004-C', b'AV0005-C', b'AV0007-C',
       b'AV0008-C', b'AV0009-C', b'AV0010-C', b'AV0011-C', b'AV0012-C',
       b'AV0013-C', b'AV0014-C', b'AV0015-C', b'AV0018-C', b'AV0024-C',
       b'AV0026-C', b'AV0027-C', b'AV0029-C', b'AV0030-C', b'AV0031-C',
       b'AV0032-C', b'AV0033-C', b'AV0034-C', b'AV0035-C', b'AV0036-C',
       b'AV0039-C', b'AV0041-C', b'AV0044-C', b'AV0045-C', b'AV0047-C',
       b'AD0231-C', b'AD0232-C', b'AD0254-C', b'AD0255-C', b'AD0305-C',
       b'AD0306-C', b'AD0347-C', b'AD0348-C'], 
      dtype='|S8')

In [49]:
callset['calldata/GT'].shape


Out[49]:
(1967, 773, 2)

In [50]:
callset['calldata/GT'].shape


Out[50]:
(1967, 773, 2)

In [51]:
callset['variants/CHROM']


Out[51]:
array([b'2L', b'2L', b'2L', ..., b'2L', b'2L', b'2L'], 
      dtype='|S12')

In [52]:
callset['variants/POS']


Out[52]:
array([2353212, 2353223, 2353234, ..., 2436558, 2436585, 2436615], dtype=int32)

In [53]:
callset['variants/REF']


Out[53]:
array([b'G', b'T', b'G', ..., b'G', b'A', b'C'], 
      dtype='|S1')

In [54]:
callset['variants/ALT']


Out[54]:
array([[b'A', b'', b''],
       [b'G', b'', b''],
       [b'C', b'', b''],
       ..., 
       [b'A', b'', b''],
       [b'C', b'', b''],
       [b'A', b'', b'']], 
      dtype='|S1')

Format conversion


In [55]:
npz_fn = 'sample.npz'
vcf_to_npz(sample_vcf_fn, npz_fn, fields='*', chunk_length=3, overwrite=False)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-55-fe36617e2a57> in <module>()
      1 npz_fn = 'sample.npz'
----> 2 vcf_to_npz(sample_vcf_fn, npz_fn, fields='*', chunk_length=3, overwrite=False)

/home/aliman/src/github/cggh/scikit-allel/allel/io_vcf_read.py in vcf_to_npz(input_path, output_path, compressed, overwrite, fields, types, numbers, buffer_size, chunk_length)
    176     if not overwrite and os.path.exists(output_path):
    177         # TODO right exception class?
--> 178         raise ValueError('file exists at path %r; use overwrite=True to replace' % output_path)
    179 
    180     # read all data into memory

ValueError: file exists at path 'sample.npz'; use overwrite=True to replace

In [56]:
vcf_to_npz(sample_vcf_fn, npz_fn, fields='*', chunk_length=3, overwrite=True)

In [57]:
!ls -lh {npz_fn}


-rw-r--r-- 1 aliman aliman 4.3K Jun  8 15:59 sample.npz

In [58]:
import numpy as np

In [59]:
callset = np.load(npz_fn)
callset


Out[59]:
<numpy.lib.npyio.NpzFile at 0x7f196851eba8>

In [60]:
sorted(callset.keys())


Out[60]:
['calldata/DP',
 'calldata/GQ',
 'calldata/GT',
 'calldata/HQ',
 'samples',
 'variants/AA',
 'variants/AC',
 'variants/AF',
 'variants/ALT',
 'variants/AN',
 'variants/CHROM',
 'variants/DB',
 'variants/DP',
 'variants/FILTER_PASS',
 'variants/FILTER_q10',
 'variants/FILTER_s50',
 'variants/H2',
 'variants/ID',
 'variants/NS',
 'variants/POS',
 'variants/QUAL',
 'variants/REF']

In [61]:
callset['samples']


Out[61]:
array([b'NA00001', b'NA00002', b'NA00003'], 
      dtype='|S7')

In [62]:
callset['variants/POS']


Out[62]:
array([    111,     112,   14370,   17330, 1110696, 1230237, 1234567,
       1235237,      10], dtype=int32)

In [63]:
callset['variants/CHROM']


Out[63]:
array([b'19', b'19', b'20', b'20', b'20', b'20', b'20', b'20', b'X'], 
      dtype='|S12')

In [64]:
callset['calldata/GT']


Out[64]:
array([[[ 0,  0],
        [ 0,  0],
        [ 0,  1]],

       [[ 0,  0],
        [ 0,  0],
        [ 0,  1]],

       [[ 0,  0],
        [ 1,  0],
        [ 1,  1]],

       [[ 0,  0],
        [ 0,  1],
        [ 0,  0]],

       [[ 1,  2],
        [ 2,  1],
        [ 2,  2]],

       [[ 0,  0],
        [ 0,  0],
        [ 0,  0]],

       [[ 0,  1],
        [ 0,  2],
        [-1, -1]],

       [[ 0,  0],
        [ 0,  0],
        [-1, -1]],

       [[ 0, -1],
        [ 0,  1],
        [ 0,  2]]], dtype=int8)

In [65]:
hdf5_fn = 'sample.h5'
vcf_to_hdf5(sample_vcf_fn, hdf5_fn, fields='*', chunk_length=3)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-65-ddbb299760ca> in <module>()
      1 hdf5_fn = 'sample.h5'
----> 2 vcf_to_hdf5(sample_vcf_fn, hdf5_fn, fields='*', chunk_length=3)

/home/aliman/src/github/cggh/scikit-allel/allel/io_vcf_read.py in vcf_to_hdf5(input_path, output_path, group, compression, compression_opts, shuffle, overwrite, fields, types, numbers, buffer_size, chunk_length, chunk_width)
    299                 else:
    300                     # TODO right exception class?
--> 301                     raise ValueError('dataset exists at path %r; use overwrite=True to replace' % name)
    302             root[group].create_dataset(name, data=np.array(headers.samples).astype('S'),
    303                                        chunks=None)

ValueError: dataset exists at path 'samples'; use overwrite=True to replace

In [66]:
vcf_to_hdf5(sample_vcf_fn, hdf5_fn, fields='*', chunk_length=3, overwrite=True)

In [67]:
!ls -lh {hdf5_fn}


-rw-r--r-- 1 aliman aliman 88K Jun  8 15:59 sample.h5

In [68]:
!h5ls {hdf5_fn}


calldata                 Group
samples                  Dataset {3}
variants                 Group

In [69]:
!h5ls {hdf5_fn}/variants


AA                       Dataset {9/Inf}
AC                       Dataset {9/Inf, 3}
AF                       Dataset {9/Inf, 3}
ALT                      Dataset {9/Inf, 3}
AN                       Dataset {9/Inf}
CHROM                    Dataset {9/Inf}
DB                       Dataset {9/Inf}
DP                       Dataset {9/Inf}
FILTER                   Dataset {9/Inf, 1}
FILTER_PASS              Dataset {9/Inf}
FILTER_q10               Dataset {9/Inf}
FILTER_s50               Dataset {9/Inf}
H2                       Dataset {9/Inf}
ID                       Dataset {9/Inf}
NS                       Dataset {9/Inf}
POS                      Dataset {9/Inf}
QUAL                     Dataset {9/Inf}
REF                      Dataset {9/Inf}

In [70]:
!h5ls {hdf5_fn}/variants/CHROM


CHROM                    Dataset {9/Inf}

In [71]:
!h5ls {hdf5_fn}/calldata


DP                       Dataset {9/Inf, 3}
GQ                       Dataset {9/Inf, 3}
GT                       Dataset {9/Inf, 3, 2}
HQ                       Dataset {9/Inf, 3, 2}

In [72]:
!h5ls {hdf5_fn}/calldata/GT


GT                       Dataset {9/Inf, 3, 2}

In [73]:
import h5py

In [74]:
with h5py.File(hdf5_fn, mode='r') as h5f:
    print(h5f['samples'][:])
    print(h5f['variants/CHROM'][:])
    print(h5f['variants/POS'][:])
    print(h5f['calldata/GT'][:])


[b'NA00001' b'NA00002' b'NA00003']
[b'19' b'19' b'20' b'20' b'20' b'20' b'20' b'20' b'X']
[    111     112   14370   17330 1110696 1230237 1234567 1235237      10]
[[[ 0  0]
  [ 0  0]
  [ 0  1]]

 [[ 0  0]
  [ 0  0]
  [ 0  1]]

 [[ 0  0]
  [ 1  0]
  [ 1  1]]

 [[ 0  0]
  [ 0  1]
  [ 0  0]]

 [[ 1  2]
  [ 2  1]
  [ 2  2]]

 [[ 0  0]
  [ 0  0]
  [ 0  0]]

 [[ 0  1]
  [ 0  2]
  [-1 -1]]

 [[ 0  0]
  [ 0  0]
  [-1 -1]]

 [[ 0 -1]
  [ 0  1]
  [ 0  2]]]

In [75]:
zarr_fn = 'sample.zarr'
vcf_to_zarr(sample_vcf_fn, zarr_fn, fields='*', chunk_length=3)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-75-afd148abc966> in <module>()
      1 zarr_fn = 'sample.zarr'
----> 2 vcf_to_zarr(sample_vcf_fn, zarr_fn, fields='*', chunk_length=3)

/home/aliman/src/github/cggh/scikit-allel/allel/io_vcf_read.py in vcf_to_zarr(input_path, output_path, group, compressor, fill_value, order, overwrite, fields, types, numbers, buffer_size, chunk_length, chunk_width)
    395         # store samples
    396         root[group].create_dataset('samples', data=np.array(headers.samples).astype('S'),
--> 397                                    compressor=None, overwrite=overwrite)
    398 
    399     # read first chunk

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/hierarchy.py in create_dataset(self, name, **kwargs)
    569         """  # flake8: noqa
    570 
--> 571         return self._write_op(self._create_dataset_nosync, name, **kwargs)
    572 
    573     def _create_dataset_nosync(self, name, data=None, **kwargs):

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/hierarchy.py in _write_op(self, f, *args, **kwargs)
    423         # synchronization
    424         if self._synchronizer is None:
--> 425             return f(*args, **kwargs)
    426         else:
    427             # synchronize on the root group

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/hierarchy.py in _create_dataset_nosync(self, name, data, **kwargs)
    585         else:
    586             a = array(data, store=self._store, path=path,
--> 587                       chunk_store=self._chunk_store, **kwargs)
    588 
    589         return a

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/creation.py in array(data, **kwargs)
    306 
    307     # instantiate array
--> 308     z = create(**kwargs)
    309 
    310     # fill with data

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/creation.py in create(shape, chunks, dtype, compressor, fill_value, order, store, synchronizer, overwrite, path, chunk_store, filters, cache_metadata, **kwargs)
     84                compressor=compressor, fill_value=fill_value, order=order,
     85                overwrite=overwrite, path=path, chunk_store=chunk_store,
---> 86                filters=filters)
     87 
     88     # instantiate array

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/storage.py in init_array(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters)
    256                          compressor=compressor, fill_value=fill_value,
    257                          order=order, overwrite=overwrite, path=path,
--> 258                          chunk_store=chunk_store, filters=filters)
    259 
    260 

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/storage.py in _init_array_metadata(store, shape, chunks, dtype, compressor, fill_value, order, overwrite, path, chunk_store, filters)
    271             rmdir(chunk_store, path)
    272     elif contains_array(store, path):
--> 273         err_contains_array(path)
    274     elif contains_group(store, path):
    275         err_contains_group(path)

/home/aliman/miniconda3/envs/biipy_20170126_py35/lib/python3.5/site-packages/zarr/errors.py in err_contains_array(path)
     25 
     26 def err_contains_array(path):
---> 27     raise KeyError('path %r contains an array' % path)
     28 
     29 

KeyError: "path 'samples' contains an array"

In [76]:
vcf_to_zarr(sample_vcf_fn, zarr_fn, fields='*', chunk_length=3, overwrite=True)

In [77]:
import zarr
callset = zarr.open_group('sample.zarr')
callset


Out[77]:
Group(/, 3)
  arrays: 1; samples
  groups: 2; calldata, variants
  store: DirectoryStore

In [78]:
callset['samples'][:]


Out[78]:
array([b'NA00001', b'NA00002', b'NA00003'], 
      dtype='|S7')

In [79]:
callset['variants']


Out[79]:
Group(/variants, 18)
  arrays: 18; AA, AC, AF, ALT, AN, CHROM, DB, DP, FILTER, FILTER_PASS, FILTE...
  store: DirectoryStore

In [80]:
callset['variants/POS'][:]


Out[80]:
array([    111,     112,   14370,   17330, 1110696, 1230237, 1234567,
       1235237,      10], dtype=int32)

In [81]:
callset['variants/CHROM'][:]


Out[81]:
array([b'19', b'19', b'20', b'20', b'20', b'20', b'20', b'20', b'X'], 
      dtype='|S12')

In [82]:
callset['calldata']


Out[82]:
Group(/calldata, 4)
  arrays: 4; DP, GQ, GT, HQ
  store: DirectoryStore

In [83]:
callset['calldata/GT'][:]


Out[83]:
array([[[ 0,  0],
        [ 0,  0],
        [ 0,  1]],

       [[ 0,  0],
        [ 0,  0],
        [ 0,  1]],

       [[ 0,  0],
        [ 1,  0],
        [ 1,  1]],

       [[ 0,  0],
        [ 0,  1],
        [ 0,  0]],

       [[ 1,  2],
        [ 2,  1],
        [ 2,  2]],

       [[ 0,  0],
        [ 0,  0],
        [ 0,  0]],

       [[ 0,  1],
        [ 0,  2],
        [-1, -1]],

       [[ 0,  0],
        [ 0,  0],
        [-1, -1]],

       [[ 0, -1],
        [ 0,  1],
        [ 0,  2]]], dtype=int8)

In [84]:
callset['calldata/GQ'][:]


Out[84]:
array([[-1, -1, -1],
       [-1, -1, -1],
       [48, 48, 43],
       [49,  3, 41],
       [21,  2, 35],
       [54, 48, 61],
       [-1, 17, 40],
       [-1, -1, -1],
       [-1, -1, -1]], dtype=int8)

Profiling


In [1]:
import sys
sys.path.insert(0, '../..')
import cProfile
from allel.io_vcf_read import read_vcf, vcf_to_npz, vcf_to_hdf5, vcf_to_zarr

sample_vcf_fn = '../../fixture/sample.vcf'
prof_vcf_fn = '../../profdata/2L_2358158_2431617.vcf'

In [2]:
%time read_vcf(prof_vcf_fn, fields='*', chunk_length=1000)


CPU times: user 92 ms, sys: 8 ms, total: 100 ms
Wall time: 94.3 ms
Out[2]:
{'calldata/GT': array([[[0, 0],
         [0, 0],
         [0, 0],
         ..., 
         [0, 0],
         [0, 0],
         [0, 0]],
 
        [[0, 0],
         [0, 0],
         [0, 0],
         ..., 
         [0, 0],
         [0, 0],
         [0, 0]],
 
        [[0, 0],
         [0, 0],
         [0, 0],
         ..., 
         [0, 0],
         [0, 0],
         [0, 0]],
 
        ..., 
        [[0, 0],
         [0, 0],
         [0, 0],
         ..., 
         [0, 0],
         [0, 0],
         [0, 0]],
 
        [[1, 1],
         [0, 1],
         [1, 1],
         ..., 
         [0, 0],
         [1, 1],
         [0, 1]],
 
        [[0, 0],
         [0, 0],
         [0, 0],
         ..., 
         [0, 0],
         [0, 0],
         [0, 0]]], dtype=int8),
 'samples': array(['AB0085-C', 'AB0087-C', 'AB0088-C', 'AB0089-C', 'AB0090-C',
        'AB0091-C', 'AB0092-C', 'AB0094-C', 'AB0095-C', 'AB0097-C',
        'AB0098-C', 'AB0099-C', 'AB0100-C', 'AB0101-C', 'AB0103-C',
        'AB0104-C', 'AB0109-C', 'AB0110-C', 'AB0111-C', 'AB0112-C',
        'AB0113-C', 'AB0114-C', 'AB0117-C', 'AB0119-C', 'AB0122-C',
        'AB0123-C', 'AB0124-C', 'AB0126-C', 'AB0127-C', 'AB0128-C',
        'AB0129-C', 'AB0130-C', 'AB0133-C', 'AB0134-C', 'AB0135-C',
        'AB0136-C', 'AB0137-C', 'AB0138-C', 'AB0139-C', 'AB0140-C',
        'AB0142-C', 'AB0143-C', 'AB0145-C', 'AB0146-C', 'AB0147-C',
        'AB0148-C', 'AB0151-C', 'AB0153-C', 'AB0155-C', 'AB0157-C',
        'AB0158-C', 'AB0159-C', 'AB0160-C', 'AB0161-C', 'AB0164-C',
        'AB0166-C', 'AB0169-C', 'AB0170-C', 'AB0171-C', 'AB0172-C',
        'AB0173-C', 'AB0174-C', 'AB0175-C', 'AB0176-C', 'AB0177-C',
        'AB0178-C', 'AB0179-C', 'AB0181-C', 'AB0182-C', 'AB0183-C',
        'AB0184-C', 'AB0185-C', 'AB0186-C', 'AB0187-C', 'AB0188-C',
        'AB0189-C', 'AB0190-C', 'AB0191-C', 'AB0192-C', 'AB0197-C',
        'AB0198-C', 'AB0199-C', 'AB0201-C', 'AB0202-C', 'AB0203-C',
        'AB0204-C', 'AB0205-C', 'AB0206-C', 'AB0207-C', 'AB0208-C',
        'AB0209-C', 'AB0210-C', 'AB0211-C', 'AB0212-C', 'AB0213-C',
        'AB0217-C', 'AB0219-C', 'AB0221-C', 'AB0222-C', 'AB0223-C',
        'AB0224-C', 'AB0226-C', 'AB0227-C', 'AB0228-C', 'AB0229-C',
        'AB0231-C', 'AB0233-C', 'AB0234-C', 'AB0235-C', 'AB0236-C',
        'AB0237-C', 'AB0238-C', 'AB0239-C', 'AB0240-C', 'AB0241-C',
        'AB0242-C', 'AB0243-C', 'AB0244-C', 'AB0246-C', 'AB0249-C',
        'AB0250-C', 'AB0251-C', 'AB0252-C', 'AB0253-C', 'AB0256-C',
        'AB0257-C', 'AB0258-C', 'AB0260-C', 'AB0261-C', 'AB0262-C',
        'AB0263-C', 'AB0264-C', 'AB0265-C', 'AB0266-C', 'AB0267-C',
        'AB0268-C', 'AB0270-C', 'AB0271-C', 'AB0272-C', 'AB0273-C',
        'AB0274-C', 'AB0276-C', 'AB0277-C', 'AB0278-C', 'AB0279-C',
        'AB0280-C', 'AB0281-C', 'AB0282-C', 'AB0283-C', 'AB0284-C',
        'AC0090-C', 'AC0091-C', 'AC0092-C', 'AC0093-C', 'AC0094-C',
        'AC0095-C', 'AC0096-C', 'AC0097-C', 'AC0098-C', 'AC0099-C',
        'AC0100-C', 'AC0101-C', 'AC0102-C', 'AC0103-C', 'AC0104-C',
        'AC0106-C', 'AC0107-C', 'AC0108-C', 'AC0109-C', 'AC0110-C',
        'AC0111-C', 'AC0112-C', 'AC0113-C', 'AC0114-C', 'AC0115-C',
        'AC0116-C', 'AC0117-C', 'AC0118-C', 'AC0119-C', 'AC0120-C',
        'AC0121-C', 'AC0122-C', 'AC0123-C', 'AC0124-C', 'AC0125-C',
        'AC0126-C', 'AC0127-C', 'AC0128-C', 'AC0129-C', 'AC0130-C',
        'AC0131-C', 'AC0132-C', 'AC0133-C', 'AC0135-C', 'AC0136-C',
        'AC0137-C', 'AC0138-C', 'AC0139-C', 'AC0140-C', 'AC0142-C',
        'AC0143-C', 'AC0144-C', 'AC0145-C', 'AC0147-C', 'AC0148-C',
        'AC0149-C', 'AC0150-C', 'AC0151-C', 'AC0152-C', 'AC0153-C',
        'AC0154-C', 'AC0156-C', 'AC0158-C', 'AC0159-C', 'AC0160-C',
        'AC0161-C', 'AC0162-C', 'AC0163-C', 'AC0164-C', 'AC0166-C',
        'AC0167-C', 'AC0168-C', 'AC0169-C', 'AC0170-C', 'AC0171-C',
        'AC0172-C', 'AC0173-C', 'AC0174-C', 'AC0176-C', 'AC0178-C',
        'AC0179-C', 'AC0180-C', 'AC0181-C', 'AC0182-C', 'AC0183-C',
        'AC0184-C', 'AC0186-C', 'AC0187-C', 'AC0188-C', 'AC0189-C',
        'AC0190-C', 'AC0191-C', 'AC0192-C', 'AC0193-C', 'AC0194-C',
        'AC0195-C', 'AC0196-C', 'AC0197-C', 'AC0199-C', 'AC0200-C',
        'AC0201-C', 'AC0202-C', 'AC0203-C', 'AJ0023-C', 'AJ0024-C',
        'AJ0032-C', 'AJ0035-C', 'AJ0036-C', 'AJ0039-C', 'AJ0043-C',
        'AJ0044-C', 'AJ0045-C', 'AJ0047-C', 'AJ0051-C', 'AJ0052-C',
        'AJ0056-C', 'AJ0061-C', 'AJ0063-C', 'AJ0064-C', 'AJ0066-C',
        'AJ0070-C', 'AJ0071-C', 'AJ0072-C', 'AJ0074-C', 'AJ0075-C',
        'AJ0076-C', 'AJ0077-C', 'AJ0078-C', 'AJ0081-C', 'AJ0084-C',
        'AJ0085-C', 'AJ0086-C', 'AJ0088-C', 'AJ0090-C', 'AJ0092-C',
        'AJ0093-C', 'AJ0096-C', 'AJ0097-C', 'AJ0098-C', 'AJ0100-C',
        'AJ0101-C', 'AJ0102-C', 'AJ0103-C', 'AJ0105-C', 'AJ0107-C',
        'AJ0109-C', 'AJ0113-C', 'AJ0115-C', 'AJ0116-C', 'AK0065-C',
        'AK0066-C', 'AK0067-C', 'AK0068-C', 'AK0069-C', 'AK0070-C',
        'AK0072-C', 'AK0073-C', 'AK0074-C', 'AK0075-C', 'AK0076-C',
        'AK0077-C', 'AK0078-C', 'AK0079-C', 'AK0080-C', 'AK0081-C',
        'AK0082-C', 'AK0085-C', 'AK0086-C', 'AK0087-C', 'AK0088-C',
        'AK0089-C', 'AK0090-C', 'AK0091-C', 'AK0092-C', 'AK0093-C',
        'AK0094-C', 'AK0095-C', 'AK0096-C', 'AK0098-C', 'AK0099-C',
        'AK0100-C', 'AK0101-C', 'AK0102-C', 'AK0103-C', 'AK0104-C',
        'AK0105-C', 'AK0106-C', 'AK0108-C', 'AK0109-C', 'AK0110-C',
        'AK0116-C', 'AK0119-C', 'AK0127-C', 'AN0007-C', 'AN0008-C',
        'AN0009-C', 'AN0010-C', 'AN0011-C', 'AN0012-C', 'AN0014-C',
        'AN0016-C', 'AN0017-C', 'AN0018-C', 'AN0019-C', 'AN0020-C',
        'AN0022-C', 'AN0023-C', 'AN0024-C', 'AN0025-C', 'AN0026-C',
        'AN0027-C', 'AN0028-C', 'AN0029-C', 'AN0030-C', 'AN0031-C',
        'AN0032-C', 'AN0033-C', 'AN0034-C', 'AN0035-C', 'AN0036-C',
        'AN0037-C', 'AN0038-C', 'AN0039-C', 'AN0040-C', 'AN0041-C',
        'AN0042-C', 'AN0043-C', 'AN0045-C', 'AN0046-C', 'AN0047-C',
        'AN0048-C', 'AN0049-C', 'AN0050-C', 'AN0051-C', 'AN0053-C',
        'AN0054-C', 'AN0055-C', 'AN0056-C', 'AN0057-C', 'AN0058-C',
        'AN0059-C', 'AN0060-C', 'AN0063-C', 'AN0064-C', 'AN0065-C',
        'AN0066-C', 'AN0067-C', 'AN0068-C', 'AN0069-C', 'AN0070-C',
        'AN0071-C', 'AN0072-C', 'AN0073-C', 'AN0074-C', 'AN0075-C',
        'AN0076-C', 'AN0077-C', 'AN0079-C', 'AN0080-C', 'AN0081-C',
        'AN0082-C', 'AN0083-C', 'AN0084-C', 'AN0085-C', 'AN0086-C',
        'AN0087-C', 'AN0088-C', 'AN0089-C', 'AN0090-C', 'AN0091-C',
        'AN0092-C', 'AN0093-C', 'AN0094-C', 'AN0095-C', 'AN0096-C',
        'AN0097-C', 'AN0098-C', 'AN0099-C', 'AN0100-C', 'AN0101-C',
        'AN0102-C', 'AN0103-C', 'AN0104-C', 'AN0105-C', 'AN0106-C',
        'AN0107-C', 'AN0108-C', 'AN0109-C', 'AN0111-C', 'AN0112-C',
        'AN0113-C', 'AN0114-C', 'AN0115-C', 'AN0117-C', 'AN0120-C',
        'AN0121-C', 'AN0122-C', 'AN0123-C', 'AN0124-C', 'AN0125-C',
        'AN0126-C', 'AN0127-C', 'AN0128-C', 'AN0129-C', 'AN0130-C',
        'AN0131-C', 'AN0132-C', 'AN0134-C', 'AN0135-C', 'AN0136-C',
        'AN0137-C', 'AN0138-C', 'AN0139-C', 'AN0140-C', 'AN0141-C',
        'AN0143-C', 'AN0147-C', 'AN0149-C', 'AN0151-C', 'AN0152-C',
        'AN0153-C', 'AN0154-C', 'AN0155-C', 'AN0156-C', 'AN0157-C',
        'AN0158-C', 'AN0159-C', 'AN0160-C', 'AN0162-C', 'AN0163-C',
        'AN0164-C', 'AN0165-C', 'AN0166-C', 'AN0167-C', 'AN0168-C',
        'AN0169-C', 'AN0170-C', 'AN0171-C', 'AN0172-C', 'AN0173-C',
        'AN0174-C', 'AN0175-C', 'AN0176-C', 'AN0177-C', 'AN0178-C',
        'AN0179-C', 'AN0180-C', 'AN0181-C', 'AN0182-C', 'AN0183-C',
        'AN0184-C', 'AN0185-C', 'AN0186-C', 'AN0187-C', 'AN0188-C',
        'AN0189-C', 'AN0190-C', 'AN0191-C', 'AN0192-C', 'AN0193-C',
        'AN0194-C', 'AN0196-C', 'AN0197-C', 'AN0198-C', 'AN0199-C',
        'AN0200-C', 'AN0201-C', 'AN0202-C', 'AN0203-C', 'AN0204-C',
        'AN0205-C', 'AN0206-C', 'AN0207-C', 'AN0208-C', 'AN0209-C',
        'AN0210-C', 'AN0212-C', 'AN0213-C', 'AN0214-C', 'AN0215-C',
        'AN0217-C', 'AN0218-C', 'AN0219-C', 'AN0220-C', 'AN0221-C',
        'AN0222-C', 'AN0223-C', 'AN0224-C', 'AN0225-C', 'AN0226-C',
        'AN0227-C', 'AN0228-C', 'AN0229-C', 'AN0230-C', 'AN0231-C',
        'AN0233-C', 'AN0234-C', 'AN0235-C', 'AN0236-C', 'AN0237-C',
        'AN0238-C', 'AN0239-C', 'AN0240-C', 'AN0241-C', 'AN0242-C',
        'AN0243-C', 'AN0244-C', 'AN0245-C', 'AN0246-C', 'AN0247-C',
        'AN0248-C', 'AN0250-C', 'AN0251-C', 'AN0252-C', 'AN0253-C',
        'AN0254-C', 'AN0255-C', 'AN0256-C', 'AN0258-C', 'AN0259-C',
        'AN0260-C', 'AN0261-C', 'AN0262-C', 'AN0263-C', 'AN0264-C',
        'AN0266-C', 'AN0267-C', 'AN0268-C', 'AN0269-C', 'AN0270-C',
        'AN0272-C', 'AN0275-C', 'AN0276-C', 'AN0277-C', 'AN0280-C',
        'AN0282-C', 'AN0283-C', 'AN0284-C', 'AN0285-C', 'AN0286-C',
        'AN0287-C', 'AN0288-C', 'AN0290-C', 'AN0291-C', 'AN0292-C',
        'AN0294-C', 'AN0295-C', 'AN0296-C', 'AN0297-C', 'AN0298-C',
        'AN0299-C', 'AN0300-C', 'AN0301-C', 'AN0303-C', 'AN0304-C',
        'AN0305-C', 'AN0307-C', 'AN0308-C', 'AN0309-C', 'AN0310-C',
        'AN0312-C', 'AN0313-C', 'AN0314-C', 'AN0315-C', 'AN0317-C',
        'AN0318-C', 'AN0319-C', 'AN0321-C', 'AR0007-C', 'AR0008-C',
        'AR0009-C', 'AR0010-C', 'AR0011-C', 'AR0012-C', 'AR0014-C',
        'AR0015-C', 'AR0017-C', 'AR0019-C', 'AR0020-C', 'AR0021-C',
        'AR0022-C', 'AR0023-C', 'AR0024-C', 'AR0026-C', 'AR0027-C',
        'AR0034-C', 'AR0035-C', 'AR0042-C', 'AR0043-C', 'AR0045-C',
        'AR0047-C', 'AR0049-C', 'AR0050-C', 'AR0051-C', 'AR0053-C',
        'AR0054-C', 'AR0057-C', 'AR0059-C', 'AR0061-C', 'AR0062-C',
        'AR0063-C', 'AR0065-C', 'AR0066-C', 'AR0069-C', 'AR0070-C',
        'AR0071-C', 'AR0072-C', 'AR0073-C', 'AR0074-C', 'AR0075-C',
        'AR0076-C', 'AR0078-C', 'AR0079-C', 'AR0080-C', 'AR0081-C',
        'AR0083-C', 'AR0084-C', 'AR0086-C', 'AR0087-C', 'AR0089-C',
        'AR0090-C', 'AR0092-C', 'AR0093-C', 'AR0095-C', 'AR0096-C',
        'AR0098-C', 'AR0099-C', 'AR0100-C', 'AS0001-C', 'AS0002-C',
        'AS0003-C', 'AS0004-C', 'AS0006-C', 'AS0007-C', 'AS0008-C',
        'AS0009-C', 'AS0010-C', 'AS0011-C', 'AS0012-C', 'AS0013-C',
        'AS0014-C', 'AS0015-C', 'AS0016-C', 'AS0017-C', 'AS0018-C',
        'AS0019-C', 'AS0020-C', 'AS0021-C', 'AS0022-C', 'AS0024-C',
        'AS0026-C', 'AS0028-C', 'AS0030-C', 'AS0032-C', 'AS0033-C',
        'AS0034-C', 'AS0035-C', 'AS0036-C', 'AS0037-C', 'AS0039-C',
        'AS0042-C', 'AS0044-C', 'AS0045-C', 'AS0047-C', 'AS0049-C',
        'AS0052-C', 'AS0053-C', 'AS0054-C', 'AS0055-C', 'AS0056-C',
        'AS0058-C', 'AS0059-C', 'AS0064-C', 'AS0065-C', 'AS0066-C',
        'AS0068-C', 'AS0069-C', 'AS0070-C', 'AS0071-C', 'AS0072-C',
        'AS0073-C', 'AS0074-C', 'AS0076-C', 'AS0077-C', 'AV0001-C',
        'AV0002-C', 'AV0003-C', 'AV0004-C', 'AV0005-C', 'AV0007-C',
        'AV0008-C', 'AV0009-C', 'AV0010-C', 'AV0011-C', 'AV0012-C',
        'AV0013-C', 'AV0014-C', 'AV0015-C', 'AV0018-C', 'AV0024-C',
        'AV0026-C', 'AV0027-C', 'AV0029-C', 'AV0030-C', 'AV0031-C',
        'AV0032-C', 'AV0033-C', 'AV0034-C', 'AV0035-C', 'AV0036-C',
        'AV0039-C', 'AV0041-C', 'AV0044-C', 'AV0045-C', 'AV0047-C',
        'AD0231-C', 'AD0232-C', 'AD0254-C', 'AD0255-C', 'AD0305-C',
        'AD0306-C', 'AD0347-C', 'AD0348-C'], dtype=object),
 'variants/AC': array([[-1, -1, -1],
        [-1, -1, -1],
        [-1, -1, -1],
        ..., 
        [-1, -1, -1],
        [-1, -1, -1],
        [-1, -1, -1]], dtype=int32),
 'variants/ALT': array([['A', '', ''],
        ['G', '', ''],
        ['C', '', ''],
        ..., 
        ['A', '', ''],
        ['C', '', ''],
        ['A', '', '']], dtype=object),
 'variants/CHROM': array(['2L', '2L', '2L', ..., '2L', '2L', '2L'], dtype=object),
 'variants/FILTER_PASS': array([False, False, False, ..., False, False, False], dtype=bool),
 'variants/ID': array(['.', '.', '.', ..., '.', '.', '.'], dtype=object),
 'variants/POS': array([2353212, 2353223, 2353234, ..., 2436558, 2436585, 2436615], dtype=int32),
 'variants/QUAL': array([ 0.,  0.,  0., ...,  0.,  0.,  0.], dtype=float32),
 'variants/REF': array(['G', 'T', 'G', ..., 'G', 'A', 'C'], dtype=object),
 'variants/numalt': array([1, 1, 1, ..., 1, 1, 1], dtype=int32),
 'variants/svlen': array([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ..., 
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]], dtype=int32)}

In [3]:
%time _ = read_vcf(prof_vcf_fn, n_threads=1, chunk_length=1000, block_length=100)


CPU times: user 116 ms, sys: 4 ms, total: 120 ms
Wall time: 208 ms

In [4]:
!cat {prof_vcf_fn} | wc -l


1979

In [5]:
#!zcat ../../profdata/prof_gt.vcf.gz | wc -l

In [6]:
%timeit _ = read_vcf(prof_vcf_fn, chunk_length=1000)


10 loops, best of 3: 70.4 ms per loop

In [7]:
cProfile.run('read_vcf(prof_vcf_fn, chunk_length=1000)', sort='time')


         184 function calls in 0.093 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.089    0.089    0.089    0.089 io_vcf_read.py:261(<listcomp>)
        1    0.002    0.002    0.003    0.003 io_vcf_read.py:1433(_iter_vcf_stream)
        8    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.concatenate}
        1    0.000    0.000    0.093    0.093 <string>:1(<module>)
        1    0.000    0.000    0.003    0.003 io_vcf_read.py:857(iter_vcf_chunks)
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1492(_read_vcf_headers)
       17    0.000    0.000    0.000    0.000 {method 'split' of 'str' objects}
        1    0.000    0.000    0.093    0.093 io_vcf_read.py:191(read_vcf)
       12    0.000    0.000    0.000    0.000 {method 'readline' of 'allel.opt.io_vcf_read.FileInputStream' objects}
        1    0.000    0.000    0.093    0.093 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {built-in method numpy.core.multiarray.array}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1403(_normalize_samples)
        1    0.000    0.000    0.000    0.000 {built-in method io.open}
        7    0.000    0.000    0.000    0.000 io_vcf_read.py:1193(_normalize_type)
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1235(_normalize_types)
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1331(_normalize_numbers)
        1    0.000    0.000    0.000    0.000 {built-in method numpy.core.multiarray.zeros}
        1    0.000    0.000    0.000    0.000 {method 'fill' of 'numpy.ndarray' objects}
       60    0.000    0.000    0.000    0.000 {method 'startswith' of 'str' objects}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1112(_add_all_fixed_variants_fields)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sorted}
        2    0.000    0.000    0.000    0.000 {method 'match' of '_sre.SRE_Pattern' objects}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1385(_normalize_fills)
        1    0.000    0.000    0.000    0.000 <string>:12(__new__)
       20    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
        8    0.000    0.000    0.000    0.000 io_vcf_read.py:276(<listcomp>)
        2    0.000    0.000    0.000    0.000 {method 'groups' of '_sre.SRE_Match' objects}
        1    0.000    0.000    0.000    0.000 {built-in method __new__ of type object at 0x7fc74547cd20}
        3    0.000    0.000    0.000    0.000 {method 'items' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'endswith' of 'str' objects}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1336(<dictcomp>)
        1    0.000    0.000    0.000    0.000 {method 'strip' of 'str' objects}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:51(_prep_fields_param)
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1240(<dictcomp>)
        1    0.000    0.000    0.000    0.000 {method 'keys' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
       15    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
        3    0.000    0.000    0.000    0.000 {built-in method builtins.len}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:1389(<dictcomp>)



In [8]:
# from allel.opt.io_vcf_read import vcf_genotype_parse
# import line_profiler
# l = line_profiler.LineProfiler()
# l.add_function(vcf_genotype_parse)
# l.runcall(read_vcf, prof_vcf_fn, buffer_size=2**15, chunk_length=1000)
# l.print_stats()

In [ ]:


In [9]:
%time vcf_to_npz(prof_vcf_fn, 'prof.npz', chunk_length=200, overwrite=True)


CPU times: user 196 ms, sys: 8 ms, total: 204 ms
Wall time: 199 ms

In [10]:
%time vcf_to_hdf5(prof_vcf_fn, 'prof.h5', chunk_length=200, overwrite=True)


CPU times: user 164 ms, sys: 8 ms, total: 172 ms
Wall time: 167 ms

In [11]:
%time vcf_to_zarr(prof_vcf_fn, 'prof.zarr', chunk_length=200, overwrite=True)


CPU times: user 196 ms, sys: 28 ms, total: 224 ms
Wall time: 229 ms

In [12]:
!zcat ../../profdata/prof_gt.vcf.gz | wc -l


198679

In [12]:
%time _ = read_vcf('../../profdata/prof_gt.vcf.gz', log=sys.stderr)


[read_vcf] 65536 rows in 3.19s; chunk in 3.19s (20547 rows/s); 2L:2383306
[read_vcf] 131072 rows in 6.32s; chunk in 3.13s (20955 rows/s); 2L:2403926
[read_vcf] 196608 rows in 9.42s; chunk in 3.11s (21085 rows/s); 2L:2434126
[read_vcf] 198667 rows in 9.61s; chunk in 0.18s (11297 rows/s); :0
[read_vcf] all done (20677 rows/s)
CPU times: user 9.71 s, sys: 64 ms, total: 9.77 s
Wall time: 9.76 s

In [13]:
%time _ = read_vcf('../../profdata/prof_gt.vcf.gz', n_threads=1, log=sys.stderr)


[read_vcf] 65536 rows in 3.71s; chunk in 3.71s (17685 rows/s); 2L:2383306
[read_vcf] 131072 rows in 7.36s; chunk in 3.65s (17947 rows/s); 2L:2403926
[read_vcf] 196608 rows in 11.02s; chunk in 3.66s (17903 rows/s); 2L:2434126
[read_vcf] 198667 rows in 11.22s; chunk in 0.20s (10187 rows/s); :0
[read_vcf] all done (17550 rows/s)
CPU times: user 11.4 s, sys: 128 ms, total: 11.5 s
Wall time: 11.5 s

In [14]:
_ = read_vcf('../../profdata/prof_gt.vcf.gz', n_threads=2, log=sys.stderr)


[read_vcf] 65536 rows in 3.66s; chunk in 3.66s (17894 rows/s); 2L:2383306
[read_vcf] 131072 rows in 7.28s; chunk in 3.62s (18092 rows/s); 2L:2403926
[read_vcf] 196608 rows in 10.90s; chunk in 3.62s (18116 rows/s); 2L:2434126
[read_vcf] 198667 rows in 11.11s; chunk in 0.20s (10075 rows/s); :0
[read_vcf] all done (17728 rows/s)

In [14]:
%time _ = read_vcf('../../profdata/prof_gt.vcf.gz', n_threads=3, log=sys.stderr)


[read_vcf] 65536 rows in 1.67s; chunk in 1.67s (39302 rows/s); 2L:2383306
[read_vcf] 131072 rows in 3.32s; chunk in 1.65s (39739 rows/s); 2L:2403926
[read_vcf] 196608 rows in 5.16s; chunk in 1.84s (35526 rows/s); 2L:2434126
[read_vcf] 198667 rows in 5.37s; chunk in 0.21s (9895 rows/s); :0
[read_vcf] all done (35779 rows/s)
CPU times: user 12.1 s, sys: 92 ms, total: 12.2 s
Wall time: 5.73 s

In [15]:
%time _ = read_vcf('../../profdata/prof_gt.vcf.gz', n_threads=4, log=sys.stderr)


[read_vcf] 65536 rows in 2.26s; chunk in 2.26s (28991 rows/s); 2L:2383306
[read_vcf] 131072 rows in 4.51s; chunk in 2.25s (29074 rows/s); 2L:2403926
[read_vcf] 196608 rows in 6.46s; chunk in 1.95s (33637 rows/s); 2L:2434126
[read_vcf] 198667 rows in 6.66s; chunk in 0.20s (10390 rows/s); :0
[read_vcf] all done (29477 rows/s)
CPU times: user 14.8 s, sys: 112 ms, total: 14.9 s
Wall time: 6.91 s

In [16]:
%time _ = read_vcf('../../profdata/prof_gt.vcf.gz', n_threads=8, log=sys.stderr)


[read_vcf] 65536 rows in 1.95s; chunk in 1.95s (33672 rows/s); 2L:2383306
[read_vcf] 131072 rows in 3.64s; chunk in 1.69s (38788 rows/s); 2L:2403926
[read_vcf] 196608 rows in 5.87s; chunk in 2.24s (29291 rows/s); 2L:2434126
[read_vcf] 198667 rows in 6.13s; chunk in 0.26s (7874 rows/s); :0
[read_vcf] all done (31343 rows/s)
CPU times: user 14.1 s, sys: 124 ms, total: 14.2 s
Wall time: 6.57 s

In [23]:
%time vcf_to_hdf5('../../profdata/prof_gt.vcf.gz', '../../profdata/prof_gt.h5', overwrite=True, log=sys.stderr)


[vcf_to_hdf5] 65536 rows in 3.11s; chunk in 3.11s (21104 rows/s); 2L:2383306
[vcf_to_hdf5] 131072 rows in 6.93s; chunk in 3.82s (17143 rows/s); 2L:2403926
[vcf_to_hdf5] 196608 rows in 10.78s; chunk in 3.85s (17034 rows/s); 2L:2434126
[vcf_to_hdf5] 198667 rows in 11.60s; chunk in 0.83s (2482 rows/s); 2L:2436615
[vcf_to_hdf5] all done (16397 rows/s)
CPU times: user 12.1 s, sys: 100 ms, total: 12.2 s
Wall time: 12.2 s

In [24]:
%time vcf_to_hdf5('../../profdata/prof_gt.vcf.gz', '../../profdata/prof_gt.h5', n_threads=2, overwrite=True, log=sys.stderr)


[vcf_to_hdf5] 65536 rows in 2.40s; chunk in 2.40s (27332 rows/s); 2L:2383306
[vcf_to_hdf5] 131072 rows in 5.42s; chunk in 3.02s (21714 rows/s); 2L:2403926
[vcf_to_hdf5] 196608 rows in 8.42s; chunk in 3.00s (21842 rows/s); 2L:2434126
[vcf_to_hdf5] 198667 rows in 9.23s; chunk in 0.81s (2545 rows/s); 2L:2436615
[vcf_to_hdf5] all done (20347 rows/s)
CPU times: user 20.1 s, sys: 116 ms, total: 20.2 s
Wall time: 9.86 s

In [25]:
%time vcf_to_zarr('../../profdata/prof_gt.vcf.gz', '../../profdata/prof_gt.zarr', overwrite=True, log=sys.stderr)


[vcf_to_zarr] 65536 rows in 3.10s; chunk in 3.10s (21118 rows/s); 2L:2383306
[vcf_to_zarr] 131072 rows in 6.23s; chunk in 3.13s (20937 rows/s); 2L:2403926
[vcf_to_zarr] 196608 rows in 9.32s; chunk in 3.09s (21241 rows/s); 2L:2434126
[vcf_to_zarr] 198667 rows in 9.56s; chunk in 0.24s (8454 rows/s); 2L:2436615
[vcf_to_zarr] all done (20699 rows/s)
CPU times: user 9.71 s, sys: 140 ms, total: 9.85 s
Wall time: 9.69 s

In [29]:
%time vcf_to_zarr('../../profdata/prof_gt.vcf.gz', '../../profdata/prof_gt.zarr', n_threads=2, overwrite=True, log=sys.stderr)


[vcf_to_zarr] 65536 rows in 1.68s; chunk in 1.68s (39087 rows/s); 2L:2383306
[vcf_to_zarr] 131072 rows in 3.39s; chunk in 1.71s (38350 rows/s); 2L:2403926
[vcf_to_zarr] 196608 rows in 5.07s; chunk in 1.69s (38845 rows/s); 2L:2434126
[vcf_to_zarr] 198667 rows in 5.33s; chunk in 0.26s (7916 rows/s); 2L:2436615
[vcf_to_zarr] all done (35716 rows/s)
CPU times: user 11.5 s, sys: 124 ms, total: 11.6 s
Wall time: 5.65 s

In [19]:
%time read_vcf('../../profdata/accessibility.X.vcf.gz', chunk_length=500000, log=sys.stderr, region='X:5000000-7000000')


../../allel/io_vcf_read.py:676: UserWarning: exception occurred attempting tabix (Could not load .tbi/.csi index of ../../profdata/accessibility.X.vcf.gz); falling back to scanning to region
  'scanning to region' % e)
[read_vcf] 500000 rows in 7.04s; chunk in 7.04s (71057 rows/s); X:5499999
[read_vcf] 1000000 rows in 7.72s; chunk in 0.69s (728332 rows/s); X:5999999
[read_vcf] 1500000 rows in 8.43s; chunk in 0.71s (708055 rows/s); X:6499999
[read_vcf] 2000000 rows in 9.14s; chunk in 0.71s (705014 rows/s); X:6999999
[read_vcf] 2000001 rows in 9.14s; chunk in 0.00s (322 rows/s); X:7000001
[read_vcf] all done (218765 rows/s)
CPU times: user 9.1 s, sys: 72 ms, total: 9.18 s
Wall time: 9.18 s
Out[19]:
{'samples': array([], 
       dtype='|S32'), 'variants/ALT': array([[b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        ..., 
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T']], 
       dtype='|S1'), 'variants/CHROM': array([b'X', b'X', b'X', ..., b'X', b'X', b'X'], 
       dtype='|S12'), 'variants/FILTER_PASS': array([ True,  True,  True, ...,  True,  True,  True], dtype=bool), 'variants/ID': array([b'.', b'.', b'.', ..., b'.', b'.', b'.'], 
       dtype='|S12'), 'variants/POS': array([5000000, 5000001, 5000002, ..., 6999998, 6999999, 7000000], dtype=int32), 'variants/QUAL': array([ nan,  nan,  nan, ...,  nan,  nan,  nan], dtype=float32), 'variants/REF': array([b'C', b'C', b'C', ..., b'C', b'T', b'G'], 
       dtype='|S1')}

In [5]:
%time read_vcf('../../profdata/accessibility.X.vcf.gz', chunk_length=500000, log=sys.stderr)


[read_vcf] 500000 rows in 0.77s; chunk in 0.77s (648096 rows/s); X:500000
[read_vcf] 1000000 rows in 1.49s; chunk in 0.72s (697566 rows/s); X:1000000
[read_vcf] 1500000 rows in 2.21s; chunk in 0.72s (692344 rows/s); X:1500000
[read_vcf] 2000000 rows in 2.93s; chunk in 0.72s (691420 rows/s); X:2000000
[read_vcf] 2500000 rows in 3.64s; chunk in 0.71s (706240 rows/s); X:2500000
[read_vcf] 3000000 rows in 4.34s; chunk in 0.70s (716039 rows/s); X:3000000
[read_vcf] 3500000 rows in 5.06s; chunk in 0.72s (694415 rows/s); X:3500000
[read_vcf] 4000000 rows in 5.76s; chunk in 0.70s (709976 rows/s); X:4000000
[read_vcf] 4500000 rows in 6.46s; chunk in 0.69s (720302 rows/s); X:4500000
[read_vcf] 5000000 rows in 7.16s; chunk in 0.71s (708804 rows/s); X:5000000
[read_vcf] 5500000 rows in 7.86s; chunk in 0.70s (717294 rows/s); X:5500000
[read_vcf] 6000000 rows in 8.56s; chunk in 0.70s (712266 rows/s); X:6000000
[read_vcf] 6500000 rows in 9.29s; chunk in 0.73s (689261 rows/s); X:6500000
[read_vcf] 7000000 rows in 10.00s; chunk in 0.72s (698657 rows/s); X:7000000
[read_vcf] 7500000 rows in 10.76s; chunk in 0.75s (665124 rows/s); X:7500000
[read_vcf] 8000000 rows in 11.60s; chunk in 0.85s (591536 rows/s); X:8000000
[read_vcf] 8500000 rows in 12.78s; chunk in 1.18s (423418 rows/s); X:8500000
[read_vcf] 9000000 rows in 13.80s; chunk in 1.02s (489201 rows/s); X:9000000
[read_vcf] 9500000 rows in 14.55s; chunk in 0.75s (669570 rows/s); X:9500000
[read_vcf] 10000000 rows in 15.35s; chunk in 0.80s (628579 rows/s); X:10000000
[read_vcf] 10500000 rows in 16.08s; chunk in 0.73s (681576 rows/s); X:10500000
[read_vcf] 11000000 rows in 16.86s; chunk in 0.78s (636963 rows/s); X:11000000
[read_vcf] 11500000 rows in 17.64s; chunk in 0.77s (645603 rows/s); X:11500000
[read_vcf] 12000000 rows in 18.48s; chunk in 0.84s (591883 rows/s); X:12000000
[read_vcf] 12500000 rows in 19.29s; chunk in 0.81s (617510 rows/s); X:12500000
[read_vcf] 13000000 rows in 20.12s; chunk in 0.83s (603020 rows/s); X:13000000
[read_vcf] 13500000 rows in 20.90s; chunk in 0.78s (642415 rows/s); X:13500000
[read_vcf] 14000000 rows in 21.67s; chunk in 0.77s (651698 rows/s); X:14000000
[read_vcf] 14500000 rows in 22.39s; chunk in 0.72s (692180 rows/s); X:14500000
[read_vcf] 15000000 rows in 23.13s; chunk in 0.74s (677016 rows/s); X:15000000
[read_vcf] 15500000 rows in 23.90s; chunk in 0.77s (649790 rows/s); X:15500000
[read_vcf] 16000000 rows in 24.74s; chunk in 0.84s (592426 rows/s); X:16000000
[read_vcf] 16500000 rows in 25.47s; chunk in 0.73s (685892 rows/s); X:16500000
[read_vcf] 17000000 rows in 26.23s; chunk in 0.76s (656731 rows/s); X:17000000
[read_vcf] 17500000 rows in 26.99s; chunk in 0.75s (664302 rows/s); X:17500000
[read_vcf] 18000000 rows in 27.70s; chunk in 0.71s (699343 rows/s); X:18000000
[read_vcf] 18500000 rows in 28.42s; chunk in 0.72s (695785 rows/s); X:18500000
[read_vcf] 19000000 rows in 29.19s; chunk in 0.77s (650063 rows/s); X:19000000
[read_vcf] 19500000 rows in 29.97s; chunk in 0.78s (638425 rows/s); X:19500000
[read_vcf] 20000000 rows in 30.75s; chunk in 0.78s (641042 rows/s); X:20000000
[read_vcf] 20500000 rows in 31.70s; chunk in 0.95s (524657 rows/s); X:20500000
[read_vcf] 21000000 rows in 32.67s; chunk in 0.97s (517538 rows/s); X:21000000
[read_vcf] 21500000 rows in 33.53s; chunk in 0.86s (583308 rows/s); X:21500000
[read_vcf] 22000000 rows in 34.44s; chunk in 0.91s (549253 rows/s); X:22000000
[read_vcf] 22500000 rows in 35.29s; chunk in 0.86s (584019 rows/s); X:22500000
[read_vcf] 23000000 rows in 36.14s; chunk in 0.84s (592837 rows/s); X:23000000
[read_vcf] 23500000 rows in 37.03s; chunk in 0.89s (561498 rows/s); X:23500000
[read_vcf] 24000000 rows in 37.88s; chunk in 0.85s (585176 rows/s); X:24000000
CPU times: user 38.4 s, sys: 396 ms, total: 38.8 s
Wall time: 38.7 s
[read_vcf] 24393108 rows in 38.57s; chunk in 0.69s (569157 rows/s); X:24393108
[read_vcf] all done (632384 rows/s)
Out[5]:
{'samples': array([], 
       dtype='|S32'), 'variants/ALT': array([[b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        ..., 
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T']], 
       dtype='|S1'), 'variants/CHROM': array([b'X', b'X', b'X', ..., b'X', b'X', b'X'], 
       dtype='|S12'), 'variants/FILTER_PASS': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/ID': array([b'.', b'.', b'.', ..., b'.', b'.', b'.'], 
       dtype='|S12'), 'variants/POS': array([       1,        2,        3, ..., 24393106, 24393107, 24393108], dtype=int32), 'variants/QUAL': array([ nan,  nan,  nan, ...,  nan,  nan,  nan], dtype=float32), 'variants/REF': array([b'G', b'C', b'G', ..., b'T', b'G', b'G'], 
       dtype='|S1')}

In [10]:
%time read_vcf('../../profdata/accessibility.X.vcf.gz', chunk_length=500000, block_length=50000, n_threads=2, log=sys.stderr)


[read_vcf] 500000 rows in 0.76s; chunk in 0.76s (655865 rows/s); X:500000
[read_vcf] 1000000 rows in 1.47s; chunk in 0.70s (711281 rows/s); X:1000000
[read_vcf] 1500000 rows in 2.41s; chunk in 0.95s (528492 rows/s); X:1500000
[read_vcf] 2000000 rows in 3.15s; chunk in 0.74s (678544 rows/s); X:2000000
[read_vcf] 2500000 rows in 3.85s; chunk in 0.70s (712446 rows/s); X:2500000
[read_vcf] 3000000 rows in 4.55s; chunk in 0.70s (717525 rows/s); X:3000000
[read_vcf] 3500000 rows in 5.26s; chunk in 0.71s (705292 rows/s); X:3500000
[read_vcf] 4000000 rows in 5.96s; chunk in 0.70s (713554 rows/s); X:4000000
[read_vcf] 4500000 rows in 6.68s; chunk in 0.72s (691443 rows/s); X:4500000
[read_vcf] 5000000 rows in 7.41s; chunk in 0.73s (687898 rows/s); X:5000000
[read_vcf] 5500000 rows in 8.20s; chunk in 0.79s (630294 rows/s); X:5500000
[read_vcf] 6000000 rows in 8.93s; chunk in 0.73s (687695 rows/s); X:6000000
[read_vcf] 6500000 rows in 10.13s; chunk in 1.20s (415256 rows/s); X:6500000
[read_vcf] 7000000 rows in 11.30s; chunk in 1.17s (428159 rows/s); X:7000000
[read_vcf] 7500000 rows in 12.01s; chunk in 0.71s (701757 rows/s); X:7500000
[read_vcf] 8000000 rows in 12.72s; chunk in 0.71s (701151 rows/s); X:8000000
[read_vcf] 8500000 rows in 13.44s; chunk in 0.72s (698297 rows/s); X:8500000
[read_vcf] 9000000 rows in 14.18s; chunk in 0.73s (680422 rows/s); X:9000000
[read_vcf] 9500000 rows in 14.88s; chunk in 0.71s (709041 rows/s); X:9500000
[read_vcf] 10000000 rows in 15.61s; chunk in 0.73s (681562 rows/s); X:10000000
[read_vcf] 10500000 rows in 16.35s; chunk in 0.74s (678061 rows/s); X:10500000
[read_vcf] 11000000 rows in 17.08s; chunk in 0.73s (686943 rows/s); X:11000000
[read_vcf] 11500000 rows in 17.80s; chunk in 0.72s (690488 rows/s); X:11500000
[read_vcf] 12000000 rows in 18.56s; chunk in 0.75s (663181 rows/s); X:12000000
[read_vcf] 12500000 rows in 19.34s; chunk in 0.78s (641062 rows/s); X:12500000
[read_vcf] 13000000 rows in 20.11s; chunk in 0.77s (648421 rows/s); X:13000000
[read_vcf] 13500000 rows in 21.10s; chunk in 0.99s (504343 rows/s); X:13500000
[read_vcf] 14000000 rows in 21.81s; chunk in 0.71s (702544 rows/s); X:14000000
[read_vcf] 14500000 rows in 22.54s; chunk in 0.72s (690832 rows/s); X:14500000
[read_vcf] 15000000 rows in 23.25s; chunk in 0.72s (698183 rows/s); X:15000000
[read_vcf] 15500000 rows in 24.00s; chunk in 0.74s (671663 rows/s); X:15500000
[read_vcf] 16000000 rows in 24.74s; chunk in 0.74s (672330 rows/s); X:16000000
[read_vcf] 16500000 rows in 25.91s; chunk in 1.17s (428465 rows/s); X:16500000
[read_vcf] 17000000 rows in 27.12s; chunk in 1.22s (410367 rows/s); X:17000000
[read_vcf] 17500000 rows in 28.37s; chunk in 1.25s (400492 rows/s); X:17500000
[read_vcf] 18000000 rows in 29.39s; chunk in 1.02s (490749 rows/s); X:18000000
[read_vcf] 18500000 rows in 30.24s; chunk in 0.85s (591632 rows/s); X:18500000
[read_vcf] 19000000 rows in 31.10s; chunk in 0.87s (577033 rows/s); X:19000000
[read_vcf] 19500000 rows in 31.93s; chunk in 0.83s (603421 rows/s); X:19500000
[read_vcf] 20000000 rows in 32.69s; chunk in 0.76s (658829 rows/s); X:20000000
[read_vcf] 20500000 rows in 33.55s; chunk in 0.86s (583646 rows/s); X:20500000
[read_vcf] 21000000 rows in 34.39s; chunk in 0.84s (596808 rows/s); X:21000000
[read_vcf] 21500000 rows in 35.24s; chunk in 0.86s (584716 rows/s); X:21500000
[read_vcf] 22000000 rows in 36.54s; chunk in 1.30s (386071 rows/s); X:22000000
[read_vcf] 22500000 rows in 37.88s; chunk in 1.35s (370707 rows/s); X:22500000
[read_vcf] 23000000 rows in 39.35s; chunk in 1.47s (341209 rows/s); X:23000000
[read_vcf] 23500000 rows in 40.65s; chunk in 1.30s (384288 rows/s); X:23500000
[read_vcf] 24000000 rows in 41.50s; chunk in 0.85s (589081 rows/s); X:24000000
[read_vcf] 24393108 rows in 42.41s; chunk in 0.91s (430549 rows/s); X:24393108
[read_vcf] all done (574129 rows/s)
CPU times: user 55.8 s, sys: 556 ms, total: 56.3 s
Wall time: 42.7 s
Out[10]:
{'samples': array([], 
       dtype='|S32'), 'variants/ALT': array([[b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        ..., 
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T']], 
       dtype='|S1'), 'variants/CHROM': array([b'X', b'X', b'X', ..., b'X', b'X', b'X'], 
       dtype='|S12'), 'variants/FILTER_PASS': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/ID': array([b'.', b'.', b'.', ..., b'.', b'.', b'.'], 
       dtype='|S12'), 'variants/POS': array([       1,        2,        3, ..., 24393106, 24393107, 24393108], dtype=int32), 'variants/QUAL': array([ nan,  nan,  nan, ...,  nan,  nan,  nan], dtype=float32), 'variants/REF': array([b'G', b'C', b'G', ..., b'T', b'G', b'G'], 
       dtype='|S1')}

In [11]:
%time read_vcf('../../profdata/accessibility.X.vcf.gz', fields='*', chunk_length=500000, log=sys.stderr)


[read_vcf] 500000 rows in 1.39s; chunk in 1.39s (360821 rows/s); X:500000
[read_vcf] 1000000 rows in 2.79s; chunk in 1.41s (355866 rows/s); X:1000000
[read_vcf] 1500000 rows in 3.75s; chunk in 0.96s (522496 rows/s); X:1500000
[read_vcf] 2000000 rows in 4.72s; chunk in 0.97s (515406 rows/s); X:2000000
[read_vcf] 2500000 rows in 5.73s; chunk in 1.01s (494436 rows/s); X:2500000
[read_vcf] 3000000 rows in 6.87s; chunk in 1.14s (438925 rows/s); X:3000000
[read_vcf] 3500000 rows in 8.22s; chunk in 1.35s (371203 rows/s); X:3500000
[read_vcf] 4000000 rows in 9.20s; chunk in 0.99s (506288 rows/s); X:4000000
[read_vcf] 4500000 rows in 10.18s; chunk in 0.98s (512664 rows/s); X:4500000
[read_vcf] 5000000 rows in 11.16s; chunk in 0.98s (508986 rows/s); X:5000000
[read_vcf] 5500000 rows in 12.13s; chunk in 0.97s (513810 rows/s); X:5500000
[read_vcf] 6000000 rows in 13.10s; chunk in 0.97s (516498 rows/s); X:6000000
[read_vcf] 6500000 rows in 14.11s; chunk in 1.00s (497744 rows/s); X:6500000
[read_vcf] 7000000 rows in 15.12s; chunk in 1.01s (493001 rows/s); X:7000000
[read_vcf] 7500000 rows in 16.15s; chunk in 1.03s (487527 rows/s); X:7500000
[read_vcf] 8000000 rows in 17.23s; chunk in 1.09s (459740 rows/s); X:8000000
[read_vcf] 8500000 rows in 18.29s; chunk in 1.06s (473213 rows/s); X:8500000
[read_vcf] 9000000 rows in 19.43s; chunk in 1.14s (440146 rows/s); X:9000000
[read_vcf] 9500000 rows in 20.45s; chunk in 1.02s (490024 rows/s); X:9500000
[read_vcf] 10000000 rows in 21.44s; chunk in 1.00s (500984 rows/s); X:10000000
[read_vcf] 10500000 rows in 22.56s; chunk in 1.11s (449063 rows/s); X:10500000
[read_vcf] 11000000 rows in 23.67s; chunk in 1.11s (449976 rows/s); X:11000000
[read_vcf] 11500000 rows in 24.83s; chunk in 1.16s (432273 rows/s); X:11500000
[read_vcf] 12000000 rows in 26.04s; chunk in 1.21s (413103 rows/s); X:12000000
[read_vcf] 12500000 rows in 27.08s; chunk in 1.05s (476899 rows/s); X:12500000
[read_vcf] 13000000 rows in 28.10s; chunk in 1.02s (490641 rows/s); X:13000000
[read_vcf] 13500000 rows in 29.10s; chunk in 1.00s (502433 rows/s); X:13500000
[read_vcf] 14000000 rows in 30.07s; chunk in 0.97s (515785 rows/s); X:14000000
[read_vcf] 14500000 rows in 31.04s; chunk in 0.97s (514577 rows/s); X:14500000
[read_vcf] 15000000 rows in 32.02s; chunk in 0.98s (511272 rows/s); X:15000000
[read_vcf] 15500000 rows in 33.13s; chunk in 1.11s (450218 rows/s); X:15500000
[read_vcf] 16000000 rows in 34.16s; chunk in 1.04s (482419 rows/s); X:16000000
[read_vcf] 16500000 rows in 35.30s; chunk in 1.14s (439949 rows/s); X:16500000
[read_vcf] 17000000 rows in 36.44s; chunk in 1.14s (438688 rows/s); X:17000000
[read_vcf] 17500000 rows in 37.47s; chunk in 1.03s (485042 rows/s); X:17500000
[read_vcf] 18000000 rows in 38.45s; chunk in 0.98s (511517 rows/s); X:18000000
[read_vcf] 18500000 rows in 39.53s; chunk in 1.08s (463024 rows/s); X:18500000
[read_vcf] 19000000 rows in 40.57s; chunk in 1.04s (479909 rows/s); X:19000000
[read_vcf] 19500000 rows in 41.95s; chunk in 1.38s (362035 rows/s); X:19500000
[read_vcf] 20000000 rows in 43.05s; chunk in 1.10s (454497 rows/s); X:20000000
[read_vcf] 20500000 rows in 44.23s; chunk in 1.18s (424404 rows/s); X:20500000
[read_vcf] 21000000 rows in 45.39s; chunk in 1.16s (430268 rows/s); X:21000000
[read_vcf] 21500000 rows in 46.61s; chunk in 1.22s (409108 rows/s); X:21500000
[read_vcf] 22000000 rows in 47.78s; chunk in 1.16s (429737 rows/s); X:22000000
[read_vcf] 22500000 rows in 48.95s; chunk in 1.17s (428309 rows/s); X:22500000
[read_vcf] 23000000 rows in 50.12s; chunk in 1.17s (426046 rows/s); X:23000000
[read_vcf] 23500000 rows in 51.31s; chunk in 1.19s (419717 rows/s); X:23500000
[read_vcf] 24000000 rows in 52.49s; chunk in 1.18s (423805 rows/s); X:24000000
[read_vcf] 24393108 rows in 53.40s; chunk in 0.91s (432403 rows/s); X:24393108
[read_vcf] all done (456805 rows/s)
CPU times: user 53.2 s, sys: 792 ms, total: 54 s
Wall time: 54 s
Out[11]:
{'samples': array([], 
       dtype='|S32'), 'variants/ALT': array([[b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        ..., 
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T'],
        [b'A', b'C', b'T']], 
       dtype='|S1'), 'variants/Accessible': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/CHROM': array([b'X', b'X', b'X', ..., b'X', b'X', b'X'], 
       dtype='|S12'), 'variants/Coverage': array([10955, 11176, 11579, ...,  3969,  3848,  1743], dtype=int32), 'variants/CoverageMQ0': array([   0,    0,    0, ..., 3933, 3821, 1727], dtype=int32), 'variants/FILTER_HighCoverage': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/FILTER_HighMQ0': array([False, False, False, ...,  True,  True,  True], dtype=bool), 'variants/FILTER_LowCoverage': array([ True,  True,  True, ...,  True,  True,  True], dtype=bool), 'variants/FILTER_LowMQ': array([ True,  True,  True, ...,  True,  True,  True], dtype=bool), 'variants/FILTER_NoCoverage': array([False, False, False, ...,  True,  True,  True], dtype=bool), 'variants/FILTER_PASS': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/FILTER_RefN': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/FILTER_RepeatDUST': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/HighCoverage': array([0, 0, 0, ..., 0, 0, 0], dtype=int32), 'variants/HighMQ0': array([  0,   0,   0, ..., 657, 636, 563], dtype=int32), 'variants/ID': array([b'.', b'.', b'.', ..., b'.', b'.', b'.'], 
       dtype='|S12'), 'variants/LowCoverage': array([505, 485, 437, ..., 644, 622, 564], dtype=int32), 'variants/LowMQ': array([622, 491, 319, ..., 659, 637, 565], dtype=int32), 'variants/LowPairing': array([ 18,  33,  64, ..., 621, 620, 538], dtype=int32), 'variants/NoCoverage': array([  0,   0,   0, ..., 106, 128, 200], dtype=int32), 'variants/POS': array([       1,        2,        3, ..., 24393106, 24393107, 24393108], dtype=int32), 'variants/QUAL': array([ nan,  nan,  nan, ...,  nan,  nan,  nan], dtype=float32), 'variants/REF': array([b'G', b'C', b'G', ..., b'T', b'G', b'G'], 
       dtype='|S1'), 'variants/RefMasked': array([ True,  True,  True, ...,  True,  True,  True], dtype=bool), 'variants/RefN': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/RepeatDUST': array([False, False, False, ..., False, False, False], dtype=bool), 'variants/RepeatMasker': array([ True,  True,  True, ...,  True,  True,  True], dtype=bool), 'variants/RepeatTRF': array([False, False, False, ..., False, False, False], dtype=bool)}

In [9]:
%time vcf_to_zarr('../../profdata/accessibility.X.vcf.gz', '../../profdata/accessibility.X.zarr', buffer_size=2**15, chunk_length=50000, overwrite=True)


CPU times: user 38.2 s, sys: 1.08 s, total: 39.3 s
Wall time: 37.9 s

In [10]:
%time vcf_to_zarr('../../profdata/accessibility.X.vcf.gz', '../../profdata/accessibility.X.zarr', fields='*', buffer_size=2**15, chunk_length=50000, overwrite=True)


CPU times: user 1min 2s, sys: 2.85 s, total: 1min 5s
Wall time: 1min 1s

In [37]:
!zcat ../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz | wc -l


39984

In [12]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields='*', chunk_length=10000, overwrite=True, log=sys.stderr)


[vcf_to_zarr] 10000 rows in 4.51s; chunk in 4.51s (2217 rows/s); 2L:98451
[vcf_to_zarr] 20000 rows in 9.22s; chunk in 4.71s (2125 rows/s); 2L:196622
[vcf_to_zarr] 30000 rows in 13.71s; chunk in 4.49s (2226 rows/s); 2L:301246
[vcf_to_zarr] 39894 rows in 18.73s; chunk in 5.02s (1969 rows/s); 2L:399982
CPU times: user 20.2 s, sys: 400 ms, total: 20.6 s
Wall time: 19.2 s
[vcf_to_zarr] all done (2087 rows/s)

In [17]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields='*', chunk_length=10000, block_length=1000, n_threads=4, overwrite=True, log=sys.stderr)


[vcf_to_zarr] 10000 rows in 3.01s; chunk in 3.01s (3321 rows/s); 2L:98451
[vcf_to_zarr] 20000 rows in 6.26s; chunk in 3.25s (3074 rows/s); 2L:196622
[vcf_to_zarr] 30000 rows in 9.38s; chunk in 3.12s (3203 rows/s); 2L:301246
[vcf_to_zarr] 39894 rows in 12.68s; chunk in 3.29s (3004 rows/s); 2L:399982
CPU times: user 22.3 s, sys: 400 ms, total: 22.7 s
Wall time: 13.2 s
[vcf_to_zarr] all done (3039 rows/s)

In [18]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields='INFO', chunk_length=10000, block_length=1000, n_threads=1, overwrite=True, log=sys.stderr)


[vcf_to_zarr] 10000 rows in 2.97s; chunk in 2.97s (3369 rows/s); 2L:98451
[vcf_to_zarr] 20000 rows in 5.64s; chunk in 2.67s (3741 rows/s); 2L:196622
[vcf_to_zarr] 30000 rows in 8.20s; chunk in 2.56s (3903 rows/s); 2L:301246
CPU times: user 11 s, sys: 60 ms, total: 11 s
Wall time: 11 s
[vcf_to_zarr] 39894 rows in 10.97s; chunk in 2.77s (3574 rows/s); 2L:399982
[vcf_to_zarr] all done (3627 rows/s)

In [19]:
%time read_vcf('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', fields='INFO', chunk_length=10000, n_threads=1, log=sys.stderr)


[read_vcf] 10000 rows in 3.03s; chunk in 3.03s (3297 rows/s); 2L:98451
[read_vcf] 20000 rows in 5.65s; chunk in 2.61s (3827 rows/s); 2L:196622
[read_vcf] 30000 rows in 8.18s; chunk in 2.54s (3938 rows/s); 2L:301246
CPU times: user 10.8 s, sys: 64 ms, total: 10.9 s
Wall time: 10.9 s
[read_vcf] 39894 rows in 10.88s; chunk in 2.69s (3672 rows/s); 2L:399982
[read_vcf] all done (3667 rows/s)
Out[19]:
{'variants/ABHet': array([        nan,         nan,  0.667     , ...,         nan,
         0.64999998,  0.755     ], dtype=float32),
 'variants/ABHom': array([ 0.801     ,  0.74699998,  0.97600001, ...,         nan,
         0.99900001,  0.93300003], dtype=float32),
 'variants/AC': array([[ 4, -1, -1],
        [38, -1, -1],
        [ 8, -1, -1],
        ..., 
        [ 1,  1, -1],
        [ 4, -1, -1],
        [15, -1, -1]], dtype=int32),
 'variants/AF': array([[ 0.14300001,         nan,         nan],
        [ 0.54299998,         nan,         nan],
        [ 0.068     ,         nan,         nan],
        ..., 
        [ 0.0006536 ,  0.0006536 ,         nan],
        [ 0.002614  ,         nan,         nan],
        [ 0.009804  ,         nan,         nan]], dtype=float32),
 'variants/AN': array([  28,   70,  118, ..., 1530, 1530, 1530], dtype=int32),
 'variants/ANN': array([b'T|intergenic', b'A|intergenic', b'A|intergenic', ...,
        b'A|intergenic', b'A|intergenic', b'G|intergenic'], 
       dtype='|S12'),
 'variants/Accessible': array([False, False, False, ..., False, False, False], dtype=bool),
 'variants/BaseCounts': array([[    3,  4541,     2,  2526],
        [ 4713,     2,  3916,     4],
        [   48,     1,  8911,     6],
        ..., 
        [   40, 31716,     5,    32],
        [  167,     4, 31361,    14],
        [27816,    15,  2167,    50]], dtype=int32),
 'variants/BaseQRankSum': array([-2.00200009, -1.52600002,  0.294     , ..., -5.63100004,
        -3.30200005,  3.13499999], dtype=float32),
 'variants/Coverage': array([ 7198,  9242, 10797, ..., 30252, 29969, 28441], dtype=int32),
 'variants/CoverageMQ0': array([ 7161,  9153, 10577, ...,  1234,   978,   320], dtype=int32),
 'variants/DP': array([  118,   344,   669, ..., 29243, 28995, 26846], dtype=int32),
 'variants/DS': array([False, False, False, ..., False, False, False], dtype=bool),
 'variants/Dels': array([ 0.,  0.,  0., ...,  0.,  0.,  0.], dtype=float32),
 'variants/FS': array([  0.        ,   0.        ,   0.        , ...,   0.        ,
          9.67800045,  67.66300201], dtype=float32),
 'variants/HRun': array([ 0,  1,  0, ..., -1,  1,  1], dtype=int32),
 'variants/HW': array([  41.70000076,  151.8999939 ,   45.90000153, ...,    0.        ,
           0.        ,    0.        ], dtype=float32),
 'variants/HaplotypeScore': array([ 0.0432    ,  0.0549    ,  0.0828    , ...,  1.06560004,
         1.38800001,  1.33159995], dtype=float32),
 'variants/HighCoverage': array([ 1,  1,  1, ..., 20, 17, 10], dtype=int32),
 'variants/HighMQ0': array([763, 765, 764, ...,  50,  29,   2], dtype=int32),
 'variants/InbreedingCoeff': array([ 0.19760001,  0.36289999,  0.0037    , ...,  0.0016    ,
        -0.0059    , -0.0119    ], dtype=float32),
 'variants/LOF': array([b'', b'', b'', ..., b'', b'', b''], 
       dtype='|S12'),
 'variants/LowCoverage': array([701, 589, 464, ...,   4,   5,   9], dtype=int32),
 'variants/LowMQ': array([763, 765, 764, ...,   0,   0,   0], dtype=int32),
 'variants/LowPairing': array([551, 664, 723, ...,   5,   2,   2], dtype=int32),
 'variants/MLEAC': array([[-1, -1, -1],
        [-1, -1, -1],
        [-1, -1, -1],
        ..., 
        [-1, -1, -1],
        [-1, -1, -1],
        [-1, -1, -1]], dtype=int32),
 'variants/MLEAF': array([[ nan,  nan,  nan],
        [ nan,  nan,  nan],
        [ nan,  nan,  nan],
        ..., 
        [ nan,  nan,  nan],
        [ nan,  nan,  nan],
        [ nan,  nan,  nan]], dtype=float32),
 'variants/MQ': array([  1.55999994,   2.32999992,   2.77999997, ...,  45.52999878,
         45.86000061,  46.75      ], dtype=float32),
 'variants/MQ0': array([7022, 8515, 8708, ..., 1167,  934,  335], dtype=int32),
 'variants/MQRankSum': array([ -1.477     ,  -4.51399994,   1.75699997, ...,   0.35800001,
          2.15400004, -69.93099976], dtype=float32),
 'variants/NDA': array([1, 1, 1, ..., 2, 1, 2], dtype=int32),
 'variants/NMD': array([b'', b'', b'', ..., b'', b'', b''], 
       dtype='|S12'),
 'variants/NoCoverage': array([2, 0, 1, ..., 0, 0, 0], dtype=int32),
 'variants/OND': array([ 0.198     ,  0.26100001,  0.022     , ...,         nan,
         0.001423  ,  0.068     ], dtype=float32),
 'variants/QD': array([ 1.66999996,  2.1099999 ,  4.11999989, ...,  7.61000013,
         9.31999969,  0.14      ], dtype=float32),
 'variants/RPA': array([-1, -1, -1, ..., -1, -1, -1], dtype=int32),
 'variants/RU': array([b'', b'', b'', ..., b'', b'', b''], 
       dtype='|S12'),
 'variants/ReadPosRankSum': array([ -0.49200001,  -1.60300004,  -1.08399999, ...,  -1.421     ,
          2.796     , -10.38899994], dtype=float32),
 'variants/RefMasked': array([ True,  True,  True, ..., False, False, False], dtype=bool),
 'variants/RefN': array([False, False, False, ..., False, False, False], dtype=bool),
 'variants/RepeatDUST': array([False, False, False, ..., False, False, False], dtype=bool),
 'variants/RepeatMasker': array([ True,  True,  True, ..., False, False, False], dtype=bool),
 'variants/RepeatTRF': array([False, False, False, ...,  True,  True,  True], dtype=bool),
 'variants/STR': array([False, False, False, ..., False, False, False], dtype=bool),
 'variants/VariantType': array([b'SNP', b'SNP', b'SNP', ..., b'MULTIALLELIC', b'SNP', b'SNP'], 
       dtype='|S12')}

Check region and tabix


In [1]:
import sys
sys.path.insert(0, '../..')
from allel.io_vcf_read import read_vcf

In [2]:
read_vcf('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', fields=['CHROM', 'POS'], chunk_length=10000, log=sys.stderr, region='2L:1-100000')


[read_vcf] 10000 rows in 0.96s; chunk in 0.96s (10450 rows/s); 2L:98451
[read_vcf] 10301 rows in 0.99s; chunk in 0.03s (8646 rows/s); :0
[read_vcf] all done (10378 rows/s)
Out[2]:
{'variants/CHROM': array([b'2L', b'2L', b'2L', ..., b'2L', b'2L', b'2L'], 
       dtype='|S12'),
 'variants/POS': array([  103,   163,   192, ..., 99993, 99996, 99997], dtype=int32)}

In [3]:
read_vcf('../../profdata/accessibility.X.vcf.gz', fields=['CHROM', 'POS'], chunk_length=100000, log=sys.stderr, region='X:1000000-2000000')


../../allel/io_vcf_read.py:678: UserWarning: exception occurred attempting tabix (Could not load .tbi/.csi index of ../../profdata/accessibility.X.vcf.gz); falling back to scanning to region
  'scanning to region' % e)
[read_vcf] 100000 rows in 1.43s; chunk in 1.43s (69955 rows/s); X:1099999
[read_vcf] 200000 rows in 1.59s; chunk in 0.16s (636490 rows/s); X:1199999
[read_vcf] 300000 rows in 1.74s; chunk in 0.15s (646018 rows/s); X:1299999
[read_vcf] 400000 rows in 1.90s; chunk in 0.16s (625514 rows/s); X:1399999
[read_vcf] 500000 rows in 2.06s; chunk in 0.16s (628348 rows/s); X:1499999
[read_vcf] 600000 rows in 2.22s; chunk in 0.16s (644555 rows/s); X:1599999
[read_vcf] 700000 rows in 2.37s; chunk in 0.15s (645166 rows/s); X:1699999
[read_vcf] 800000 rows in 2.53s; chunk in 0.16s (634715 rows/s); X:1799999
[read_vcf] 900000 rows in 2.70s; chunk in 0.17s (597541 rows/s); X:1899999
[read_vcf] 1000000 rows in 2.85s; chunk in 0.15s (647056 rows/s); X:1999999
[read_vcf] 1000001 rows in 2.85s; chunk in 0.00s (1495 rows/s); X:2000001
[read_vcf] all done (350738 rows/s)
Out[3]:
{'variants/CHROM': array([b'X', b'X', b'X', ..., b'X', b'X', b'X'], 
       dtype='|S12'),
 'variants/POS': array([1000000, 1000001, 1000002, ..., 1999998, 1999999, 2000000], dtype=int32)}

In [4]:
read_vcf('../../profdata/accessibility.X.vcf.gz', fields=['CHROM', 'POS'], chunk_length=100000, log=sys.stderr, region='X:1000000-2000000', tabix=None)


[read_vcf] 100000 rows in 1.46s; chunk in 1.46s (68691 rows/s); X:1099999
[read_vcf] 200000 rows in 1.62s; chunk in 0.16s (626667 rows/s); X:1199999
[read_vcf] 300000 rows in 1.77s; chunk in 0.16s (638279 rows/s); X:1299999
[read_vcf] 400000 rows in 1.93s; chunk in 0.16s (614899 rows/s); X:1399999
[read_vcf] 500000 rows in 2.10s; chunk in 0.17s (591599 rows/s); X:1499999
[read_vcf] 600000 rows in 2.27s; chunk in 0.17s (605465 rows/s); X:1599999
[read_vcf] 700000 rows in 2.43s; chunk in 0.16s (611707 rows/s); X:1699999
[read_vcf] 800000 rows in 2.60s; chunk in 0.17s (595219 rows/s); X:1799999
[read_vcf] 900000 rows in 2.77s; chunk in 0.17s (587222 rows/s); X:1899999
[read_vcf] 1000000 rows in 2.95s; chunk in 0.18s (561287 rows/s); X:1999999
[read_vcf] 1000001 rows in 2.95s; chunk in 0.00s (782 rows/s); X:2000001
[read_vcf] all done (338868 rows/s)
Out[4]:
{'variants/CHROM': array([b'X', b'X', b'X', ..., b'X', b'X', b'X'], 
       dtype='|S12'),
 'variants/POS': array([1000000, 1000001, 1000002, ..., 1999998, 1999999, 2000000], dtype=int32)}

Profile INFO


In [1]:
import sys
sys.path.insert(0, '../..')
import cProfile
from allel.io_vcf_read import read_vcf, vcf_to_npz, vcf_to_hdf5, vcf_to_zarr, ANNTransformer, vcf_to_csv, \
    vcf_to_dataframe, vcf_to_recarray
# from allel.opt.io_vcf_read import (iter_vcf, 
#                                    CalldataParser_parse, 
#                                    GenotypeInt8Parser_parse, 
#                                    ParserContext_next, 
#                                    BufferedReader_read
#                                  )

sample_vcf_fn = '../../fixture/sample.vcf'
prof_vcf_fn = '../../profdata/2L_2358158_2431617.vcf'

In [2]:
vcf_to_csv('../../profdata/accessibility.X.vcf.gz', '../../profdata/accessibility.X.tsv', 
           fields='*', chunk_length=100000, log=sys.stderr, 
           region='X:1000000-2000000', tabix=None, sep='\t')


[vcf_to_csv] 100000 rows in 1.47s; chunk in 1.47s (68026 rows/s); X:1099999
[vcf_to_csv] 200000 rows in 2.90s; chunk in 1.43s (69884 rows/s); X:1199999
[vcf_to_csv] 300000 rows in 4.27s; chunk in 1.37s (73235 rows/s); X:1299999
[vcf_to_csv] 400000 rows in 5.63s; chunk in 1.36s (73475 rows/s); X:1399999
[vcf_to_csv] 500000 rows in 6.98s; chunk in 1.36s (73719 rows/s); X:1499999
[vcf_to_csv] 600000 rows in 8.35s; chunk in 1.36s (73444 rows/s); X:1599999
[vcf_to_csv] 700000 rows in 9.70s; chunk in 1.36s (73627 rows/s); X:1699999
[vcf_to_csv] 800000 rows in 11.11s; chunk in 1.41s (71133 rows/s); X:1799999
[vcf_to_csv] 900000 rows in 12.49s; chunk in 1.38s (72488 rows/s); X:1899999
[vcf_to_csv] 1000000 rows in 13.87s; chunk in 1.38s (72423 rows/s); X:1999999
[vcf_to_csv] 1000001 rows in 15.05s; chunk in 1.18s (0 rows/s); X:2000001
[vcf_to_csv] all done (66422 rows/s)

In [3]:
!head ../../profdata/accessibility.X.tsv


CHROM	POS	ID	REF	ALT_1	ALT_2	ALT_3	QUAL	HighCoverage	RepeatTRF	Coverage	LowPairing	Accessible	CoverageMQ0	RefN	HighMQ0	NoCoverage	LowMQ	RepeatMasker	RefMasked	LowCoverage	RepeatDUST	FILTER_PASS	FILTER_HighCoverage	FILTER_HighMQ0	FILTER_NoCoverage	FILTER_LowMQ	FILTER_LowCoverage	FILTER_RefN	FILTER_RepeatDUST	numalt	svlen_1	svlen_2	svlen_3
X	1000000	.	T	A	C	T		3	False	25835	2	True	2	False	0	0	0	False	False	1	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000001	.	C	A	C	T		2	False	25854	2	True	2	False	0	0	0	False	False	1	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000002	.	A	A	C	T		2	False	25708	2	True	2	False	0	0	0	False	False	1	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000003	.	C	A	C	T		2	False	25662	2	True	2	False	0	0	0	False	False	1	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000004	.	A	A	C	T		2	False	25626	2	True	2	False	0	0	0	False	False	1	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000005	.	G	A	C	T		2	False	25656	2	True	2	False	0	0	0	False	False	1	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000006	.	G	A	C	T		2	False	25479	2	True	2	False	0	0	0	False	False	2	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000007	.	C	A	C	T		2	False	25469	2	True	2	False	0	0	0	False	False	2	False	True	False	False	False	False	False	False	False	4	0	0	0
X	1000008	.	T	A	C	T		2	False	25489	1	True	2	False	0	0	0	False	False	3	False	True	False	False	False	False	False	False	False	4	0	0	0

In [2]:
df = vcf_to_dataframe('../../profdata/accessibility.X.vcf.gz',
                      fields='*', chunk_length=100000, log=sys.stderr, 
                      region='X:1000000-1500000', tabix=None)
df.head()


[vcf_to_dataframe] 100000 rows in 1.46s; chunk in 1.46s (68271 rows/s); X:1099999
[vcf_to_dataframe] 200000 rows in 1.68s; chunk in 0.21s (466964 rows/s); X:1199999
[vcf_to_dataframe] 300000 rows in 1.89s; chunk in 0.21s (466957 rows/s); X:1299999
[vcf_to_dataframe] 400000 rows in 2.11s; chunk in 0.21s (471392 rows/s); X:1399999
[vcf_to_dataframe] 500000 rows in 2.31s; chunk in 0.21s (486027 rows/s); X:1499999
[vcf_to_dataframe] 500001 rows in 2.31s; chunk in 0.00s (336 rows/s); X:1500001
[vcf_to_dataframe] all done (216024 rows/s)
Out[2]:
CHROM POS ID REF ALT_1 ALT_2 ALT_3 QUAL RefMasked LowMQ ... FILTER_HighMQ0 FILTER_NoCoverage FILTER_RefN FILTER_RepeatDUST FILTER_LowCoverage FILTER_HighCoverage numalt svlen_1 svlen_2 svlen_3
0 X 1000000 . T A C T NaN False 0 ... False False False False False False 4 0 0 0
1 X 1000001 . C A C T NaN False 0 ... False False False False False False 4 0 0 0
2 X 1000002 . A A C T NaN False 0 ... False False False False False False 4 0 0 0
3 X 1000003 . C A C T NaN False 0 ... False False False False False False 4 0 0 0
4 X 1000004 . A A C T NaN False 0 ... False False False False False False 4 0 0 0

5 rows × 34 columns


In [3]:
ra = vcf_to_recarray('../../profdata/accessibility.X.vcf.gz',
                     fields='*', chunk_length=100000, log=sys.stderr, 
                     region='X:1000000-1500000', tabix=None)
ra


[vcf_to_recarray] 100000 rows in 1.54s; chunk in 1.54s (64981 rows/s); X:1099999
[vcf_to_recarray] 200000 rows in 1.85s; chunk in 0.31s (317530 rows/s); X:1199999
[vcf_to_recarray] 300000 rows in 2.09s; chunk in 0.24s (418168 rows/s); X:1299999
[vcf_to_recarray] 400000 rows in 2.31s; chunk in 0.21s (469614 rows/s); X:1399999
[vcf_to_recarray] 500000 rows in 2.51s; chunk in 0.21s (478443 rows/s); X:1499999
[vcf_to_recarray] 500001 rows in 2.52s; chunk in 0.00s (278 rows/s); X:1500001
[vcf_to_recarray] all done (198483 rows/s)
Out[3]:
array([ (b'X', 1000000, b'.', b'T', b'A', b'C', b'T', nan, False, 0, 0, True, 2, False, False, 2, False, 1, 0, False, 25835, 3, True, False, False, False, False, False, False, False, 4, 0, 0, 0),
       (b'X', 1000001, b'.', b'C', b'A', b'C', b'T', nan, False, 0, 0, True, 2, False, False, 2, False, 1, 0, False, 25854, 2, True, False, False, False, False, False, False, False, 4, 0, 0, 0),
       (b'X', 1000002, b'.', b'A', b'A', b'C', b'T', nan, False, 0, 0, True, 2, False, False, 2, False, 1, 0, False, 25708, 2, True, False, False, False, False, False, False, False, 4, 0, 0, 0),
       ...,
       (b'X', 1499998, b'.', b'G', b'A', b'C', b'T', nan, False, 0, 0, True, 12, False, False, 4, False, 9, 0, False, 21716, 0, True, False, False, False, False, False, False, False, 4, 0, 0, 0),
       (b'X', 1499999, b'.', b'C', b'A', b'C', b'T', nan, False, 0, 0, True, 12, False, False, 4, False, 11, 0, False, 21698, 0, True, False, False, False, False, False, False, False, 4, 0, 0, 0),
       (b'X', 1500000, b'.', b'G', b'A', b'C', b'T', nan, False, 0, 0, True, 12, False, False, 4, False, 12, 0, False, 21756, 0, True, False, False, False, False, False, False, False, 4, 0, 0, 0)], 
      dtype=(numpy.record, [('CHROM', 'S12'), ('POS', '<i4'), ('ID', 'S12'), ('REF', 'S30'), ('ALT_1', 'S30'), ('ALT_2', 'S30'), ('ALT_3', 'S30'), ('QUAL', '<f4'), ('RefMasked', '?'), ('LowMQ', '<i4'), ('HighMQ0', '<i4'), ('Accessible', '?'), ('CoverageMQ0', '<i4'), ('RepeatDUST', '?'), ('RepeatMasker', '?'), ('LowPairing', '<i4'), ('RepeatTRF', '?'), ('LowCoverage', '<i4'), ('NoCoverage', '<i4'), ('RefN', '?'), ('Coverage', '<i4'), ('HighCoverage', '<i4'), ('FILTER_PASS', '?'), ('FILTER_LowMQ', '?'), ('FILTER_HighMQ0', '?'), ('FILTER_NoCoverage', '?'), ('FILTER_RefN', '?'), ('FILTER_RepeatDUST', '?'), ('FILTER_LowCoverage', '?'), ('FILTER_HighCoverage', '?'), ('numalt', '<i4'), ('svlen_1', '<i4'), ('svlen_2', '<i4'), ('svlen_3', '<i4')]))

In [4]:
import allel
allel.VariantTable(ra)


Out[4]:
<VariantTable shape=(500001,) dtype=(numpy.record, [('CHROM', 'S12'), ('POS', '<i4'), ('ID', 'S12'), ('REF', 'S30'), ('ALT_1', 'S30'), ('ALT_2', 'S30'), ('ALT_3', 'S30'), ('QUAL', '<f4'), ('RefMasked', '?'), ('LowMQ', '<i4'), ('HighMQ0', '<i4'), ('Accessible', '?'), ('CoverageMQ0', '<i4'), ('RepeatDUST', '?'), ('RepeatMasker', '?'), ('LowPairing', '<i4'), ('RepeatTRF', '?'), ('LowCoverage', '<i4'), ('NoCoverage', '<i4'), ('RefN', '?'), ('Coverage', '<i4'), ('HighCoverage', '<i4'), ('FILTER_PASS', '?'), ('FILTER_LowMQ', '?'), ('FILTER_HighMQ0', '?'), ('FILTER_NoCoverage', '?'), ('FILTER_RefN', '?'), ('FILTER_RepeatDUST', '?'), ('FILTER_LowCoverage', '?'), ('FILTER_HighCoverage', '?'), ('numalt', '<i4'), ('svlen_1', '<i4'), ('svlen_2', '<i4'), ('svlen_3', '<i4')])>
CHROMPOSIDREFALT_1ALT_2ALT_3QUALRefMaskedLowMQHighMQ0AccessibleCoverageMQ0RepeatDUSTRepeatMaskerLowPairingRepeatTRFLowCoverageNoCoverageRefNCoverageHighCoverageFILTER_PASSFILTER_LowMQFILTER_HighMQ0FILTER_NoCoverageFILTER_RefNFILTER_RepeatDUSTFILTER_LowCoverageFILTER_HighCoveragenumaltsvlen_1svlen_2svlen_3
0b'X'1000000b'.'b'T'b'A'b'C'b'T'nanFalse00True2FalseFalse2False10False258353TrueFalseFalseFalseFalseFalseFalseFalse4000
1b'X'1000001b'.'b'C'b'A'b'C'b'T'nanFalse00True2FalseFalse2False10False258542TrueFalseFalseFalseFalseFalseFalseFalse4000
2b'X'1000002b'.'b'A'b'A'b'C'b'T'nanFalse00True2FalseFalse2False10False257082TrueFalseFalseFalseFalseFalseFalseFalse4000
......
499998b'X'1499998b'.'b'G'b'A'b'C'b'T'nanFalse00True12FalseFalse4False90False217160TrueFalseFalseFalseFalseFalseFalseFalse4000
499999b'X'1499999b'.'b'C'b'A'b'C'b'T'nanFalse00True12FalseFalse4False110False216980TrueFalseFalseFalseFalseFalseFalseFalse4000
500000b'X'1500000b'.'b'G'b'A'b'C'b'T'nanFalse00True12FalseFalse4False120False217560TrueFalseFalseFalseFalseFalseFalseFalse4000

In [2]:
read_vcf('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', fields='ANN', types={'ANN': 'S200'}, chunk_length=10000, log=sys.stderr)


[read_vcf] 10000 rows in 2.54s; chunk in 2.54s (3934 rows/s); 2L:98451
[read_vcf] 20000 rows in 5.02s; chunk in 2.48s (4031 rows/s); 2L:196622
[read_vcf] 30000 rows in 7.44s; chunk in 2.41s (4144 rows/s); 2L:301246
[read_vcf] 39894 rows in 10.02s; chunk in 2.58s (3835 rows/s); :0
[read_vcf] all done (3983 rows/s)
Out[2]:
{'variants/ANN': array([ b'T|intergenic_region|MODIFIER|AGAP004677|AGAP004677|intergenic_region|AGAP004677|||||||||',
        b'A|intergenic_region|MODIFIER|AGAP004677|AGAP004677|intergenic_region|AGAP004677|||||||||',
        b'A|intergenic_region|MODIFIER|AGAP004677|AGAP004677|intergenic_region|AGAP004677|||||||||',
        ...,
        b'A|intergenic_region|MODIFIER|AGAP004681-AGAP004682|AGAP004681-AGAP004682|intergenic_region|AGAP004681-AGAP004682|||||||||',
        b'A|intergenic_region|MODIFIER|AGAP004681-AGAP004682|AGAP004681-AGAP004682|intergenic_region|AGAP004681-AGAP004682|||||||||',
        b'G|intergenic_region|MODIFIER|AGAP004681-AGAP004682|AGAP004681-AGAP004682|intergenic_region|AGAP004681-AGAP004682|||||||||'], 
       dtype='|S200')}

In [3]:
read_vcf('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', fields='ANN', transformers=[ANNTransformer()], chunk_length=10000, log=sys.stderr)


[read_vcf] 10000 rows in 2.55s; chunk in 2.55s (3915 rows/s); 2L:98451
[read_vcf] 20000 rows in 5.05s; chunk in 2.50s (4002 rows/s); 2L:196622
[read_vcf] 30000 rows in 7.48s; chunk in 2.43s (4112 rows/s); 2L:301246
[read_vcf] 39894 rows in 10.07s; chunk in 2.59s (3821 rows/s); :0
[read_vcf] all done (3960 rows/s)
Out[3]:
{'variants/ANN_AA': array([[-1, -1],
        [-1, -1],
        [-1, -1],
        ..., 
        [-1, -1],
        [-1, -1],
        [-1, -1]], dtype=int32),
 'variants/ANN_Allele': array([b'T', b'A', b'A', ..., b'A', b'A', b'G'], 
       dtype='|S1'),
 'variants/ANN_Annotation': array([b'intergenic_region', b'intergenic_region', b'intergenic_region',
        ..., b'intergenic_region', b'intergenic_region',
        b'intergenic_region'], 
       dtype='|S34'),
 'variants/ANN_Annotation_Impact': array([b'MODIFIER', b'MODIFIER', b'MODIFIER', ..., b'MODIFIER',
        b'MODIFIER', b'MODIFIER'], 
       dtype='|S8'),
 'variants/ANN_CDS': array([[-1, -1],
        [-1, -1],
        [-1, -1],
        ..., 
        [-1, -1],
        [-1, -1],
        [-1, -1]], dtype=int32),
 'variants/ANN_Distance': array([-1, -1, -1, ..., -1, -1, -1], dtype=int32),
 'variants/ANN_Feature_ID': array([b'AGAP004677', b'AGAP004677', b'AGAP004677', ..., b'AGAP004681-AGA',
        b'AGAP004681-AGA', b'AGAP004681-AGA'], 
       dtype='|S14'),
 'variants/ANN_Feature_Type': array([b'intergenic_region', b'intergenic_region', b'intergenic_region',
        ..., b'intergenic_region', b'intergenic_region',
        b'intergenic_region'], 
       dtype='|S20'),
 'variants/ANN_Gene_ID': array([b'AGAP004677', b'AGAP004677', b'AGAP004677', ..., b'AGAP004681-AGA',
        b'AGAP004681-AGA', b'AGAP004681-AGA'], 
       dtype='|S14'),
 'variants/ANN_Gene_Name': array([b'AGAP004677', b'AGAP004677', b'AGAP004677', ..., b'AGAP004681-AGA',
        b'AGAP004681-AGA', b'AGAP004681-AGA'], 
       dtype='|S14'),
 'variants/ANN_HGVS_c': array([b'', b'', b'', ..., b'', b'', b''], 
       dtype='|S16'),
 'variants/ANN_HGVS_p': array([b'', b'', b'', ..., b'', b'', b''], 
       dtype='|S16'),
 'variants/ANN_Rank': array([[-1, -1],
        [-1, -1],
        [-1, -1],
        ..., 
        [-1, -1],
        [-1, -1],
        [-1, -1]], dtype=int8),
 'variants/ANN_Transcript_BioType': array([b'', b'', b'', ..., b'', b'', b''], 
       dtype='|S20'),
 'variants/ANN_cDNA': array([[-1, -1],
        [-1, -1],
        [-1, -1],
        ..., 
        [-1, -1],
        [-1, -1],
        [-1, -1]], dtype=int32)}

In [ ]:


In [ ]:


In [ ]:


In [41]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields='*', buffer_size=2**15, chunk_length=10000, block_length=1000, n_threads=4, overwrite=True)


CPU times: user 24.8 s, sys: 248 ms, total: 25.1 s
Wall time: 14.5 s

In [ ]:


In [17]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields=['CHROM', 'POS'], buffer_size=2**15, chunk_length=50000, overwrite=True)


CPU times: user 10.5 s, sys: 32 ms, total: 10.5 s
Wall time: 10.5 s

In [2]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields='calldata/*', buffer_size=2**15, chunk_length=50000, overwrite=True)


CPU times: user 19.6 s, sys: 1.09 s, total: 20.7 s
Wall time: 18.8 s

In [3]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields='calldata/GT', buffer_size=2**15, chunk_length=50000, overwrite=True)


CPU times: user 12.5 s, sys: 60 ms, total: 12.6 s
Wall time: 12.5 s

In [4]:
%time vcf_to_zarr('../../profdata/ag1000g.phase1.ar3.2L.partial.vcf.gz', '../../profdata/ag1000g.phase1.ar3.2L.partial.zarr', fields='*', buffer_size=2**15, chunk_length=50000, overwrite=True)


CPU times: user 20.5 s, sys: 1 s, total: 21.5 s
Wall time: 19.6 s

In [15]:
import zarr
callset = zarr.open_group('../../profdata/ag1000g.phase1.ar3.2L.partial.zarr')
callset


Out[15]:
Group(/, 2)
  groups: 2; calldata, variants
  store: DirectoryStore

In [18]:
callset['variants']['ABHet'][:]


Out[18]:
array([        nan,         nan,  0.667     , ...,         nan,
        0.64999998,  0.755     ], dtype=float32)

In [19]:
callset['calldata']


Out[19]:
Group(/calldata, 7)
  arrays: 7; AB, AD, DP, GQ, GT, MQ0, PL
  store: DirectoryStore

In [20]:
callset['calldata/GT']


Out[20]:
Array(/calldata/GT, (39894, 765, 2), int8, chunks=(50000, 64, 2), order=C)
  nbytes: 58.2M; nbytes_stored: 7.2M; ratio: 8.1; initialized: 12/12
  compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
  store: DirectoryStore

In [21]:
callset['calldata/GT'][:]


Out[21]:
array([[[-1, -1],
        [-1, -1],
        [-1, -1],
        ..., 
        [ 0,  0],
        [-1, -1],
        [-1, -1]],

       [[-1, -1],
        [-1, -1],
        [-1, -1],
        ..., 
        [-1, -1],
        [-1, -1],
        [-1, -1]],

       [[-1, -1],
        [-1, -1],
        [-1, -1],
        ..., 
        [-1, -1],
        [-1, -1],
        [-1, -1]],

       ..., 
       [[ 0,  0],
        [ 0,  0],
        [ 0,  0],
        ..., 
        [ 0,  0],
        [ 0,  0],
        [ 0,  0]],

       [[ 0,  0],
        [ 0,  0],
        [ 0,  0],
        ..., 
        [ 0,  0],
        [ 0,  0],
        [ 0,  0]],

       [[ 0,  0],
        [ 0,  0],
        [ 0,  0],
        ..., 
        [ 0,  0],
        [ 0,  0],
        [ 0,  0]]], dtype=int8)

In [ ]:


In [15]:
(20000000/20000)/60


Out[15]:
16.666666666666668

In [16]:
cProfile.run("vcf_to_zarr(prof_vcf_fn, 'prof.zarr', buffer_size=2**15, chunk_length=1000, overwrite=True)", sort='time')


         71850 function calls (70986 primitive calls) in 0.191 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.040    0.040    0.191    0.191 io_vcf_read.py:360(vcf_to_zarr)
       76    0.033    0.000    0.056    0.001 {built-in method builtins.next}
      377    0.023    0.000    0.023    0.000 {method 'decompress' of 'zlib.Decompress' objects}
      473    0.010    0.000    0.010    0.000 {built-in method zlib.crc32}
8157/7773    0.010    0.000    0.052    0.000 {method 'read' of '_io.BufferedReader' objects}
     7744    0.009    0.000    0.066    0.000 gzip.py:269(read)
       12    0.007    0.001    0.031    0.003 io_vcf_read.py:829(_binary_readline)
     7744    0.004    0.000    0.006    0.000 _compression.py:12(_check_not_closed)
      649    0.004    0.000    0.004    0.000 {built-in method posix.stat}
      219    0.002    0.000    0.002    0.000 {built-in method posix.unlink}
      378    0.002    0.000    0.040    0.000 gzip.py:436(read)
     7744    0.002    0.000    0.002    0.000 gzip.py:296(closed)
       40    0.002    0.000    0.002    0.000 {zarr.blosc.compress}
      105    0.002    0.000    0.002    0.000 {built-in method io.open}
      378    0.002    0.000    0.042    0.000 _compression.py:66(readinto)
      949    0.002    0.000    0.002    0.000 gzip.py:80(read)
        1    0.001    0.001    0.033    0.033 io_vcf_read.py:790(_read_vcf)
       84    0.001    0.000    0.001    0.000 {built-in method posix.open}
       75    0.001    0.000    0.016    0.000 storage.py:614(__setitem__)
       75    0.001    0.000    0.001    0.000 {method '__exit__' of '_io._IOBase' objects}
      489    0.001    0.000    0.002    0.000 posixpath.py:71(join)
1930/1450    0.001    0.000    0.002    0.000 encoder.py:332(_iterencode_dict)
       41    0.001    0.000    0.013    0.000 core.py:700(_chunk_setitem_nosync)
     8461    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
      145    0.001    0.000    0.001    0.000 util.py:251(normalize_storage_path)
       50    0.001    0.000    0.003    0.000 storage.py:606(__getitem__)
       75    0.001    0.000    0.001    0.000 {built-in method posix.rename}
       76    0.001    0.000    0.001    0.000 posixpath.py:319(normpath)
     3047    0.001    0.000    0.001    0.000 {built-in method builtins.isinstance}
        9    0.001    0.000    0.005    0.001 shutil.py:396(_rmtree_safe_fd)
       72    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.array}
      600    0.001    0.000    0.001    0.000 random.py:229(_randbelow)
       17    0.001    0.000    0.015    0.001 core.py:496(__setitem__)
      376    0.001    0.000    0.011    0.000 gzip.py:487(_add_read_data)
      600    0.000    0.000    0.001    0.000 random.py:259(choice)
      471    0.000    0.000    0.001    0.000 gzip.py:93(prepend)
       75    0.000    0.000    0.001    0.000 functools.py:43(update_wrapper)
       75    0.000    0.000    0.005    0.000 tempfile.py:249(_mkstemp_inner)
       75    0.000    0.000    0.002    0.000 tempfile.py:473(__getattr__)
       34    0.000    0.000    0.001    0.000 encoder.py:203(iterencode)
      794    0.000    0.000    0.000    0.000 {method 'startswith' of 'str' objects}
      675    0.000    0.000    0.000    0.000 {built-in method builtins.getattr}
     3847    0.000    0.000    0.000    0.000 {built-in method builtins.len}
       75    0.000    0.000    0.007    0.000 tempfile.py:522(NamedTemporaryFile)
       95    0.000    0.000    0.001    0.000 gzip.py:491(_read_eof)
       97    0.000    0.000    0.001    0.000 gzip.py:403(_read_gzip_header)
     1450    0.000    0.000    0.002    0.000 encoder.py:411(_iterencode)
       29    0.000    0.000    0.000    0.000 decoder.py:345(raw_decode)
      649    0.000    0.000    0.001    0.000 posixpath.py:39(_get_sep)
       34    0.000    0.000    0.003    0.000 encoder.py:181(encode)
       75    0.000    0.000    0.000    0.000 {method 'write' of '_io.BufferedWriter' objects}
      380    0.000    0.000    0.001    0.000 gzip.py:387(_read_exact)
       16    0.000    0.000    0.022    0.001 core.py:1001(_append_nosync)
      185    0.000    0.000    0.002    0.000 genericpath.py:27(isfile)
       84    0.000    0.000    0.000    0.000 posixpath.py:99(split)
      221    0.000    0.000    0.000    0.000 {method 'split' of 'str' objects}
       16    0.000    0.000    0.000    0.000 {built-in method posix.remove}
       96    0.000    0.000    0.000    0.000 {built-in method zlib.decompressobj}
        9    0.000    0.000    0.011    0.001 storage.py:261(_init_array_metadata)
       25    0.000    0.000    0.001    0.000 meta.py:16(decode_array_metadata)
       50    0.000    0.000    0.000    0.000 {built-in method builtins.sorted}
        9    0.000    0.000    0.000    0.000 {built-in method posix.listdir}
       75    0.000    0.000    0.002    0.000 tempfile.py:160(<listcomp>)
      243    0.000    0.000    0.000    0.000 {method 'join' of 'str' objects}
       25    0.000    0.000    0.000    0.000 ufunclike.py:54(isposinf)
      218    0.000    0.000    0.000    0.000 encoder.py:276(_iterencode_list)
       75    0.000    0.000    0.002    0.000 tempfile.py:157(__next__)
       16    0.000    0.000    0.007    0.000 core.py:925(_resize_nosync)
       25    0.000    0.000    0.001    0.000 meta.py:126(encode_fill_value)
       16    0.000    0.000    0.007    0.000 core.py:154(_flush_metadata_nosync)
       12    0.000    0.000    0.000    0.000 {method 'join' of 'bytes' objects}
        1    0.000    0.000    0.191    0.191 {built-in method builtins.exec}
      160    0.000    0.000    0.001    0.000 genericpath.py:16(exists)
     1090    0.000    0.000    0.000    0.000 {method 'getrandbits' of '_random.Random' objects}
      285    0.000    0.000    0.000    0.000 {built-in method _struct.unpack}
       60    0.000    0.000    0.001    0.000 storage.py:657(__contains__)
       34    0.000    0.000    0.003    0.000 __init__.py:182(dumps)
       25    0.000    0.000    0.004    0.000 meta.py:42(encode_array_metadata)
        9    0.000    0.000    0.000    0.000 {built-in method posix.mkdir}
       21    0.000    0.000    0.000    0.000 {method 'fill' of 'numpy.ndarray' objects}
       85    0.000    0.000    0.001    0.000 genericpath.py:39(isdir)
       25    0.000    0.000    0.003    0.000 core.py:79(__init__)
        4    0.000    0.000    0.002    0.001 hierarchy.py:77(__init__)
        1    0.000    0.000    0.031    0.031 io_vcf_read.py:859(read_vcf_headers)
       25    0.000    0.000    0.002    0.000 core.py:116(_load_metadata_nosync)
      136    0.000    0.000    0.000    0.000 core.py:595(<genexpr>)
      378    0.000    0.000    0.000    0.000 {method 'cast' of 'memoryview' objects}
       29    0.000    0.000    0.001    0.000 decoder.py:334(decode)
      136    0.000    0.000    0.000    0.000 core.py:611(<genexpr>)
       38    0.000    0.000    0.000    0.000 core.py:312(<genexpr>)
       71    0.000    0.000    0.000    0.000 {built-in method numpy.core.multiarray.empty}
      150    0.000    0.000    0.000    0.000 tempfile.py:439(close)
       75    0.000    0.000    0.001    0.000 tempfile.py:500(__exit__)
       41    0.000    0.000    0.000    0.000 util.py:109(is_total_slice)
      490    0.000    0.000    0.000    0.000 {method 'endswith' of 'str' objects}
       38    0.000    0.000    0.000    0.000 core.py:938(<genexpr>)
       25    0.000    0.000    0.000    0.000 ufunclike.py:116(isneginf)
       75    0.000    0.000    0.000    0.000 tempfile.py:467(__init__)
       78    0.000    0.000    0.000    0.000 numeric.py:2064(isscalar)
        9    0.000    0.000    0.000    0.000 {built-in method posix.rmdir}
       76    0.000    0.000    0.001    0.000 posixpath.py:356(abspath)
       75    0.000    0.000    0.000    0.000 tempfile.py:97(_infer_return_type)
       60    0.000    0.000    0.000    0.000 {method 'match' of '_sre.SRE_Pattern' objects}
       39    0.000    0.000    0.001    0.000 storage.py:45(contains_array)
       41    0.000    0.000    0.013    0.000 core.py:677(_chunk_setitem)
        9    0.000    0.000    0.000    0.000 {built-in method posix.close}
      119    0.000    0.000    0.000    0.000 util.py:278(<listcomp>)
       17    0.000    0.000    0.000    0.000 util.py:201(<listcomp>)
        2    0.000    0.000    0.025    0.012 io_vcf_read.py:351(_zarr_store_chunk)
      108    0.000    0.000    0.000    0.000 util.py:124(<genexpr>)
      375    0.000    0.000    0.000    0.000 {built-in method builtins.setattr}
       75    0.000    0.000    0.000    0.000 tempfile.py:146(rng)
       17    0.000    0.000    0.003    0.000 hierarchy.py:266(__getitem__)
        2    0.000    0.000    0.001    0.001 hierarchy.py:501(_require_group_nosync)
       29    0.000    0.000    0.001    0.000 __init__.py:271(loads)
        4    0.000    0.000    0.000    0.000 meta.py:83(decode_group_metadata)
       97    0.000    0.000    0.000    0.000 gzip.py:383(_init_read)
       75    0.000    0.000    0.000    0.000 functools.py:73(wraps)
       17    0.000    0.000    0.000    0.000 util.py:169(normalize_array_selection)
       40    0.000    0.000    0.002    0.000 codecs.py:351(encode)
       41    0.000    0.000    0.002    0.000 core.py:796(_encode_chunk)
       16    0.000    0.000    0.000    0.000 util.py:208(normalize_resize_args)
      394    0.000    0.000    0.000    0.000 {built-in method _json.encode_basestring_ascii}
      600    0.000    0.000    0.000    0.000 {method 'bit_length' of 'int' objects}
        1    0.000    0.000    0.012    0.012 io_vcf_read.py:322(_zarr_setup_datasets)
        9    0.000    0.000    0.005    0.001 shutil.py:445(rmtree)
      120    0.000    0.000    0.000    0.000 {method 'replace' of 'str' objects}
       41    0.000    0.000    0.000    0.000 core.py:769(_chunk_key)
       76    0.000    0.000    0.000    0.000 posixpath.py:61(isabs)
       75    0.000    0.000    0.000    0.000 tempfile.py:118(_sanitize_params)
       24    0.000    0.000    0.000    0.000 codecs.py:357(get_config)
       25    0.000    0.000    0.000    0.000 meta.py:78(decode_dtype)
       25    0.000    0.000    0.000    0.000 meta.py:58(encode_dtype)
       25    0.000    0.000    0.000    0.000 encoder.py:258(_make_iterencode)
        1    0.000    0.000    0.000    0.000 {method 'astype' of 'numpy.ndarray' objects}
       28    0.000    0.000    0.000    0.000 hierarchy.py:234(_item_path)
       75    0.000    0.000    0.000    0.000 tempfile.py:494(__enter__)
        9    0.000    0.000    0.013    0.001 hierarchy.py:573(_create_dataset_nosync)
        1    0.000    0.000    0.191    0.191 <string>:1(<module>)
        9    0.000    0.000    0.001    0.000 storage.py:128(_require_parent_group)
      190    0.000    0.000    0.000    0.000 {built-in method builtins.max}
       24    0.000    0.000    0.000    0.000 codecs.py:92(from_config)
       21    0.000    0.000    0.001    0.000 storage.py:53(contains_group)
       24    0.000    0.000    0.000    0.000 codecs.py:20(get_codec)
       40    0.000    0.000    0.000    0.000 threading.py:1224(current_thread)
        9    0.000    0.000    0.013    0.001 creation.py:17(create)
       75    0.000    0.000    0.000    0.000 tempfile.py:505(close)
       84    0.000    0.000    0.000    0.000 {method 'rfind' of 'str' objects}
      192    0.000    0.000    0.000    0.000 {built-in method builtins.min}
       41    0.000    0.000    0.000    0.000 core.py:591(<listcomp>)
       23    0.000    0.000    0.000    0.000 util.py:133(normalize_axis_selection)
       50    0.000    0.000    0.000    0.000 numeric.py:414(asarray)
       84    0.000    0.000    0.000    0.000 {method 'rstrip' of 'str' objects}
       58    0.000    0.000    0.000    0.000 {method 'encode' of 'str' objects}
       75    0.000    0.000    0.000    0.000 {method '__enter__' of '_io._IOBase' objects}
      121    0.000    0.000    0.000    0.000 {built-in method builtins.hasattr}
       75    0.000    0.000    0.000    0.000 tempfile.py:481(func_wrapper)
       75    0.000    0.000    0.000    0.000 tempfile.py:449(__del__)
      296    0.000    0.000    0.000    0.000 encoder.py:352(<lambda>)
       75    0.000    0.000    0.000    0.000 tempfile.py:424(__init__)
       25    0.000    0.000    0.000    0.000 encoder.py:104(__init__)
       24    0.000    0.000    0.000    0.000 codecs.py:344(__init__)
       16    0.000    0.000    0.022    0.001 core.py:865(_synchronized_op)
       11    0.000    0.000    0.015    0.001 hierarchy.py:417(_write_op)
       61    0.000    0.000    0.000    0.000 {built-in method builtins.all}
        1    0.000    0.000    0.000    0.000 gzip.py:123(__init__)
      245    0.000    0.000    0.000    0.000 {built-in method _stat.S_ISDIR}
        9    0.000    0.000    0.005    0.001 storage.py:694(rmdir)
       16    0.000    0.000    0.000    0.000 core.py:310(_cdata_shape)
        9    0.000    0.000    0.000    0.000 util.py:12(normalize_shape)
       29    0.000    0.000    0.000    0.000 attrs.py:13(__init__)
       16    0.000    0.000    0.000    0.000 core.py:942(<listcomp>)
        1    0.000    0.000    0.002    0.002 hierarchy.py:817(open_group)
      132    0.000    0.000    0.000    0.000 {built-in method _stat.S_ISREG}
        9    0.000    0.000    0.012    0.001 storage.py:141(init_array)
       78    0.000    0.000    0.000    0.000 storage.py:36(_path_to_prefix)
      119    0.000    0.000    0.000    0.000 {built-in method builtins.any}
       16    0.000    0.000    0.022    0.001 core.py:880(_write_op)
       16    0.000    0.000    0.022    0.001 core.py:953(append)
        1    0.000    0.000    0.000    0.000 _compression.py:39(__init__)
        9    0.000    0.000    0.000    0.000 util.py:78(normalize_chunks)
       19    0.000    0.000    0.000    0.000 numeric.py:535(ascontiguousarray)
        1    0.000    0.000    0.000    0.000 storage.py:597(__init__)
       55    0.000    0.000    0.000    0.000 {method 'get' of 'dict' objects}
       75    0.000    0.000    0.000    0.000 {method 'update' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 util.py:34(guess_chunks)
       75    0.000    0.000    0.000    0.000 {method 'close' of '_io.BufferedWriter' objects}
        1    0.000    0.000    0.000    0.000 gzip.py:376(__init__)
       17    0.000    0.000    0.000    0.000 util.py:198(get_chunk_range)
       38    0.000    0.000    0.000    0.000 util.py:223(<genexpr>)
        1    0.000    0.000    0.000    0.000 {built-in method posix.getcwd}
        9    0.000    0.000    0.013    0.001 hierarchy.py:518(create_dataset)
       38    0.000    0.000    0.000    0.000 core.py:1030(<genexpr>)
       40    0.000    0.000    0.000    0.000 core.py:574(<genexpr>)
        9    0.000    0.000    0.000    0.000 {built-in method posix.lstat}
       25    0.000    0.000    0.000    0.000 meta.py:112(decode_fill_value)
       38    0.000    0.000    0.000    0.000 core.py:1020(<genexpr>)
       25    0.000    0.000    0.002    0.000 core.py:107(_load_metadata)
       75    0.000    0.000    0.000    0.000 tempfile.py:235(_get_candidate_names)
        1    0.000    0.000    0.033    0.033 io_vcf_read.py:420(read_vcf_chunks)
       40    0.000    0.000    0.000    0.000 threading.py:1298(main_thread)
        9    0.000    0.000    0.005    0.001 storage.py:69(rmdir)
        2    0.000    0.000    0.000    0.000 {method 'reduce' of 'numpy.ufunc' objects}
        1    0.000    0.000    0.000    0.000 hierarchy.py:751(_handle_store_arg)
       25    0.000    0.000    0.000    0.000 meta.py:65(_decode_dtype_descr)
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:644(normalize_types)
        1    0.000    0.000    0.000    0.000 gzip.py:20(open)
        9    0.000    0.000    0.000    0.000 os.py:216(makedirs)
       22    0.000    0.000    0.000    0.000 core.py:1008(<genexpr>)
        9    0.000    0.000    0.000    0.000 util.py:244(normalize_order)
       40    0.000    0.000    0.000    0.000 util.py:185(<genexpr>)
        2    0.000    0.000    0.001    0.001 hierarchy.py:473(require_group)
       75    0.000    0.000    0.000    0.000 {built-in method posix.getpid}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:542(add_all_fixed_variants_fields)
       99    0.000    0.000    0.000    0.000 {built-in method builtins.id}
        9    0.000    0.000    0.000    0.000 creation.py:104(_handle_kwargs)
        9    0.000    0.000    0.000    0.000 {built-in method posix.fstat}
       67    0.000    0.000    0.000    0.000 core.py:943(<genexpr>)
        9    0.000    0.000    0.000    0.000 storage.py:661(__eq__)
       58    0.000    0.000    0.000    0.000 {method 'end' of '_sre.SRE_Match' objects}
        7    0.000    0.000    0.000    0.000 io_vcf_read.py:609(normalize_type)
        1    0.000    0.000    0.001    0.001 creation.py:267(array)
        2    0.000    0.000    0.000    0.000 fromnumeric.py:1851(product)
       40    0.000    0.000    0.000    0.000 {built-in method _thread.get_ident}
       19    0.000    0.000    0.000    0.000 util.py:102(<genexpr>)
       51    0.000    0.000    0.000    0.000 {method 'items' of 'dict' objects}
        2    0.000    0.000    0.000    0.000 {method 'groups' of '_sre.SRE_Match' objects}
       24    0.000    0.000    0.000    0.000 {method 'pop' of 'dict' objects}
       21    0.000    0.000    0.000    0.000 util.py:23(<genexpr>)
        1    0.000    0.000    0.000    0.000 gzip.py:74(__init__)
       22    0.000    0.000    0.000    0.000 core.py:1010(<genexpr>)
        1    0.000    0.000    0.000    0.000 <string>:12(__new__)
        9    0.000    0.000    0.000    0.000 creation.py:95(_handle_store_arg)
       16    0.000    0.000    0.000    0.000 core.py:150(_refresh_metadata_nosync)
        9    0.000    0.000    0.000    0.000 genericpath.py:81(samestat)
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:736(normalize_numbers)
        2    0.000    0.000    0.000    0.000 util.py:75(<genexpr>)
        9    0.000    0.000    0.000    0.000 {method 'upper' of 'str' objects}
        1    0.000    0.000    0.000    0.000 creation.py:247(_get_shape_chunks)
        8    0.000    0.000    0.000    0.000 {method 'add' of 'set' objects}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:83(_prep_fields_arg)
        1    0.000    0.000    0.000    0.000 {built-in method __new__ of type object at 0x7f6f46712d20}
        1    0.000    0.000    0.000    0.000 {method 'keys' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 _compression.py:150(tell)
        1    0.000    0.000    0.000    0.000 _compression.py:36(readable)
        9    0.000    0.000    0.000    0.000 {method 'setdefault' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:742(<dictcomp>)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 io_vcf_read.py:650(<dictcomp>)



In [103]:
%prun vcf_to_hdf5(prof_vcf_fn, 'prof.h5', buffer_size=2**15, chunk_length=1000, overwrite=True)


 

In [11]:
import line_profiler
l = line_profiler.LineProfiler()
# l.add_function(_read_vcf)
l.add_function(iter_vcf)
# l.add_function(CalldataParser_parse)
l.add_function(GenotypeInt8Parser_parse)
# l.add_function(ParserContext_next)
# l.add_function(BufferedReader_read)
l.runcall(read_vcf, prof_vcf_fn, buffer_size=2**15, chunk_length=1000)
l.print_stats()


Timer unit: 1e-06 s

Total time: 23.9421 s
File: /home/aliman/src/github/cggh/scikit-allel/allel/opt/io_vcf_read.pyx
Function: iter_vcf at line 71

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    71                                           def iter_vcf(binary_file, int buffer_size, int chunk_length, int temp_max_size, headers, fields,
    72                                                        types, numbers):
    73                                               cdef:
    74                                                   ParserContext context
    75                                                   Parser chrom_parser
    76                                                   Parser pos_parser
    77                                                   Parser id_parser
    78                                                   Parser ref_parser
    79                                                   Parser alt_parser
    80                                                   Parser qual_parser
    81                                                   Parser filter_parser
    82                                                   Parser info_parser
    83                                                   Parser format_parser
    84                                                   Parser calldata_parser
    85                                           
    86                                               # setup output
    87                                               # TODO yield chunks
    88         1            2      2.0      0.0      chunks = []
    89                                           
    90                                               # setup reader
    91         1          253    253.0      0.0      reader = BufferedReader(binary_file, buffer_size=buffer_size)
    92                                           
    93                                               # setup context
    94         1            4      4.0      0.0      n_samples = len(headers.samples)
    95         1            4      4.0      0.0      context = ParserContext(reader, temp_max_size=temp_max_size, n_samples=n_samples)
    96                                           
    97                                               # read in first character
    98         1            2      2.0      0.0      ParserContext_next(context)
    99                                           
   100                                               # copy so we don't modify someone else's data
   101         1            2      2.0      0.0      fields = set(fields)
   102                                           
   103                                               # setup CHROM parser
   104         1            1      1.0      0.0      if CHROM_FIELD in fields:
   105         2           29     14.5      0.0          chrom_parser = StringParser(field=CHROM_FIELD, chunk_length=chunk_length,
   106         1            1      1.0      0.0                                      dtype=types[CHROM_FIELD])
   107         1            2      2.0      0.0          fields.remove(CHROM_FIELD)
   108                                               else:
   109                                                   chrom_parser = SkipChromParser()
   110                                           
   111                                               # setup POS parser
   112         1            1      1.0      0.0      if POS_FIELD in fields:
   113                                                   # TODO user-provided type
   114         1           10     10.0      0.0          pos_parser = PosInt32Parser(chunk_length=chunk_length)
   115         1            2      2.0      0.0          fields.remove(POS_FIELD)
   116                                               else:
   117                                                   pos_parser = SkipPosParser()
   118                                           
   119                                               # setup ID parser
   120         1            1      1.0      0.0      if ID_FIELD in fields:
   121         2           11      5.5      0.0          id_parser = StringParser(field=ID_FIELD, chunk_length=chunk_length,
   122         1            1      1.0      0.0                                   dtype=types[ID_FIELD])
   123         1            1      1.0      0.0          fields.remove(ID_FIELD)
   124                                               else:
   125                                                   id_parser = SkipParser()
   126                                           
   127                                               # setup REF parser
   128         1            1      1.0      0.0      if REF_FIELD in fields:
   129         2           10      5.0      0.0          ref_parser = StringParser(field=REF_FIELD, chunk_length=chunk_length,
   130         1            1      1.0      0.0                                    dtype=types[REF_FIELD])
   131         1            1      1.0      0.0          fields.remove(REF_FIELD)
   132                                               else:
   133                                                   ref_parser = SkipParser()
   134                                           
   135                                               # setup ALT parser
   136         1            1      1.0      0.0      if ALT_FIELD in fields:
   137         1            0      0.0      0.0          t = types[ALT_FIELD]
   138         1            1      1.0      0.0          n = numbers[ALT_FIELD]
   139         1           15     15.0      0.0          alt_parser = AltParser(chunk_length=chunk_length, dtype=t, number=n)
   140         1            1      1.0      0.0          fields.remove(ALT_FIELD)
   141                                               else:
   142                                                   alt_parser = SkipParser()
   143                                           
   144                                               # setup QUAL parser
   145         1            1      1.0      0.0      if QUAL_FIELD in fields:
   146                                                   # TODO user-provided type
   147         1           12     12.0      0.0          qual_parser = QualFloat32Parser(chunk_length=chunk_length, fill=-1)
   148         1            1      1.0      0.0          fields.remove(QUAL_FIELD)
   149                                               else:
   150                                                   qual_parser = SkipParser()
   151                                           
   152                                               # setup FILTER parser
   153         1            1      1.0      0.0      filters = list()
   154         3            3      1.0      0.0      for field in list(fields):
   155         2            3      1.5      0.0          if field.startswith('variants/FILTER_'):
   156         1            2      2.0      0.0              filter = field[16:].encode('ascii')
   157         1            1      1.0      0.0              filters.append(filter)
   158         1            1      1.0      0.0              fields.remove(field)
   159         1            1      1.0      0.0      if filters:
   160         1           12     12.0      0.0          filter_parser = FilterParser(chunk_length=chunk_length, filters=filters)
   161                                               else:
   162                                                   filter_parser = SkipParser()
   163                                           
   164                                               # setup INFO parsers
   165         1            1      1.0      0.0      infos = list()
   166         1            0      0.0      0.0      info_types = dict()
   167         1            1      1.0      0.0      info_numbers = dict()
   168                                               # assume any variants fields left are INFO
   169         2            2      1.0      0.0      for field in list(fields):
   170         1            2      2.0      0.0          group, name = field.split('/')
   171         1            1      1.0      0.0          if group == 'variants':
   172                                                       key = name.encode('ascii')
   173                                                       infos.append(key)
   174                                                       fields.remove(field)
   175                                                       info_types[key] = types[field]
   176                                                       info_numbers[key] = numbers[field]
   177         1            1      1.0      0.0      if infos:
   178                                                   info_parser = InfoParser(chunk_length=chunk_length, infos=infos, types=info_types,
   179                                                                            numbers=info_numbers)
   180                                               else:
   181         1            1      1.0      0.0          info_parser = SkipParser()
   182                                           
   183                                               # setup FORMAT and calldata parsers
   184         1            0      0.0      0.0      formats = list()
   185         1            1      1.0      0.0      format_types = dict()
   186         1            1      1.0      0.0      format_numbers = dict()
   187         2            1      0.5      0.0      for field in list(fields):
   188         1            4      4.0      0.0          group, name = field.split('/')
   189         1            1      1.0      0.0          if group == 'calldata':
   190         1            2      2.0      0.0              key = name.encode('ascii')
   191         1            1      1.0      0.0              formats.append(key)
   192         1            1      1.0      0.0              fields.remove(field)
   193         1            1      1.0      0.0              format_types[key] = types[field]
   194         1            1      1.0      0.0              format_numbers[key] = numbers[field]
   195         1            1      1.0      0.0      if formats:
   196         1            2      2.0      0.0          format_parser = FormatParser()
   197         2         1498    749.0      0.0          calldata_parser = CalldataParser(chunk_length=chunk_length,
   198         1            1      1.0      0.0                                           formats=formats, types=format_types,
   199         1            0      0.0      0.0                                           numbers=format_numbers,
   200         1            1      1.0      0.0                                           n_samples=context.n_samples,
   201                                                                                    ploidy=2)
   202                                               else:
   203                                                   format_parser = SkipParser()
   204                                                   calldata_parser = SkipParser()
   205                                           
   206         1            1      1.0      0.0      if fields:
   207                                                   # shouldn't ever be any left over
   208                                                   raise RuntimeError('unexpected fields left over: %r' % set(fields))
   209                                           
   210         1            1      1.0      0.0      while True:
   211                                           
   212     19671         9382      0.5      0.0          if context.c == 0:
   213         1            0      0.0      0.0              break
   214                                           
   215     19670         9411      0.5      0.0          elif context.state == ParserState.CHROM:
   216      1967         7764      3.9      0.0              chrom_parser.parse(context)
   217      1967         1031      0.5      0.0              context.state = ParserState.POS
   218                                           
   219     17703         8456      0.5      0.0          elif context.state == ParserState.POS:
   220      1967        17697      9.0      0.1              pos_parser.parse(context)
   221      1967         1091      0.6      0.0              context.state = ParserState.ID
   222                                           
   223     15736         7584      0.5      0.0          elif context.state == ParserState.ID:
   224      1967         4719      2.4      0.0              id_parser.parse(context)
   225      1967         1077      0.5      0.0              context.state = ParserState.REF
   226                                           
   227     13769         6644      0.5      0.0          elif context.state == ParserState.REF:
   228      1967         4863      2.5      0.0              ref_parser.parse(context)
   229      1967         1097      0.6      0.0              context.state = ParserState.ALT
   230                                           
   231     11802         5623      0.5      0.0          elif context.state == ParserState.ALT:
   232      1967         6807      3.5      0.0              alt_parser.parse(context)
   233      1967         1030      0.5      0.0              context.state = ParserState.QUAL
   234                                           
   235      9835         4785      0.5      0.0          elif context.state == ParserState.QUAL:
   236      1967         8932      4.5      0.0              qual_parser.parse(context)
   237      1967         1079      0.5      0.0              context.state = ParserState.FILTER
   238                                           
   239      7868         3943      0.5      0.0          elif context.state == ParserState.FILTER:
   240      1967         6563      3.3      0.0              filter_parser.parse(context)
   241      1967         1081      0.5      0.0              context.state = ParserState.INFO
   242                                           
   243      5901         3024      0.5      0.0          elif context.state == ParserState.INFO:
   244                                                       # debug(context.variant_index, 'parse INFO')
   245      1967         4709      2.4      0.0              info_parser.parse(context)
   246      1967         1108      0.6      0.0              context.state = ParserState.FORMAT
   247                                           
   248      3934         2063      0.5      0.0          elif context.state == ParserState.FORMAT:
   249      1967        10955      5.6      0.0              format_parser.parse(context)
   250      1967         1179      0.6      0.0              context.state = ParserState.CALLDATA
   251                                           
   252      1967          953      0.5      0.0          elif context.state == ParserState.CALLDATA:
   253      1967     23788570  12093.8     99.4              calldata_parser.parse(context)
   254      1967         1093      0.6      0.0              context.state = ParserState.CHROM
   255                                           
   256                                                       # setup next variant
   257                                                       # debug('setup next variant')
   258      1967          973      0.5      0.0              context.variant_index += 1
   259      1967          922      0.5      0.0              if context.chunk_variant_index < chunk_length - 1:
   260      1966          986      0.5      0.0                  context.chunk_variant_index += 1
   261                                           
   262                                                       else:
   263                                           
   264                                                           # build chunk for output
   265         1            2      2.0      0.0                  chunk = dict()
   266         1           47     47.0      0.0                  chrom_parser.mkchunk(chunk)
   267         1           11     11.0      0.0                  pos_parser.mkchunk(chunk)
   268         1           12     12.0      0.0                  id_parser.mkchunk(chunk)
   269         1            9      9.0      0.0                  ref_parser.mkchunk(chunk)
   270         1           16     16.0      0.0                  alt_parser.mkchunk(chunk)
   271         1           10     10.0      0.0                  qual_parser.mkchunk(chunk)
   272         1           16     16.0      0.0                  filter_parser.mkchunk(chunk)
   273         1            1      1.0      0.0                  info_parser.mkchunk(chunk)
   274         1         1460   1460.0      0.0                  calldata_parser.mkchunk(chunk)
   275                                                           # TODO yield
   276         1            2      2.0      0.0                  chunks.append(chunk)
   277                                           
   278                                                           # setup next chunk
   279         1            1      1.0      0.0                  context.chunk_variant_index = 0
   280                                           
   281                                                   else:
   282                                                       # shouldn't ever happen
   283                                                       raise RuntimeError('unexpected parser state')
   284                                           
   285                                               # left-over chunk
   286         1            1      1.0      0.0      limit = context.chunk_variant_index
   287         1            0      0.0      0.0      if limit > 0:
   288         1            1      1.0      0.0          chunk = dict()
   289         1           27     27.0      0.0          chrom_parser.mkchunk(chunk, limit=limit)
   290         1           10     10.0      0.0          pos_parser.mkchunk(chunk, limit=limit)
   291         1            9      9.0      0.0          id_parser.mkchunk(chunk, limit=limit)
   292         1            7      7.0      0.0          ref_parser.mkchunk(chunk, limit=limit)
   293         1           15     15.0      0.0          alt_parser.mkchunk(chunk, limit=limit)
   294         1           18     18.0      0.0          qual_parser.mkchunk(chunk, limit=limit)
   295         1           14     14.0      0.0          filter_parser.mkchunk(chunk, limit=limit)
   296         1            1      1.0      0.0          info_parser.mkchunk(chunk, limit=limit)
   297         1         1256   1256.0      0.0          calldata_parser.mkchunk(chunk, limit=limit)
   298                                                   # TODO yield
   299         1            1      1.0      0.0          chunks.append(chunk)
   300                                           
   301                                               # TODO yield
   302         1           17     17.0      0.0      return chunks

Total time: 13.673 s
File: /home/aliman/src/github/cggh/scikit-allel/allel/opt/io_vcf_read.pyx
Function: GenotypeInt8Parser_parse at line 1452

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
  1452                                           cpdef inline void GenotypeInt8Parser_parse(GenotypeInt8Parser self, ParserContext context):
  1453                                               cdef:
  1454   1520491       418450      0.3      3.1          int allele_index = 0
  1455                                               # debug('GenotypeInt8Parser_parse')
  1456                                           
  1457                                               # reset temporary buffer
  1458   1520491       563282      0.4      4.1      temp_clear(context)
  1459                                           
  1460   1520491       317756      0.2      2.3      while True:
  1461                                           
  1462   6081964      1218841      0.2      8.9          if context.c == PERIOD:
  1463                                                       pass
  1464                                           
  1465   6081964      1249002      0.2      9.1          elif context.c == SLASH or context.c == PIPE:
  1466                                           
  1467   1520491      1531151      1.0     11.2              GenotypeInt8Parser_store(self, context, allele_index)
  1468   1520491       309492      0.2      2.3              allele_index += 1
  1469   1520491       572537      0.4      4.2              temp_clear(context)
  1470                                           
  1471   4561473       970231      0.2      7.1          elif context.c == COLON or context.c == TAB or context.c == NEWLINE:
  1472                                           
  1473   1520491      1500280      1.0     11.0              GenotypeInt8Parser_store(self, context, allele_index)
  1474   1520491       309290      0.2      2.3              break
  1475                                           
  1476                                                   else:
  1477                                           
  1478   3040982      1399506      0.5     10.2              temp_append(context)
  1479                                           
  1480   4561473      3313201      0.7     24.2          ParserContext_next(context)
  1481                                           
  1482                                               # debug(context.variant_index, context.sample_index, 'GT',
  1483                                               #       self.values[context.chunk_variant_index, context.sample_index])
  1484                                           
  1485                                           
  1486                                           cdef inline void GenotypeInt8Parser_store(GenotypeInt8Parser self, ParserContext context,
  1487                                                                                     int allele_index):
  1488                                               cdef:
  1489                                                   long allele
  1490                                                   char* str_end
  1491                                           
  1492                                               if allele_index >= self.ploidy:
  1493                                                   # more alleles than we've made room for, ignore
  1494                                                   return
  1495                                           
  1496                                               if context.temp_size == 0:
  1497                                                   # empty allele - note strictly kosher
  1498                                                   return
  1499                                           
  1500                                               # attempt to parse allele
  1501                                               allele = temp_strtol(context, -1)
  1502                                           
  1503                                               # store value
  1504                                               self.memory[context.chunk_variant_index, context.sample_index, allele_index] = allele
  1505                                           
  1506                                           
  1507                                           cdef class SkipInfoParser(Parser):
  1508                                           
  1509                                               def __cinit__(self):
  1510                                                   pass
  1511                                           
  1512                                               cdef parse(self, ParserContext context):
  1513                                                   # debug(context.variant_index, 'SkipInfoParser.parse', bytes([context.c]))
  1514                                                   SkipInfoParser_parse(self, context)


In [26]:
import vcfnp

In [28]:
%time vcfnp.calldata(prof_vcf_fn, fields=('genotype',))


[vcfnp] 2017-05-24 14:49:30.651145 :: caching is disabled
[vcfnp] 2017-05-24 14:49:30.651788 :: building array
CPU times: user 4.31 s, sys: 0 ns, total: 4.31 s
Wall time: 4.3 s
Out[28]:
array([ (([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([1, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],)),
       (([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 0],), ([0, 1],), ([0, 0],), ([0, 1],), ([0, 0],), ([1, 1],), ([0, 0],), ([1, 1],), ([0, 1],), ([0, 1],), ([1, 0],), ([1, 1],), ([0, 0],), ([1, 1],), ([0, 0],), ([1, 1],), ([1, 1],), ([0, 1],), ([0, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([0, 0],), ([0, 1],), ([0, 1],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([1, 1],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],)),
       (([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],)),
       ...,
       (([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([1, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],)),
       (([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 0],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 0],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 0],), ([0, 1],), ([0, 1],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 0],), ([0, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([0, 1],), ([0, 1],), ([1, 0],), ([1, 1],), ([0, 1],), ([0, 1],), ([1, 0],), ([1, 1],), ([1, 1],), ([0, 0],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 0],), ([1, 0],), ([0, 1],), ([1, 0],), ([0, 0],), ([1, 0],), ([1, 1],), ([0, 0],), ([1, 0],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 0],), ([0, 1],), ([0, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([0, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([0, 0],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([1, 1],), ([0, 0],), ([0, 1],), ([0, 0],), ([1, 0],), ([0, 0],), ([1, 1],), ([0, 1],)),
       (([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 1],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],), ([0, 0],))], 
      dtype=[('AB0085-C', [('genotype', 'i1', (2,))]), ('AB0087-C', [('genotype', 'i1', (2,))]), ('AB0088-C', [('genotype', 'i1', (2,))]), ('AB0089-C', [('genotype', 'i1', (2,))]), ('AB0090-C', [('genotype', 'i1', (2,))]), ('AB0091-C', [('genotype', 'i1', (2,))]), ('AB0092-C', [('genotype', 'i1', (2,))]), ('AB0094-C', [('genotype', 'i1', (2,))]), ('AB0095-C', [('genotype', 'i1', (2,))]), ('AB0097-C', [('genotype', 'i1', (2,))]), ('AB0098-C', [('genotype', 'i1', (2,))]), ('AB0099-C', [('genotype', 'i1', (2,))]), ('AB0100-C', [('genotype', 'i1', (2,))]), ('AB0101-C', [('genotype', 'i1', (2,))]), ('AB0103-C', [('genotype', 'i1', (2,))]), ('AB0104-C', [('genotype', 'i1', (2,))]), ('AB0109-C', [('genotype', 'i1', (2,))]), ('AB0110-C', [('genotype', 'i1', (2,))]), ('AB0111-C', [('genotype', 'i1', (2,))]), ('AB0112-C', [('genotype', 'i1', (2,))]), ('AB0113-C', [('genotype', 'i1', (2,))]), ('AB0114-C', [('genotype', 'i1', (2,))]), ('AB0117-C', [('genotype', 'i1', (2,))]), ('AB0119-C', [('genotype', 'i1', (2,))]), ('AB0122-C', [('genotype', 'i1', (2,))]), ('AB0123-C', [('genotype', 'i1', (2,))]), ('AB0124-C', [('genotype', 'i1', (2,))]), ('AB0126-C', [('genotype', 'i1', (2,))]), ('AB0127-C', [('genotype', 'i1', (2,))]), ('AB0128-C', [('genotype', 'i1', (2,))]), ('AB0129-C', [('genotype', 'i1', (2,))]), ('AB0130-C', [('genotype', 'i1', (2,))]), ('AB0133-C', [('genotype', 'i1', (2,))]), ('AB0134-C', [('genotype', 'i1', (2,))]), ('AB0135-C', [('genotype', 'i1', (2,))]), ('AB0136-C', [('genotype', 'i1', (2,))]), ('AB0137-C', [('genotype', 'i1', (2,))]), ('AB0138-C', [('genotype', 'i1', (2,))]), ('AB0139-C', [('genotype', 'i1', (2,))]), ('AB0140-C', [('genotype', 'i1', (2,))]), ('AB0142-C', [('genotype', 'i1', (2,))]), ('AB0143-C', [('genotype', 'i1', (2,))]), ('AB0145-C', [('genotype', 'i1', (2,))]), ('AB0146-C', [('genotype', 'i1', (2,))]), ('AB0147-C', [('genotype', 'i1', (2,))]), ('AB0148-C', [('genotype', 'i1', (2,))]), ('AB0151-C', [('genotype', 'i1', (2,))]), ('AB0153-C', [('genotype', 'i1', (2,))]), ('AB0155-C', [('genotype', 'i1', (2,))]), ('AB0157-C', [('genotype', 'i1', (2,))]), ('AB0158-C', [('genotype', 'i1', (2,))]), ('AB0159-C', [('genotype', 'i1', (2,))]), ('AB0160-C', [('genotype', 'i1', (2,))]), ('AB0161-C', [('genotype', 'i1', (2,))]), ('AB0164-C', [('genotype', 'i1', (2,))]), ('AB0166-C', [('genotype', 'i1', (2,))]), ('AB0169-C', [('genotype', 'i1', (2,))]), ('AB0170-C', [('genotype', 'i1', (2,))]), ('AB0171-C', [('genotype', 'i1', (2,))]), ('AB0172-C', [('genotype', 'i1', (2,))]), ('AB0173-C', [('genotype', 'i1', (2,))]), ('AB0174-C', [('genotype', 'i1', (2,))]), ('AB0175-C', [('genotype', 'i1', (2,))]), ('AB0176-C', [('genotype', 'i1', (2,))]), ('AB0177-C', [('genotype', 'i1', (2,))]), ('AB0178-C', [('genotype', 'i1', (2,))]), ('AB0179-C', [('genotype', 'i1', (2,))]), ('AB0181-C', [('genotype', 'i1', (2,))]), ('AB0182-C', [('genotype', 'i1', (2,))]), ('AB0183-C', [('genotype', 'i1', (2,))]), ('AB0184-C', [('genotype', 'i1', (2,))]), ('AB0185-C', [('genotype', 'i1', (2,))]), ('AB0186-C', [('genotype', 'i1', (2,))]), ('AB0187-C', [('genotype', 'i1', (2,))]), ('AB0188-C', [('genotype', 'i1', (2,))]), ('AB0189-C', [('genotype', 'i1', (2,))]), ('AB0190-C', [('genotype', 'i1', (2,))]), ('AB0191-C', [('genotype', 'i1', (2,))]), ('AB0192-C', [('genotype', 'i1', (2,))]), ('AB0197-C', [('genotype', 'i1', (2,))]), ('AB0198-C', [('genotype', 'i1', (2,))]), ('AB0199-C', [('genotype', 'i1', (2,))]), ('AB0201-C', [('genotype', 'i1', (2,))]), ('AB0202-C', [('genotype', 'i1', (2,))]), ('AB0203-C', [('genotype', 'i1', (2,))]), ('AB0204-C', [('genotype', 'i1', (2,))]), ('AB0205-C', [('genotype', 'i1', (2,))]), ('AB0206-C', [('genotype', 'i1', (2,))]), ('AB0207-C', [('genotype', 'i1', (2,))]), ('AB0208-C', [('genotype', 'i1', (2,))]), ('AB0209-C', [('genotype', 'i1', (2,))]), ('AB0210-C', [('genotype', 'i1', (2,))]), ('AB0211-C', [('genotype', 'i1', (2,))]), ('AB0212-C', [('genotype', 'i1', (2,))]), ('AB0213-C', [('genotype', 'i1', (2,))]), ('AB0217-C', [('genotype', 'i1', (2,))]), ('AB0219-C', [('genotype', 'i1', (2,))]), ('AB0221-C', [('genotype', 'i1', (2,))]), ('AB0222-C', [('genotype', 'i1', (2,))]), ('AB0223-C', [('genotype', 'i1', (2,))]), ('AB0224-C', [('genotype', 'i1', (2,))]), ('AB0226-C', [('genotype', 'i1', (2,))]), ('AB0227-C', [('genotype', 'i1', (2,))]), ('AB0228-C', [('genotype', 'i1', (2,))]), ('AB0229-C', [('genotype', 'i1', (2,))]), ('AB0231-C', [('genotype', 'i1', (2,))]), ('AB0233-C', [('genotype', 'i1', (2,))]), ('AB0234-C', [('genotype', 'i1', (2,))]), ('AB0235-C', [('genotype', 'i1', (2,))]), ('AB0236-C', [('genotype', 'i1', (2,))]), ('AB0237-C', [('genotype', 'i1', (2,))]), ('AB0238-C', [('genotype', 'i1', (2,))]), ('AB0239-C', [('genotype', 'i1', (2,))]), ('AB0240-C', [('genotype', 'i1', (2,))]), ('AB0241-C', [('genotype', 'i1', (2,))]), ('AB0242-C', [('genotype', 'i1', (2,))]), ('AB0243-C', [('genotype', 'i1', (2,))]), ('AB0244-C', [('genotype', 'i1', (2,))]), ('AB0246-C', [('genotype', 'i1', (2,))]), ('AB0249-C', [('genotype', 'i1', (2,))]), ('AB0250-C', [('genotype', 'i1', (2,))]), ('AB0251-C', [('genotype', 'i1', (2,))]), ('AB0252-C', [('genotype', 'i1', (2,))]), ('AB0253-C', [('genotype', 'i1', (2,))]), ('AB0256-C', [('genotype', 'i1', (2,))]), ('AB0257-C', [('genotype', 'i1', (2,))]), ('AB0258-C', [('genotype', 'i1', (2,))]), ('AB0260-C', [('genotype', 'i1', (2,))]), ('AB0261-C', [('genotype', 'i1', (2,))]), ('AB0262-C', [('genotype', 'i1', (2,))]), ('AB0263-C', [('genotype', 'i1', (2,))]), ('AB0264-C', [('genotype', 'i1', (2,))]), ('AB0265-C', [('genotype', 'i1', (2,))]), ('AB0266-C', [('genotype', 'i1', (2,))]), ('AB0267-C', [('genotype', 'i1', (2,))]), ('AB0268-C', [('genotype', 'i1', (2,))]), ('AB0270-C', [('genotype', 'i1', (2,))]), ('AB0271-C', [('genotype', 'i1', (2,))]), ('AB0272-C', [('genotype', 'i1', (2,))]), ('AB0273-C', [('genotype', 'i1', (2,))]), ('AB0274-C', [('genotype', 'i1', (2,))]), ('AB0276-C', [('genotype', 'i1', (2,))]), ('AB0277-C', [('genotype', 'i1', (2,))]), ('AB0278-C', [('genotype', 'i1', (2,))]), ('AB0279-C', [('genotype', 'i1', (2,))]), ('AB0280-C', [('genotype', 'i1', (2,))]), ('AB0281-C', [('genotype', 'i1', (2,))]), ('AB0282-C', [('genotype', 'i1', (2,))]), ('AB0283-C', [('genotype', 'i1', (2,))]), ('AB0284-C', [('genotype', 'i1', (2,))]), ('AC0090-C', [('genotype', 'i1', (2,))]), ('AC0091-C', [('genotype', 'i1', (2,))]), ('AC0092-C', [('genotype', 'i1', (2,))]), ('AC0093-C', [('genotype', 'i1', (2,))]), ('AC0094-C', [('genotype', 'i1', (2,))]), ('AC0095-C', [('genotype', 'i1', (2,))]), ('AC0096-C', [('genotype', 'i1', (2,))]), ('AC0097-C', [('genotype', 'i1', (2,))]), ('AC0098-C', [('genotype', 'i1', (2,))]), ('AC0099-C', [('genotype', 'i1', (2,))]), ('AC0100-C', [('genotype', 'i1', (2,))]), ('AC0101-C', [('genotype', 'i1', (2,))]), ('AC0102-C', [('genotype', 'i1', (2,))]), ('AC0103-C', [('genotype', 'i1', (2,))]), ('AC0104-C', [('genotype', 'i1', (2,))]), ('AC0106-C', [('genotype', 'i1', (2,))]), ('AC0107-C', [('genotype', 'i1', (2,))]), ('AC0108-C', [('genotype', 'i1', (2,))]), ('AC0109-C', [('genotype', 'i1', (2,))]), ('AC0110-C', [('genotype', 'i1', (2,))]), ('AC0111-C', [('genotype', 'i1', (2,))]), ('AC0112-C', [('genotype', 'i1', (2,))]), ('AC0113-C', [('genotype', 'i1', (2,))]), ('AC0114-C', [('genotype', 'i1', (2,))]), ('AC0115-C', [('genotype', 'i1', (2,))]), ('AC0116-C', [('genotype', 'i1', (2,))]), ('AC0117-C', [('genotype', 'i1', (2,))]), ('AC0118-C', [('genotype', 'i1', (2,))]), ('AC0119-C', [('genotype', 'i1', (2,))]), ('AC0120-C', [('genotype', 'i1', (2,))]), ('AC0121-C', [('genotype', 'i1', (2,))]), ('AC0122-C', [('genotype', 'i1', (2,))]), ('AC0123-C', [('genotype', 'i1', (2,))]), ('AC0124-C', [('genotype', 'i1', (2,))]), ('AC0125-C', [('genotype', 'i1', (2,))]), ('AC0126-C', [('genotype', 'i1', (2,))]), ('AC0127-C', [('genotype', 'i1', (2,))]), ('AC0128-C', [('genotype', 'i1', (2,))]), ('AC0129-C', [('genotype', 'i1', (2,))]), ('AC0130-C', [('genotype', 'i1', (2,))]), ('AC0131-C', [('genotype', 'i1', (2,))]), ('AC0132-C', [('genotype', 'i1', (2,))]), ('AC0133-C', [('genotype', 'i1', (2,))]), ('AC0135-C', [('genotype', 'i1', (2,))]), ('AC0136-C', [('genotype', 'i1', (2,))]), ('AC0137-C', [('genotype', 'i1', (2,))]), ('AC0138-C', [('genotype', 'i1', (2,))]), ('AC0139-C', [('genotype', 'i1', (2,))]), ('AC0140-C', [('genotype', 'i1', (2,))]), ('AC0142-C', [('genotype', 'i1', (2,))]), ('AC0143-C', [('genotype', 'i1', (2,))]), ('AC0144-C', [('genotype', 'i1', (2,))]), ('AC0145-C', [('genotype', 'i1', (2,))]), ('AC0147-C', [('genotype', 'i1', (2,))]), ('AC0148-C', [('genotype', 'i1', (2,))]), ('AC0149-C', [('genotype', 'i1', (2,))]), ('AC0150-C', [('genotype', 'i1', (2,))]), ('AC0151-C', [('genotype', 'i1', (2,))]), ('AC0152-C', [('genotype', 'i1', (2,))]), ('AC0153-C', [('genotype', 'i1', (2,))]), ('AC0154-C', [('genotype', 'i1', (2,))]), ('AC0156-C', [('genotype', 'i1', (2,))]), ('AC0158-C', [('genotype', 'i1', (2,))]), ('AC0159-C', [('genotype', 'i1', (2,))]), ('AC0160-C', [('genotype', 'i1', (2,))]), ('AC0161-C', [('genotype', 'i1', (2,))]), ('AC0162-C', [('genotype', 'i1', (2,))]), ('AC0163-C', [('genotype', 'i1', (2,))]), ('AC0164-C', [('genotype', 'i1', (2,))]), ('AC0166-C', [('genotype', 'i1', (2,))]), ('AC0167-C', [('genotype', 'i1', (2,))]), ('AC0168-C', [('genotype', 'i1', (2,))]), ('AC0169-C', [('genotype', 'i1', (2,))]), ('AC0170-C', [('genotype', 'i1', (2,))]), ('AC0171-C', [('genotype', 'i1', (2,))]), ('AC0172-C', [('genotype', 'i1', (2,))]), ('AC0173-C', [('genotype', 'i1', (2,))]), ('AC0174-C', [('genotype', 'i1', (2,))]), ('AC0176-C', [('genotype', 'i1', (2,))]), ('AC0178-C', [('genotype', 'i1', (2,))]), ('AC0179-C', [('genotype', 'i1', (2,))]), ('AC0180-C', [('genotype', 'i1', (2,))]), ('AC0181-C', [('genotype', 'i1', (2,))]), ('AC0182-C', [('genotype', 'i1', (2,))]), ('AC0183-C', [('genotype', 'i1', (2,))]), ('AC0184-C', [('genotype', 'i1', (2,))]), ('AC0186-C', [('genotype', 'i1', (2,))]), ('AC0187-C', [('genotype', 'i1', (2,))]), ('AC0188-C', [('genotype', 'i1', (2,))]), ('AC0189-C', [('genotype', 'i1', (2,))]), ('AC0190-C', [('genotype', 'i1', (2,))]), ('AC0191-C', [('genotype', 'i1', (2,))]), ('AC0192-C', [('genotype', 'i1', (2,))]), ('AC0193-C', [('genotype', 'i1', (2,))]), ('AC0194-C', [('genotype', 'i1', (2,))]), ('AC0195-C', [('genotype', 'i1', (2,))]), ('AC0196-C', [('genotype', 'i1', (2,))]), ('AC0197-C', [('genotype', 'i1', (2,))]), ('AC0199-C', [('genotype', 'i1', (2,))]), ('AC0200-C', [('genotype', 'i1', (2,))]), ('AC0201-C', [('genotype', 'i1', (2,))]), ('AC0202-C', [('genotype', 'i1', (2,))]), ('AC0203-C', [('genotype', 'i1', (2,))]), ('AJ0023-C', [('genotype', 'i1', (2,))]), ('AJ0024-C', [('genotype', 'i1', (2,))]), ('AJ0032-C', [('genotype', 'i1', (2,))]), ('AJ0035-C', [('genotype', 'i1', (2,))]), ('AJ0036-C', [('genotype', 'i1', (2,))]), ('AJ0039-C', [('genotype', 'i1', (2,))]), ('AJ0043-C', [('genotype', 'i1', (2,))]), ('AJ0044-C', [('genotype', 'i1', (2,))]), ('AJ0045-C', [('genotype', 'i1', (2,))]), ('AJ0047-C', [('genotype', 'i1', (2,))]), ('AJ0051-C', [('genotype', 'i1', (2,))]), ('AJ0052-C', [('genotype', 'i1', (2,))]), ('AJ0056-C', [('genotype', 'i1', (2,))]), ('AJ0061-C', [('genotype', 'i1', (2,))]), ('AJ0063-C', [('genotype', 'i1', (2,))]), ('AJ0064-C', [('genotype', 'i1', (2,))]), ('AJ0066-C', [('genotype', 'i1', (2,))]), ('AJ0070-C', [('genotype', 'i1', (2,))]), ('AJ0071-C', [('genotype', 'i1', (2,))]), ('AJ0072-C', [('genotype', 'i1', (2,))]), ('AJ0074-C', [('genotype', 'i1', (2,))]), ('AJ0075-C', [('genotype', 'i1', (2,))]), ('AJ0076-C', [('genotype', 'i1', (2,))]), ('AJ0077-C', [('genotype', 'i1', (2,))]), ('AJ0078-C', [('genotype', 'i1', (2,))]), ('AJ0081-C', [('genotype', 'i1', (2,))]), ('AJ0084-C', [('genotype', 'i1', (2,))]), ('AJ0085-C', [('genotype', 'i1', (2,))]), ('AJ0086-C', [('genotype', 'i1', (2,))]), ('AJ0088-C', [('genotype', 'i1', (2,))]), ('AJ0090-C', [('genotype', 'i1', (2,))]), ('AJ0092-C', [('genotype', 'i1', (2,))]), ('AJ0093-C', [('genotype', 'i1', (2,))]), ('AJ0096-C', [('genotype', 'i1', (2,))]), ('AJ0097-C', [('genotype', 'i1', (2,))]), ('AJ0098-C', [('genotype', 'i1', (2,))]), ('AJ0100-C', [('genotype', 'i1', (2,))]), ('AJ0101-C', [('genotype', 'i1', (2,))]), ('AJ0102-C', [('genotype', 'i1', (2,))]), ('AJ0103-C', [('genotype', 'i1', (2,))]), ('AJ0105-C', [('genotype', 'i1', (2,))]), ('AJ0107-C', [('genotype', 'i1', (2,))]), ('AJ0109-C', [('genotype', 'i1', (2,))]), ('AJ0113-C', [('genotype', 'i1', (2,))]), ('AJ0115-C', [('genotype', 'i1', (2,))]), ('AJ0116-C', [('genotype', 'i1', (2,))]), ('AK0065-C', [('genotype', 'i1', (2,))]), ('AK0066-C', [('genotype', 'i1', (2,))]), ('AK0067-C', [('genotype', 'i1', (2,))]), ('AK0068-C', [('genotype', 'i1', (2,))]), ('AK0069-C', [('genotype', 'i1', (2,))]), ('AK0070-C', [('genotype', 'i1', (2,))]), ('AK0072-C', [('genotype', 'i1', (2,))]), ('AK0073-C', [('genotype', 'i1', (2,))]), ('AK0074-C', [('genotype', 'i1', (2,))]), ('AK0075-C', [('genotype', 'i1', (2,))]), ('AK0076-C', [('genotype', 'i1', (2,))]), ('AK0077-C', [('genotype', 'i1', (2,))]), ('AK0078-C', [('genotype', 'i1', (2,))]), ('AK0079-C', [('genotype', 'i1', (2,))]), ('AK0080-C', [('genotype', 'i1', (2,))]), ('AK0081-C', [('genotype', 'i1', (2,))]), ('AK0082-C', [('genotype', 'i1', (2,))]), ('AK0085-C', [('genotype', 'i1', (2,))]), ('AK0086-C', [('genotype', 'i1', (2,))]), ('AK0087-C', [('genotype', 'i1', (2,))]), ('AK0088-C', [('genotype', 'i1', (2,))]), ('AK0089-C', [('genotype', 'i1', (2,))]), ('AK0090-C', [('genotype', 'i1', (2,))]), ('AK0091-C', [('genotype', 'i1', (2,))]), ('AK0092-C', [('genotype', 'i1', (2,))]), ('AK0093-C', [('genotype', 'i1', (2,))]), ('AK0094-C', [('genotype', 'i1', (2,))]), ('AK0095-C', [('genotype', 'i1', (2,))]), ('AK0096-C', [('genotype', 'i1', (2,))]), ('AK0098-C', [('genotype', 'i1', (2,))]), ('AK0099-C', [('genotype', 'i1', (2,))]), ('AK0100-C', [('genotype', 'i1', (2,))]), ('AK0101-C', [('genotype', 'i1', (2,))]), ('AK0102-C', [('genotype', 'i1', (2,))]), ('AK0103-C', [('genotype', 'i1', (2,))]), ('AK0104-C', [('genotype', 'i1', (2,))]), ('AK0105-C', [('genotype', 'i1', (2,))]), ('AK0106-C', [('genotype', 'i1', (2,))]), ('AK0108-C', [('genotype', 'i1', (2,))]), ('AK0109-C', [('genotype', 'i1', (2,))]), ('AK0110-C', [('genotype', 'i1', (2,))]), ('AK0116-C', [('genotype', 'i1', (2,))]), ('AK0119-C', [('genotype', 'i1', (2,))]), ('AK0127-C', [('genotype', 'i1', (2,))]), ('AN0007-C', [('genotype', 'i1', (2,))]), ('AN0008-C', [('genotype', 'i1', (2,))]), ('AN0009-C', [('genotype', 'i1', (2,))]), ('AN0010-C', [('genotype', 'i1', (2,))]), ('AN0011-C', [('genotype', 'i1', (2,))]), ('AN0012-C', [('genotype', 'i1', (2,))]), ('AN0014-C', [('genotype', 'i1', (2,))]), ('AN0016-C', [('genotype', 'i1', (2,))]), ('AN0017-C', [('genotype', 'i1', (2,))]), ('AN0018-C', [('genotype', 'i1', (2,))]), ('AN0019-C', [('genotype', 'i1', (2,))]), ('AN0020-C', [('genotype', 'i1', (2,))]), ('AN0022-C', [('genotype', 'i1', (2,))]), ('AN0023-C', [('genotype', 'i1', (2,))]), ('AN0024-C', [('genotype', 'i1', (2,))]), ('AN0025-C', [('genotype', 'i1', (2,))]), ('AN0026-C', [('genotype', 'i1', (2,))]), ('AN0027-C', [('genotype', 'i1', (2,))]), ('AN0028-C', [('genotype', 'i1', (2,))]), ('AN0029-C', [('genotype', 'i1', (2,))]), ('AN0030-C', [('genotype', 'i1', (2,))]), ('AN0031-C', [('genotype', 'i1', (2,))]), ('AN0032-C', [('genotype', 'i1', (2,))]), ('AN0033-C', [('genotype', 'i1', (2,))]), ('AN0034-C', [('genotype', 'i1', (2,))]), ('AN0035-C', [('genotype', 'i1', (2,))]), ('AN0036-C', [('genotype', 'i1', (2,))]), ('AN0037-C', [('genotype', 'i1', (2,))]), ('AN0038-C', [('genotype', 'i1', (2,))]), ('AN0039-C', [('genotype', 'i1', (2,))]), ('AN0040-C', [('genotype', 'i1', (2,))]), ('AN0041-C', [('genotype', 'i1', (2,))]), ('AN0042-C', [('genotype', 'i1', (2,))]), ('AN0043-C', [('genotype', 'i1', (2,))]), ('AN0045-C', [('genotype', 'i1', (2,))]), ('AN0046-C', [('genotype', 'i1', (2,))]), ('AN0047-C', [('genotype', 'i1', (2,))]), ('AN0048-C', [('genotype', 'i1', (2,))]), ('AN0049-C', [('genotype', 'i1', (2,))]), ('AN0050-C', [('genotype', 'i1', (2,))]), ('AN0051-C', [('genotype', 'i1', (2,))]), ('AN0053-C', [('genotype', 'i1', (2,))]), ('AN0054-C', [('genotype', 'i1', (2,))]), ('AN0055-C', [('genotype', 'i1', (2,))]), ('AN0056-C', [('genotype', 'i1', (2,))]), ('AN0057-C', [('genotype', 'i1', (2,))]), ('AN0058-C', [('genotype', 'i1', (2,))]), ('AN0059-C', [('genotype', 'i1', (2,))]), ('AN0060-C', [('genotype', 'i1', (2,))]), ('AN0063-C', [('genotype', 'i1', (2,))]), ('AN0064-C', [('genotype', 'i1', (2,))]), ('AN0065-C', [('genotype', 'i1', (2,))]), ('AN0066-C', [('genotype', 'i1', (2,))]), ('AN0067-C', [('genotype', 'i1', (2,))]), ('AN0068-C', [('genotype', 'i1', (2,))]), ('AN0069-C', [('genotype', 'i1', (2,))]), ('AN0070-C', [('genotype', 'i1', (2,))]), ('AN0071-C', [('genotype', 'i1', (2,))]), ('AN0072-C', [('genotype', 'i1', (2,))]), ('AN0073-C', [('genotype', 'i1', (2,))]), ('AN0074-C', [('genotype', 'i1', (2,))]), ('AN0075-C', [('genotype', 'i1', (2,))]), ('AN0076-C', [('genotype', 'i1', (2,))]), ('AN0077-C', [('genotype', 'i1', (2,))]), ('AN0079-C', [('genotype', 'i1', (2,))]), ('AN0080-C', [('genotype', 'i1', (2,))]), ('AN0081-C', [('genotype', 'i1', (2,))]), ('AN0082-C', [('genotype', 'i1', (2,))]), ('AN0083-C', [('genotype', 'i1', (2,))]), ('AN0084-C', [('genotype', 'i1', (2,))]), ('AN0085-C', [('genotype', 'i1', (2,))]), ('AN0086-C', [('genotype', 'i1', (2,))]), ('AN0087-C', [('genotype', 'i1', (2,))]), ('AN0088-C', [('genotype', 'i1', (2,))]), ('AN0089-C', [('genotype', 'i1', (2,))]), ('AN0090-C', [('genotype', 'i1', (2,))]), ('AN0091-C', [('genotype', 'i1', (2,))]), ('AN0092-C', [('genotype', 'i1', (2,))]), ('AN0093-C', [('genotype', 'i1', (2,))]), ('AN0094-C', [('genotype', 'i1', (2,))]), ('AN0095-C', [('genotype', 'i1', (2,))]), ('AN0096-C', [('genotype', 'i1', (2,))]), ('AN0097-C', [('genotype', 'i1', (2,))]), ('AN0098-C', [('genotype', 'i1', (2,))]), ('AN0099-C', [('genotype', 'i1', (2,))]), ('AN0100-C', [('genotype', 'i1', (2,))]), ('AN0101-C', [('genotype', 'i1', (2,))]), ('AN0102-C', [('genotype', 'i1', (2,))]), ('AN0103-C', [('genotype', 'i1', (2,))]), ('AN0104-C', [('genotype', 'i1', (2,))]), ('AN0105-C', [('genotype', 'i1', (2,))]), ('AN0106-C', [('genotype', 'i1', (2,))]), ('AN0107-C', [('genotype', 'i1', (2,))]), ('AN0108-C', [('genotype', 'i1', (2,))]), ('AN0109-C', [('genotype', 'i1', (2,))]), ('AN0111-C', [('genotype', 'i1', (2,))]), ('AN0112-C', [('genotype', 'i1', (2,))]), ('AN0113-C', [('genotype', 'i1', (2,))]), ('AN0114-C', [('genotype', 'i1', (2,))]), ('AN0115-C', [('genotype', 'i1', (2,))]), ('AN0117-C', [('genotype', 'i1', (2,))]), ('AN0120-C', [('genotype', 'i1', (2,))]), ('AN0121-C', [('genotype', 'i1', (2,))]), ('AN0122-C', [('genotype', 'i1', (2,))]), ('AN0123-C', [('genotype', 'i1', (2,))]), ('AN0124-C', [('genotype', 'i1', (2,))]), ('AN0125-C', [('genotype', 'i1', (2,))]), ('AN0126-C', [('genotype', 'i1', (2,))]), ('AN0127-C', [('genotype', 'i1', (2,))]), ('AN0128-C', [('genotype', 'i1', (2,))]), ('AN0129-C', [('genotype', 'i1', (2,))]), ('AN0130-C', [('genotype', 'i1', (2,))]), ('AN0131-C', [('genotype', 'i1', (2,))]), ('AN0132-C', [('genotype', 'i1', (2,))]), ('AN0134-C', [('genotype', 'i1', (2,))]), ('AN0135-C', [('genotype', 'i1', (2,))]), ('AN0136-C', [('genotype', 'i1', (2,))]), ('AN0137-C', [('genotype', 'i1', (2,))]), ('AN0138-C', [('genotype', 'i1', (2,))]), ('AN0139-C', [('genotype', 'i1', (2,))]), ('AN0140-C', [('genotype', 'i1', (2,))]), ('AN0141-C', [('genotype', 'i1', (2,))]), ('AN0143-C', [('genotype', 'i1', (2,))]), ('AN0147-C', [('genotype', 'i1', (2,))]), ('AN0149-C', [('genotype', 'i1', (2,))]), ('AN0151-C', [('genotype', 'i1', (2,))]), ('AN0152-C', [('genotype', 'i1', (2,))]), ('AN0153-C', [('genotype', 'i1', (2,))]), ('AN0154-C', [('genotype', 'i1', (2,))]), ('AN0155-C', [('genotype', 'i1', (2,))]), ('AN0156-C', [('genotype', 'i1', (2,))]), ('AN0157-C', [('genotype', 'i1', (2,))]), ('AN0158-C', [('genotype', 'i1', (2,))]), ('AN0159-C', [('genotype', 'i1', (2,))]), ('AN0160-C', [('genotype', 'i1', (2,))]), ('AN0162-C', [('genotype', 'i1', (2,))]), ('AN0163-C', [('genotype', 'i1', (2,))]), ('AN0164-C', [('genotype', 'i1', (2,))]), ('AN0165-C', [('genotype', 'i1', (2,))]), ('AN0166-C', [('genotype', 'i1', (2,))]), ('AN0167-C', [('genotype', 'i1', (2,))]), ('AN0168-C', [('genotype', 'i1', (2,))]), ('AN0169-C', [('genotype', 'i1', (2,))]), ('AN0170-C', [('genotype', 'i1', (2,))]), ('AN0171-C', [('genotype', 'i1', (2,))]), ('AN0172-C', [('genotype', 'i1', (2,))]), ('AN0173-C', [('genotype', 'i1', (2,))]), ('AN0174-C', [('genotype', 'i1', (2,))]), ('AN0175-C', [('genotype', 'i1', (2,))]), ('AN0176-C', [('genotype', 'i1', (2,))]), ('AN0177-C', [('genotype', 'i1', (2,))]), ('AN0178-C', [('genotype', 'i1', (2,))]), ('AN0179-C', [('genotype', 'i1', (2,))]), ('AN0180-C', [('genotype', 'i1', (2,))]), ('AN0181-C', [('genotype', 'i1', (2,))]), ('AN0182-C', [('genotype', 'i1', (2,))]), ('AN0183-C', [('genotype', 'i1', (2,))]), ('AN0184-C', [('genotype', 'i1', (2,))]), ('AN0185-C', [('genotype', 'i1', (2,))]), ('AN0186-C', [('genotype', 'i1', (2,))]), ('AN0187-C', [('genotype', 'i1', (2,))]), ('AN0188-C', [('genotype', 'i1', (2,))]), ('AN0189-C', [('genotype', 'i1', (2,))]), ('AN0190-C', [('genotype', 'i1', (2,))]), ('AN0191-C', [('genotype', 'i1', (2,))]), ('AN0192-C', [('genotype', 'i1', (2,))]), ('AN0193-C', [('genotype', 'i1', (2,))]), ('AN0194-C', [('genotype', 'i1', (2,))]), ('AN0196-C', [('genotype', 'i1', (2,))]), ('AN0197-C', [('genotype', 'i1', (2,))]), ('AN0198-C', [('genotype', 'i1', (2,))]), ('AN0199-C', [('genotype', 'i1', (2,))]), ('AN0200-C', [('genotype', 'i1', (2,))]), ('AN0201-C', [('genotype', 'i1', (2,))]), ('AN0202-C', [('genotype', 'i1', (2,))]), ('AN0203-C', [('genotype', 'i1', (2,))]), ('AN0204-C', [('genotype', 'i1', (2,))]), ('AN0205-C', [('genotype', 'i1', (2,))]), ('AN0206-C', [('genotype', 'i1', (2,))]), ('AN0207-C', [('genotype', 'i1', (2,))]), ('AN0208-C', [('genotype', 'i1', (2,))]), ('AN0209-C', [('genotype', 'i1', (2,))]), ('AN0210-C', [('genotype', 'i1', (2,))]), ('AN0212-C', [('genotype', 'i1', (2,))]), ('AN0213-C', [('genotype', 'i1', (2,))]), ('AN0214-C', [('genotype', 'i1', (2,))]), ('AN0215-C', [('genotype', 'i1', (2,))]), ('AN0217-C', [('genotype', 'i1', (2,))]), ('AN0218-C', [('genotype', 'i1', (2,))]), ('AN0219-C', [('genotype', 'i1', (2,))]), ('AN0220-C', [('genotype', 'i1', (2,))]), ('AN0221-C', [('genotype', 'i1', (2,))]), ('AN0222-C', [('genotype', 'i1', (2,))]), ('AN0223-C', [('genotype', 'i1', (2,))]), ('AN0224-C', [('genotype', 'i1', (2,))]), ('AN0225-C', [('genotype', 'i1', (2,))]), ('AN0226-C', [('genotype', 'i1', (2,))]), ('AN0227-C', [('genotype', 'i1', (2,))]), ('AN0228-C', [('genotype', 'i1', (2,))]), ('AN0229-C', [('genotype', 'i1', (2,))]), ('AN0230-C', [('genotype', 'i1', (2,))]), ('AN0231-C', [('genotype', 'i1', (2,))]), ('AN0233-C', [('genotype', 'i1', (2,))]), ('AN0234-C', [('genotype', 'i1', (2,))]), ('AN0235-C', [('genotype', 'i1', (2,))]), ('AN0236-C', [('genotype', 'i1', (2,))]), ('AN0237-C', [('genotype', 'i1', (2,))]), ('AN0238-C', [('genotype', 'i1', (2,))]), ('AN0239-C', [('genotype', 'i1', (2,))]), ('AN0240-C', [('genotype', 'i1', (2,))]), ('AN0241-C', [('genotype', 'i1', (2,))]), ('AN0242-C', [('genotype', 'i1', (2,))]), ('AN0243-C', [('genotype', 'i1', (2,))]), ('AN0244-C', [('genotype', 'i1', (2,))]), ('AN0245-C', [('genotype', 'i1', (2,))]), ('AN0246-C', [('genotype', 'i1', (2,))]), ('AN0247-C', [('genotype', 'i1', (2,))]), ('AN0248-C', [('genotype', 'i1', (2,))]), ('AN0250-C', [('genotype', 'i1', (2,))]), ('AN0251-C', [('genotype', 'i1', (2,))]), ('AN0252-C', [('genotype', 'i1', (2,))]), ('AN0253-C', [('genotype', 'i1', (2,))]), ('AN0254-C', [('genotype', 'i1', (2,))]), ('AN0255-C', [('genotype', 'i1', (2,))]), ('AN0256-C', [('genotype', 'i1', (2,))]), ('AN0258-C', [('genotype', 'i1', (2,))]), ('AN0259-C', [('genotype', 'i1', (2,))]), ('AN0260-C', [('genotype', 'i1', (2,))]), ('AN0261-C', [('genotype', 'i1', (2,))]), ('AN0262-C', [('genotype', 'i1', (2,))]), ('AN0263-C', [('genotype', 'i1', (2,))]), ('AN0264-C', [('genotype', 'i1', (2,))]), ('AN0266-C', [('genotype', 'i1', (2,))]), ('AN0267-C', [('genotype', 'i1', (2,))]), ('AN0268-C', [('genotype', 'i1', (2,))]), ('AN0269-C', [('genotype', 'i1', (2,))]), ('AN0270-C', [('genotype', 'i1', (2,))]), ('AN0272-C', [('genotype', 'i1', (2,))]), ('AN0275-C', [('genotype', 'i1', (2,))]), ('AN0276-C', [('genotype', 'i1', (2,))]), ('AN0277-C', [('genotype', 'i1', (2,))]), ('AN0280-C', [('genotype', 'i1', (2,))]), ('AN0282-C', [('genotype', 'i1', (2,))]), ('AN0283-C', [('genotype', 'i1', (2,))]), ('AN0284-C', [('genotype', 'i1', (2,))]), ('AN0285-C', [('genotype', 'i1', (2,))]), ('AN0286-C', [('genotype', 'i1', (2,))]), ('AN0287-C', [('genotype', 'i1', (2,))]), ('AN0288-C', [('genotype', 'i1', (2,))]), ('AN0290-C', [('genotype', 'i1', (2,))]), ('AN0291-C', [('genotype', 'i1', (2,))]), ('AN0292-C', [('genotype', 'i1', (2,))]), ('AN0294-C', [('genotype', 'i1', (2,))]), ('AN0295-C', [('genotype', 'i1', (2,))]), ('AN0296-C', [('genotype', 'i1', (2,))]), ('AN0297-C', [('genotype', 'i1', (2,))]), ('AN0298-C', [('genotype', 'i1', (2,))]), ('AN0299-C', [('genotype', 'i1', (2,))]), ('AN0300-C', [('genotype', 'i1', (2,))]), ('AN0301-C', [('genotype', 'i1', (2,))]), ('AN0303-C', [('genotype', 'i1', (2,))]), ('AN0304-C', [('genotype', 'i1', (2,))]), ('AN0305-C', [('genotype', 'i1', (2,))]), ('AN0307-C', [('genotype', 'i1', (2,))]), ('AN0308-C', [('genotype', 'i1', (2,))]), ('AN0309-C', [('genotype', 'i1', (2,))]), ('AN0310-C', [('genotype', 'i1', (2,))]), ('AN0312-C', [('genotype', 'i1', (2,))]), ('AN0313-C', [('genotype', 'i1', (2,))]), ('AN0314-C', [('genotype', 'i1', (2,))]), ('AN0315-C', [('genotype', 'i1', (2,))]), ('AN0317-C', [('genotype', 'i1', (2,))]), ('AN0318-C', [('genotype', 'i1', (2,))]), ('AN0319-C', [('genotype', 'i1', (2,))]), ('AN0321-C', [('genotype', 'i1', (2,))]), ('AR0007-C', [('genotype', 'i1', (2,))]), ('AR0008-C', [('genotype', 'i1', (2,))]), ('AR0009-C', [('genotype', 'i1', (2,))]), ('AR0010-C', [('genotype', 'i1', (2,))]), ('AR0011-C', [('genotype', 'i1', (2,))]), ('AR0012-C', [('genotype', 'i1', (2,))]), ('AR0014-C', [('genotype', 'i1', (2,))]), ('AR0015-C', [('genotype', 'i1', (2,))]), ('AR0017-C', [('genotype', 'i1', (2,))]), ('AR0019-C', [('genotype', 'i1', (2,))]), ('AR0020-C', [('genotype', 'i1', (2,))]), ('AR0021-C', [('genotype', 'i1', (2,))]), ('AR0022-C', [('genotype', 'i1', (2,))]), ('AR0023-C', [('genotype', 'i1', (2,))]), ('AR0024-C', [('genotype', 'i1', (2,))]), ('AR0026-C', [('genotype', 'i1', (2,))]), ('AR0027-C', [('genotype', 'i1', (2,))]), ('AR0034-C', [('genotype', 'i1', (2,))]), ('AR0035-C', [('genotype', 'i1', (2,))]), ('AR0042-C', [('genotype', 'i1', (2,))]), ('AR0043-C', [('genotype', 'i1', (2,))]), ('AR0045-C', [('genotype', 'i1', (2,))]), ('AR0047-C', [('genotype', 'i1', (2,))]), ('AR0049-C', [('genotype', 'i1', (2,))]), ('AR0050-C', [('genotype', 'i1', (2,))]), ('AR0051-C', [('genotype', 'i1', (2,))]), ('AR0053-C', [('genotype', 'i1', (2,))]), ('AR0054-C', [('genotype', 'i1', (2,))]), ('AR0057-C', [('genotype', 'i1', (2,))]), ('AR0059-C', [('genotype', 'i1', (2,))]), ('AR0061-C', [('genotype', 'i1', (2,))]), ('AR0062-C', [('genotype', 'i1', (2,))]), ('AR0063-C', [('genotype', 'i1', (2,))]), ('AR0065-C', [('genotype', 'i1', (2,))]), ('AR0066-C', [('genotype', 'i1', (2,))]), ('AR0069-C', [('genotype', 'i1', (2,))]), ('AR0070-C', [('genotype', 'i1', (2,))]), ('AR0071-C', [('genotype', 'i1', (2,))]), ('AR0072-C', [('genotype', 'i1', (2,))]), ('AR0073-C', [('genotype', 'i1', (2,))]), ('AR0074-C', [('genotype', 'i1', (2,))]), ('AR0075-C', [('genotype', 'i1', (2,))]), ('AR0076-C', [('genotype', 'i1', (2,))]), ('AR0078-C', [('genotype', 'i1', (2,))]), ('AR0079-C', [('genotype', 'i1', (2,))]), ('AR0080-C', [('genotype', 'i1', (2,))]), ('AR0081-C', [('genotype', 'i1', (2,))]), ('AR0083-C', [('genotype', 'i1', (2,))]), ('AR0084-C', [('genotype', 'i1', (2,))]), ('AR0086-C', [('genotype', 'i1', (2,))]), ('AR0087-C', [('genotype', 'i1', (2,))]), ('AR0089-C', [('genotype', 'i1', (2,))]), ('AR0090-C', [('genotype', 'i1', (2,))]), ('AR0092-C', [('genotype', 'i1', (2,))]), ('AR0093-C', [('genotype', 'i1', (2,))]), ('AR0095-C', [('genotype', 'i1', (2,))]), ('AR0096-C', [('genotype', 'i1', (2,))]), ('AR0098-C', [('genotype', 'i1', (2,))]), ('AR0099-C', [('genotype', 'i1', (2,))]), ('AR0100-C', [('genotype', 'i1', (2,))]), ('AS0001-C', [('genotype', 'i1', (2,))]), ('AS0002-C', [('genotype', 'i1', (2,))]), ('AS0003-C', [('genotype', 'i1', (2,))]), ('AS0004-C', [('genotype', 'i1', (2,))]), ('AS0006-C', [('genotype', 'i1', (2,))]), ('AS0007-C', [('genotype', 'i1', (2,))]), ('AS0008-C', [('genotype', 'i1', (2,))]), ('AS0009-C', [('genotype', 'i1', (2,))]), ('AS0010-C', [('genotype', 'i1', (2,))]), ('AS0011-C', [('genotype', 'i1', (2,))]), ('AS0012-C', [('genotype', 'i1', (2,))]), ('AS0013-C', [('genotype', 'i1', (2,))]), ('AS0014-C', [('genotype', 'i1', (2,))]), ('AS0015-C', [('genotype', 'i1', (2,))]), ('AS0016-C', [('genotype', 'i1', (2,))]), ('AS0017-C', [('genotype', 'i1', (2,))]), ('AS0018-C', [('genotype', 'i1', (2,))]), ('AS0019-C', [('genotype', 'i1', (2,))]), ('AS0020-C', [('genotype', 'i1', (2,))]), ('AS0021-C', [('genotype', 'i1', (2,))]), ('AS0022-C', [('genotype', 'i1', (2,))]), ('AS0024-C', [('genotype', 'i1', (2,))]), ('AS0026-C', [('genotype', 'i1', (2,))]), ('AS0028-C', [('genotype', 'i1', (2,))]), ('AS0030-C', [('genotype', 'i1', (2,))]), ('AS0032-C', [('genotype', 'i1', (2,))]), ('AS0033-C', [('genotype', 'i1', (2,))]), ('AS0034-C', [('genotype', 'i1', (2,))]), ('AS0035-C', [('genotype', 'i1', (2,))]), ('AS0036-C', [('genotype', 'i1', (2,))]), ('AS0037-C', [('genotype', 'i1', (2,))]), ('AS0039-C', [('genotype', 'i1', (2,))]), ('AS0042-C', [('genotype', 'i1', (2,))]), ('AS0044-C', [('genotype', 'i1', (2,))]), ('AS0045-C', [('genotype', 'i1', (2,))]), ('AS0047-C', [('genotype', 'i1', (2,))]), ('AS0049-C', [('genotype', 'i1', (2,))]), ('AS0052-C', [('genotype', 'i1', (2,))]), ('AS0053-C', [('genotype', 'i1', (2,))]), ('AS0054-C', [('genotype', 'i1', (2,))]), ('AS0055-C', [('genotype', 'i1', (2,))]), ('AS0056-C', [('genotype', 'i1', (2,))]), ('AS0058-C', [('genotype', 'i1', (2,))]), ('AS0059-C', [('genotype', 'i1', (2,))]), ('AS0064-C', [('genotype', 'i1', (2,))]), ('AS0065-C', [('genotype', 'i1', (2,))]), ('AS0066-C', [('genotype', 'i1', (2,))]), ('AS0068-C', [('genotype', 'i1', (2,))]), ('AS0069-C', [('genotype', 'i1', (2,))]), ('AS0070-C', [('genotype', 'i1', (2,))]), ('AS0071-C', [('genotype', 'i1', (2,))]), ('AS0072-C', [('genotype', 'i1', (2,))]), ('AS0073-C', [('genotype', 'i1', (2,))]), ('AS0074-C', [('genotype', 'i1', (2,))]), ('AS0076-C', [('genotype', 'i1', (2,))]), ('AS0077-C', [('genotype', 'i1', (2,))]), ('AV0001-C', [('genotype', 'i1', (2,))]), ('AV0002-C', [('genotype', 'i1', (2,))]), ('AV0003-C', [('genotype', 'i1', (2,))]), ('AV0004-C', [('genotype', 'i1', (2,))]), ('AV0005-C', [('genotype', 'i1', (2,))]), ('AV0007-C', [('genotype', 'i1', (2,))]), ('AV0008-C', [('genotype', 'i1', (2,))]), ('AV0009-C', [('genotype', 'i1', (2,))]), ('AV0010-C', [('genotype', 'i1', (2,))]), ('AV0011-C', [('genotype', 'i1', (2,))]), ('AV0012-C', [('genotype', 'i1', (2,))]), ('AV0013-C', [('genotype', 'i1', (2,))]), ('AV0014-C', [('genotype', 'i1', (2,))]), ('AV0015-C', [('genotype', 'i1', (2,))]), ('AV0018-C', [('genotype', 'i1', (2,))]), ('AV0024-C', [('genotype', 'i1', (2,))]), ('AV0026-C', [('genotype', 'i1', (2,))]), ('AV0027-C', [('genotype', 'i1', (2,))]), ('AV0029-C', [('genotype', 'i1', (2,))]), ('AV0030-C', [('genotype', 'i1', (2,))]), ('AV0031-C', [('genotype', 'i1', (2,))]), ('AV0032-C', [('genotype', 'i1', (2,))]), ('AV0033-C', [('genotype', 'i1', (2,))]), ('AV0034-C', [('genotype', 'i1', (2,))]), ('AV0035-C', [('genotype', 'i1', (2,))]), ('AV0036-C', [('genotype', 'i1', (2,))]), ('AV0039-C', [('genotype', 'i1', (2,))]), ('AV0041-C', [('genotype', 'i1', (2,))]), ('AV0044-C', [('genotype', 'i1', (2,))]), ('AV0045-C', [('genotype', 'i1', (2,))]), ('AV0047-C', [('genotype', 'i1', (2,))]), ('AD0231-C', [('genotype', 'i1', (2,))]), ('AD0232-C', [('genotype', 'i1', (2,))]), ('AD0254-C', [('genotype', 'i1', (2,))]), ('AD0255-C', [('genotype', 'i1', (2,))]), ('AD0305-C', [('genotype', 'i1', (2,))]), ('AD0306-C', [('genotype', 'i1', (2,))]), ('AD0347-C', [('genotype', 'i1', (2,))]), ('AD0348-C', [('genotype', 'i1', (2,))])])

In [29]:
4.3 / 0.09


Out[29]:
47.77777777777778

Legacy


In [ ]:
vcf_block_read(vcf_fn, buffer_size=2**15, block_size=2**25)

In [3]:
%time spike_read_len(vcf_fn, buffer_size=10)


CPU times: user 700 ms, sys: 0 ns, total: 700 ms
Wall time: 697 ms
Out[3]:
6140661

In [4]:
%timeit spike_read_len(vcf_fn, buffer_size=100)


10 loops, best of 3: 105 ms per loop

In [5]:
%timeit spike_read_len(vcf_fn, buffer_size=1000)


10 loops, best of 3: 50 ms per loop

In [6]:
%timeit spike_read_len(vcf_fn, buffer_size=2**15)


10 loops, best of 3: 39 ms per loop

In [7]:
%timeit spike_read_len(vcf_fn, buffer_size=2**12)


10 loops, best of 3: 45.3 ms per loop

In [8]:
import cProfile

In [9]:
cProfile.run('spike_read_len(vcf_fn, buffer_size=2**15)', sort='time')


         6146762 function calls (6146566 primitive calls) in 0.941 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.584    0.584    0.941    0.941 io_vcf.pyx:90(spike_read_len)
  6140662    0.322    0.000    0.356    0.000 io_vcf.pyx:74(BufferedInputStream_next)
      189    0.019    0.000    0.019    0.000 {method 'decompress' of 'zlib.Decompress' objects}
      285    0.008    0.000    0.008    0.000 {built-in method zlib.crc32}
  385/189    0.001    0.000    0.033    0.000 {method 'read' of '_io.BufferedReader' objects}
      190    0.001    0.000    0.031    0.000 gzip.py:436(read)
      190    0.001    0.000    0.033    0.000 _compression.py:66(readinto)
      761    0.001    0.000    0.001    0.000 gzip.py:80(read)
      189    0.001    0.000    0.034    0.000 gzip.py:269(read)
      189    0.000    0.000    0.035    0.000 io_vcf.pyx:56(BufferedInputStream_fill_buffer)
       97    0.000    0.000    0.001    0.000 gzip.py:403(_read_gzip_header)
       95    0.000    0.000    0.001    0.000 gzip.py:491(_read_eof)
      188    0.000    0.000    0.008    0.000 gzip.py:487(_add_read_data)
      380    0.000    0.000    0.001    0.000 gzip.py:387(_read_exact)
      283    0.000    0.000    0.000    0.000 gzip.py:93(prepend)
       96    0.000    0.000    0.000    0.000 {built-in method zlib.decompressobj}
      189    0.000    0.000    0.000    0.000 _compression.py:12(_check_not_closed)
     1610    0.000    0.000    0.000    0.000 {built-in method builtins.len}
      285    0.000    0.000    0.000    0.000 {built-in method _struct.unpack}
        1    0.000    0.000    0.941    0.941 {built-in method builtins.exec}
      190    0.000    0.000    0.000    0.000 {method 'cast' of 'memoryview' objects}
        1    0.000    0.000    0.000    0.000 {built-in method io.open}
       97    0.000    0.000    0.000    0.000 gzip.py:383(_init_read)
      191    0.000    0.000    0.000    0.000 gzip.py:296(closed)
        1    0.000    0.000    0.000    0.000 gzip.py:123(__init__)
        1    0.000    0.000    0.000    0.000 gzip.py:20(open)
        1    0.000    0.000    0.000    0.000 gzip.py:376(__init__)
        2    0.000    0.000    0.000    0.000 {method 'close' of '_io.BufferedReader' objects}
        1    0.000    0.000    0.000    0.000 _compression.py:39(__init__)
        1    0.000    0.000    0.000    0.000 gzip.py:74(__init__)
        1    0.000    0.000    0.941    0.941 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 _compression.py:59(close)
        1    0.000    0.000    0.000    0.000 gzip.py:300(close)
        1    0.000    0.000    0.001    0.001 io_vcf.pyx:44(__cinit__)
        1    0.000    0.000    0.000    0.000 {method 'replace' of 'str' objects}
        1    0.000    0.000    0.000    0.000 _compression.py:150(tell)
        1    0.000    0.000    0.000    0.000 {method 'startswith' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
        1    0.000    0.000    0.000    0.000 _compression.py:36(readable)
        1    0.000    0.000    0.000    0.000 {function DecompressReader.close at 0x7f1ca6ab9c80}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}



In [10]:
import line_profiler

l = line_profiler.LineProfiler()
l.add_function(spike_read_len)
l.add_function(BufferedInputStream_next)
l.add_function(BufferedInputStream_fill_buffer)
l.runcall(spike_read_len, vcf_fn, buffer_size=2**14)
l.print_stats()

Legacy


In [ ]:
l = line_profiler.CLineProfiler

In [3]:
2**15


Out[3]:
32768

In [ ]:
blocks = io_vcf.vcf_block_read(vcf_fn, buffer_size=2**16, block_size=1000)


HeaderParser_parse 37873392 35
20
b'##fileformat=VCFv4.1'
HeaderParser_parse 37873413 35
60
b'##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">'
HeaderParser_parse 37873474 35
124
b'##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele countin genotypes, for each ALT allele, in the same order aslisted">'
HeaderParser_parse 37873599 35
32
b'##contig=<ID=2L,length=49364325>'
HeaderParser_parse 37873632 35
32
b'##contig=<ID=2R,length=61545105>'
HeaderParser_parse 37873665 35
32
b'##contig=<ID=3L,length=41963435>'
HeaderParser_parse 37873698 35
32
b'##contig=<ID=3R,length=53200684>'
HeaderParser_parse 37873731 35
34
b'##contig=<ID=UNKN,length=42389979>'
HeaderParser_parse 37873766 35
31
b'##contig=<ID=X,length=24393108>'
HeaderParser_parse 37873798 35
38
b'##contig=<ID=Y_unplaced,length=237045>'
HeaderParser_parse 37873837 35
106
b'##reference=file:///data/anopheles/ag1000g/data/genome/AgamP3/Anopheles-gambiae-PEST_CHROMOSOMES_AgamP3.fa'
HeaderParser_parse 37873944 35
7002

In [ ]: