Supplementary data: recombination events

Setup



In [1]:

    
%run ../../shared_setup.ipynb









    



docker image cggh/biipy:v1.6.0



In [2]:

    
# load variation data
sample_exclusions = dup_samples.copy()
for cross in excessive_recomb_samples:
    sample_exclusions[cross] += excessive_recomb_samples[cross]

callsets = load_callsets(COMBINED_CALLSET_FN_TEMPLATE, 
                         sample_exclusions=sample_exclusions, 
                         variant_filter='FILTER_PASS',
                         call_filter=combined_conf_calls)

samples = {cross: callsets[cross]['calldata'].dtype.names
           for cross in CROSSES}
progeny = {cross: samples[cross][2:] for cross in CROSSES}
n_progeny = {cross: len(progeny[cross]) for cross in CROSSES}
print(n_progeny)
print(np.sum(list(n_progeny.values())))









    



2016-03-08 18:00:14.144437 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/3d7_hb3.combined.final.npz
2016-03-08 18:00:14.837650 :: filter variants: excluding 157 (0.4%) retaining 42087 (99.6%) of 42244 variants
2016-03-08 18:00:14.855152 :: filter samples: excluding ['C01/PG0062-C/ERR019070', 'C02/PG0053-C/ERR019067', 'C02/PG0055-C/ERR019066', 'C02/PG0056-C/ERR019068'] including ['3D7/PG0051-C/ERR019061', 'HB3/PG0052-C/ERR019054', 'C01/PG0065-C/ERR019064', 'C02/PG0067-C/ERR019073', 'C03/PG0066-C/ERR019072', 'C04/PG0061-C/ERR019059', 'C05/PG0068-C/ERR019065', 'C06/PG0069-C/ERR019055', 'C07/PG0070-C/ERR019056', 'C08/PG0071-C/ERR019074', 'C09/PG0072-C/ERR019057', 'C10/PG0063-C/ERR019060', 'C11/PG0064-C/ERR019071', 'C12/PG0058-C/ERR019063', 'C13/PG0054-C/ERR019062', 'C14/PG0060-C/ERR019058', 'C15/PG0057-C/ERR019069']
2016-03-08 18:00:14.901409 :: filter calls: excluding 2216 (0.3%) retaining 713263 (99.7%) of 715479 calls
2016-03-08 18:00:14.902387 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/hb3_dd2.combined.final.npz
2016-03-08 18:00:15.332268 :: filter variants: excluding 450 (1.2%) retaining 36461 (98.8%) of 36911 variants
2016-03-08 18:00:15.350733 :: filter samples: excluding ['SC01/PG0025-C/ERR019045'] including ['HB3/PG0004-CW/ERR012788', 'DD2/PG0008-CW/ERR012840', '1BB5/PG0023-C/ERR015449', '3BA6/PG0022-Cx/ERR126027', '3BD5/PG0024-C/ERR019053', '7C101/PG0074-C/ERR019048', '7C111/PG0038-C/ERR015457', '7C12/PG0035-Cx/ERR037704', '7C126/PG0047-C/ERR015452', '7C140/PG0039-C/ERR015454', '7C159/PG0040-Cx/ERR107475', '7C16/PG0036-C/ERR015455', '7C170/PG0041-C/ERR015446', '7C183/PG0042-C/ERR015448', '7C188/PG0030-C/ERR019046', '7C20/PG0037-C/ERR015451', '7C3/PG0034-C/ERR019047', '7C408/PG0031-C/ERR015458', '7C421/PG0043-C/ERR015459', '7C424/PG0044-C/ERR019043', '7C46/PG0046-Cx/ERR107476', '7C7/PG0048-C/ERR019049', 'B1SD/PG0015-C/ERR019044', 'B4R3/PG0018-C/ERR019042', 'CH3_116/PG0032-Cx/ERR037703', 'CH3_61/PG0033-Cx/ERR175544', 'D43/PG0029-Cx/ERR107474', 'GC03/PG0021-C/ERR015447', 'GC06/PG0028-C/ERR015456', 'QC01/PG0017-C/ERR019050', 'QC13/PG0016-C/ERR012895', 'QC23/PG0045-C/ERR012892', 'QC34/PG0026-C/ERR015453', 'SC05/PG0019-C/ERR019051', 'TC05/PG0027-C/ERR015450', 'TC08/PG0020-C/ERR019052']
2016-03-08 18:00:15.431000 :: filter calls: excluding 28934 (2.2%) retaining 1283662 (97.8%) of 1312596 calls
2016-03-08 18:00:15.433186 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/7g8_gb4.combined.final.npz
2016-03-08 18:00:15.855605 :: filter variants: excluding 304 (0.9%) retaining 34471 (99.1%) of 34775 variants
2016-03-08 18:00:15.874058 :: filter samples: excluding ['AUD/PG0112-CW/ERR045639', 'JC9/PG0111-C/ERR029409', 'JE11/PG0100-CW/ERR045630', 'JF6/PG0079-CW/ERR045637', 'KB8/PG0104-CW/ERR045642', 'LA10/PG0086-CW/ERR045629', 'NIC/PG0095-C/ERR027107', 'QF5/PG0078-CW/ERR045638', 'XD8/PG0105-CW/ERR045628', 'XF12/PG0102-CW/ERR045635', 'D2/PG0094-CW/ERR045632'] including ['7G8/PG0083-C/ERR027099', 'GB4/PG0084-C/ERR027100', 'AL2/PG0103-CW/ERR045627', 'AUD/PG0112-C/ERR029406', 'DAN/PG0098-C/ERR027110', 'DEV/PG0081-CW/ERR045633', 'JB12/PG0099-C/ERR029146', 'JB8/PG0087-C/ERR029091', 'JC3/PG0077-CW/ERR045636', 'JC9/PG0111-CW/ERR045634', 'JE11/PG0100-C/ERR029404', 'JF6/PG0079-C/ERR027102', 'JON/PG0107-C/ERR029408', 'KA6/PG0091-C/ERR027117', 'KB8/PG0104-C/ERR029148', 'KH7/PG0088-C/ERR027111', 'LA10/PG0086-C/ERR029090', 'NF10/PG0096-C/ERR027108', 'NIC/PG0095-CW/ERR045631', 'QF5/PG0078-C/ERR029092', 'TF1/PG0080-C/ERR027103', 'WC4/PG0082-C/ERR029093', 'WE2/PG0085-C/ERR027101', 'WF12/PG0097-C/ERR027109', 'XB3/PG0093-C/ERR029105', 'XD8/PG0105-C/ERR029144', 'XE7/PG0106-C/ERR029407', 'XF12/PG0102-C/ERR029143', 'XG10/PG0109-C/ERR029405']
2016-03-08 18:00:15.940467 :: filter calls: excluding 12209 (1.2%) retaining 987450 (98.8%) of 999659 calls






    



{'7g8_gb4': 27, 'hb3_dd2': 34, '3d7_hb3': 15}
76

CO recombination events

To infer CO events we first mask out genotypes on blocks shorter than 10kb.



In [3]:

    
min_co_block_size = 10000
callsets_co = {cross: filter_calls(callsets[cross], min_haplen_calls(min_co_block_size))
               for cross in CROSSES}









    



2016-03-08 18:00:21.507979 :: filter calls: excluding 786 (0.1%) retaining 714693 (99.9%) of 715479 calls
2016-03-08 18:00:21.690570 :: filter calls: excluding 2121 (0.2%) retaining 1310475 (99.8%) of 1312596 calls
2016-03-08 18:00:21.826622 :: filter calls: excluding 1683 (0.2%) retaining 997976 (99.8%) of 999659 calls



In [4]:

    
def tabulate_crossovers(cross):
    variants, calldata, _ = unpack_callset(callsets_co[cross])
    tbl = (
        tabulate_switches(variants, calldata)
        .addfield('cross', cross)
        .rename({'pos': 'co_pos_mid', 'lpos': 'co_pos_min', 'rpos': 'co_pos_max', 'range': 'co_pos_range'})
        .addfield('co_from_parent', lambda r: r.cross.split('_')[r['from'] - 7])
        .addfield('co_to_parent', lambda r: r.cross.split('_')[r['to'] - 7])
        .cutout('from', 'to')
    )
    return etl.wrap(tbl)  


tbl_co = (etl
    .cat(*[tabulate_crossovers(cross) for cross in CROSSES])
    .sort(key=('chrom', 'co_pos_mid'))
)
tbl_co.totsv(os.path.join(PUBLIC_DIR, 'tbl_co.txt'))
tbl_co.topickle(os.path.join(PUBLIC_DIR, 'tbl_co.pickle'))
display_with_nrows(tbl_co, caption='CO events')









    





CO events (1194 rows)


0|sample
1|chrom
2|co_pos_mid
3|co_pos_min
4|co_pos_max
5|co_pos_range
6|cross
7|co_from_parent
8|co_to_parent




B1SD/PG0015-C/ERR019044
b'Pf3D7_01_v3'
145052
144877
145227
350
hb3_dd2
hb3
dd2


GC03/PG0021-C/ERR015447
b'Pf3D7_01_v3'
163584
163145
164024
879
hb3_dd2
dd2
hb3


XF12/PG0102-C/ERR029143
b'Pf3D7_01_v3'
206769
205803
207736
1933
7g8_gb4
gb4
7g8


7C159/PG0040-Cx/ERR107475
b'Pf3D7_01_v3'
206905
206074
207736
1662
hb3_dd2
hb3
dd2


CH3_61/PG0033-Cx/ERR175544
b'Pf3D7_01_v3'
206905
206074
207736
1662
hb3_dd2
dd2
hb3



...

Conversion tracts

Identify short blocks

Start by tabulating all the short inheritance blocks individually.



In [5]:

    
def tabulate_short_inheritance_blocks(cross):
    variants, calldata, _ = unpack_callset(callsets[cross])
    _, _, tbl_blocks = haplotypes(variants, calldata)   
    tbl = (
        tbl_blocks
        .select(lambda r: r.length_min < min_co_block_size and r.nxt_inheritance != -1 and r.prv_inheritance != -1)
        .addfield('cross', cross)
        .addfield('is_complex', False)
        .addfield('blocks', 1)
    )
    return tbl


tbl_short_blocks = (etl
    .cat(*[tabulate_short_inheritance_blocks(cross) for cross in CROSSES])
    .sort(key=('sample', 'chrom', 'start_min'))
)
display_with_nrows(tbl_short_blocks, caption='short inheritance blocks')









    





short inheritance blocks (1183 rows)


0|sample
1|chrom
2|start_min
3|start_mid
4|start_max
5|stop_min
6|stop_mid
7|stop_max
8|length_min
9|length_mid
10|length_max
11|support
12|prv_inheritance
13|inheritance
14|nxt_inheritance
15|cross
16|is_complex
17|blocks




1BB5/PG0023-C/ERR015449
b'Pf3D7_02_v3'
551191
551710
552230
553769
554132
554496
1539
2422
3305
5
8
7
8
hb3_dd2
False
1


1BB5/PG0023-C/ERR015449
b'Pf3D7_03_v3'
810043
810060
810077
810077
811042
812008
0
982
1965
1
8
7
8
hb3_dd2
False
1


1BB5/PG0023-C/ERR015449
b'Pf3D7_05_v3'
973050
973180
973311
973311
974623
975935
0
1443
2885
1
7
8
7
hb3_dd2
False
1


1BB5/PG0023-C/ERR015449
b'Pf3D7_08_v3'
1293542
1294381
1295221
1296788
1297035
1297283
1567
2654
3741
4
7
8
7
hb3_dd2
False
1


1BB5/PG0023-C/ERR015449
b'Pf3D7_08_v3'
1314649
1314740
1314831
1314831
1315166
1315502
0
426
853
1
7
8
7
hb3_dd2
False
1



...



In [6]:

    
tbl_short_blocks.valuecounts('sample').head(10)









    Out[6]:







0|sample
1|count
2|frequency




CH3_116/PG0032-Cx/ERR037703
48
0.04057480980557904


CH3_61/PG0033-Cx/ERR175544
47
0.039729501267962805


JF6/PG0079-C/ERR027102
38
0.032121724429416736


JB12/PG0099-C/ERR029146
36
0.030431107354184278


B1SD/PG0015-C/ERR019044
35
0.029585798816568046



...



In [7]:

    
df_sb = tbl_short_blocks.valuecounts('sample').todataframe()
plt.hist(df_sb['count']);

Combine adjacent blocks into conversion tracts

Now combine adjacent blocks together.



In [8]:

    
class MergeAdjacentBlocks(object):
    
    def __init__(self, source):
        self.source = source
        
    def __iter__(self):
        tbl = etl.wrap(self.source)
        fields = tbl.fieldnames()
        it = iter(tbl.records())
        yield ['sample', 'cross', 'chrom', 'start_min', 'start_mid', 'start_max', 'stop_min', 'stop_mid', 'stop_max', 'length_min', 'length_mid', 'length_max', 'support', 'is_complex', 'blocks']
        cur = next(it)
        sample = cur.sample
        cross = cur.cross
        chrom = cur.chrom
        start_min = cur.start_min
        start_mid = cur.start_mid
        start_max = cur.start_max
        stop_min = cur.stop_min
        stop_mid = cur.stop_mid
        stop_max = cur.stop_max
        length_min = cur.length_min
        length_mid = cur.length_mid
        length_max = cur.length_max
        support = cur.support
        is_complex = cur.is_complex
        blocks = cur.blocks
        for cur in it:
            # are they adjacent?
            if sample == cur.sample and chrom == cur.chrom and stop_mid == cur.start_mid:
                # yes, merge
                stop_min = cur.stop_min
                stop_mid = cur.stop_mid
                stop_max = cur.stop_max
                support += cur.support
                length_min = stop_min - start_max
                length_max = stop_max - start_min
                length_mid = stop_mid - start_mid
                is_complex = True
                blocks += 1
            else:
                # yield previous
                yield (sample, cross, chrom, start_min, start_mid, start_max, stop_min, stop_mid, stop_max, length_min, length_mid, length_max, support, is_complex, blocks)
                # reset
                sample = cur.sample
                cross = cur.cross
                chrom = cur.chrom
                start_min = cur.start_min
                start_mid = cur.start_mid
                start_max = cur.start_max
                stop_min = cur.stop_min
                stop_mid = cur.stop_mid
                stop_max = cur.stop_max
                length_min = cur.length_min
                length_mid = cur.length_mid
                length_max = cur.length_max
                support = cur.support
                is_complex = cur.is_complex
                blocks = cur.blocks
        # handle last one left over
        yield (sample, cross, chrom, start_min, start_mid, start_max, stop_min, stop_mid, stop_max, length_min, length_mid, length_max, support, is_complex, blocks)



In [9]:

    
tbl_conversion_tracts = etl.wrap(
    MergeAdjacentBlocks(tbl_short_blocks.cutout('prv_inheritance', 'inheritance', 'nxt_inheritance'))
)
display_with_nrows(tbl_conversion_tracts, caption='conversion tracts')









    





conversion tracts (921 rows)


0|sample
1|cross
2|chrom
3|start_min
4|start_mid
5|start_max
6|stop_min
7|stop_mid
8|stop_max
9|length_min
10|length_mid
11|length_max
12|support
13|is_complex
14|blocks




1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_02_v3'
551191
551710
552230
553769
554132
554496
1539
2422
3305
5
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_03_v3'
810043
810060
810077
810077
811042
812008
0
982
1965
1
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_05_v3'
973050
973180
973311
973311
974623
975935
0
1443
2885
1
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1293542
1294381
1295221
1296788
1297035
1297283
1567
2654
3741
4
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1314649
1314740
1314831
1314831
1315166
1315502
0
426
853
1
False
1



...



In [10]:

    
tbl_conversion_tracts.valuecounts('sample').head()









    Out[10]:







0|sample
1|count
2|frequency




CH3_116/PG0032-Cx/ERR037703
44
0.04777415852334419


CH3_61/PG0033-Cx/ERR175544
38
0.04125950054288816


JB12/PG0099-C/ERR029146
32
0.03474484256243214


B1SD/PG0015-C/ERR019044
30
0.03257328990228013


7C183/PG0042-C/ERR015448
26
0.02823018458197611



In [11]:

    
tbl_conversion_tracts.valuecounts('is_complex')









    Out[11]:







0|is_complex
1|count
2|frequency




False
773
0.8393051031487514


True
148
0.16069489685124863



In [12]:

    
tbl_conversion_tracts.valuecounts('blocks')









    Out[12]:







0|blocks
1|count
2|frequency




1
773
0.8393051031487514


2
86
0.09337676438653637


3
42
0.04560260586319218


4
10
0.010857763300760043


5
3
0.003257328990228013



...



In [13]:

    
X = tbl_conversion_tracts.valuecounts('sample').values('count').list()
plt.hist(X);

Identify tracts with robust support



In [14]:

    
tbl_tracts_robust = tbl_conversion_tracts.select(lambda r: r.support > 1 and r.length_min > 100)
display_with_nrows(tbl_tracts_robust, caption='conversion tracts with robust support')









    





conversion tracts with robust support (331 rows)


0|sample
1|cross
2|chrom
3|start_min
4|start_mid
5|start_max
6|stop_min
7|stop_mid
8|stop_max
9|length_min
10|length_mid
11|length_max
12|support
13|is_complex
14|blocks




1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_02_v3'
551191
551710
552230
553769
554132
554496
1539
2422
3305
5
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1293542
1294381
1295221
1296788
1297035
1297283
1567
2654
3741
4
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1337134
1337350
1337567
1346702
1347132
1347563
9135
9782
10429
24
True
8


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_09_v3'
950476
951138
951801
954256
954735
955215
2455
3597
4739
4
True
3


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_12_v3'
331487
331820
332154
336886
337497
338108
4732
5677
6621
7
False
1



...



In [15]:

    
tbl_tracts_robust.valuecounts('is_complex')









    Out[15]:







0|is_complex
1|count
2|frequency




False
190
0.5740181268882175


True
141
0.4259818731117825



In [16]:

    
tbl_tracts_robust.valuecounts('blocks')









    Out[16]:







0|blocks
1|count
2|frequency




1
190
0.5740181268882175


2
82
0.24773413897280966


3
39
0.11782477341389729


4
10
0.030211480362537766


5
3
0.00906344410876133



...



In [17]:

    
tbl_tracts_robust.valuecounts('sample').head(5).display()
tbl_tracts_robust.valuecounts('sample').tail(5).display()









    







0|sample
1|count
2|frequency




7C111/PG0038-C/ERR015457
10
0.030211480362537766


JF6/PG0079-C/ERR027102
10
0.030211480362537766


CH3_61/PG0033-Cx/ERR175544
9
0.027190332326283987


KA6/PG0091-C/ERR027117
8
0.02416918429003021


C09/PG0072-C/ERR019057
8
0.02416918429003021










    







0|sample
1|count
2|frequency




TF1/PG0080-C/ERR027103
2
0.006042296072507553


XF12/PG0102-C/ERR029143
1
0.0030211480362537764


7C126/PG0047-C/ERR015452
1
0.0030211480362537764


QC34/PG0026-C/ERR015453
1
0.0030211480362537764


7C408/PG0031-C/ERR015458
1
0.0030211480362537764



In [18]:

    
X = tbl_tracts_robust.valuecounts('sample').values('count').list()
plt.hist(X, bins=10);



In [19]:

    
X = tbl_tracts_robust.values('length_min').list()
plt.hist(X, bins=60);

Differentiate CO and NCO conversion tracts

Now figure out which conversion tracts are associated with COs and which are NCOs...



In [20]:

    
tbl_tracts_differentiated = (
    tbl_tracts_robust
    .addfield('facet', lambda r: '%s_%s' % (r.sample, r.chrom))
    .intervalleftjoin(tbl_co.addfield('facet', lambda r: '%s_%s' % (r.sample, r.chrom)), 
                      lkey='facet', 
                      rkey='facet', 
                      lstart='start_min', 
                      lstop='stop_max', 
                      rstart='co_pos_min', 
                      rstop='co_pos_max')
    .cutout(15, 16, 17, 22, 23, 24, 25)
    .rename({
        'start_min': 'tract_start_min',
        'start_mid': 'tract_start_mid',
        'start_max': 'tract_start_max',
        'stop_min': 'tract_stop_min',
        'stop_mid': 'tract_stop_mid',
        'stop_max': 'tract_stop_max',
        'length_min': 'tract_length_min',
        'length_mid': 'tract_length_mid',
        'length_max': 'tract_length_max',
        'support': 'tract_support',
        'is_complex': 'tract_is_complex',
        'blocks': 'tract_blocks',
    })
    .addfield('tract_type', lambda row: 'NCO' if row.co_pos_min is None else 'CO')
)
tbl_tracts_differentiated.topickle(os.path.join(PUBLIC_DIR, 'tbl_conversion_tracts.pickle'))
tbl_tracts_differentiated.totsv(os.path.join(PUBLIC_DIR, 'tbl_conversion_tracts.txt'))
display_with_nrows(tbl_tracts_differentiated, caption='differentiated conversion tracts')









    





differentiated conversion tracts (331 rows)


0|sample
1|cross
2|chrom
3|tract_start_min
4|tract_start_mid
5|tract_start_max
6|tract_stop_min
7|tract_stop_mid
8|tract_stop_max
9|tract_length_min
10|tract_length_mid
11|tract_length_max
12|tract_support
13|tract_is_complex
14|tract_blocks
15|co_pos_mid
16|co_pos_min
17|co_pos_max
18|co_pos_range
19|tract_type




1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_02_v3'
551191
551710
552230
553769
554132
554496
1539
2422
3305
5
False
1
None
None
None
None
NCO


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1293542
1294381
1295221
1296788
1297035
1297283
1567
2654
3741
4
False
1
None
None
None
None
NCO


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1337134
1337350
1337567
1346702
1347132
1347563
9135
9782
10429
24
True
8
1342348
1337134
1347563
10429
CO


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_09_v3'
950476
951138
951801
954256
954735
955215
2455
3597
4739
4
True
3
None
None
None
None
NCO


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_12_v3'
331487
331820
332154
336886
337497
338108
4732
5677
6621
7
False
1
None
None
None
None
NCO



...



In [21]:

    
tbl_tracts_differentiated.valuecounts('tract_type')









    Out[21]:







0|tract_type
1|count
2|frequency




NCO
235
0.7099697885196374


CO
96
0.29003021148036257



In [22]:

    
tbl_tracts_co = tbl_tracts_differentiated.eq('tract_type', 'CO')
display_with_nrows(tbl_tracts_co, caption='CO conversion tracts')









    





CO conversion tracts (96 rows)


0|sample
1|cross
2|chrom
3|tract_start_min
4|tract_start_mid
5|tract_start_max
6|tract_stop_min
7|tract_stop_mid
8|tract_stop_max
9|tract_length_min
10|tract_length_mid
11|tract_length_max
12|tract_support
13|tract_is_complex
14|tract_blocks
15|co_pos_mid
16|co_pos_min
17|co_pos_max
18|co_pos_range
19|tract_type




1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1337134
1337350
1337567
1346702
1347132
1347563
9135
9782
10429
24
True
8
1342348
1337134
1347563
10429
CO


3BD5/PG0024-C/ERR019053
hb3_dd2
b'Pf3D7_03_v3'
912649
912727
912805
916518
916794
917070
3713
4067
4421
8
True
2
914859
912649
917070
4421
CO


3BD5/PG0024-C/ERR019053
hb3_dd2
b'Pf3D7_04_v3'
729031
729218
729405
733911
734356
734801
4506
5138
5770
7
True
2
731916
729031
734801
5770
CO


3BD5/PG0024-C/ERR019053
hb3_dd2
b'Pf3D7_10_v3'
225428
226716
228004
229437
229802
230167
1433
3086
4739
3
True
2
227797
225428
230167
4739
CO


3BD5/PG0024-C/ERR019053
hb3_dd2
b'Pf3D7_10_v3'
1341344
1342132
1342921
1361560
1361995
1362430
18639
19863
21086
32
True
6
1351887
1341344
1362430
21086
CO



...



In [23]:

    
tbl_tracts_nco = tbl_tracts_differentiated.eq('tract_type', 'NCO').cutout(15, 16, 17, 18, 19)
display_with_nrows(tbl_tracts_nco, caption='NCO conversion tracts')









    





NCO conversion tracts (235 rows)


0|sample
1|cross
2|chrom
3|tract_start_min
4|tract_start_mid
5|tract_start_max
6|tract_stop_min
7|tract_stop_mid
8|tract_stop_max
9|tract_length_min
10|tract_length_mid
11|tract_length_max
12|tract_support
13|tract_is_complex
14|tract_blocks




1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_02_v3'
551191
551710
552230
553769
554132
554496
1539
2422
3305
5
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_08_v3'
1293542
1294381
1295221
1296788
1297035
1297283
1567
2654
3741
4
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_09_v3'
950476
951138
951801
954256
954735
955215
2455
3597
4739
4
True
3


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_12_v3'
331487
331820
332154
336886
337497
338108
4732
5677
6621
7
False
1


1BB5/PG0023-C/ERR015449
hb3_dd2
b'Pf3D7_13_v3'
756601
756957
757313
760183
761269
762355
2870
4312
5754
9
False
1



...



In [24]:

    
tbl_tracts_nco.valuecounts('tract_is_complex')









    Out[24]:







0|tract_is_complex
1|count
2|frequency




False
190
0.8085106382978723


True
45
0.19148936170212766



In [25]:

    
tbl_tracts_differentiated.gt('tract_length_min', 15000).displayall()









    







0|sample
1|cross
2|chrom
3|tract_start_min
4|tract_start_mid
5|tract_start_max
6|tract_stop_min
7|tract_stop_mid
8|tract_stop_max
9|tract_length_min
10|tract_length_mid
11|tract_length_max
12|tract_support
13|tract_is_complex
14|tract_blocks
15|co_pos_mid
16|co_pos_min
17|co_pos_max
18|co_pos_range
19|tract_type




3BD5/PG0024-C/ERR019053
hb3_dd2
b'Pf3D7_10_v3'
1308754
1309392
1310031
1330511
1330807
1331104
20480
21415
22350
39
True
3
None
None
None
None
NCO


3BD5/PG0024-C/ERR019053
hb3_dd2
b'Pf3D7_10_v3'
1341344
1342132
1342921
1361560
1361995
1362430
18639
19863
21086
32
True
6
1351887
1341344
1362430
21086
CO


7C3/PG0034-C/ERR019047
hb3_dd2
b'Pf3D7_08_v3'
1202631
1203122
1203613
1227264
1227421
1227578
23651
24299
24947
44
True
5
None
None
None
None
NCO


C04/PG0061-C/ERR019059
3d7_hb3
b'Pf3D7_14_v3'
2891817
2892879
2893941
2922794
2923016
2923239
28853
30137
31422
77
True
9
None
None
None
None
NCO


JC3/PG0077-CW/ERR045636
7g8_gb4
b'Pf3D7_08_v3'
1207822
1208720
1209619
1228966
1229286
1229606
19347
20566
21784
42
True
10
1218714
1207822
1229606
21784
CO


JF6/PG0079-C/ERR027102
7g8_gb4
b'Pf3D7_11_v3'
1685640
1686586
1687533
1713449
1713727
1714005
25916
27141
28365
40
True
7
None
None
None
None
NCO


JF6/PG0079-C/ERR027102
7g8_gb4
b'Pf3D7_11_v3'
1726074
1726125
1726177
1746099
1747010
1747922
19922
20885
21848
42
True
11
None
None
None
None
NCO



In [ ]:

0\|sample	1\|chrom	2\|co_pos_mid	3\|co_pos_min	4\|co_pos_max	5\|co_pos_range	6\|cross	7\|co_from_parent	8\|co_to_parent
B1SD/PG0015-C/ERR019044	b'Pf3D7_01_v3'	145052	144877	145227	350	hb3_dd2	hb3	dd2
GC03/PG0021-C/ERR015447	b'Pf3D7_01_v3'	163584	163145	164024	879	hb3_dd2	dd2	hb3
XF12/PG0102-C/ERR029143	b'Pf3D7_01_v3'	206769	205803	207736	1933	7g8_gb4	gb4	7g8
7C159/PG0040-Cx/ERR107475	b'Pf3D7_01_v3'	206905	206074	207736	1662	hb3_dd2	hb3	dd2
CH3_61/PG0033-Cx/ERR175544	b'Pf3D7_01_v3'	206905	206074	207736	1662	hb3_dd2	dd2	hb3

0\|sample	1\|chrom	2\|start_min	3\|start_mid	4\|start_max	5\|stop_min	6\|stop_mid	7\|stop_max	8\|length_min	9\|length_mid	10\|length_max	11\|support	12\|prv_inheritance	13\|inheritance	14\|nxt_inheritance	15\|cross	16\|is_complex	17\|blocks
1BB5/PG0023-C/ERR015449	b'Pf3D7_02_v3'	551191	551710	552230	553769	554132	554496	1539	2422	3305	5	8	7	8	hb3_dd2	False	1
1BB5/PG0023-C/ERR015449	b'Pf3D7_03_v3'	810043	810060	810077	810077	811042	812008	0	982	1965	1	8	7	8	hb3_dd2	False	1
1BB5/PG0023-C/ERR015449	b'Pf3D7_05_v3'	973050	973180	973311	973311	974623	975935	0	1443	2885	1	7	8	7	hb3_dd2	False	1
1BB5/PG0023-C/ERR015449	b'Pf3D7_08_v3'	1293542	1294381	1295221	1296788	1297035	1297283	1567	2654	3741	4	7	8	7	hb3_dd2	False	1
1BB5/PG0023-C/ERR015449	b'Pf3D7_08_v3'	1314649	1314740	1314831	1314831	1315166	1315502	0	426	853	1	7	8	7	hb3_dd2	False	1

0\|sample	1\|count	2\|frequency
CH3_116/PG0032-Cx/ERR037703	48	0.04057480980557904
CH3_61/PG0033-Cx/ERR175544	47	0.039729501267962805
JF6/PG0079-C/ERR027102	38	0.032121724429416736
JB12/PG0099-C/ERR029146	36	0.030431107354184278
B1SD/PG0015-C/ERR019044	35	0.029585798816568046

0\|sample	1\|count	2\|frequency
CH3_116/PG0032-Cx/ERR037703	44	0.04777415852334419
CH3_61/PG0033-Cx/ERR175544	38	0.04125950054288816
JB12/PG0099-C/ERR029146	32	0.03474484256243214
B1SD/PG0015-C/ERR019044	30	0.03257328990228013
7C183/PG0042-C/ERR015448	26	0.02823018458197611

0\|blocks	1\|count	2\|frequency
1	773	0.8393051031487514
2	86	0.09337676438653637
3	42	0.04560260586319218
4	10	0.010857763300760043
5	3	0.003257328990228013

0\|blocks	1\|count	2\|frequency
1	190	0.5740181268882175
2	82	0.24773413897280966
3	39	0.11782477341389729
4	10	0.030211480362537766
5	3	0.00906344410876133

0\|sample	1\|count	2\|frequency
7C111/PG0038-C/ERR015457	10	0.030211480362537766
JF6/PG0079-C/ERR027102	10	0.030211480362537766
CH3_61/PG0033-Cx/ERR175544	9	0.027190332326283987
KA6/PG0091-C/ERR027117	8	0.02416918429003021
C09/PG0072-C/ERR019057	8	0.02416918429003021

0\|sample	1\|count	2\|frequency
TF1/PG0080-C/ERR027103	2	0.006042296072507553
XF12/PG0102-C/ERR029143	1	0.0030211480362537764
7C126/PG0047-C/ERR015452	1	0.0030211480362537764
QC34/PG0026-C/ERR015453	1	0.0030211480362537764
7C408/PG0031-C/ERR015458	1	0.0030211480362537764

0\|sample	1\|cross	2\|chrom	3\|tract_start_min	4\|tract_start_mid	5\|tract_start_max	6\|tract_stop_min	7\|tract_stop_mid	8\|tract_stop_max	9\|tract_length_min	10\|tract_length_mid	11\|tract_length_max	12\|tract_support	13\|tract_is_complex	14\|tract_blocks	15\|co_pos_mid	16\|co_pos_min	17\|co_pos_max	18\|co_pos_range	19\|tract_type
3BD5/PG0024-C/ERR019053	hb3_dd2	b'Pf3D7_10_v3'	1308754	1309392	1310031	1330511	1330807	1331104	20480	21415	22350	39	True	3	None	None	None	None	NCO
3BD5/PG0024-C/ERR019053	hb3_dd2	b'Pf3D7_10_v3'	1341344	1342132	1342921	1361560	1361995	1362430	18639	19863	21086	32	True	6	1351887	1341344	1362430	21086	CO
7C3/PG0034-C/ERR019047	hb3_dd2	b'Pf3D7_08_v3'	1202631	1203122	1203613	1227264	1227421	1227578	23651	24299	24947	44	True	5	None	None	None	None	NCO
C04/PG0061-C/ERR019059	3d7_hb3	b'Pf3D7_14_v3'	2891817	2892879	2893941	2922794	2923016	2923239	28853	30137	31422	77	True	9	None	None	None	None	NCO
JC3/PG0077-CW/ERR045636	7g8_gb4	b'Pf3D7_08_v3'	1207822	1208720	1209619	1228966	1229286	1229606	19347	20566	21784	42	True	10	1218714	1207822	1229606	21784	CO
JF6/PG0079-C/ERR027102	7g8_gb4	b'Pf3D7_11_v3'	1685640	1686586	1687533	1713449	1713727	1714005	25916	27141	28365	40	True	7	None	None	None	None	NCO
JF6/PG0079-C/ERR027102	7g8_gb4	b'Pf3D7_11_v3'	1726074	1726125	1726177	1746099	1747010	1747922	19922	20885	21848	42	True	11	None	None	None	None	NCO