In [1]:
%run ../../shared_setup.ipynb


docker image cggh/biipy:v1.6.0

In [2]:
def impute_inheritance_nearest(inheritance, pos, pos_impute):
    """Impute inheritance at unknown positions, by copying from
    nearest neighbouring position where inheritance is known.

    Parameters
    ----------

    inheritance : array_like, int, shape (n_variants, n_gametes)
        An array of integers coding the allelic inheritance state at the
        known positions.
    pos : array_like, int, shape (n_variants,)
        Array of genomic positions at which `inheritance` was determined.
    pos_impute : array_like, int
        Array of positions at which to impute inheritance.

    Returns
    -------

    imputed_inheritance : ndarray, int
        An array of integers coding the imputed allelic inheritance.

    """

    # check inputs
    inheritance = np.asarray(inheritance)
    assert inheritance.ndim == 2
    pos = np.asarray(pos)
    assert pos.ndim == 1
    pos_impute = np.asarray(pos_impute)
    assert pos_impute.ndim == 1
    n_variants = pos.size
    assert inheritance.shape[0] == n_variants

    # find indices of neighbouring variants
    indices_left = np.clip(np.searchsorted(pos, pos_impute), 0, n_variants - 1)
    indices_right = np.clip(indices_left + 1, 0, n_variants - 1)
    inh_left = np.take(inheritance, indices_left, axis=0)
    inh_right = np.take(inheritance, indices_right, axis=0)

    # find positions of neighbouring variants
    pos_left = np.take(pos, indices_left)
    pos_right = np.take(pos, indices_right)

    # compute distance to neighbours
    dist_left = np.abs(pos_impute - pos_left)
    dist_right = np.abs(pos_right - pos_impute)

    # build output
    out = np.zeros_like(inh_left)
    out[(dist_left <= dist_right)] = inh_left[(dist_left <= dist_right)]
    out[(dist_left > dist_right)] = inh_right[(dist_left > dist_right)]

    return out

In [3]:
def impute_inheritance_neighbours(inheritance, pos, pos_impute):
    """Impute inheritance at unknown positions, by copying from
    neighbouring position where inheritance is known.

    Parameters
    ----------

    inheritance : array_like, int, shape (n_variants, n_gametes)
        An array of integers coding the allelic inheritance state at the
        known positions.
    pos : array_like, int, shape (n_variants,)
        Array of genomic positions at which `inheritance` was determined.
    pos_impute : array_like, int
        Array of positions at which to impute inheritance.

    Returns
    -------

    imputed_inheritance : ndarray, int
        An array of integers coding the imputed allelic inheritance.

    """

    # check inputs
    inheritance = np.asarray(inheritance)
    assert inheritance.ndim == 2
    pos = np.asarray(pos)
    assert pos.ndim == 1
    pos_impute = np.asarray(pos_impute)
    assert pos_impute.ndim == 1
    n_variants = pos.size
    assert inheritance.shape[0] == n_variants

    # find indices of neighbouring variants
    indices_left = np.clip(np.searchsorted(pos, pos_impute), 0, n_variants - 1)
    indices_right = np.clip(indices_left + 1, 0, n_variants - 1)
    inh_left = np.take(inheritance, indices_left, axis=0)
    inh_right = np.take(inheritance, indices_right, axis=0)

    # build output
    pos_left = np.take(pos, indices_left)
    out = np.zeros_like(inh_left)
    out[inh_left == inh_right] = inh_left[inh_left == inh_right]
    out[pos_left == pos_impute] = inh_left[pos_left == pos_impute]

    return out

In [4]:
gatk = load_callsets(GATK_CALLSET_FN_TEMPLATE,
                     variant_filter='FILTER_PASS',
                     call_filter=gatk_conf_calls, 
                     sample_exclusions=excessive_recomb_samples)


2016-03-11 22:17:08.908942 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/3d7_hb3.gatk.final.npz
2016-03-11 22:17:09.973368 :: filter variants: excluding 155288 (80.9%) retaining 36598 (19.1%) of 191886 variants
2016-03-11 22:17:10.011131 :: filter calls: excluding 1413 (0.2%) retaining 767145 (99.8%) of 768558 calls
2016-03-11 22:17:10.012411 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/hb3_dd2.gatk.final.npz
2016-03-11 22:17:12.661668 :: filter variants: excluding 345466 (92.2%) retaining 29067 (7.8%) of 374533 variants
2016-03-11 22:17:12.678783 :: filter samples: excluding ['SC01/PG0025-C/ERR019045'] including ['HB3/PG0004-CW/ERR012788', 'DD2/PG0008-CW/ERR012840', '1BB5/PG0023-C/ERR015449', '3BA6/PG0022-Cx/ERR126027', '3BD5/PG0024-C/ERR019053', '7C101/PG0074-C/ERR019048', '7C111/PG0038-C/ERR015457', '7C12/PG0035-Cx/ERR037704', '7C126/PG0047-C/ERR015452', '7C140/PG0039-C/ERR015454', '7C159/PG0040-Cx/ERR107475', '7C16/PG0036-C/ERR015455', '7C170/PG0041-C/ERR015446', '7C183/PG0042-C/ERR015448', '7C188/PG0030-C/ERR019046', '7C20/PG0037-C/ERR015451', '7C3/PG0034-C/ERR019047', '7C408/PG0031-C/ERR015458', '7C421/PG0043-C/ERR015459', '7C424/PG0044-C/ERR019043', '7C46/PG0046-Cx/ERR107476', '7C7/PG0048-C/ERR019049', 'B1SD/PG0015-C/ERR019044', 'B4R3/PG0018-C/ERR019042', 'CH3_116/PG0032-Cx/ERR037703', 'CH3_61/PG0033-Cx/ERR175544', 'D43/PG0029-Cx/ERR107474', 'GC03/PG0021-C/ERR015447', 'GC06/PG0028-C/ERR015456', 'QC01/PG0017-C/ERR019050', 'QC13/PG0016-C/ERR012895', 'QC23/PG0045-C/ERR012892', 'QC34/PG0026-C/ERR015453', 'SC05/PG0019-C/ERR019051', 'TC05/PG0027-C/ERR015450', 'TC08/PG0020-C/ERR019052']
2016-03-11 22:17:12.719247 :: filter calls: excluding 12521 (1.2%) retaining 1033891 (98.8%) of 1046412 calls
2016-03-11 22:17:12.720968 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/7g8_gb4.gatk.final.npz
2016-03-11 22:17:15.599949 :: filter variants: excluding 358058 (92.8%) retaining 27709 (7.2%) of 385767 variants
2016-03-11 22:17:15.614945 :: filter samples: excluding ['D2/PG0094-CW/ERR045632'] including ['7G8/PG0083-C/ERR027099', 'GB4/PG0084-C/ERR027100', 'AL2/PG0103-CW/ERR045627', 'AUD/PG0112-C/ERR029406', 'AUD/PG0112-CW/ERR045639', 'DAN/PG0098-C/ERR027110', 'DEV/PG0081-CW/ERR045633', 'JB12/PG0099-C/ERR029146', 'JB8/PG0087-C/ERR029091', 'JC3/PG0077-CW/ERR045636', 'JC9/PG0111-C/ERR029409', 'JC9/PG0111-CW/ERR045634', 'JE11/PG0100-C/ERR029404', 'JE11/PG0100-CW/ERR045630', 'JF6/PG0079-C/ERR027102', 'JF6/PG0079-CW/ERR045637', 'JON/PG0107-C/ERR029408', 'KA6/PG0091-C/ERR027117', 'KB8/PG0104-C/ERR029148', 'KB8/PG0104-CW/ERR045642', 'KH7/PG0088-C/ERR027111', 'LA10/PG0086-C/ERR029090', 'LA10/PG0086-CW/ERR045629', 'NF10/PG0096-C/ERR027108', 'NIC/PG0095-C/ERR027107', 'NIC/PG0095-CW/ERR045631', 'QF5/PG0078-C/ERR029092', 'QF5/PG0078-CW/ERR045638', 'TF1/PG0080-C/ERR027103', 'WC4/PG0082-C/ERR029093', 'WE2/PG0085-C/ERR027101', 'WF12/PG0097-C/ERR027109', 'XB3/PG0093-C/ERR029105', 'XD8/PG0105-C/ERR029144', 'XD8/PG0105-CW/ERR045628', 'XE7/PG0106-C/ERR029407', 'XF12/PG0102-C/ERR029143', 'XF12/PG0102-CW/ERR045635', 'XG10/PG0109-C/ERR029405']
2016-03-11 22:17:15.654904 :: filter calls: excluding 7422 (0.7%) retaining 1073229 (99.3%) of 1080651 calls

In [5]:
cortex = load_callsets(CORTEX_CALLSET_FN_TEMPLATE, 
                       variant_filter='FILTER_PASS',
                       call_filter=cortex_conf_calls, 
                       sample_exclusions=excessive_recomb_samples)


2016-03-11 22:17:15.660207 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/3d7_hb3.cortex.final.npz
2016-03-11 22:17:15.962775 :: filter variants: excluding 29576 (52.0%) retaining 27296 (48.0%) of 56872 variants
2016-03-11 22:17:15.976984 :: filter calls: excluding 4849 (0.8%) retaining 568367 (99.2%) of 573216 calls
2016-03-11 22:17:15.978027 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/hb3_dd2.cortex.final.npz
2016-03-11 22:17:16.586636 :: filter variants: excluding 80272 (75.3%) retaining 26293 (24.7%) of 106565 variants
2016-03-11 22:17:16.597752 :: filter samples: excluding ['SC01/PG0025-C/ERR019045'] including ['HB3/PG0004-CW/ERR012788', 'DD2/PG0008-CW/ERR012840', '1BB5/PG0023-C/ERR015449', '3BA6/PG0022-Cx/ERR126027', '3BD5/PG0024-C/ERR019053', '7C101/PG0074-C/ERR019048', '7C111/PG0038-C/ERR015457', '7C12/PG0035-Cx/ERR037704', '7C126/PG0047-C/ERR015452', '7C140/PG0039-C/ERR015454', '7C159/PG0040-Cx/ERR107475', '7C16/PG0036-C/ERR015455', '7C170/PG0041-C/ERR015446', '7C183/PG0042-C/ERR015448', '7C188/PG0030-C/ERR019046', '7C20/PG0037-C/ERR015451', '7C3/PG0034-C/ERR019047', '7C408/PG0031-C/ERR015458', '7C421/PG0043-C/ERR015459', '7C424/PG0044-C/ERR019043', '7C46/PG0046-Cx/ERR107476', '7C7/PG0048-C/ERR019049', 'B1SD/PG0015-C/ERR019044', 'B4R3/PG0018-C/ERR019042', 'CH3_116/PG0032-Cx/ERR037703', 'CH3_61/PG0033-Cx/ERR175544', 'D43/PG0029-Cx/ERR107474', 'GC03/PG0021-C/ERR015447', 'GC06/PG0028-C/ERR015456', 'QC01/PG0017-C/ERR019050', 'QC13/PG0016-C/ERR012895', 'QC23/PG0045-C/ERR012892', 'QC34/PG0026-C/ERR015453', 'SC05/PG0019-C/ERR019051', 'TC05/PG0027-C/ERR015450', 'TC08/PG0020-C/ERR019052']
2016-03-11 22:17:16.624607 :: filter calls: excluding 37204 (3.9%) retaining 909344 (96.1%) of 946548 calls
2016-03-11 22:17:16.626471 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/7g8_gb4.cortex.final.npz
2016-03-11 22:17:17.307010 :: filter variants: excluding 85362 (78.5%) retaining 23394 (21.5%) of 108756 variants
2016-03-11 22:17:17.317186 :: filter samples: excluding ['D2/PG0094-CW/ERR045632'] including ['7G8/PG0083-C/ERR027099', 'GB4/PG0084-C/ERR027100', 'AL2/PG0103-CW/ERR045627', 'AUD/PG0112-C/ERR029406', 'AUD/PG0112-CW/ERR045639', 'DAN/PG0098-C/ERR027110', 'DEV/PG0081-CW/ERR045633', 'JB12/PG0099-C/ERR029146', 'JB8/PG0087-C/ERR029091', 'JC3/PG0077-CW/ERR045636', 'JC9/PG0111-C/ERR029409', 'JC9/PG0111-CW/ERR045634', 'JE11/PG0100-C/ERR029404', 'JE11/PG0100-CW/ERR045630', 'JF6/PG0079-C/ERR027102', 'JF6/PG0079-CW/ERR045637', 'JON/PG0107-C/ERR029408', 'KA6/PG0091-C/ERR027117', 'KB8/PG0104-C/ERR029148', 'KB8/PG0104-CW/ERR045642', 'KH7/PG0088-C/ERR027111', 'LA10/PG0086-C/ERR029090', 'LA10/PG0086-CW/ERR045629', 'NF10/PG0096-C/ERR027108', 'NIC/PG0095-C/ERR027107', 'NIC/PG0095-CW/ERR045631', 'QF5/PG0078-C/ERR029092', 'QF5/PG0078-CW/ERR045638', 'TF1/PG0080-C/ERR027103', 'WC4/PG0082-C/ERR029093', 'WE2/PG0085-C/ERR027101', 'WF12/PG0097-C/ERR027109', 'XB3/PG0093-C/ERR029105', 'XD8/PG0105-C/ERR029144', 'XD8/PG0105-CW/ERR045628', 'XE7/PG0106-C/ERR029407', 'XF12/PG0102-C/ERR029143', 'XF12/PG0102-CW/ERR045635', 'XG10/PG0109-C/ERR029405']
2016-03-11 22:17:17.339276 :: filter calls: excluding 26051 (2.9%) retaining 886315 (97.1%) of 912366 calls

In [6]:
combined = load_callsets(COMBINED_CALLSET_FN_TEMPLATE, 
                         variant_filter='FILTER_PASS',
                         call_filter=combined_conf_calls, 
                         sample_exclusions=excessive_recomb_samples)


2016-03-11 22:17:17.344703 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/3d7_hb3.combined.final.npz
2016-03-11 22:17:17.674727 :: filter variants: excluding 157 (0.4%) retaining 42087 (99.6%) of 42244 variants
2016-03-11 22:17:17.714452 :: filter calls: excluding 2439 (0.3%) retaining 881388 (99.7%) of 883827 calls
2016-03-11 22:17:17.715266 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/hb3_dd2.combined.final.npz
2016-03-11 22:17:18.158869 :: filter variants: excluding 450 (1.2%) retaining 36461 (98.8%) of 36911 variants
2016-03-11 22:17:18.178627 :: filter samples: excluding ['SC01/PG0025-C/ERR019045'] including ['HB3/PG0004-CW/ERR012788', 'DD2/PG0008-CW/ERR012840', '1BB5/PG0023-C/ERR015449', '3BA6/PG0022-Cx/ERR126027', '3BD5/PG0024-C/ERR019053', '7C101/PG0074-C/ERR019048', '7C111/PG0038-C/ERR015457', '7C12/PG0035-Cx/ERR037704', '7C126/PG0047-C/ERR015452', '7C140/PG0039-C/ERR015454', '7C159/PG0040-Cx/ERR107475', '7C16/PG0036-C/ERR015455', '7C170/PG0041-C/ERR015446', '7C183/PG0042-C/ERR015448', '7C188/PG0030-C/ERR019046', '7C20/PG0037-C/ERR015451', '7C3/PG0034-C/ERR019047', '7C408/PG0031-C/ERR015458', '7C421/PG0043-C/ERR015459', '7C424/PG0044-C/ERR019043', '7C46/PG0046-Cx/ERR107476', '7C7/PG0048-C/ERR019049', 'B1SD/PG0015-C/ERR019044', 'B4R3/PG0018-C/ERR019042', 'CH3_116/PG0032-Cx/ERR037703', 'CH3_61/PG0033-Cx/ERR175544', 'D43/PG0029-Cx/ERR107474', 'GC03/PG0021-C/ERR015447', 'GC06/PG0028-C/ERR015456', 'QC01/PG0017-C/ERR019050', 'QC13/PG0016-C/ERR012895', 'QC23/PG0045-C/ERR012892', 'QC34/PG0026-C/ERR015453', 'SC05/PG0019-C/ERR019051', 'TC05/PG0027-C/ERR015450', 'TC08/PG0020-C/ERR019052']
2016-03-11 22:17:18.261214 :: filter calls: excluding 28934 (2.2%) retaining 1283662 (97.8%) of 1312596 calls
2016-03-11 22:17:18.263265 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/7g8_gb4.combined.final.npz
2016-03-11 22:17:18.668250 :: filter variants: excluding 304 (0.9%) retaining 34471 (99.1%) of 34775 variants
2016-03-11 22:17:18.709106 :: filter samples: excluding ['D2/PG0094-CW/ERR045632'] including ['7G8/PG0083-C/ERR027099', 'GB4/PG0084-C/ERR027100', 'AL2/PG0103-CW/ERR045627', 'AUD/PG0112-C/ERR029406', 'AUD/PG0112-CW/ERR045639', 'DAN/PG0098-C/ERR027110', 'DEV/PG0081-CW/ERR045633', 'JB12/PG0099-C/ERR029146', 'JB8/PG0087-C/ERR029091', 'JC3/PG0077-CW/ERR045636', 'JC9/PG0111-C/ERR029409', 'JC9/PG0111-CW/ERR045634', 'JE11/PG0100-C/ERR029404', 'JE11/PG0100-CW/ERR045630', 'JF6/PG0079-C/ERR027102', 'JF6/PG0079-CW/ERR045637', 'JON/PG0107-C/ERR029408', 'KA6/PG0091-C/ERR027117', 'KB8/PG0104-C/ERR029148', 'KB8/PG0104-CW/ERR045642', 'KH7/PG0088-C/ERR027111', 'LA10/PG0086-C/ERR029090', 'LA10/PG0086-CW/ERR045629', 'NF10/PG0096-C/ERR027108', 'NIC/PG0095-C/ERR027107', 'NIC/PG0095-CW/ERR045631', 'QF5/PG0078-C/ERR029092', 'QF5/PG0078-CW/ERR045638', 'TF1/PG0080-C/ERR027103', 'WC4/PG0082-C/ERR029093', 'WE2/PG0085-C/ERR027101', 'WF12/PG0097-C/ERR027109', 'XB3/PG0093-C/ERR029105', 'XD8/PG0105-C/ERR029144', 'XD8/PG0105-CW/ERR045628', 'XE7/PG0106-C/ERR029407', 'XF12/PG0102-C/ERR029143', 'XF12/PG0102-CW/ERR045635', 'XG10/PG0109-C/ERR029405']
2016-03-11 22:17:18.786947 :: filter calls: excluding 19937 (1.5%) retaining 1324432 (98.5%) of 1344369 calls

In [7]:
combined_snp = filter_callsets(combined, variant_filter='is_snp')


2016-03-11 22:17:18.794832 :: filter variants: excluding 21576 (59.2%) retaining 14885 (40.8%) of 36461 variants
2016-03-11 22:17:18.805219 :: filter variants: excluding 26699 (63.4%) retaining 15388 (36.6%) of 42087 variants
2016-03-11 22:17:18.816272 :: filter variants: excluding 20079 (58.2%) retaining 14392 (41.8%) of 34471 variants

In [8]:
combined_indel = filter_callsets(combined, variant_filter='~is_snp')


2016-03-11 22:17:18.836806 :: filter variants: excluding 14885 (40.8%) retaining 21576 (59.2%) of 36461 variants
2016-03-11 22:17:18.855402 :: filter variants: excluding 15388 (36.6%) retaining 26699 (63.4%) of 42087 variants
2016-03-11 22:17:18.874794 :: filter variants: excluding 14392 (41.8%) retaining 20079 (58.2%) of 34471 variants

Compare GATK and Cortex


In [9]:
def compare_inheritance_chrom(cs1, cs2, chrom, mode='nearest'):

    # filter to chromosome
    cs1 = filter_variants(cs1, query="CHROM == b'%s'" % chrom, verbose=False)
    cs2 = filter_variants(cs2, query="CHROM == b'%s'" % chrom, verbose=False)

    # unpack callsets
    v1, _, c2d1 = unpack_callset(cs1)
    pos1 = v1['POS']
    g1 = c2d1['genotype']
    inh1 = inheritance(g1)
    v2, _, c2d2 = unpack_callset(cs2)
    pos2 = v2['POS']
    g2 = c2d2['genotype']
    inh2 = inheritance(g2)

    # impute inheritance
    if mode == 'nearest':
        inh2_imputed = impute_inheritance_nearest(inh1, pos1, pos2)
    else:
        inh2_imputed = impute_inheritance_neighbours(inh1, pos1, pos2)

    # calculate discordance
    is_both_called = (((inh2 == INHERITANCE_PARENT1) | (inh2 == INHERITANCE_PARENT2)) & 
                      ((inh2_imputed == INHERITANCE_PARENT1) | (inh2_imputed == INHERITANCE_PARENT2)))
    is_discord = (inh2 != inh2_imputed) & is_both_called
    n_discord = nnz(is_discord)
    is_concord = (inh2 == inh2_imputed) & is_both_called
    n_concord = nnz(is_concord)

    return n_discord, n_concord


def compare_inheritance(cs1, cs2, mode='nearest'):
    n_discord = n_concord = 0
    for chrom in CHROMOSOMES:
        chrom = str(chrom, 'ascii')
        n_discord_chrom, n_concord_chrom = compare_inheritance_chrom(cs1, cs2, chrom, mode=mode)
        n_discord += n_discord_chrom
        n_concord += n_concord_chrom
    return n_discord, n_concord

In [10]:
cross = '3d7_hb3'
compare_inheritance(gatk[cross], cortex[cross])


Out[10]:
(122, 566873)

In [11]:
cross = '3d7_hb3'
compare_inheritance(gatk[cross], cortex[cross], mode='neighbours')


Out[11]:
(105, 566686)

In [12]:
cross = '3d7_hb3'
compare_inheritance(cortex[cross], gatk[cross])


Out[12]:
(532, 757842)

In [13]:
cross = '3d7_hb3'
compare_inheritance(cortex[cross], gatk[cross], mode='neighbours')


Out[13]:
(445, 754578)

In [14]:
cross = 'hb3_dd2'
compare_inheritance(gatk[cross], cortex[cross])


Out[14]:
(1004, 892397)

In [15]:
cross = 'hb3_dd2'
compare_inheritance(cortex[cross], gatk[cross])


Out[15]:
(1864, 989800)

In [16]:
cross = '7g8_gb4'
compare_inheritance(gatk[cross], cortex[cross])


Out[16]:
(903, 873740)

In [17]:
cross = '7g8_gb4'
compare_inheritance(cortex[cross], gatk[cross])


Out[17]:
(1967, 1034709)

In [18]:
cross = '3d7_hb3'
compare_inheritance(combined_snp[cross], combined_indel[cross])


Out[18]:
(1333, 555591)

In [19]:
cross = '3d7_hb3'
compare_inheritance(combined_indel[cross], combined_snp[cross])


Out[19]:
(313, 320936)

In [20]:
cross = 'hb3_dd2'
compare_inheritance(combined_snp[cross], combined_indel[cross])


Out[20]:
(1870, 737117)

In [21]:
cross = 'hb3_dd2'
compare_inheritance(combined_indel[cross], combined_snp[cross])


Out[21]:
(706, 516653)

In [22]:
cross = '7g8_gb4'
compare_inheritance(combined_snp[cross], combined_indel[cross])


Out[22]:
(1923, 754946)

In [23]:
cross = '7g8_gb4'
compare_inheritance(combined_indel[cross], combined_snp[cross])


Out[23]:
(810, 545899)

In [25]:
tbl = [['cross', 'Cortex|GATK', 'GATK|Cortex', 'INDELs|SNPs', 'SNPs|INDELs']]
mode = 'nearest'
for cross in CROSSES:
    row = [LABELS[cross]]
    n_discord, n_concord = compare_inheritance(gatk[cross], cortex[cross], mode=mode)
    val = '%s/%s' % (n_discord, n_discord + n_concord)
    row.append(val)
    n_discord, n_concord = compare_inheritance(cortex[cross], gatk[cross], mode=mode)
    val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
    row.append(val)
    n_discord, n_concord = compare_inheritance(combined_snp[cross], combined_indel[cross], mode=mode)
    val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
    row.append(val)
    n_discord, n_concord = compare_inheritance(combined_indel[cross], combined_snp[cross], mode=mode)
    val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
    row.append(val)
    tbl += [row]
etl.wrap(tbl).displayall(index_header=False)


cross Cortex|GATK GATK|Cortex INDELs|SNPs SNPs|INDELs
3D7 x HB3 122/566995 532/758374 (0.0702%) 1333/556924 (0.2394%) 313/321249 (0.0974%)
HB3 x Dd2 1004/893401 1864/991664 (0.1880%) 1870/738987 (0.2530%) 706/517359 (0.1365%)
7G8 x GB4 903/874643 1967/1036676 (0.1897%) 1923/756869 (0.2541%) 810/546709 (0.1482%)

In [26]:
tbl = [['cross', 'Cortex|GATK', 'GATK|Cortex', 'INDELs|SNPs', 'SNPs|INDELs']]
mode = 'neighbours'
for cross in CROSSES:
    row = [LABELS[cross]]
    n_discord, n_concord = compare_inheritance(gatk[cross], cortex[cross], mode=mode)
    val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
    row.append(val)
    n_discord, n_concord = compare_inheritance(cortex[cross], gatk[cross], mode=mode)
    val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
    row.append(val)
    n_discord, n_concord = compare_inheritance(combined_snp[cross], combined_indel[cross], mode=mode)
    val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
    row.append(val)
    n_discord, n_concord = compare_inheritance(combined_indel[cross], combined_snp[cross], mode=mode)
    val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
    row.append(val)
    tbl += [row]
etl.wrap(tbl).displayall(index_header=False)


cross Cortex|GATK GATK|Cortex INDELs|SNPs SNPs|INDELs
3D7 x HB3 105/566791 (0.0185%) 445/755023 (0.0589%) 1175/554522 (0.2119%) 260/319822 (0.0813%)
HB3 x Dd2 456/890426 (0.0512%) 1694/978162 (0.1732%) 1625/729606 (0.2227%) 507/503379 (0.1007%)
7G8 x GB4 533/872324 (0.0611%) 1748/1021572 (0.1711%) 1647/749455 (0.2198%) 589/534830 (0.1101%)

In [26]:
def fig_inheritance(fig, callsets, cross, chrom, filename=None, step=100):
    
    callset = callsets[cross]
    
    ax = plt.subplot2grid((9, 1), (0, 0), rowspan=6)
    # don't include parents
    n_samples = len(callset['calldata'].dtype.names)
    plot_inheritance(ax, callset, chrom, sample_indices=range(2, n_samples))
    ax.set_xticks([])
#    ax.set_title('%s, chromosome %s' % (LABELS[cross], int(chrom[6:8])), fontsize=8)
    ax.set_ylabel('progeny clones', rotation=90)
    ax.yaxis.tick_left()
    ax.set_yticklabels([])
    
    ax = plt.subplot2grid((9, 1), (6, 0))
    plot_variants_locator(ax, callset, chrom, step=step)
    #ax.set_xticklabels([])
    ax.set_xticks([])
    
    ax = plt.subplot2grid((9, 1), (7, 0))
    plot_accessibility(ax, chrom, cross, linewidth=.5)
    ax.set_ylabel('accessibility', rotation=0, ha='right', va='center')
    
    ax = plt.subplot2grid((9, 1), (8, 0))
    plot_genes(ax, chrom)
    # ax.set_xticks([])
    # ax.spines['bottom'].set_visible(False)
    ax.set_ylabel('genes', rotation=0, ha='right', va='center')
    ax.set_xlabel('position (bp)')
    
    fig.subplots_adjust(hspace=0.02, left=.12, top=.9, right=.98, bottom=.13)

In [27]:
chrom = 'Pf3D7_12_v3'
callsets = combined_snp
cross = 'hb3_dd2'
fig = plt.figure(figsize=(7, 4))
fig.suptitle('%s, chromosome %s, combined callset, SNPs' % (LABELS[cross], int(chrom[6:8])), 
             fontsize=10, fontweight='bold')
fig_inheritance(fig, callsets, cross, chrom, step=10)
fn = '../../artwork/supp/inheritance_combined_snps.{dpi}.{fmt}'
for fmt in 'jpeg', 'png':
    for dpi in 120, 300:
        fig.savefig(fn.format(dpi=dpi, fmt=fmt), dpi=dpi, jpeg_quality=100)



In [28]:
chrom = 'Pf3D7_12_v3'
callsets = combined_indel
cross = 'hb3_dd2'
fig = plt.figure(figsize=(7, 4))
fig.suptitle('%s, chromosome %s, combined callset, INDELs' % (LABELS[cross], int(chrom[6:8])), 
             fontsize=10, fontweight='bold')
fig_inheritance(fig, callsets, cross, chrom, step=10)
fn = '../../artwork/supp/inheritance_combined_indel.{dpi}.{fmt}'
for fmt in 'jpeg', 'png':
    for dpi in 120, 300:
        fig.savefig(fn.format(dpi=dpi, fmt=fmt), dpi=dpi, jpeg_quality=100)



In [ ]: