Locating recombinant haplotypes - demo

This notebook has a demo of the function "locate_recombinants" from the hapclust_utils notebook. The function identifies haplotypes with evidence for recombination based on the 4-gamete test.


In [25]:
%run setup.ipynb
import hapclust
%matplotlib inline



In [2]:
callset = h5py.File('../data/ag1000g.phase1.AR3.1.haplotypes.specific_regions.2L_2358158_2431617.h5',
                    mode='r')
region_vgsc = SeqFeature('2L', 2358158, 2431617)
genotypes = allel.GenotypeArray(callset['2L/calldata/genotype'])
haplotypes = genotypes.to_haplotypes()
pos = allel.SortedIndex(callset['2L/variants/POS'])
loc = pos.locate_range(region_vgsc.start, region_vgsc.end)
h_vgsc = haplotypes[loc]
pos_995S = 2422651
pos_995F = 2422652
loc_995S = haplotypes[pos.locate_key(pos_995S)] == 1
loc_995F = haplotypes[pos.locate_key(pos_995F)] == 1
h_vgsc_995F = h_vgsc.compress(loc_995F, axis=1)
h_vgsc_995S = h_vgsc.compress(loc_995S, axis=1)
sample_ids = callset['2L']['samples'][:]
hap_ids = np.array(list(itertools.chain(*[[s + b'a', s + b'b'] for s in sample_ids])))
hap_ids_995F = hap_ids[loc_995F]
hap_ids_995S = hap_ids[loc_995S]
tbl_haplotypes = etl.fromtsv('../data/ag1000g.phase1.AR3.1.haplotypes.meta.txt')
hap_pops = np.array(tbl_haplotypes.values('population'))
hap_pops_995S = hap_pops[loc_995S]
hap_pops_995F = hap_pops[loc_995F]
# need to use named colors for graphviz
pop_colors = {
    'AOM': 'brown',
    'BFM': 'firebrick1',
    'GWA': 'goldenrod1',
    'GNS': 'cadetblue1',
    'BFS': 'deepskyblue',
    'CMS': 'dodgerblue3',
    'UGS': 'palegreen',
    'GAS': 'olivedrab',
    'KES': 'grey47',
    'colony': 'black'
}
hap_colors = np.array([pop_colors[p] for p in hap_pops])
hap_colors_995S = np.array([pop_colors[p] for p in hap_pops_995S])
hap_colors_995F = np.array([pop_colors[p] for p in hap_pops_995F])

tbl_variant_labels = (
    etl
    .frompickle('../data/tbl_variants_phase1.pkl')
    .eq('num_alleles', 2)
    .cut('POS', 'REF', 'ALT', 'AGAP004707-RA')
    .convert('AGAP004707-RA', lambda v: v[1] if v[0] == 'NON_SYNONYMOUS_CODING' else '')
    .addfield('label', lambda row: row['AGAP004707-RA'] if row['AGAP004707-RA'] else '%s:%s>%s' % (row.POS, row.REF, row.ALT))
)
tbl_variant_labels


Out[2]:
0|POS 1|REF 2|ALT 3|AGAP004707-RA 4|label
2358254 G A D33N D33N
2358316 T G 2358316:T>G
2358328 T C 2358328:T>C
2358353 C T 2358353:C>T
2358405 T A 2358405:T>A

...


In [3]:
pos2label = tbl_variant_labels.lookupone('POS', 'label')
pos2label[pos_995F]


Out[3]:
'L995F'

In [4]:
pos2label_coding = tbl_variant_labels.lookupone('POS', 'AGAP004707-RA')
pos2label_coding[pos_995F]


Out[4]:
'L995F'

In [5]:
variant_labels = np.array([pos2label.get(p, '') for p in pos], dtype=object)
variant_labels_vgsc = variant_labels[loc]
variant_labels_vgsc


Out[5]:
array(['D33N', '2358316:T>G', '2358328:T>C', ..., '2431518:C>A',
       '2431527:G>C', '2431542:T>C'], dtype=object)

In [6]:
variant_labels_coding = np.array([pos2label_coding.get(p, '') for p in pos], dtype=object)
variant_labels_coding_vgsc = variant_labels_coding[loc]
variant_labels_coding_vgsc


Out[6]:
array(['D33N', '', '', ..., '', '', ''], dtype=object)

L995S


In [9]:
cut_height = 2
fig, ax_dend, ax_freq, cluster_spans_995S, leaf_obs_995S = hapclust.fig_haplotypes_clustered(
    h_vgsc_995S, cut_height=cut_height, dpi=150, 
    highlight_clusters=5, label_clusters=5)

In [10]:
cluster_idx = 2
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995S.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_coding_vgsc, fontsize=6, show_node_labels='count')


Out[10]:
%3 0 85 1 0->1 2 0->2 3 0->3 4 2 0->4 I492N 6 6 0->6 7 0->7 8 0->8 9 2 0->9 10 0->10 11 0->11 M925L 12 0->12 13 0->13 5 2 5->6 5->9

In [12]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', 
    variant_labels=variant_labels_coding_vgsc, fontsize=6, show_node_labels='count')


221 1473 [ 2  2  6 96] (0, 0) {60, 13}
221 1473 [ 2  2  6 96] (0, 1) {88, 34}
found 2 solutions; min recombinant haplotypes: 2
Out[12]:
%3 0 85 1 0->1 2 0->2 3 0->3 4 2 0->4 I492N 5 6 0->5 6 0->6 7 0->7 8 2 0->8 9 0->9 10 0->10 M925L 11 0->11 12 0->12

In [13]:
cluster_idx = 9
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995S.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_coding_vgsc, fontsize=6, show_node_labels='count')


Out[13]:
%3 0 8 6 3 0->6 1 11 3 1->3 4 4 1->4 V1681I 5 2 1->5 S2076T 1->6 7 1->7 L1617F 2 2->3 2->6 8 2 5->8 6->8 S2076T

In [14]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', 
    variant_labels=variant_labels_coding_vgsc, fontsize=6, show_node_labels='count')


364 431 [18  1 13  1] (0, 1) {9}
364 431 [18  1 13  1] (1, 1) {5}
364 1714 [17  2 12  2] (0, 1) {19, 14}
364 1714 [17  2 12  2] (1, 1) {26, 29}
found 4 solutions; min recombinant haplotypes: 3
Out[14]:
%3 0 8 4 3 0->4 1 11 3 4 1->3 V1681I 1->4 5 1->5 L1617F 2 2->4 6 2 4->6 S2076T

In [15]:
cluster_idx = 12
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995S.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_coding_vgsc, fontsize=6, show_node_labels='count')


Out[15]:
%3 0 26 2 3 0->2 1 6 1->2

In [16]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', 
    variant_labels=variant_labels_coding_vgsc, fontsize=6, show_node_labels='count')


found 1 solutions; min recombinant haplotypes: 0
Out[16]:
%3 0 26 2 3 0->2 1 6 1->2

In [17]:
cluster_idx = 9, 12
cluster_hap_indices = list()
for i in cluster_idx:
    _, _, hix = cluster_spans_995S[i]
    cluster_hap_indices.extend(hix)
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995S.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[17]:
%3 0 8 anon_0_10_0 0->anon_0_10_0 2408423:C>T 1 11 3 1->3 2381529:T>C 4 4 1->4 V1681I 5 2 1->5 S2076T 6 3 1->6 2376361:T>G 7 1->7 L1617F anon_1_9_0 1->anon_1_9_0 2408423:C>T 2 3->2 2376361:T>G 8 2 5->8 2376361:T>G 6->0 2366674:G>T 6->2 2381529:T>C 6->8 S2076T anon_6_11_0 6->anon_6_11_0 2408423:C>T 9 26 11 3 9->11 2376361:T>G 10 6 11->10 2366674:G>T anon_0_10_1 anon_0_10_1->anon_0_10_0 2408677:A>C anon_0_10_2 anon_0_10_2->anon_0_10_1 2408751:T>A anon_0_10_3 anon_0_10_3->anon_0_10_2 2408788:G>A anon_0_10_4 anon_0_10_3->anon_0_10_4 2410255:T>C anon_0_10_5 anon_0_10_4->anon_0_10_5 2410272:C>T anon_0_10_6 anon_0_10_6->10 2422250:C>T anon_0_10_6->anon_0_10_5 2415838:G>A anon_1_9_1 anon_1_9_1->anon_1_9_0 2408677:A>C anon_1_9_2 anon_1_9_2->anon_1_9_1 2408751:T>A anon_1_9_3 anon_1_9_3->anon_1_9_2 2408788:G>A anon_1_9_4 anon_1_9_3->anon_1_9_4 2410255:T>C anon_1_9_5 anon_1_9_4->anon_1_9_5 2410272:C>T anon_1_9_6 anon_1_9_6->9 2422250:C>T anon_1_9_6->anon_1_9_5 2415838:G>A anon_6_11_1 anon_6_11_1->anon_6_11_0 2408677:A>C anon_6_11_2 anon_6_11_2->anon_6_11_1 2408751:T>A anon_6_11_3 anon_6_11_3->anon_6_11_2 2408788:G>A anon_6_11_4 anon_6_11_3->anon_6_11_4 2410255:T>C anon_6_11_5 anon_6_11_4->anon_6_11_5 2410272:C>T anon_6_11_6 anon_6_11_6->11 2422250:C>T anon_6_11_6->anon_6_11_5 2415838:G>A

In [18]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


209 1227 [25 29  8  6] (1, 1) {34, 37, 39, 41, 56, 60}
209 1248 [29 25  6  8] (1, 0) {34, 37, 39, 41, 56, 60}
209 1251 [29 25  6  8] (1, 0) {34, 37, 39, 41, 56, 60}
209 1253 [29 25  6  8] (1, 0) {34, 37, 39, 41, 56, 60}
209 1286 [25 29  8  6] (1, 1) {34, 37, 39, 41, 56, 60}
209 1287 [25 29  8  6] (1, 1) {34, 37, 39, 41, 56, 60}
209 1421 [29 25  6  8] (1, 0) {34, 37, 39, 41, 56, 60}
209 1525 [25 29  8  6] (1, 1) {34, 37, 39, 41, 56, 60}
364 431 [44  1 22  1] (0, 1) {9}
364 431 [44  1 22  1] (1, 1) {5}
364 1227 [19 26 14  9] (1, 1) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1248 [26 19  9 14] (1, 0) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1251 [26 19  9 14] (1, 0) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1253 [26 19  9 14] (1, 0) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1286 [19 26 14  9] (1, 1) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1287 [19 26 14  9] (1, 1) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1421 [26 19  9 14] (1, 0) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1525 [19 26 14  9] (1, 1) {34, 37, 39, 41, 51, 53, 55, 56, 60}
364 1714 [43  2 21  2] (0, 1) {19, 14}
364 1714 [43  2 21  2] (1, 1) {26, 29}
found 4 solutions; min recombinant haplotypes: 12
Out[18]:
%3 0 8 1 11 3 4 1->3 V1681I 4 3 1->4 2376361:T>G 5 1->5 L1617F anon_1_7_0 1->anon_1_7_0 2408423:C>T 2 4->0 2366674:G>T 4->2 2381529:T>C 6 2 4->6 S2076T 7 26 anon_1_7_1 anon_1_7_1->anon_1_7_0 2408677:A>C anon_1_7_2 anon_1_7_2->anon_1_7_1 2408751:T>A anon_1_7_3 anon_1_7_3->anon_1_7_2 2408788:G>A anon_1_7_4 anon_1_7_3->anon_1_7_4 2410255:T>C anon_1_7_5 anon_1_7_4->anon_1_7_5 2410272:C>T anon_1_7_6 anon_1_7_6->7 2422250:C>T anon_1_7_6->anon_1_7_5 2415838:G>A

In [19]:
cluster_idx = 14
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995S.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[19]:
%3 0 55 4 9 0->4 2366965:T>G 5 0->5 2383492:G>A 7 0->7 2380627:C>A 8 0->8 2359596:T>C 9 0->9 K251R 1 2 6 1->2 2395858:T>G 6 1->6 2392571:T>C 2->0 2392571:T>C 3 2 2->3 2366965:T>G 3->4 2392571:T>C 6->0 2395858:T>G

In [20]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


224 895 [ 7 60  2  9] (1, 0) {24, 20}
895 1031 [ 1  8  1 68] (0, 0) {16}
895 1031 [ 1  8  1 68] (1, 0) {36}
found 2 solutions; min recombinant haplotypes: 3
Out[20]:
%3 0 55 2 9 0->2 2366965:T>G 3 0->3 2383492:G>A 5 0->5 2380627:C>A 6 0->6 2359596:T>C 7 0->7 K251R 1 6 1->0 2392571:T>C 4 4->0 2395858:T>G

L995F


In [23]:
cut_height = 4
fig, ax_dend, ax_freq, cluster_spans_995F, leaf_obs_995F = hapclust.fig_haplotypes_clustered(
    h_vgsc_995F, cut_height=cut_height, dpi=150, 
    highlight_clusters=5, label_clusters=5)

In [24]:
cluster_idx = 4
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995F.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[24]:
%3 0 10 2 0->2 2366732:C>A 3 0->3 2385769:G>T anon_0_1_0 0->anon_0_1_0 2392697:G>A anon_0_4_0 0->anon_0_4_0 2366788:C>T 1 4 anon_0_1_0->1 2407710:C>T anon_0_4_0->4 2428746:A>T

In [27]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


found 1 solutions; min recombinant haplotypes: 0
Out[27]:
%3 0 10 2 0->2 2366732:C>A 3 0->3 2385769:G>T anon_0_1_0 0->anon_0_1_0 2392697:G>A anon_0_4_0 0->anon_0_4_0 2366788:C>T 1 4 anon_0_1_0->1 2407710:C>T anon_0_4_0->4 2428746:A>T

In [28]:
cluster_idx = 7
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995F.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[28]:
%3 0 11 8 0->8 2408941:T>C 1 15 1->0 F1920S 2 3 1->2 2403731:A>T 5 1->5 R537L 6 1->6 H1755Y 7 1->7 2417853:A>C 9 1->9 K251R anon_1_3_0 1->anon_1_3_0 2400716:A>C anon_2_4_0 2->anon_2_4_0 2390558:G>A 3 4 anon_1_3_1 anon_1_3_0->anon_1_3_1 2408043:T>A anon_1_3_1->3 2414435:C>A anon_2_4_1 anon_2_4_0->anon_2_4_1 2426280:G>T anon_2_4_1->4 2428660:A>G

In [29]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


found 1 solutions; min recombinant haplotypes: 0
Out[29]:
%3 0 11 8 0->8 2408941:T>C 1 15 1->0 F1920S 2 3 1->2 2403731:A>T 5 1->5 R537L 6 1->6 H1755Y 7 1->7 2417853:A>C 9 1->9 K251R anon_1_3_0 1->anon_1_3_0 2400716:A>C anon_2_4_0 2->anon_2_4_0 2390558:G>A 3 4 anon_1_3_1 anon_1_3_0->anon_1_3_1 2408043:T>A anon_1_3_1->3 2414435:C>A anon_2_4_1 anon_2_4_0->anon_2_4_1 2426280:G>T anon_2_4_1->4 2428660:A>G

In [30]:
cluster_idx = 8
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995F.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[30]:
%3 0 35 4 0->4 2361692:T>C 5 0->5 2381794:G>T 1 6 3 1->3 2383854:C>A 2 6 6 6->1 S1683I 6->2 G317S anon_0_6_0 6->anon_0_6_0 I1940T anon_0_6_0->0 D466H

In [32]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


found 1 solutions; min recombinant haplotypes: 0
Out[32]:
%3 0 35 4 0->4 2361692:T>C 5 0->5 2381794:G>T 1 6 3 1->3 2383854:C>A 2 6 6 6->1 S1683I 6->2 G317S anon_0_6_0 6->anon_0_6_0 I1940T anon_0_6_0->0 D466H

In [33]:
# look at clusters 7 and 8 together
cluster_idx = 7, 8
cluster_hap_indices = list()
for i in cluster_idx:
    _, _, hix = cluster_spans_995F[i]
    cluster_hap_indices.extend(hix)
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995F.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[33]:
%3 0 11 8 0->8 2408941:T>C 1 15 1->0 F1920S 2 3 1->2 2403731:A>T 5 1->5 R537L 6 1->6 H1755Y 7 1->7 2417853:A>C 9 1->9 K251R anon_1_3_0 1->anon_1_3_0 2400716:A>C anon_2_4_0 2->anon_2_4_0 2390558:G>A 3 4 10 35 14 10->14 2361692:T>C 15 10->15 2381794:G>T 11 6 13 11->13 2383854:C>A 12 6 16 16->11 S1683I 16->12 G317S anon_10_16_0 16->anon_10_16_0 I1940T anon_1_3_1 anon_1_3_0->anon_1_3_1 2408043:T>A anon_1_3_1->3 2414435:C>A anon_1_16_0 anon_1_16_0->1 2358808:G>C anon_1_16_1 anon_1_16_0->anon_1_16_1 2407174:T>A anon_1_16_2 anon_1_16_2->16 2426521:A>T anon_1_16_2->anon_1_16_1 2411665:T>C anon_2_4_1 anon_2_4_0->anon_2_4_1 2426280:G>T anon_2_4_1->4 2428660:A>G anon_10_16_0->10 D466H

In [34]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


found 1 solutions; min recombinant haplotypes: 0
Out[34]:
%3 0 11 8 0->8 2408941:T>C 1 15 1->0 F1920S 2 3 1->2 2403731:A>T 5 1->5 R537L 6 1->6 H1755Y 7 1->7 2417853:A>C 9 1->9 K251R anon_1_3_0 1->anon_1_3_0 2400716:A>C anon_2_4_0 2->anon_2_4_0 2390558:G>A 3 4 10 35 14 10->14 2361692:T>C 15 10->15 2381794:G>T 11 6 13 11->13 2383854:C>A 12 6 16 16->11 S1683I 16->12 G317S anon_10_16_0 16->anon_10_16_0 I1940T anon_1_3_1 anon_1_3_0->anon_1_3_1 2408043:T>A anon_1_3_1->3 2414435:C>A anon_1_16_0 anon_1_16_0->1 2358808:G>C anon_1_16_1 anon_1_16_0->anon_1_16_1 2407174:T>A anon_1_16_2 anon_1_16_2->16 2426521:A>T anon_1_16_2->anon_1_16_1 2411665:T>C anon_2_4_1 anon_2_4_0->anon_2_4_1 2426280:G>T anon_2_4_1->4 2428660:A>G anon_10_16_0->10 D466H

In [35]:
cluster_idx = 12
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995F.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[35]:
%3 0 162 1 0->1 2362303:A>C 2 0->2 2389372:C>A 4 0->4 2421360:C>T 5 0->5 2359542:T>A 7 0->7 2362376:G>T 13 0->13 2394724:C>T 14 0->14 A586T 15 0->15 2409249:T>G 17 0->17 2375592:T>A 19 2 0->19 2366965:T>G 20 0->20 2426618:T>A 21 0->21 2417855:C>T 22 0->22 2425747:C>G 23 2 0->23 2374818:A>G 24 0->24 2359611:C>A 26 0->26 A837V 27 0->27 2393383:A>G 28 0->28 2404453:C>A anon_0_6_0 0->anon_0_6_0 2360542:T>G anon_0_12_0 0->anon_0_12_0 2407594:A>C anon_0_18_0 0->anon_0_18_0 2415035:G>T anon_0_25_0 0->anon_0_25_0 2380868:T>G anon_2_8_0 2->anon_2_8_0 2389737:G>A 3 3->0 2407178:G>A 6 8 9 2 9->0 R254K 10 11 11->0 2417678:T>C anon_11_16_0 11->anon_11_16_0 2396175:A>T 12 16 18 25 25->anon_0_25_0 2402774:T>A anon_0_6_1 anon_0_6_0->anon_0_6_1 2417114:T>A anon_0_6_1->6 2417148:C>T anon_0_10_0 anon_0_10_0->0 2392093:A>T anon_0_10_0->10 2399127:G>A anon_0_12_1 anon_0_12_0->anon_0_12_1 2407598:C>G anon_0_12_1->12 2417108:A>G anon_0_18_0->18 2420686:G>A anon_2_8_1 anon_2_8_0->anon_2_8_1 2423538:A>G anon_2_8_1->8 2423850:G>T anon_11_16_0->16 2401282:C>T

In [36]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]))
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


found 1 solutions; min recombinant haplotypes: 0
Out[36]:
%3 0 162 1 0->1 2362303:A>C 2 0->2 2389372:C>A 4 0->4 2421360:C>T 5 0->5 2359542:T>A 7 0->7 2362376:G>T 13 0->13 2394724:C>T 14 0->14 A586T 15 0->15 2409249:T>G 17 0->17 2375592:T>A 19 2 0->19 2366965:T>G 20 0->20 2426618:T>A 21 0->21 2417855:C>T 22 0->22 2425747:C>G 23 2 0->23 2374818:A>G 24 0->24 2359611:C>A 26 0->26 A837V 27 0->27 2393383:A>G 28 0->28 2404453:C>A anon_0_6_0 0->anon_0_6_0 2360542:T>G anon_0_12_0 0->anon_0_12_0 2407594:A>C anon_0_18_0 0->anon_0_18_0 2415035:G>T anon_0_25_0 0->anon_0_25_0 2380868:T>G anon_2_8_0 2->anon_2_8_0 2389737:G>A 3 3->0 2407178:G>A 6 8 9 2 9->0 R254K 10 11 11->0 2417678:T>C anon_11_16_0 11->anon_11_16_0 2396175:A>T 12 16 18 25 25->anon_0_25_0 2402774:T>A anon_0_6_1 anon_0_6_0->anon_0_6_1 2417114:T>A anon_0_6_1->6 2417148:C>T anon_0_10_0 anon_0_10_0->0 2392093:A>T anon_0_10_0->10 2399127:G>A anon_0_12_1 anon_0_12_0->anon_0_12_1 2407598:C>G anon_0_12_1->12 2417108:A>G anon_0_18_0->18 2420686:G>A anon_2_8_1 anon_2_8_0->anon_2_8_1 2423538:A>G anon_2_8_1->8 2423850:G>T anon_11_16_0->16 2401282:C>T

In [37]:
cluster_idx = 16
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_colors = hap_colors_995F.take(cluster_hap_indices)
hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


Out[37]:
%3 0 50 24 0->24 P55L 58 0->58 2415120:T>A 1 2 15 60 2->60 P1874S 3 90 3->1 2381352:C>A 10 3->10 2384174:T>A 16 3->16 M704V 18 3->18 2360189:T>C 21 3->21 2428653:T>C 25 3->25 2359582:A>G 29 3->29 V1303A 30 3->30 2421715:C>T 34 3->34 2361414:A>C 36 3->36 2395858:T>G 41 3->41 2366965:T>G 44 3->44 2392973:A>G 46 3->46 2392142:G>T 56 3->56 2408896:C>T 59 3->59 2415120:T>A anon_3_35_0 3->anon_3_35_0 2395098:A>G anon_3_40_0 3->anon_3_40_0 2360106:G>A anon_3_45_0 3->anon_3_45_0 2360304:G>A 4 78 55 4->55 2366963:G>A 5 25 39 5->39 2366538:C>A 5->60 A1934V 6 93 6->0 I1868T 6->2 A1934V 6->3 N1570Y 6->4 P1874L 6->5 P1874S 7 7 6->7 K1603T 8 3 6->8 L1667M 13 5 6->13 I1940T 14 3 6->14 T791M 17 3 6->17 N1345I 19 10 6->19 E1597G 26 2 6->26 I1584T 31 12 6->31 V1853I 47 6->47 2382263:A>G 50 5 6->50 2366965:T>G 51 6->51 2388293:C>T anon_6_27_0 6->anon_6_27_0 2362002:A>T anon_6_49_0 6->anon_6_49_0 2359422:C>G 57 3 8->57 2415120:T>A 9 11 11->4 2358667:T>A 12 28 13->28 2421454:A>G 15 25 14->15 A1746S 61 14->61 V1853I 15->9 2360225:G>A 15->12 2382425:A>T anon_15_23_0 15->anon_15_23_0 2360354:C>T 33 19->33 2376264:A>G 20 22 22->0 2374779:A>T 23 27 2 31->61 T791M 32 32->5 2358667:T>A 35 37 38 38->5 2362259:C>G 40 42 43 43->3 2362259:C>G 45 46->42 2360151:T>A 48 3 48->6 2358667:T>A 48->11 P1874L 48->32 P1874S 49 49->anon_6_49_0 2417127:T>A 50->41 N1570Y 52 50->52 2403047:T>A 53 50->53 2426642:T>C 54 54->6 2410330:C>T 60->37 2425696:G>C anon_20_61_0 61->anon_20_61_0 L1366F anon_3_35_0->35 2407846:C>G anon_3_40_0->40 P940H anon_3_45_0->45 2394110:C>T anon_6_27_1 anon_6_27_0->anon_6_27_1 2362003:C>T anon_6_27_1->27 G54C anon_15_23_1 anon_15_23_0->anon_15_23_1 Q545L anon_15_23_1->23 2415877:C>T anon_20_61_0->20 2384348:T>G

In [38]:
idx_rec = hapclust.locate_recombinants(cluster_haps, debug=True)
print('found', len(idx_rec), 'solutions; min recombinant haplotypes:', len(idx_rec[0]), [len(s) for s in idx_rec])
idx_norec = [i for i in range(cluster_haps.shape[1]) if i not in idx_rec[0]]
cluster_haps_norec = cluster_haps.take(idx_norec, axis=1)
cluster_hap_colors_norec = cluster_hap_colors.take(idx_norec)
hapclust.graph_haplotype_network(
    cluster_haps_norec, hap_colors=cluster_hap_colors_norec, network_method='mjn', max_dist=10,
    variant_labels=variant_labels_vgsc, fontsize=6, show_node_labels='count')


6 1698 [  4   1 442  28] (0, 1) {172}
6 1699 [  4   1 391  79] (0, 1) {39}
147 1674 [  1   1 364 109] (0, 0) {243}
147 1674 [  1   1 364 109] (0, 1) {291}
147 1698 [  1   1 445  28] (0, 0) {291}
147 1698 [  1   1 445  28] (0, 1) {243}
224 1674 [358 109   7   1] (1, 1) {282}
1379 1674 [361 109   4   1] (1, 1) {474}
1379 1681 [467   3   2   3] (1, 0) {473, 474}
1379 1697 [418  52   4   1] (1, 1) {473}
1433 1695 [431  12  31   1] (1, 1) {87}
1698 1704 [431  15  28   1] (1, 1) {242}
found 4 solutions; min recombinant haplotypes: 8 [8, 8, 9, 9]
Out[38]:
%3 0 50 22 0->22 P55L 1 2 15 3 90 3->1 2381352:C>A 10 3->10 2384174:T>A 15 3->15 M704V 17 3->17 2360189:T>C 19 3->19 2428653:T>C 23 3->23 2359582:A>G 27 3->27 V1303A 28 3->28 2421715:C>T 31 3->31 2361414:A>C 33 3->33 2395858:T>G 38 3->38 2392973:A>G 40 3->40 2392142:G>T 50 3->50 2408896:C>T anon_3_32_0 3->anon_3_32_0 2395098:A>G anon_3_36_0 3->anon_3_36_0 2360106:G>A anon_3_39_0 3->anon_3_39_0 2360304:G>A 4 78 49 4->49 2366963:G>A 5 25 35 5->35 2366538:C>A 6 93 6->0 I1868T 6->2 A1934V 6->3 N1570Y 6->4 P1874L 6->5 P1874S 7 7 6->7 K1603T 8 3 6->8 L1667M 12 5 6->12 I1940T 13 3 6->13 T791M 16 3 6->16 N1345I 18 10 6->18 E1597G 24 2 6->24 I1584T 29 12 6->29 V1853I 41 6->41 2382263:A>G 44 5 6->44 2366965:T>G 45 6->45 2388293:C>T anon_6_25_0 6->anon_6_25_0 2362002:A>T anon_6_43_0 6->anon_6_43_0 2359422:C>G 51 3 8->51 2415120:T>A 9 11 26 12->26 2421454:A>G 14 25 13->14 A1746S 14->9 2360225:G>A 14->11 2382425:A>T anon_14_21_0 14->anon_14_21_0 2360354:C>T 30 18->30 2376264:A>G 20 20->0 2374779:A>T 21 25 2 32 34 34->5 2362259:C>G 36 37 39 40->37 2360151:T>A 42 3 42->6 2358667:T>A 43 43->anon_6_43_0 2417127:T>A 46 44->46 2403047:T>A 47 44->47 2426642:T>C 48 48->6 2410330:C>T anon_3_32_0->32 2407846:C>G anon_3_36_0->36 P940H anon_3_39_0->39 2394110:C>T anon_6_25_1 anon_6_25_0->anon_6_25_1 2362003:C>T anon_6_25_1->25 G54C anon_14_21_1 anon_14_21_0->anon_14_21_1 Q545L anon_14_21_1->21 2415877:C>T

In [ ]: