Haplotype clustering demo

This notebook demonstrates how to use the haplotype clustering utilities.


In [1]:
%run setup.ipynb
%matplotlib inline
import hapclust
# %reload_ext autoreload
# %autoreload 1
# %aimport hapclust


Dummy data


In [2]:
h = allel.HaplotypeArray([[0, 0, 1, 1, 0],
                          [0, 0, 1, 0, 1],
                          [0, 0, 0, 1, 1],
                          [0, 0, 0, 1, 0],
                          [0, 0, 0, 0, 1]])

In [3]:
graph = hapclust.graph_haplotype_network(
    h[:, :-1], network_method='msn', debug=False, 
    show_node_labels=True, node_size_factor=.2, anon_width=.4,
    variant_labels=['A', 'B', 'C', 'D', 'E'])
graph


Out[3]:
%3 0 0 anon_0_1_0 0->anon_0_1_0 A anon_0_2_0 0->anon_0_2_0 A 1 1 2 2 anon_0_1_0->1 B anon_0_2_1 anon_0_2_0->anon_0_2_1 C anon_0_2_1->2 D anon_1_2_0 anon_1_2_0->1 B anon_1_2_1 anon_1_2_0->anon_1_2_1 C anon_1_2_1->2 D

In [4]:
graph = hapclust.graph_haplotype_network(
    h[:, :-1], network_method='mjn', debug=False, 
    show_node_labels=True, node_size_factor=.2, anon_width=.4,
    variant_labels=['A', 'B', 'C', 'D', 'E'])
graph


Out[4]:
%3 0 0 3 3 0->3 A 1 1 2 2 3->1 B anon_2_3_0 3->anon_2_3_0 D anon_2_3_0->2 C

In [5]:
hapclust.graph_haplotype_network(
    h, network_method='msn', debug=False, 
    show_node_labels=True, node_size_factor=.2, anon_width=.4,
    variant_labels=['A', 'B', 'C', 'D', 'E'])


Out[5]:
%3 0 0 anon_0_1_0 0->anon_0_1_0 A anon_0_2_0 0->anon_0_2_0 A anon_0_3_0 0->anon_0_3_0 B 1 1 2 2 3 3 anon_0_1_0->1 B anon_0_2_1 anon_0_2_0->anon_0_2_1 C anon_0_2_1->2 D anon_0_3_1 anon_0_3_0->anon_0_3_1 C anon_0_3_1->3 E anon_1_2_0 anon_1_2_0->1 B anon_1_2_1 anon_1_2_0->anon_1_2_1 C anon_1_2_1->2 D anon_1_3_0 anon_1_3_0->1 A anon_1_3_1 anon_1_3_0->anon_1_3_1 C anon_1_3_1->3 E

In [6]:
hapclust.graph_haplotype_network(
    h, network_method='mjn', debug=False, 
    show_node_labels=True, node_size_factor=.2, anon_width=.4,
    variant_labels=['A', 'B', 'C', 'D', 'E'])


Out[6]:
%3 0 0 4 4 0->4 A 5 5 0->5 B 6 6 0->6 C 1 1 7 7 1->7 C 2 2 3 3 4->1 B 8 8 4->8 C 5->1 A 9 9 5->9 C 6->8 A 6->9 B 8->2 D 8->7 B 9->3 E 9->7 A

In [7]:
idx_rec = hapclust.locate_recombinants(h, debug=False)
idx_rec


Out[7]:
[{3},
 {2},
 {4},
 {3, 4},
 {3, 4},
 {2, 3},
 {2, 3},
 {3, 4},
 {2, 3},
 {2, 4},
 {2, 4},
 {2, 4},
 {2, 3},
 {2, 3},
 {2, 3},
 {3, 4},
 {2, 4},
 {2, 4},
 {2, 4},
 {3, 4},
 {3, 4},
 {2, 3, 4},
 {2, 3, 4},
 {2, 3, 4},
 {2, 3, 4},
 {2, 3, 4},
 {2, 3, 4}]

In [8]:
# how many possible solutions?
len(idx_rec)


Out[8]:
27

In [9]:
# pick a solution, locate non-recombinants
idx_norec = [i for i in range(h.shape[1]) if i not in idx_rec[0]]
idx_norec


Out[9]:
[0, 1, 2, 4]

In [10]:
hapclust.graph_haplotype_network(
    h[:, idx_norec], network_method='mjn', debug=False, 
    show_node_labels=True, node_size_factor=.2, anon_width=.4,
    variant_labels=['A', 'B', 'C', 'D', 'E'])


Out[10]:
%3 0 0 3 3 0->3 B 1 1 2 2 3->1 A anon_2_3_0 3->anon_2_3_0 E anon_2_3_0->2 C

Setup data


In [11]:
# callset = h5py.File('../data/ag1000g.phase1.AR3.1.haplotypes.specific_regions.2L_2358158_2431617.h5',
#                     mode='r')
callset = phase1_ar31.callset_phased
region_vgsc = SeqFeature('2L', 2358158, 2431617)
genotypes = allel.GenotypeDaskArray(callset['2L/calldata/genotype'])
haplotypes = genotypes.to_haplotypes()
pos = allel.SortedIndex(callset['2L/variants/POS'])
loc = pos.locate_range(region_vgsc.start, region_vgsc.end)
h_vgsc = haplotypes[loc].compute()
pos_995S = 2422651
pos_995F = 2422652
loc_995S = haplotypes[pos.locate_key(pos_995S)] == 1
loc_995F = haplotypes[pos.locate_key(pos_995F)] == 1
h_vgsc_995F = h_vgsc.compress(loc_995F, axis=1)
h_vgsc_995S = h_vgsc.compress(loc_995S, axis=1)
sample_ids = callset['2L']['samples'][:]
hap_ids = np.array(list(itertools.chain(*[[s + b'a', s + b'b'] for s in sample_ids])))
hap_ids_995F = hap_ids[loc_995F]
hap_ids_995S = hap_ids[loc_995S]
# tbl_haplotypes = etl.fromtsv('../data/ag1000g.phase1.AR3.1.haplotypes.meta.txt')
tbl_haplotypes = phase1_ar31.tbl_haplotypes
hap_pops = np.array(tbl_haplotypes.values('population'))
hap_pops_995S = hap_pops[loc_995S]
hap_pops_995F = hap_pops[loc_995F]
# need to use named colors for graphviz
pop_colors = {
    'AOM': 'brown',
    'BFM': 'firebrick1',
    'GWA': 'goldenrod1',
    'GNS': 'cadetblue1',
    'BFS': 'deepskyblue',
    'CMS': 'dodgerblue3',
    'UGS': 'palegreen',
    'GAS': 'olivedrab',
    'KES': 'grey47',
    'colony': 'black'
}
hap_colors = np.array([pop_colors[p] for p in hap_pops])
hap_colors_995S = np.array([pop_colors[p] for p in hap_pops_995S])
hap_colors_995F = np.array([pop_colors[p] for p in hap_pops_995F])

In [12]:
tbl_variant_labels = (
    etl
    .frompickle('../data/tbl_variants_phase1.pkl')
    .eq('num_alleles', 2)
    .cut('POS', 'AGAP004707-RA')
    .convert('AGAP004707-RA', lambda v: v[1] if v[0] == 'NON_SYNONYMOUS_CODING' else '')
    .rename('AGAP004707-RA', 'label')
)
tbl_variant_labels


Out[12]:
0|POS 1|label
2358254 D33N
2358316
2358328
2358353
2358405

...


In [13]:
pos2label = tbl_variant_labels.lookupone('POS', 'label')
pos2label[pos_995F]


Out[13]:
'L995F'

In [14]:
variant_labels = np.array([pos2label.get(p, '') for p in pos], dtype=object)
variant_labels_vgsc = variant_labels[loc]
variant_labels_vgsc[:5]


Out[14]:
array(['D33N', '', '', '', ''], dtype=object)

Hierarchical clustering

Plot plot plot...


In [15]:
# Default plot... 
# cuts the tree at height 2 (so max distance within each cluster is 1)...
# highlights all clusters...
# labels all clusters.
hapclust.fig_haplotypes_clustered(h_vgsc_995S, dpi=150);



In [16]:
# Change the orientation...
hapclust.fig_haplotypes_clustered(h_vgsc_995S, orientation='left', dpi=150);



In [17]:
# Try a different cut height...
hapclust.fig_haplotypes_clustered(h_vgsc_995S, cut_height=5, dpi=150);



In [18]:
# Choose to highlight only clusters above a certain size...
hapclust.fig_haplotypes_clustered(h_vgsc_995S, dpi=150, highlight_clusters=5);



In [19]:
# Manually choose which clusters to highlight...
hapclust.fig_haplotypes_clustered(h_vgsc_995S, dpi=150, highlight_clusters=[2, 9]);



In [20]:
# Turn off cluster labels...
hapclust.fig_haplotypes_clustered(h_vgsc_995S, dpi=150, highlight_clusters=5, label_clusters=False);



In [21]:
# Use your favourite colors...
hapclust.fig_haplotypes_clustered(
    h_vgsc_995S, dpi=150, highlight_clusters=5, label_clusters=False, 
    highlight_colors=['red', 'green', 'blue', 'cyan', 'magenta', 'yellow'],
    highlight_alpha=.8);


Return values


In [22]:
# What does this function return?
fig, ax_dend, ax_freq, cluster_spans, leaf_obs = hapclust.fig_haplotypes_clustered(
    h_vgsc_995S, dpi=150, highlight_clusters=5, label_clusters=5)


Customising axes


In [23]:
# E.g., use returned axes objects to customise labels etc. ...
cut_height = 2
fig, ax_dend, ax_freq, cluster_spans_995S, leaf_obs_995S = hapclust.fig_haplotypes_clustered(
    h_vgsc_995S, cut_height=cut_height, dpi=150, 
    highlight_clusters=5, label_clusters=5)
ax_dend.set_title('Haplotype structure (L995S)')
ax_dend.set_ylabel('Distance (no. SNPs)')
ax_freq.set_ylabel('Haplotype frequency');


Accessing information about clusters


In [24]:
# cluster_spans is useful for accessing information about each cluster...
cluster_spans_995S


Out[24]:
[(0, 1, array([85])),
 (1, 2, array([16])),
 (2,
  16,
  array([  0,   1,   3,   4,   5,   7,  11,  15,  17,  19,  21,  23,  25,
          28,  29,  33,  35,  36,  37,  38,  39,  40,  41,  42,  43,  45,
          47,  50,  51,  55,  57,  58,  61,  62,  64,  65,  71,  73,  75,
          77,  78,  82,  83,  86,  87,  88,  89,  90,  94,  95, 100, 101,
         103, 107, 108, 111, 114, 115, 119, 121, 122, 123, 126, 127, 128,
         129, 131, 133, 135, 136, 139, 140, 141, 144, 145, 147, 148, 149,
         150, 156, 158, 159, 163, 164, 165, 167, 169, 170, 172, 173, 176,
         177, 181, 183, 184, 185, 186, 187, 188, 189, 193, 194, 197, 199,
         200, 203])),
 (16, 17, array([316, 346])),
 (17, 18, array([369])),
 (18, 19, array([277])),
 (19, 20, array([294])),
 (20, 21, array([278])),
 (21, 22, array([327])),
 (22,
  31,
  array([275, 281, 283, 284, 286, 287, 288, 291, 293, 299, 303, 304, 306,
         307, 311, 318, 320, 321, 322, 325, 326, 328, 329, 332, 336, 337,
         344, 345, 347, 349, 351, 352, 353, 357])),
 (31, 32, array([274])),
 (32,
  36,
  array([273, 276, 279, 280, 282, 285, 289, 290, 292, 295, 297, 298, 300,
         301, 302, 309, 310, 312, 313, 314, 315, 317, 323, 324, 330, 331,
         333, 334, 335, 338, 339, 340, 342, 348, 350, 355])),
 (36, 37, array([403])),
 (37,
  47,
  array([296, 305, 308, 319, 341, 343, 354, 356, 358, 359, 360, 361, 362,
         363, 364, 365, 366, 367, 368, 370, 371, 372, 373, 374, 375, 376,
         377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
         390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402,
         404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416,
         417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429])),
 (47, 48, array([258])),
 (48, 49, array([227])),
 (49, 50, array([112])),
 (50,
  61,
  array([  2,   6,   8,   9,  10,  12,  13,  14,  18,  20,  22,  24,  26,
          27,  30,  31,  32,  34,  44,  46,  48,  49,  52,  53,  54,  56,
          59,  60,  63,  66,  67,  68,  69,  70,  72,  74,  76,  79,  80,
          81,  84,  91,  92,  93,  96,  97,  98,  99, 102, 104, 105, 106,
         109, 110, 113, 116, 117, 118, 120, 124, 125, 130, 132, 134, 137,
         138, 142, 143, 146, 151, 152, 153, 154, 155, 157, 160, 161, 162,
         166, 168, 171, 174, 175, 178, 179, 180, 182, 190, 191, 192, 195,
         196, 198, 201, 202, 204, 205, 206, 207, 208, 209, 210, 211, 212,
         213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225,
         226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
         240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
         253, 254, 255, 256, 257, 259, 260, 261, 262, 263, 264, 265, 266,
         267, 268, 269, 270, 271, 272]))]

In [25]:
# E.g., cluster labelled "17" in the plot:
cluster_idx = 17
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]

In [26]:
# These are positions in the dendrogram where the cluster starts and stops:
dend_start, dend_stop


Out[26]:
(50, 61)

In [27]:
# These are the indices of the haplotypes in the cluster
cluster_hap_indices


Out[27]:
array([  2,   6,   8,   9,  10,  12,  13,  14,  18,  20,  22,  24,  26,
        27,  30,  31,  32,  34,  44,  46,  48,  49,  52,  53,  54,  56,
        59,  60,  63,  66,  67,  68,  69,  70,  72,  74,  76,  79,  80,
        81,  84,  91,  92,  93,  96,  97,  98,  99, 102, 104, 105, 106,
       109, 110, 113, 116, 117, 118, 120, 124, 125, 130, 132, 134, 137,
       138, 142, 143, 146, 151, 152, 153, 154, 155, 157, 160, 161, 162,
       166, 168, 171, 174, 175, 178, 179, 180, 182, 190, 191, 192, 195,
       196, 198, 201, 202, 204, 205, 206, 207, 208, 209, 210, 211, 212,
       213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225,
       226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
       240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
       253, 254, 255, 256, 257, 259, 260, 261, 262, 263, 264, 265, 266,
       267, 268, 269, 270, 271, 272])

In [28]:
# How many haplotypes in the cluster?
len(cluster_hap_indices)


Out[28]:
162

In [29]:
# N.B., these are relative to the haplotype array passed into the function.
# To extract only haplotypes in this cluster...
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_haps


Out[29]:
<HaplotypeArray shape=(1710, 162) dtype=int8>
01234...157158159160161
000000...00000
100000...00000
200000...00000
......
170700000...00000
170800000...00000
170900000...00000

Outputting haplotype data in other formats


In [30]:
cluster_hap_ids = hap_ids_995S.take(cluster_hap_indices)

In [31]:
sequences = cluster_haps.astype('S1').T

In [32]:
allel.io.write_fasta('../data/demo.hapclust.995S.cut{}.cluster{}.fasta'.format(cut_height, 
                                                                          cluster_idx),
                     sequences=list(sequences), 
                     names=cluster_hap_ids, 
                     mode='w', 
                     width=80)

Mapping dendrogram leaves onto haplotypes


In [33]:
# leaf_obs can also be useful, it maps the leaves of the dendrogram onto indices of original observations...
# need this if ever you want to plot the haplotypes themselves

# E.g., the first leaf of the dendrogram contains these haplotypes:
leaf_obs_995S[0]


Out[33]:
[85]

In [34]:
# E.g., the 8th leaf of the dendrogram contains these haplotypes:
leaf_obs_995S[7]


Out[34]:
[55]

In [35]:
# To extract a haplotype array matching the leaves of the dendrogram...

# only need one index per leaf, as all haplotypes per leaf are identical
indices = [l[0] for l in leaf_obs_995S]

# take unique haplotypes in order shown in dendrogram
h_vgsc_995S_dend = h_vgsc_995S.take(indices, axis=1)
h_vgsc_995S_dend


Out[35]:
<HaplotypeArray shape=(1710, 61) dtype=int8>
01234...5657585960
000000...00000
100000...00000
200000...00000
......
170700000...00000
170800000...00000
170900000...00000

Haplotype networks


In [36]:
# make a network of all L995S haplotypes
graph = hapclust.graph_haplotype_network(h_vgsc_995S, network_method='mst')
graph


Out[36]:
%3 0 1 0->1 3 1->3 5 1->5 7 1->7 9 1->9 10 1->10 11 1->11 14 1->14 18 1->18 20 1->20 22 1->22 anon_1_15_0 1->anon_1_15_0 2 13 2->13 19 2->19 23 2->23 27 2->27 28 2->28 4 4->2 21 4->21 6 anon_6_1_0 6->anon_6_1_0 8 12 12->1 12->8 15 16 anon_16_4_0 16->anon_16_4_0 17 17->2 25 23->25 24 anon_24_2_0 24->anon_24_2_0 26 anon_26_2_0 26->anon_26_2_0 29 29->23 30 40 30->40 43 30->43 47 30->47 31 anon_31_30_0 31->anon_31_30_0 32 anon_32_34_0 32->anon_32_34_0 33 34 35 38 35->38 36 anon_36_33_0 36->anon_36_33_0 37 50 37->50 54 37->54 55 37->55 56 37->56 57 37->57 59 37->59 60 37->60 anon_37_58_0 37->anon_37_58_0 39 39->32 41 49 41->49 42 44 44->32 44->35 48 44->48 45 45->32 46 anon_46_32_0 46->anon_46_32_0 48->41 51 50->51 52 53 55->53 58 anon_1_15_1 anon_1_15_0->anon_1_15_1 anon_1_15_2 anon_1_15_1->anon_1_15_2 anon_1_15_2->15 anon_6_1_0->1 anon_16_4_0->4 anon_24_2_0->2 anon_26_2_0->2 anon_31_30_1 anon_31_30_0->anon_31_30_1 anon_31_30_2 anon_31_30_1->anon_31_30_2 anon_31_30_3 anon_31_30_2->anon_31_30_3 anon_31_30_3->30 anon_32_34_1 anon_32_34_0->anon_32_34_1 anon_32_34_2 anon_32_34_1->anon_32_34_2 anon_32_34_2->34 anon_36_33_0->33 anon_37_58_1 anon_37_58_0->anon_37_58_1 anon_37_58_2 anon_37_58_1->anon_37_58_2 anon_37_58_2->58 anon_46_32_1 anon_46_32_0->anon_46_32_1 anon_46_32_1->32

In [37]:
# add some color
graph = hapclust.graph_haplotype_network(h_vgsc_995S, hap_colors=hap_colors_995S, network_method='mst')
graph


Out[37]:
%3 0 1 0->1 3 1->3 5 1->5 7 1->7 9 1->9 10 1->10 11 1->11 14 1->14 18 1->18 20 1->20 22 1->22 anon_1_15_0 1->anon_1_15_0 2 13 2->13 19 2->19 23 2->23 27 2->27 28 2->28 4 4->2 21 4->21 6 anon_6_1_0 6->anon_6_1_0 8 12 12->1 12->8 15 16 anon_16_4_0 16->anon_16_4_0 17 17->2 25 23->25 24 anon_24_2_0 24->anon_24_2_0 26 anon_26_2_0 26->anon_26_2_0 29 29->23 30 40 30->40 43 30->43 47 30->47 31 anon_31_30_0 31->anon_31_30_0 32 anon_32_34_0 32->anon_32_34_0 33 34 35 38 35->38 36 anon_36_33_0 36->anon_36_33_0 37 50 37->50 54 37->54 55 37->55 56 37->56 57 37->57 59 37->59 60 37->60 anon_37_58_0 37->anon_37_58_0 39 39->32 41 49 41->49 42 44 44->32 44->35 48 44->48 45 45->32 46 anon_46_32_0 46->anon_46_32_0 48->41 51 50->51 52 53 55->53 58 anon_1_15_1 anon_1_15_0->anon_1_15_1 anon_1_15_2 anon_1_15_1->anon_1_15_2 anon_1_15_2->15 anon_6_1_0->1 anon_16_4_0->4 anon_24_2_0->2 anon_26_2_0->2 anon_31_30_1 anon_31_30_0->anon_31_30_1 anon_31_30_2 anon_31_30_1->anon_31_30_2 anon_31_30_3 anon_31_30_2->anon_31_30_3 anon_31_30_3->30 anon_32_34_1 anon_32_34_0->anon_32_34_1 anon_32_34_2 anon_32_34_1->anon_32_34_2 anon_32_34_2->34 anon_36_33_0->33 anon_37_58_1 anon_37_58_0->anon_37_58_1 anon_37_58_2 anon_37_58_1->anon_37_58_2 anon_37_58_2->58 anon_46_32_1 anon_46_32_0->anon_46_32_1 anon_46_32_1->32

In [38]:
# try a different network building method 
graph = hapclust.graph_haplotype_network(h_vgsc_995S, hap_colors=hap_colors_995S, network_method='msn')
graph


Out[38]:
%3 0 1 0->1 3 1->3 5 1->5 7 1->7 9 1->9 10 1->10 11 1->11 12 1->12 14 1->14 18 1->18 20 1->20 22 1->22 anon_1_6_0 1->anon_1_6_0 anon_1_15_0 1->anon_1_15_0 2 4 2->4 13 2->13 17 2->17 19 2->19 23 2->23 27 2->27 28 2->28 anon_2_24_0 2->anon_2_24_0 anon_2_26_0 2->anon_2_26_0 21 4->21 anon_4_16_0 4->anon_4_16_0 6 8 8->9 8->12 15 16 25 23->25 29 23->29 24 26 30 40 30->40 43 30->43 47 30->47 anon_30_31_0 30->anon_30_31_0 31 32 38 32->38 39 32->39 41 32->41 44 32->44 45 32->45 anon_32_34_0 32->anon_32_34_0 anon_32_46_0 32->anon_32_46_0 33 anon_33_36_0 33->anon_33_36_0 34 35 35->38 35->44 36 37 50 37->50 54 37->54 55 37->55 56 37->56 57 37->57 59 37->59 60 37->60 anon_37_58_0 37->anon_37_58_0 48 41->48 49 41->49 42 44->48 46 51 50->51 53 50->53 52 53->55 58 anon_1_6_0->6 anon_1_15_1 anon_1_15_0->anon_1_15_1 anon_1_15_2 anon_1_15_1->anon_1_15_2 anon_1_15_2->15 anon_2_24_0->24 anon_2_26_0->26 anon_4_16_0->16 anon_30_31_1 anon_30_31_0->anon_30_31_1 anon_30_31_2 anon_30_31_1->anon_30_31_2 anon_30_31_3 anon_30_31_2->anon_30_31_3 anon_30_31_3->31 anon_32_34_1 anon_32_34_0->anon_32_34_1 anon_32_34_2 anon_32_34_1->anon_32_34_2 anon_32_34_2->34 anon_32_46_1 anon_32_46_0->anon_32_46_1 anon_32_46_1->46 anon_33_36_0->36 anon_37_58_1 anon_37_58_0->anon_37_58_1 anon_37_58_2 anon_37_58_1->anon_37_58_2 anon_37_58_2->58

In [39]:
# try a different network building method 
graph = hapclust.graph_haplotype_network(h_vgsc_995S, hap_colors=hap_colors_995S, network_method='mjn')
graph


Out[39]:
%3 0 1 0->1 3 1->3 5 1->5 7 1->7 9 1->9 10 1->10 11 1->11 12 1->12 14 1->14 18 1->18 20 1->20 22 1->22 anon_1_6_0 1->anon_1_6_0 anon_1_15_0 1->anon_1_15_0 2 4 2->4 13 2->13 17 2->17 19 2->19 23 2->23 27 2->27 28 2->28 anon_2_24_0 2->anon_2_24_0 anon_2_26_0 2->anon_2_26_0 21 4->21 anon_4_16_0 4->anon_4_16_0 6 8 8->9 8->12 15 16 25 23->25 29 23->29 24 26 30 40 30->40 43 30->43 47 30->47 anon_30_31_0 30->anon_30_31_0 31 32 38 32->38 39 32->39 41 32->41 44 32->44 45 32->45 61 32->61 33 anon_33_36_0 33->anon_33_36_0 34 anon_34_61_0 34->anon_34_61_0 35 35->38 35->44 36 37 50 37->50 54 37->54 55 37->55 56 37->56 57 37->57 59 37->59 60 37->60 anon_37_58_0 37->anon_37_58_0 48 41->48 49 41->49 42 44->48 46 anon_46_61_0 46->anon_46_61_0 51 50->51 53 50->53 52 53->55 58 anon_1_6_0->6 anon_1_15_1 anon_1_15_0->anon_1_15_1 anon_1_15_2 anon_1_15_1->anon_1_15_2 anon_1_15_2->15 anon_2_24_0->24 anon_2_26_0->26 anon_4_16_0->16 anon_30_31_1 anon_30_31_0->anon_30_31_1 anon_30_31_2 anon_30_31_1->anon_30_31_2 anon_30_31_3 anon_30_31_2->anon_30_31_3 anon_30_31_3->31 anon_33_36_0->36 anon_34_61_1 anon_34_61_0->anon_34_61_1 anon_34_61_1->61 anon_37_58_1 anon_37_58_0->anon_37_58_1 anon_37_58_2 anon_37_58_1->anon_37_58_2 anon_37_58_2->58 anon_46_61_0->61

In [40]:
# change the maximum connection distance
graph = hapclust.graph_haplotype_network(
    h_vgsc_995S, hap_colors=hap_colors_995S, max_dist=10, network_method='mst')
graph


Out[40]:
%3 0 1 0->1 3 1->3 5 1->5 7 1->7 9 1->9 10 1->10 11 1->11 14 1->14 18 1->18 20 1->20 22 1->22 anon_1_15_0 1->anon_1_15_0 2 13 2->13 19 2->19 23 2->23 27 2->27 28 2->28 4 4->2 21 4->21 6 anon_6_1_0 6->anon_6_1_0 8 12 12->1 12->8 15 16 anon_16_4_0 16->anon_16_4_0 17 17->2 25 23->25 24 anon_24_2_0 24->anon_24_2_0 26 anon_26_2_0 26->anon_26_2_0 29 29->23 30 40 30->40 43 30->43 47 30->47 31 anon_31_30_0 31->anon_31_30_0 32 anon_32_30_0 32->anon_32_30_0 anon_32_34_0 32->anon_32_34_0 33 34 35 38 35->38 36 anon_36_33_0 36->anon_36_33_0 37 50 37->50 54 37->54 55 37->55 56 37->56 57 37->57 59 37->59 60 37->60 anon_37_58_0 37->anon_37_58_0 39 39->32 41 49 41->49 42 44 44->32 44->35 48 44->48 45 45->32 46 anon_46_32_0 46->anon_46_32_0 48->41 51 50->51 52 53 55->53 58 anon_1_15_1 anon_1_15_0->anon_1_15_1 anon_1_15_2 anon_1_15_1->anon_1_15_2 anon_1_15_2->15 anon_6_1_0->1 anon_16_4_0->4 anon_24_2_0->2 anon_26_2_0->2 anon_31_30_1 anon_31_30_0->anon_31_30_1 anon_31_30_2 anon_31_30_1->anon_31_30_2 anon_31_30_3 anon_31_30_2->anon_31_30_3 anon_31_30_3->30 anon_32_30_1 anon_32_30_0->anon_32_30_1 anon_32_30_2 anon_32_30_1->anon_32_30_2 anon_32_30_3 anon_32_30_2->anon_32_30_3 anon_32_30_4 anon_32_30_3->anon_32_30_4 anon_32_30_5 anon_32_30_4->anon_32_30_5 anon_32_30_6 anon_32_30_5->anon_32_30_6 anon_32_30_6->30 anon_32_34_1 anon_32_34_0->anon_32_34_1 anon_32_34_2 anon_32_34_1->anon_32_34_2 anon_32_34_2->34 anon_36_33_0->33 anon_37_58_1 anon_37_58_0->anon_37_58_1 anon_37_58_2 anon_37_58_1->anon_37_58_2 anon_37_58_2->58 anon_46_32_1 anon_46_32_0->anon_46_32_1 anon_46_32_1->32

In [41]:
graph = hapclust.graph_haplotype_network(
    h_vgsc_995S, hap_colors=hap_colors_995S, network_method='msn', max_dist=10)
graph


Out[41]:
%3 0 1 0->1 3 1->3 5 1->5 7 1->7 9 1->9 10 1->10 11 1->11 12 1->12 14 1->14 18 1->18 20 1->20 22 1->22 anon_1_6_0 1->anon_1_6_0 anon_1_15_0 1->anon_1_15_0 2 4 2->4 13 2->13 17 2->17 19 2->19 23 2->23 27 2->27 28 2->28 anon_2_24_0 2->anon_2_24_0 anon_2_26_0 2->anon_2_26_0 21 4->21 anon_4_16_0 4->anon_4_16_0 6 8 8->9 8->12 15 16 25 23->25 29 23->29 24 26 30 40 30->40 43 30->43 47 30->47 anon_30_31_0 30->anon_30_31_0 anon_30_32_0 30->anon_30_32_0 31 32 38 32->38 39 32->39 41 32->41 44 32->44 45 32->45 anon_32_34_0 32->anon_32_34_0 anon_32_46_0 32->anon_32_46_0 33 anon_33_36_0 33->anon_33_36_0 34 35 35->38 35->44 36 37 50 37->50 54 37->54 55 37->55 56 37->56 57 37->57 59 37->59 60 37->60 anon_37_58_0 37->anon_37_58_0 48 41->48 49 41->49 42 anon_43_44_0 43->anon_43_44_0 44->48 46 51 50->51 53 50->53 52 53->55 58 anon_1_6_0->6 anon_1_15_1 anon_1_15_0->anon_1_15_1 anon_1_15_2 anon_1_15_1->anon_1_15_2 anon_1_15_2->15 anon_2_24_0->24 anon_2_26_0->26 anon_4_16_0->16 anon_30_31_1 anon_30_31_0->anon_30_31_1 anon_30_31_2 anon_30_31_1->anon_30_31_2 anon_30_31_3 anon_30_31_2->anon_30_31_3 anon_30_31_3->31 anon_30_32_1 anon_30_32_0->anon_30_32_1 anon_30_32_2 anon_30_32_1->anon_30_32_2 anon_30_32_3 anon_30_32_2->anon_30_32_3 anon_30_32_4 anon_30_32_3->anon_30_32_4 anon_30_32_5 anon_30_32_4->anon_30_32_5 anon_30_32_6 anon_30_32_5->anon_30_32_6 anon_30_32_6->32 anon_32_34_1 anon_32_34_0->anon_32_34_1 anon_32_34_2 anon_32_34_1->anon_32_34_2 anon_32_34_2->34 anon_32_46_1 anon_32_46_0->anon_32_46_1 anon_32_46_1->46 anon_33_36_0->36 anon_37_58_1 anon_37_58_0->anon_37_58_1 anon_37_58_2 anon_37_58_1->anon_37_58_2 anon_37_58_2->58 anon_43_44_1 anon_43_44_0->anon_43_44_1 anon_43_44_2 anon_43_44_1->anon_43_44_2 anon_43_44_3 anon_43_44_2->anon_43_44_3 anon_43_44_4 anon_43_44_3->anon_43_44_4 anon_43_44_5 anon_43_44_4->anon_43_44_5 anon_43_44_6 anon_43_44_5->anon_43_44_6 anon_43_44_6->44

In [42]:
graph = hapclust.graph_haplotype_network(
    h_vgsc_995S, hap_colors=hap_colors_995S, network_method='mjn', max_dist=10)
graph


Out[42]:
%3 0 1 0->1 3 1->3 5 1->5 7 1->7 9 1->9 10 1->10 11 1->11 12 1->12 14 1->14 18 1->18 20 1->20 22 1->22 anon_1_6_0 1->anon_1_6_0 anon_1_15_0 1->anon_1_15_0 2 4 2->4 13 2->13 17 2->17 19 2->19 23 2->23 27 2->27 28 2->28 anon_2_24_0 2->anon_2_24_0 anon_2_26_0 2->anon_2_26_0 21 4->21 anon_4_16_0 4->anon_4_16_0 6 8 8->9 8->12 15 16 25 23->25 29 23->29 24 26 30 40 30->40 43 30->43 47 30->47 anon_30_31_0 30->anon_30_31_0 anon_30_32_0 30->anon_30_32_0 31 32 38 32->38 39 32->39 41 32->41 44 32->44 45 32->45 61 32->61 33 anon_33_36_0 33->anon_33_36_0 34 anon_34_61_0 34->anon_34_61_0 35 35->38 35->44 36 37 50 37->50 54 37->54 55 37->55 56 37->56 57 37->57 59 37->59 60 37->60 anon_37_58_0 37->anon_37_58_0 48 41->48 49 41->49 42 anon_43_44_0 43->anon_43_44_0 44->48 46 anon_46_61_0 46->anon_46_61_0 51 50->51 53 50->53 52 53->55 58 anon_1_6_0->6 anon_1_15_1 anon_1_15_0->anon_1_15_1 anon_1_15_2 anon_1_15_1->anon_1_15_2 anon_1_15_2->15 anon_2_24_0->24 anon_2_26_0->26 anon_4_16_0->16 anon_30_31_1 anon_30_31_0->anon_30_31_1 anon_30_31_2 anon_30_31_1->anon_30_31_2 anon_30_31_3 anon_30_31_2->anon_30_31_3 anon_30_31_3->31 anon_30_32_1 anon_30_32_0->anon_30_32_1 anon_30_32_2 anon_30_32_1->anon_30_32_2 anon_30_32_3 anon_30_32_2->anon_30_32_3 anon_30_32_4 anon_30_32_3->anon_30_32_4 anon_30_32_5 anon_30_32_4->anon_30_32_5 anon_30_32_6 anon_30_32_5->anon_30_32_6 anon_30_32_6->32 anon_33_36_0->36 anon_34_61_1 anon_34_61_0->anon_34_61_1 anon_34_61_1->61 anon_37_58_1 anon_37_58_0->anon_37_58_1 anon_37_58_2 anon_37_58_1->anon_37_58_2 anon_37_58_2->58 anon_46_61_0->61 anon_43_44_1 anon_43_44_0->anon_43_44_1 anon_43_44_2 anon_43_44_1->anon_43_44_2 anon_43_44_3 anon_43_44_2->anon_43_44_3 anon_43_44_4 anon_43_44_3->anon_43_44_4 anon_43_44_5 anon_43_44_4->anon_43_44_5 anon_43_44_6 anon_43_44_5->anon_43_44_6 anon_43_44_6->44

In [43]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 2
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995S[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[43]:
%3 0 1 0->1 2 1->2 3 1->3 4 1->4 I492N 6 1->6 7 1->7 8 1->8 9 1->9 10 1->10 11 1->11 M925L 12 1->12 13 1->13 5 5->6 5->9

In [44]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[44]:
%3 0 1 0->1 2 1->2 3 1->3 4 1->4 I492N 6 1->6 7 1->7 8 1->8 9 1->9 10 1->10 11 1->11 M925L 12 1->12 13 1->13 5 5->6 5->9

In [45]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 9
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995S[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[45]:
%3 0 2 0->2 3 0->3 4 0->4 S2076T 5 0->5 6 0->6 L1617F 1 1->2 1->5 7 4->7 8 4->8 5->7 S2076T

In [46]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[46]:
%3 0 2 0->2 3 0->3 4 0->4 S2076T 5 0->5 6 0->6 L1617F 1 1->2 1->5 7 4->7 8 4->8 5->7 S2076T

In [47]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 11
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995S[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[47]:
%3 0 1 0->1 2 0->2 3 0->3

In [48]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[48]:
%3 0 1 0->1 2 0->2 3 0->3

In [49]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 13
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995S[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[49]:
%3 0 1 0->1 4 0->4 5 0->5 6 0->6 7 0->7 8 0->8 9 0->9 K251R 2 1->2 3 1->3 3->5

In [50]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[50]:
%3 0 1 0->1 4 0->4 5 0->5 6 0->6 7 0->7 8 0->8 9 0->9 K251R 2 1->2 3 1->3 3->5

In [51]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 17
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995S[cluster_idx]
cluster_haps = h_vgsc_995S.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995S[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[51]:
%3 0 1 0->1 2 0->2 3 0->3 F1529L 4 0->4 6 0->6 8 0->8 9 0->9 5 1->5 7 6->7 10 6->10

In [52]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[52]:
%3 0 1 0->1 2 0->2 3 0->3 F1529L 4 0->4 6 0->6 8 0->8 9 0->9 5 1->5 7 6->7 10 6->10

In [53]:
# let's do L995F as well...
cut_height = 4
fig, ax_dend, ax_freq, cluster_spans_995F, leaf_obs_995F = hapclust.fig_haplotypes_clustered(
    h_vgsc_995F, orientation='left', cut_height=cut_height, dpi=150, 
    highlight_clusters=5, label_clusters=5)



In [54]:
graph = hapclust.graph_haplotype_network(
    h_vgsc_995F, hap_colors=hap_colors_995F, max_dist=4, network_method='mst')
graph


Out[54]:
%3 0 1 3 1->3 11 1->11 18 1->18 22 1->22 2 anon_2_15_0 2->anon_2_15_0 3->0 3->2 5 3->5 8 3->8 13 3->13 14 3->14 17 3->17 21 3->21 26 3->26 29 3->29 75 3->75 76 3->76 93 3->93 95 3->95 100 3->100 101 3->101 119 3->119 4 4->3 32 4->32 36 4->36 10 5->10 118 5->118 6 6->3 7 12 7->12 31 8->31 53 8->53 9 9->3 16 13->16 28 13->28 14->12 15 19 19->3 33 19->33 20 anon_20_14_0 20->anon_20_14_0 23 23->1 24 anon_24_12_0 24->anon_24_12_0 25 25->3 27 anon_27_3_0 27->anon_27_3_0 30 30->19 34 35 37 38 38->4 39 39->4 40 41 42 anon_42_3_0 42->anon_42_3_0 43 48 43->48 49 43->49 55 43->55 59 43->59 64 43->64 68 43->68 77 43->77 78 43->78 79 43->79 86 43->86 91 43->91 94 43->94 113 43->113 anon_43_63_0 43->anon_43_63_0 anon_43_81_0 43->anon_43_81_0 44 44->3 45 80 45->80 85 45->85 anon_45_61_0 45->anon_45_61_0 46 51 46->51 62 46->62 anon_46_45_0 46->anon_46_45_0 47 54 47->54 50 52 50->52 112 50->112 115 50->115 117 50->117 anon_50_46_0 50->anon_50_46_0 54->50 116 54->116 56 57 anon_57_83_0 57->anon_57_83_0 58 60 61 63 65 65->50 66 67 anon_67_52_0 67->anon_67_52_0 69 69->57 70 70->43 71 anon_71_43_0 71->anon_71_43_0 72 72->43 73 74 74->73 anon_74_53_0 74->anon_74_53_0 81 82 83 84 anon_84_43_0 84->anon_84_43_0 87 88 88->45 89 89->53 90 anon_90_57_0 90->anon_90_57_0 91->56 92 96 97 98 99 102 103 104 105 106 107 108 109 110 111 114 114->50 anon_2_15_0->15 anon_20_14_0->14 anon_24_12_1 anon_24_12_0->anon_24_12_1 anon_24_12_1->12 anon_27_3_1 anon_27_3_0->anon_27_3_1 anon_27_3_1->3 anon_42_3_0->3 anon_43_63_1 anon_43_63_0->anon_43_63_1 anon_43_63_2 anon_43_63_1->anon_43_63_2 anon_43_63_2->63 anon_43_81_0->81 anon_45_61_1 anon_45_61_0->anon_45_61_1 anon_45_61_2 anon_45_61_1->anon_45_61_2 anon_45_61_2->61 anon_46_45_0->45 anon_50_46_1 anon_50_46_0->anon_50_46_1 anon_50_46_2 anon_50_46_1->anon_50_46_2 anon_50_46_2->46 anon_57_83_0->83 anon_67_52_1 anon_67_52_0->anon_67_52_1 anon_67_52_1->52 anon_71_43_1 anon_71_43_0->anon_71_43_1 anon_71_43_1->43 anon_74_53_0->53 anon_84_43_1 anon_84_43_0->anon_84_43_1 anon_84_43_2 anon_84_43_1->anon_84_43_2 anon_84_43_2->43 anon_90_57_1 anon_90_57_0->anon_90_57_1 anon_90_57_2 anon_90_57_1->anon_90_57_2 anon_90_57_2->57

In [55]:
graph = hapclust.graph_haplotype_network(
    h_vgsc_995F, hap_colors=hap_colors_995F, max_dist=4, network_method='msn')
graph


Out[55]:
%3 0 3 0->3 1 1->3 11 1->11 18 1->18 22 1->22 23 1->23 2 2->3 31 2->31 anon_2_15_0 2->anon_2_15_0 4 3->4 5 3->5 6 3->6 8 3->8 9 3->9 13 3->13 14 3->14 17 3->17 19 3->19 21 3->21 25 3->25 26 3->26 29 3->29 44 3->44 75 3->75 76 3->76 93 3->93 95 3->95 100 3->100 101 3->101 119 3->119 anon_3_27_0 3->anon_3_27_0 anon_3_42_0 3->anon_3_42_0 32 4->32 36 4->36 38 4->38 39 4->39 10 5->10 118 5->118 7 12 7->12 8->10 8->31 53 8->53 12->14 anon_12_24_0 12->anon_12_24_0 16 13->16 28 13->28 anon_14_20_0 14->anon_14_20_0 15 30 19->30 33 19->33 20 24 27 34 35 37 40 41 42 43 48 43->48 49 43->49 55 43->55 59 43->59 64 43->64 68 43->68 70 43->70 72 43->72 77 43->77 78 43->78 79 43->79 86 43->86 91 43->91 94 43->94 113 43->113 anon_43_63_0 43->anon_43_63_0 anon_43_71_0 43->anon_43_71_0 anon_43_81_0 43->anon_43_81_0 anon_43_84_0 43->anon_43_84_0 45 80 45->80 85 45->85 88 45->88 anon_45_46_0 45->anon_45_46_0 anon_45_61_0 45->anon_45_61_0 46 51 46->51 62 46->62 anon_46_50_0 46->anon_46_50_0 47 54 47->54 50 52 50->52 50->54 65 50->65 112 50->112 114 50->114 115 50->115 117 50->117 anon_52_67_0 52->anon_52_67_0 89 53->89 anon_53_74_0 53->anon_53_74_0 116 54->116 56 56->91 57 69 57->69 anon_57_83_0 57->anon_57_83_0 anon_57_90_0 57->anon_57_90_0 58 60 61 63 66 67 71 73 74 73->74 81 82 83 84 87 90 92 96 97 98 99 102 103 104 105 106 107 108 109 110 111 anon_2_15_0->15 anon_3_27_1 anon_3_27_0->anon_3_27_1 anon_3_27_1->27 anon_3_42_0->42 anon_12_24_1 anon_12_24_0->anon_12_24_1 anon_12_24_1->24 anon_14_20_0->20 anon_43_63_1 anon_43_63_0->anon_43_63_1 anon_43_63_2 anon_43_63_1->anon_43_63_2 anon_43_63_2->63 anon_43_71_1 anon_43_71_0->anon_43_71_1 anon_43_71_1->71 anon_43_81_0->81 anon_43_84_1 anon_43_84_0->anon_43_84_1 anon_43_84_2 anon_43_84_1->anon_43_84_2 anon_43_84_2->84 anon_45_46_0->46 anon_45_61_1 anon_45_61_0->anon_45_61_1 anon_45_61_2 anon_45_61_1->anon_45_61_2 anon_45_61_2->61 anon_46_50_1 anon_46_50_0->anon_46_50_1 anon_46_50_2 anon_46_50_1->anon_46_50_2 anon_46_50_2->50 anon_52_67_1 anon_52_67_0->anon_52_67_1 anon_52_67_1->67 anon_53_74_0->74 anon_57_83_0->83 anon_57_90_1 anon_57_90_0->anon_57_90_1 anon_57_90_2 anon_57_90_1->anon_57_90_2 anon_57_90_2->90

In [56]:
graph = hapclust.graph_haplotype_network(
    h_vgsc_995F, hap_colors=hap_colors_995F, max_dist=4, network_method='mjn')
graph


Out[56]:
%3 0 3 0->3 1 1->3 11 1->11 18 1->18 22 1->22 23 1->23 2 2->3 31 2->31 anon_2_15_0 2->anon_2_15_0 4 3->4 5 3->5 6 3->6 8 3->8 9 3->9 13 3->13 14 3->14 17 3->17 19 3->19 21 3->21 25 3->25 26 3->26 29 3->29 44 3->44 75 3->75 76 3->76 93 3->93 95 3->95 100 3->100 101 3->101 119 3->119 anon_3_27_0 3->anon_3_27_0 anon_3_42_0 3->anon_3_42_0 32 4->32 36 4->36 38 4->38 39 4->39 10 5->10 118 5->118 7 12 7->12 8->10 8->31 53 8->53 12->14 anon_12_24_0 12->anon_12_24_0 16 13->16 28 13->28 anon_14_20_0 14->anon_14_20_0 15 30 19->30 33 19->33 20 24 27 34 35 37 40 41 42 43 48 43->48 49 43->49 55 43->55 59 43->59 64 43->64 68 43->68 70 43->70 72 43->72 77 43->77 78 43->78 79 43->79 86 43->86 91 43->91 94 43->94 113 43->113 anon_43_63_0 43->anon_43_63_0 anon_43_71_0 43->anon_43_71_0 anon_43_81_0 43->anon_43_81_0 anon_43_84_0 43->anon_43_84_0 45 80 45->80 85 45->85 88 45->88 anon_45_46_0 45->anon_45_46_0 anon_45_61_0 45->anon_45_61_0 46 51 46->51 62 46->62 anon_46_50_0 46->anon_46_50_0 47 54 47->54 50 52 50->52 50->54 65 50->65 112 50->112 114 50->114 115 50->115 117 50->117 anon_52_67_0 52->anon_52_67_0 89 53->89 anon_53_74_0 53->anon_53_74_0 116 54->116 56 56->91 57 69 57->69 anon_57_83_0 57->anon_57_83_0 anon_57_90_0 57->anon_57_90_0 58 60 61 63 66 67 71 73 74 73->74 81 82 83 84 87 90 92 96 97 98 99 102 103 104 105 106 107 108 109 110 111 anon_2_15_0->15 anon_3_27_1 anon_3_27_0->anon_3_27_1 anon_3_27_1->27 anon_3_42_0->42 anon_12_24_1 anon_12_24_0->anon_12_24_1 anon_12_24_1->24 anon_14_20_0->20 anon_43_63_1 anon_43_63_0->anon_43_63_1 anon_43_63_2 anon_43_63_1->anon_43_63_2 anon_43_63_2->63 anon_43_71_1 anon_43_71_0->anon_43_71_1 anon_43_71_1->71 anon_43_81_0->81 anon_43_84_1 anon_43_84_0->anon_43_84_1 anon_43_84_2 anon_43_84_1->anon_43_84_2 anon_43_84_2->84 anon_45_46_0->46 anon_45_61_1 anon_45_61_0->anon_45_61_1 anon_45_61_2 anon_45_61_1->anon_45_61_2 anon_45_61_2->61 anon_46_50_1 anon_46_50_0->anon_46_50_1 anon_46_50_2 anon_46_50_1->anon_46_50_2 anon_46_50_2->50 anon_52_67_1 anon_52_67_0->anon_52_67_1 anon_52_67_1->67 anon_53_74_0->74 anon_57_83_0->83 anon_57_90_1 anon_57_90_0->anon_57_90_1 anon_57_90_2 anon_57_90_1->anon_57_90_2 anon_57_90_2->90

In [57]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 15
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995F[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[57]:
%3 0 3 0->3 1 2 1->2 1->3 F1920S 4 1->4 6 1->6 R537L 7 1->7 H1755Y 8 1->8 10 1->10 K251R anon_2_5_0 2->anon_2_5_0 9 3->9 5 anon_2_5_1 anon_2_5_0->anon_2_5_1 anon_2_5_1->5

In [58]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[58]:
%3 0 3 0->3 1 2 1->2 1->3 F1920S 4 1->4 6 1->6 R537L 7 1->7 H1755Y 8 1->8 10 1->10 K251R anon_2_5_0 2->anon_2_5_0 9 3->9 5 anon_2_5_1 anon_2_5_0->anon_2_5_1 anon_2_5_1->5

In [59]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 16
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995F[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[59]:
%3 0 4 0->4 5 0->5 6 0->6 1 2 1->2 G317S 3 1->3 anon_0_1_0 1->anon_0_1_0 I1940T anon_0_1_0->0 D466H

In [60]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[60]:
%3 0 4 0->4 5 0->5 6 0->6 1 2 1->2 G317S 3 1->3 anon_0_1_0 1->anon_0_1_0 I1940T anon_0_1_0->0 D466H

In [61]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 20
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995F[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[61]:
%3 0 1 0->1 2 0->2 3 0->3 5 0->5 6 0->6 7 0->7 8 0->8 10 0->10 12 0->12 13 0->13 15 0->15 16 0->16 17 0->17 A837V 18 0->18 anon_0_9_0 0->anon_0_9_0 anon_0_14_0 0->anon_0_14_0 4 4->16 9 11 11->0 R254K 14 anon_0_9_1 anon_0_9_0->anon_0_9_1 anon_0_9_1->9 anon_0_14_0->14

In [62]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[62]:
%3 0 1 0->1 2 0->2 3 0->3 5 0->5 6 0->6 7 0->7 8 0->8 10 0->10 12 0->12 13 0->13 15 0->15 16 0->16 17 0->17 A837V 18 0->18 anon_0_9_0 0->anon_0_9_0 anon_0_14_0 0->anon_0_14_0 4 4->16 9 11 11->0 R254K 14 anon_0_9_1 anon_0_9_0->anon_0_9_1 anon_0_9_1->9 anon_0_14_0->14

In [63]:
# plot a network for two of the clusters together 
cluster_hap_indices = []
for cluster_idx in 15, 16:
    _, _, cidx = cluster_spans_995F[cluster_idx]
    cluster_hap_indices.extend(cidx)
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995F[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[63]:
%3 0 3 0->3 1 2 1->2 1->3 F1920S 4 1->4 6 1->6 R537L 7 1->7 H1755Y 8 1->8 10 1->10 K251R anon_1_12_0 1->anon_1_12_0 anon_2_5_0 2->anon_2_5_0 9 3->9 5 11 15 11->15 16 11->16 17 11->17 12 13 12->13 G317S 14 12->14 anon_11_12_0 12->anon_11_12_0 I1940T anon_1_12_1 anon_1_12_0->anon_1_12_1 anon_1_12_2 anon_1_12_1->anon_1_12_2 anon_1_12_2->12 anon_2_5_1 anon_2_5_0->anon_2_5_1 anon_2_5_1->5 anon_11_12_0->11 D466H

In [64]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[64]:
%3 0 3 0->3 1 2 1->2 1->3 F1920S 4 1->4 6 1->6 R537L 7 1->7 H1755Y 8 1->8 10 1->10 K251R anon_1_12_0 1->anon_1_12_0 anon_2_5_0 2->anon_2_5_0 9 3->9 5 11 15 11->15 16 11->16 17 11->17 12 13 12->13 G317S 14 12->14 anon_11_12_0 12->anon_11_12_0 I1940T anon_1_12_1 anon_1_12_0->anon_1_12_1 anon_1_12_2 anon_1_12_1->anon_1_12_2 anon_1_12_2->12 anon_2_5_1 anon_2_5_0->anon_2_5_1 anon_2_5_1->5 anon_11_12_0->11 D466H

In [65]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 24
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995F[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='msn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[65]:
%3 0 1 0->1 anon_0_2_0 0->anon_0_2_0 2 anon_0_2_0->2

In [66]:
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, 
    network_method='mjn', variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[66]:
%3 0 1 0->1 anon_0_2_0 0->anon_0_2_0 2 anon_0_2_0->2

In [67]:
# plot a network for just a single cluster that we extracted earlier from the dendrogram
cluster_idx = 33
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995F[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mst',
    edge_weight=10, overlap=False, 
    show_node_labels=True, variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[67]:
%3 0 0 1 1 22 22 1->22 2 2 31 31 2->31 3 3 3->0 3->1 I1868T 3->2 A1934V 4 4 3->4 P1874S 5 5 3->5 P1874L 6 6 3->6 K1603T 8 8 3->8 9 9 3->9 13 13 3->13 I1940T 14 14 3->14 T791M 17 17 3->17 N1345I 19 19 3->19 E1597G 21 21 3->21 V1853I 25 25 3->25 P55L 29 29 3->29 V1303A 38 38 3->38 I829L 46 46 3->46 anon_27_3_1 3->anon_27_3_1 G54C anon_37_3_0 3->anon_37_3_0 P940H 32 32 4->32 34 34 4->34 35 35 4->35 36 36 4->36 49 49 5->49 7 7 10 10 8->10 P1874L 11 11 11->1 12 12 12->7 16 16 13->16 M704V 28 28 13->28 14->12 A1746S anon_14_20_0 14->anon_14_20_0 15 15 anon_15_2_0 15->anon_15_2_0 18 18 18->1 30 30 19->30 33 33 19->33 20 20 23 23 23->1 24 24 anon_24_12_0 24->anon_24_12_0 26 26 26->3 27 27 anon_27_3_0 27->anon_27_3_0 37 37 37->anon_37_3_0 39 39 39->8 40 40 41 41 40->41 anon_41_39_0 41->anon_41_39_0 42 42 42->3 43 43 43->3 44 44 44->39 45 45 45->3 47 47 47->3 48 48 48->3 50 50 50->3 anon_14_20_0->20 L1366F anon_15_2_0->2 anon_24_12_1 anon_24_12_1->12 anon_24_12_1->anon_24_12_0 Q545L anon_27_3_0->anon_27_3_1 anon_41_39_0->39

In [68]:
# does the different network method really matter?
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='msn',
    edge_weight=9, overlap=False,
    show_node_labels=True, variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[68]:
%3 0 0 3 3 0->3 1 1 11 11 1->11 18 18 1->18 22 22 1->22 23 23 1->23 2 2 31 31 2->31 anon_2_15_0 2->anon_2_15_0 3->1 I1868T 3->2 A1934V 4 4 3->4 P1874S 5 5 3->5 P1874L 6 6 3->6 K1603T 8 8 3->8 9 9 3->9 13 13 3->13 I1940T 14 14 3->14 T791M 17 17 3->17 N1345I 19 19 3->19 E1597G 21 21 3->21 V1853I 25 25 3->25 P55L 26 26 3->26 29 29 3->29 V1303A 38 38 3->38 I829L 42 42 3->42 43 43 3->43 45 45 3->45 46 46 3->46 47 47 3->47 48 48 3->48 50 50 3->50 anon_3_27_0 3->anon_3_27_0 anon_3_37_0 3->anon_3_37_0 32 32 4->32 34 34 4->34 35 35 4->35 36 36 4->36 10 10 5->10 49 49 5->49 7 7 12 12 7->12 8->10 P1874L 8->31 A1934V 39 39 8->39 anon_12_24_0 12->anon_12_24_0 16 16 13->16 M704V 28 28 13->28 14->12 A1746S anon_14_20_0 14->anon_14_20_0 15 15 30 30 19->30 33 33 19->33 20 20 24 24 27 27 37 37 44 44 39->44 anon_39_41_0 39->anon_39_41_0 40 40 41 41 40->41 anon_2_15_0->15 anon_3_27_1 anon_3_27_0->anon_3_27_1 anon_3_27_1->27 G54C anon_3_37_0->37 P940H anon_12_24_1 anon_12_24_0->anon_12_24_1 Q545L anon_12_24_1->24 anon_14_20_0->20 L1366F anon_39_41_0->41

In [69]:
# does the different network method really matter?
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn', 
    edge_weight=8, overlap=False,
    show_node_labels=True, variant_labels=variant_labels_vgsc, fontsize=8)
graph


Out[69]:
%3 0 0 3 3 0->3 1 1 11 11 1->11 18 18 1->18 22 22 1->22 23 23 1->23 2 2 31 31 2->31 anon_2_15_0 2->anon_2_15_0 3->1 I1868T 3->2 A1934V 4 4 3->4 P1874S 5 5 3->5 P1874L 6 6 3->6 K1603T 8 8 3->8 9 9 3->9 13 13 3->13 I1940T 14 14 3->14 T791M 17 17 3->17 N1345I 19 19 3->19 E1597G 21 21 3->21 V1853I 25 25 3->25 P55L 26 26 3->26 29 29 3->29 V1303A 38 38 3->38 I829L 42 42 3->42 43 43 3->43 45 45 3->45 46 46 3->46 47 47 3->47 48 48 3->48 50 50 3->50 anon_3_27_0 3->anon_3_27_0 anon_3_37_0 3->anon_3_37_0 32 32 4->32 34 34 4->34 35 35 4->35 36 36 4->36 10 10 5->10 49 49 5->49 7 7 12 12 7->12 8->10 P1874L 8->31 A1934V 39 39 8->39 anon_12_24_0 12->anon_12_24_0 16 16 13->16 M704V 28 28 13->28 14->12 A1746S anon_14_20_0 14->anon_14_20_0 15 15 30 30 19->30 33 33 19->33 20 20 24 24 27 27 37 37 44 44 39->44 anon_39_41_0 39->anon_39_41_0 40 40 41 41 40->41 anon_2_15_0->15 anon_3_27_1 anon_3_27_0->anon_3_27_1 anon_3_27_1->27 G54C anon_3_37_0->37 P940H anon_12_24_1 anon_12_24_0->anon_12_24_1 Q545L anon_12_24_1->24 anon_14_20_0->20 L1366F anon_39_41_0->41

In [70]:
# ...yes I think it does.

In [71]:
# just for fun, graph the whole lot...
graph = hapclust.graph_haplotype_network(
    h_vgsc, hap_colors=hap_colors, network_method='msn', 
    variant_labels=variant_labels_vgsc, fontsize=7)
graph


Out[71]:
%3 0 4 0->4 1 15 1->15 23 1->23 27 1->27 28 1->28 2 7 2->7 3 38 3->38 anon_3_19_0 3->anon_3_19_0 4->1 I1868T 4->3 A1934V 5 4->5 P1874S 6 4->6 P1874L 8 4->8 K1603T 11 4->11 13 4->13 17 4->17 I1940T 18 4->18 T791M 22 4->22 N1345I 24 4->24 E1597G 26 4->26 V1853I 30 4->30 P55L 31 4->31 35 4->35 V1303A 189 4->189 I829L 317 4->317 320 4->320 381 4->381 387 4->387 398 4->398 399 4->399 439 4->439 anon_4_32_0 4->anon_4_32_0 anon_4_56_0 4->anon_4_56_0 anon_4_418_0 4->anon_4_418_0 39 5->39 45 5->45 48 5->48 49 5->49 14 6->14 438 6->438 anon_7_52_0 7->anon_7_52_0 9 16 9->16 10 12 10->12 11->14 P1874L 11->38 A1934V 207 11->207 33 12->33 anon_12_20_0 12->anon_12_20_0 anon_12_37_0 12->anon_12_37_0 anon_16_29_0 16->anon_16_29_0 21 17->21 M704V 34 17->34 18->16 A1746S anon_18_25_0 18->anon_18_25_0 19 20 36 24->36 40 24->40 25 29 32 37 41 42 43 44 46 47 50 51 52 53 54 55 56 57 58 57->58 60 58->60 62 58->62 64 58->64 I492N 66 58->66 67 58->67 68 58->68 69 58->69 71 58->71 75 58->75 M925L 77 58->77 79 58->79 anon_58_63_0 58->anon_58_63_0 anon_58_72_0 58->anon_58_72_0 anon_58_281_0 58->anon_58_281_0 59 61 59->61 70 59->70 74 59->74 F1529L 76 59->76 173 59->173 181 59->181 182 59->182 anon_59_178_0 59->anon_59_178_0 anon_59_180_0 59->anon_59_180_0 78 61->78 anon_61_73_0 61->anon_61_73_0 63 65 65->66 65->69 72 73 80 anon_80_90_0 80->anon_80_90_0 anon_80_133_0 80->anon_80_133_0 81 82 anon_82_124_0 82->anon_82_124_0 83 84 85 86 anon_86_121_0 86->anon_86_121_0 87 88 89 90 anon_90_133_0 90->anon_90_133_0 91 92 anon_92_276_0 92->anon_92_276_0 anon_92_353_0 92->anon_92_353_0 93 94 anon_94_170_0 94->anon_94_170_0 95 96 anon_96_146_0 96->anon_96_146_0 97 98 99 100 101 102 103 104 105 106 107 108 anon_108_190_0 108->anon_108_190_0 109 anon_109_120_0 109->anon_109_120_0 anon_109_123_0 109->anon_109_123_0 anon_109_124_0 109->anon_109_124_0 110 anon_110_149_0 110->anon_110_149_0 111 112 161 112->161 113 114 115 116 117 118 anon_118_124_0 118->anon_118_124_0 119 120 anon_120_124_0 120->anon_120_124_0 anon_120_149_0 120->anon_120_149_0 121 124 121->124 122 123 anon_124_149_0 124->anon_124_149_0 125 anon_125_161_0 125->anon_125_161_0 126 127 128 129 130 131 132 anon_132_158_0 132->anon_132_158_0 133 134 anon_134_143_0 134->anon_134_143_0 135 136 137 138 139 140 141 142 143 144 145 146 147 257 147->257 148 148->161 149 150 151 152 153 anon_153_161_0 153->anon_153_161_0 154 anon_154_281_0 154->anon_154_281_0 155 155->161 156 157 anon_157_257_0 157->anon_157_257_0 158 159 160 anon_160_263_0 160->anon_160_263_0 anon_161_163_0 161->anon_161_163_0 162 163 164 165 166 167 168 169 170 171 172 174 172->174 175 172->175 anon_172_176_0 172->anon_172_176_0 179 173->179 184 173->184 176 177 183 177->183 178 180 185 302 185->302 312 185->312 344 185->344 anon_185_197_0 185->anon_185_197_0 186 199 186->199 200 186->200 210 186->210 239 186->239 266 186->266 277 186->277 289 186->289 299 186->299 325 186->325 330 186->330 359 186->359 375 186->375 382 186->382 A837V 431 186->431 anon_186_265_0 186->anon_186_265_0 anon_186_291_0 186->anon_186_291_0 anon_186_334_0 186->anon_186_334_0 anon_186_356_0 186->anon_186_356_0 187 anon_187_231_0 187->anon_187_231_0 anon_187_288_0 187->anon_187_288_0 188 281 188->281 190 anon_190_264_0 190->anon_190_264_0 191 333 191->333 358 191->358 368 191->368 anon_191_242_0 191->anon_191_242_0 192 193 anon_193_311_0 193->anon_193_311_0 194 219 194->219 340 194->340 anon_194_257_0 194->anon_194_257_0 195 203 195->203 G317S 250 195->250 anon_191_195_0 195->anon_191_195_0 I1940T anon_195_201_0 195->anon_195_201_0 196 209 196->209 197 198 anon_198_309_0 198->anon_198_309_0 201 204 201->204 201->209 F1920S 267 201->267 427 201->427 R537L 432 201->432 H1755Y 433 201->433 437 201->437 K251R 202 anon_202_255_0 202->anon_202_255_0 anon_204_273_0 204->anon_204_273_0 205 205->201 L995F 341 205->341 anon_205_232_0 205->anon_205_232_0 anon_205_234_0 205->anon_205_234_0 206 272 206->272 287 206->287 305 206->305 S2076T 316 206->316 321 206->321 L1617F anon_206_214_0 206->anon_206_214_0 anon_206_329_0 206->anon_206_329_0 370 207->370 anon_207_315_0 207->anon_207_315_0 208 anon_208_261_0 208->anon_208_261_0 434 209->434 211 249 211->249 212 376 212->376 anon_212_217_0 212->anon_212_217_0 213 213->375 214 215 216 anon_216_276_0 216->anon_216_276_0 anon_216_279_0 216->anon_216_279_0 anon_216_348_0 216->anon_216_348_0 217 218 anon_218_229_0 218->anon_218_229_0 anon_218_237_0 218->anon_218_237_0 anon_218_271_0 218->anon_218_271_0 anon_218_275_0 218->anon_218_275_0 anon_218_278_0 218->anon_218_278_0 anon_218_345_0 218->anon_218_345_0 220 anon_220_271_0 220->anon_220_271_0 221 anon_221_249_0 221->anon_221_249_0 222 anon_222_279_0 222->anon_222_279_0 anon_222_348_0 222->anon_222_348_0 223 224 286 224->286 anon_224_355_0 224->anon_224_355_0 anon_224_371_0 224->anon_224_371_0 225 226 227 227->249 228 229 anon_229_275_0 229->anon_229_275_0 anon_229_278_0 229->anon_229_278_0 anon_229_345_0 229->anon_229_345_0 230 231 anon_231_271_0 231->anon_231_271_0 anon_231_347_0 231->anon_231_347_0 232 anon_232_234_0 232->anon_232_234_0 233 anon_233_263_0 233->anon_233_263_0 234 234->195 L995F anon_234_346_0 234->anon_234_346_0 anon_234_374_0 234->anon_234_374_0 235 236 anon_236_311_0 236->anon_236_311_0 237 anon_237_275_0 237->anon_237_275_0 anon_237_278_0 237->anon_237_278_0 anon_237_345_0 237->anon_237_345_0 238 240 241 242 243 244 anon_244_296_0 244->anon_244_296_0 245 anon_245_380_0 245->anon_245_380_0 246 247 248 258 248->258 365 248->365 284 249->284 295 249->295 379 249->379 anon_249_296_0 249->anon_249_296_0 anon_249_303_0 249->anon_249_303_0 anon_249_304_0 249->anon_249_304_0 251 308 251->308 R254K 322 251->322 L995F 252 253 254 254->272 254->316 255 256 anon_257_300_0 257->anon_257_300_0 259 260 261 262 421 262->421 425 262->425 426 262->426 428 262->428 429 262->429 435 262->435 436 262->436 K251R anon_262_430_0 262->anon_262_430_0 263 264 265 268 anon_268_271_0 268->anon_268_271_0 269 270 271 anon_231_271_2 271->anon_231_271_2 L995F anon_271_323_0 271->anon_271_323_0 anon_271_363_0 271->anon_271_363_0 anon_271_275_0 271->anon_271_275_0 anon_271_278_0 271->anon_271_278_0 anon_271_345_0 271->anon_271_345_0 273 274 275 276 anon_276_324_0 276->anon_276_324_0 anon_276_336_0 276->anon_276_336_0 anon_276_353_0 276->anon_276_353_0 anon_276_373_0 276->anon_276_373_0 anon_276_279_0 276->anon_276_279_0 anon_276_348_0 276->anon_276_348_0 278 279 anon_279_348_0 279->anon_279_348_0 280 282 281->282 364 281->364 anon_281_298_0 281->anon_281_298_0 283 anon_283_361_0 283->anon_283_361_0 306 284->306 285 297 285->297 288 290 291 292 293 294 326 294->326 296 298 300 301 303 304 350 305->350 385 305->385 307 315 307->315 308->186 L995F 308->208 L995S 314 308->314 369 308->369 309 310 311 313 324 313->324 316->350 S2076T 318 anon_318_351_0 318->anon_318_351_0 319 322->186 R254K 323 327 328 329 331 332 334 335 336 337 338 339 384 339->384 342 343 345 346 347 348 349 anon_349_354_0 349->anon_349_354_0 351 352 353 353->207 L995F 353->310 L995S anon_353_362_0 353->anon_353_362_0 354 355 356 357 360 361 362 363 366 367 371 372 373 386 373->386 374 377 378 380 383 383->4 L995F 388 389 390 391 392 393 394 395 396 397 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 422 421->422 424 421->424 423 424->426 430 440 441 442 anon_3_19_0->19 anon_4_32_1 anon_4_32_0->anon_4_32_1 anon_4_32_1->32 G54C anon_4_56_0->56 P940H anon_4_418_1 anon_4_418_0->anon_4_418_1 anon_4_418_2 anon_4_418_1->anon_4_418_2 anon_4_418_3 anon_4_418_2->anon_4_418_3 anon_4_418_3->418 anon_7_52_0->52 anon_12_20_0->20 anon_12_37_1 anon_12_37_0->anon_12_37_1 anon_12_37_2 anon_12_37_1->anon_12_37_2 anon_12_37_3 anon_12_37_2->anon_12_37_3 anon_12_37_3->37 anon_16_29_1 anon_16_29_0->anon_16_29_1 Q545L anon_16_29_1->29 anon_18_25_0->25 L1366F anon_58_63_0->63 anon_58_72_1 anon_58_72_0->anon_58_72_1 anon_58_72_2 anon_58_72_1->anon_58_72_2 anon_58_72_2->72 anon_58_281_1 anon_58_281_0->anon_58_281_1 anon_58_281_2 anon_58_281_2->281 anon_58_281_2->anon_58_281_1 L995S anon_59_178_0->178 anon_59_180_0->180 anon_61_73_0->73 anon_80_90_1 anon_80_90_0->anon_80_90_1 anon_80_90_2 anon_80_90_1->anon_80_90_2 anon_80_90_3 anon_80_90_2->anon_80_90_3 anon_80_90_3->90 anon_82_124_1 anon_82_124_0->anon_82_124_1 anon_82_124_2 anon_82_124_1->anon_82_124_2 anon_82_124_3 anon_82_124_2->anon_82_124_3 anon_82_124_3->124 anon_86_121_0->121 anon_90_133_1 anon_90_133_0->anon_90_133_1 anon_90_133_2 anon_90_133_1->anon_90_133_2 anon_90_133_2->133 anon_92_276_1 anon_92_276_0->anon_92_276_1 anon_92_276_2 anon_92_276_1->anon_92_276_2 anon_92_276_3 anon_92_276_2->anon_92_276_3 anon_92_276_3->276 anon_94_170_0->170 anon_96_146_1 anon_96_146_0->anon_96_146_1 anon_96_146_1->146 A1125S anon_108_190_1 anon_108_190_0->anon_108_190_1 anon_108_190_2 anon_108_190_1->anon_108_190_2 anon_108_190_2->190 anon_109_120_1 anon_109_120_0->anon_109_120_1 anon_109_120_2 anon_109_120_1->anon_109_120_2 anon_109_120_2->120 anon_109_123_1 anon_109_123_0->anon_109_123_1 anon_109_123_2 anon_109_123_1->anon_109_123_2 anon_109_123_2->123 V1254I anon_110_149_1 anon_110_149_0->anon_110_149_1 anon_110_149_2 anon_110_149_1->anon_110_149_2 anon_110_149_3 anon_110_149_2->anon_110_149_3 V1254I anon_110_149_3->149 anon_118_124_1 anon_118_124_0->anon_118_124_1 anon_118_124_2 anon_118_124_1->anon_118_124_2 anon_118_124_2->124 anon_120_124_0->124 anon_120_149_1 anon_120_149_0->anon_120_149_1 anon_120_149_2 anon_120_149_1->anon_120_149_2 anon_120_149_2->149 V1254I anon_125_161_1 anon_125_161_0->anon_125_161_1 anon_125_161_1->161 anon_132_158_1 anon_132_158_0->anon_132_158_1 anon_132_158_2 anon_132_158_1->anon_132_158_2 anon_132_158_3 anon_132_158_2->anon_132_158_3 anon_132_158_3->158 anon_134_143_1 anon_134_143_0->anon_134_143_1 anon_134_143_2 anon_134_143_1->anon_134_143_2 anon_134_143_3 anon_134_143_2->anon_134_143_3 anon_134_143_3->143 anon_153_161_0->161 anon_154_281_1 anon_154_281_0->anon_154_281_1 anon_154_281_1->281 anon_157_257_1 anon_157_257_0->anon_157_257_1 anon_157_257_1->257 anon_160_263_1 anon_160_263_0->anon_160_263_1 anon_160_263_2 anon_160_263_1->anon_160_263_2 anon_160_263_3 anon_160_263_2->anon_160_263_3 anon_160_263_3->263 anon_161_163_1 anon_161_163_0->anon_161_163_1 anon_161_163_1->163 anon_172_176_0->176 anon_185_197_1 anon_185_197_0->anon_185_197_1 anon_185_197_2 anon_185_197_1->anon_185_197_2 anon_185_197_3 anon_185_197_2->anon_185_197_3 anon_185_197_3->197 anon_186_265_1 anon_186_265_0->anon_186_265_1 anon_186_265_2 anon_186_265_1->anon_186_265_2 anon_186_265_2->265 anon_186_291_1 anon_186_291_0->anon_186_291_1 anon_186_291_1->291 anon_186_334_0->334 anon_186_356_1 anon_186_356_0->anon_186_356_1 anon_186_356_2 anon_186_356_1->anon_186_356_2 anon_186_356_2->356 anon_187_231_1 anon_187_231_0->anon_187_231_1 anon_187_231_2 anon_187_231_1->anon_187_231_2 anon_187_231_3 anon_187_231_2->anon_187_231_3 L995F anon_187_231_3->231 anon_187_288_1 anon_187_288_0->anon_187_288_1 anon_187_288_1->288 anon_190_264_1 anon_190_264_0->anon_190_264_1 anon_190_264_1->264 anon_191_195_0->191 D466H anon_191_242_1 anon_191_242_0->anon_191_242_1 anon_191_242_2 anon_191_242_1->anon_191_242_2 anon_191_242_2->242 anon_193_311_1 anon_193_311_0->anon_193_311_1 anon_193_311_2 anon_193_311_1->anon_193_311_2 anon_193_311_3 anon_193_311_2->anon_193_311_3 anon_193_311_3->311 anon_194_257_0->257 anon_195_201_1 anon_195_201_0->anon_195_201_1 anon_195_201_2 anon_195_201_1->anon_195_201_2 anon_195_201_2->201 anon_198_309_1 anon_198_309_0->anon_198_309_1 anon_198_309_2 anon_198_309_1->anon_198_309_2 anon_198_309_3 anon_198_309_2->anon_198_309_3 anon_198_309_3->309 anon_202_255_1 anon_202_255_0->anon_202_255_1 anon_202_255_2 anon_202_255_1->anon_202_255_2 anon_202_255_3 anon_202_255_2->anon_202_255_3 anon_202_255_3->255 anon_204_273_1 anon_204_273_0->anon_204_273_1 anon_204_273_1->273 anon_205_232_1 anon_205_232_0->anon_205_232_1 anon_205_232_2 anon_205_232_1->anon_205_232_2 anon_205_232_3 anon_205_232_2->anon_205_232_3 anon_205_232_3->232 anon_206_214_1 anon_206_214_0->anon_206_214_1 anon_206_214_2 anon_206_214_1->anon_206_214_2 anon_206_214_2->214 anon_206_329_1 anon_206_329_0->anon_206_329_1 anon_206_329_1->329 anon_207_315_0->315 anon_208_261_0->261 anon_212_217_0->217 anon_216_276_0->276 anon_216_279_1 anon_216_279_0->anon_216_279_1 anon_216_279_2 anon_216_279_1->anon_216_279_2 anon_216_279_3 anon_216_279_2->anon_216_279_3 anon_216_279_3->279 anon_218_229_0->229 anon_218_237_0->237 anon_218_271_0->271 anon_218_275_1 anon_218_275_0->anon_218_275_1 anon_218_275_2 anon_218_275_1->anon_218_275_2 anon_218_275_3 anon_218_275_2->anon_218_275_3 anon_218_275_3->275 anon_218_278_1 anon_218_278_0->anon_218_278_1 anon_218_278_1->278 anon_218_345_1 anon_218_345_0->anon_218_345_1 anon_218_345_1->345 anon_220_271_1 anon_220_271_0->anon_220_271_1 anon_220_271_2 anon_220_271_1->anon_220_271_2 anon_220_271_2->271 anon_221_249_0->249 anon_222_279_1 anon_222_279_0->anon_222_279_1 anon_222_279_2 anon_222_279_1->anon_222_279_2 anon_222_279_2->279 anon_224_355_0->355 anon_224_371_1 anon_224_371_0->anon_224_371_1 anon_224_371_2 anon_224_371_1->anon_224_371_2 anon_224_371_2->371 anon_231_271_1 anon_231_271_0->anon_231_271_1 anon_231_271_1->anon_231_271_2 anon_231_347_1 anon_231_347_0->anon_231_347_1 anon_231_347_2 anon_231_347_2->347 anon_231_347_2->anon_231_347_1 L995F anon_233_263_0->263 anon_234_346_0->346 anon_234_374_1 anon_234_374_0->anon_234_374_1 anon_234_374_2 anon_234_374_1->anon_234_374_2 anon_234_374_2->374 anon_236_311_1 anon_236_311_0->anon_236_311_1 anon_236_311_1->311 anon_244_296_1 anon_244_296_0->anon_244_296_1 anon_244_296_1->296 anon_245_380_0->380 anon_249_296_1 anon_249_296_0->anon_249_296_1 anon_249_296_1->296 anon_249_303_0->303 anon_249_304_1 anon_249_304_0->anon_249_304_1 anon_249_304_1->304 anon_257_300_0->300 anon_262_430_1 anon_262_430_0->anon_262_430_1 anon_262_430_2 anon_262_430_1->anon_262_430_2 anon_262_430_2->430 anon_268_271_1 anon_268_271_0->anon_268_271_1 anon_268_271_2 anon_268_271_1->anon_268_271_2 anon_268_271_3 anon_268_271_2->anon_268_271_3 anon_268_271_3->271 anon_271_323_0->323 anon_271_363_1 anon_271_363_0->anon_271_363_1 anon_271_363_2 anon_271_363_1->anon_271_363_2 anon_271_363_3 anon_271_363_2->anon_271_363_3 anon_271_363_3->363 anon_276_324_0->324 anon_276_336_0->336 T679S anon_276_353_0->353 anon_276_373_1 anon_276_373_0->anon_276_373_1 anon_276_373_2 anon_276_373_1->anon_276_373_2 anon_276_373_3 anon_276_373_2->anon_276_373_3 anon_276_373_3->373 anon_279_348_0->348 anon_281_298_0->298 anon_283_361_0->361 anon_318_351_1 anon_318_351_0->anon_318_351_1 anon_318_351_2 anon_318_351_1->anon_318_351_2 anon_318_351_3 anon_318_351_2->anon_318_351_3 anon_318_351_3->351 anon_349_354_1 anon_349_354_0->anon_349_354_1 anon_349_354_2 anon_349_354_1->anon_349_354_2 anon_349_354_2->354 anon_353_362_0->362 anon_80_133_1 anon_80_133_0->anon_80_133_1 anon_80_133_2 anon_80_133_1->anon_80_133_2 anon_80_133_3 anon_80_133_2->anon_80_133_3 anon_80_133_3->133 anon_92_353_1 anon_92_353_0->anon_92_353_1 anon_92_353_2 anon_92_353_1->anon_92_353_2 anon_92_353_3 anon_92_353_2->anon_92_353_3 anon_92_353_3->353 anon_109_124_1 anon_109_124_0->anon_109_124_1 anon_109_124_2 anon_109_124_1->anon_109_124_2 anon_109_124_2->124 anon_124_149_1 anon_124_149_0->anon_124_149_1 anon_124_149_2 anon_124_149_1->anon_124_149_2 anon_124_149_2->149 V1254I anon_205_234_1 anon_205_234_0->anon_205_234_1 anon_205_234_2 anon_205_234_1->anon_205_234_2 anon_205_234_2->234 anon_216_348_1 anon_216_348_0->anon_216_348_1 anon_216_348_2 anon_216_348_1->anon_216_348_2 anon_216_348_3 anon_216_348_2->anon_216_348_3 anon_216_348_3->348 anon_222_348_1 anon_222_348_0->anon_222_348_1 anon_222_348_2 anon_222_348_1->anon_222_348_2 anon_222_348_2->348 anon_229_275_1 anon_229_275_0->anon_229_275_1 anon_229_275_2 anon_229_275_1->anon_229_275_2 anon_229_275_3 anon_229_275_2->anon_229_275_3 anon_229_275_3->275 anon_229_278_1 anon_229_278_0->anon_229_278_1 anon_229_278_1->278 anon_229_345_1 anon_229_345_0->anon_229_345_1 anon_229_345_1->345 anon_232_234_1 anon_232_234_0->anon_232_234_1 anon_232_234_2 anon_232_234_1->anon_232_234_2 anon_232_234_3 anon_232_234_2->anon_232_234_3 anon_232_234_3->234 anon_237_275_1 anon_237_275_0->anon_237_275_1 anon_237_275_2 anon_237_275_1->anon_237_275_2 anon_237_275_3 anon_237_275_2->anon_237_275_3 anon_237_275_3->275 anon_237_278_1 anon_237_278_0->anon_237_278_1 anon_237_278_1->278 anon_237_345_1 anon_237_345_0->anon_237_345_1 anon_237_345_1->345 anon_271_275_1 anon_271_275_0->anon_271_275_1 anon_271_275_2 anon_271_275_1->anon_271_275_2 anon_271_275_3 anon_271_275_2->anon_271_275_3 anon_271_275_3->275 anon_271_278_1 anon_271_278_0->anon_271_278_1 anon_271_278_1->278 anon_271_345_1 anon_271_345_0->anon_271_345_1 anon_271_345_1->345 anon_276_279_1 anon_276_279_0->anon_276_279_1 anon_276_279_2 anon_276_279_1->anon_276_279_2 anon_276_279_3 anon_276_279_2->anon_276_279_3 anon_276_279_3->279 anon_276_348_1 anon_276_348_0->anon_276_348_1 anon_276_348_2 anon_276_348_1->anon_276_348_2 anon_276_348_3 anon_276_348_2->anon_276_348_3 anon_276_348_3->348

In [72]:
graph = hapclust.graph_haplotype_network(
    h_vgsc, hap_colors=hap_colors, network_method='mjn', 
    variant_labels=variant_labels_vgsc, fontsize=7)
graph


Out[72]:
%3 0 4 0->4 1 15 1->15 23 1->23 27 1->27 28 1->28 2 7 2->7 3 38 3->38 anon_3_19_0 3->anon_3_19_0 4->1 I1868T 4->3 A1934V 5 4->5 P1874S 6 4->6 P1874L 8 4->8 K1603T 11 4->11 13 4->13 17 4->17 I1940T 18 4->18 T791M 22 4->22 N1345I 24 4->24 E1597G 26 4->26 V1853I 30 4->30 P55L 31 4->31 35 4->35 V1303A 189 4->189 I829L 317 4->317 320 4->320 381 4->381 387 4->387 398 4->398 399 4->399 439 4->439 anon_4_32_0 4->anon_4_32_0 anon_4_56_0 4->anon_4_56_0 anon_4_418_0 4->anon_4_418_0 39 5->39 45 5->45 48 5->48 49 5->49 14 6->14 438 6->438 anon_7_52_0 7->anon_7_52_0 9 16 9->16 10 12 10->12 11->14 P1874L 11->38 A1934V 207 11->207 33 12->33 anon_12_20_0 12->anon_12_20_0 anon_12_37_0 12->anon_12_37_0 anon_16_29_0 16->anon_16_29_0 21 17->21 M704V 34 17->34 18->16 A1746S anon_18_25_0 18->anon_18_25_0 19 20 36 24->36 40 24->40 25 29 32 37 41 42 43 44 46 47 50 51 52 53 54 55 56 57 58 57->58 60 58->60 62 58->62 64 58->64 I492N 66 58->66 67 58->67 68 58->68 69 58->69 71 58->71 75 58->75 M925L 77 58->77 79 58->79 anon_58_63_0 58->anon_58_63_0 anon_58_72_0 58->anon_58_72_0 anon_58_281_0 58->anon_58_281_0 59 61 59->61 70 59->70 74 59->74 F1529L 76 59->76 173 59->173 181 59->181 182 59->182 anon_59_178_0 59->anon_59_178_0 anon_59_180_0 59->anon_59_180_0 78 61->78 anon_61_73_0 61->anon_61_73_0 63 65 65->66 65->69 72 73 80 anon_80_443_0 80->anon_80_443_0 81 82 anon_82_124_0 82->anon_82_124_0 83 84 85 86 anon_86_121_0 86->anon_86_121_0 87 88 89 90 anon_90_443_0 90->anon_90_443_0 91 92 anon_92_444_0 92->anon_92_444_0 93 94 anon_94_170_0 94->anon_94_170_0 95 96 anon_96_146_0 96->anon_96_146_0 97 98 99 100 101 102 103 104 105 106 107 108 anon_108_190_0 108->anon_108_190_0 109 anon_109_445_0 109->anon_109_445_0 110 anon_110_447_0 110->anon_110_447_0 111 112 161 112->161 113 114 115 116 117 118 anon_118_124_0 118->anon_118_124_0 119 120 446 120->446 121 124 121->124 122 123 anon_123_445_0 123->anon_123_445_0 124->446 125 anon_125_161_0 125->anon_125_161_0 126 127 128 129 130 131 132 anon_132_158_0 132->anon_132_158_0 133 anon_133_443_0 133->anon_133_443_0 134 anon_134_143_0 134->anon_134_143_0 135 136 137 138 139 140 141 142 143 144 145 146 147 257 147->257 148 148->161 149 150 151 152 153 anon_153_161_0 153->anon_153_161_0 154 anon_154_281_0 154->anon_154_281_0 155 155->161 156 157 anon_157_257_0 157->anon_157_257_0 158 159 160 anon_160_263_0 160->anon_160_263_0 anon_161_163_0 161->anon_161_163_0 162 163 164 165 166 167 168 169 170 171 172 174 172->174 175 172->175 anon_172_176_0 172->anon_172_176_0 179 173->179 184 173->184 176 177 183 177->183 anon_177_457_0 177->anon_177_457_0 178 180 185 302 185->302 312 185->312 344 185->344 anon_185_197_0 185->anon_185_197_0 186 199 186->199 200 186->200 210 186->210 239 186->239 266 186->266 277 186->277 289 186->289 299 186->299 325 186->325 330 186->330 359 186->359 375 186->375 382 186->382 A837V 431 186->431 anon_186_265_0 186->anon_186_265_0 anon_186_291_0 186->anon_186_291_0 anon_186_334_0 186->anon_186_334_0 anon_186_356_0 186->anon_186_356_0 187 449 187->449 188 281 188->281 190 anon_190_264_0 190->anon_190_264_0 191 333 191->333 358 191->358 368 191->368 anon_191_242_0 191->anon_191_242_0 192 193 anon_193_457_0 193->anon_193_457_0 194 219 194->219 340 194->340 anon_194_257_0 194->anon_194_257_0 195 203 195->203 G317S 250 195->250 anon_191_195_0 195->anon_191_195_0 I1940T 196 209 196->209 197 198 anon_198_309_0 198->anon_198_309_0 201 204 201->204 201->209 F1920S 267 201->267 427 201->427 R537L 432 201->432 H1755Y 433 201->433 437 201->437 K251R 202 anon_202_255_0 202->anon_202_255_0 anon_204_273_0 204->anon_204_273_0 205 205->201 L995F 341 205->341 anon_205_451_0 205->anon_205_451_0 206 272 206->272 287 206->287 305 206->305 S2076T 316 206->316 321 206->321 L1617F 453 206->453 370 207->370 anon_207_315_0 207->anon_207_315_0 208 anon_208_261_0 208->anon_208_261_0 434 209->434 211 249 211->249 212 376 212->376 anon_212_217_0 212->anon_212_217_0 213 213->375 214 anon_214_453_0 214->anon_214_453_0 215 anon_215_457_0 215->anon_215_457_0 216 454 216->454 217 218 456 218->456 220 anon_220_271_0 220->anon_220_271_0 221 anon_221_249_0 221->anon_221_249_0 222 anon_222_455_0 222->anon_222_455_0 223 224 286 224->286 anon_224_355_0 224->anon_224_355_0 anon_224_371_0 224->anon_224_371_0 225 226 227 227->249 228 229 229->456 230 231 232 anon_232_451_0 232->anon_232_451_0 233 anon_233_263_0 233->anon_233_263_0 234 234->195 L995F 452 234->452 anon_234_346_0 234->anon_234_346_0 235 236 anon_236_450_0 236->anon_236_450_0 237 237->456 238 240 241 242 243 244 anon_244_296_0 244->anon_244_296_0 245 anon_245_380_0 245->anon_245_380_0 246 247 anon_247_459_0 247->anon_247_459_0 248 258 248->258 365 248->365 284 249->284 295 249->295 379 249->379 458 249->458 anon_249_303_0 249->anon_249_303_0 anon_249_304_0 249->anon_249_304_0 251 308 251->308 R254K 322 251->322 L995F 252 253 254 254->272 254->316 255 256 anon_257_300_0 257->anon_257_300_0 259 260 261 262 421 262->421 425 262->425 426 262->426 428 262->428 429 262->429 435 262->435 436 262->436 K251R anon_262_430_0 262->anon_262_430_0 263 264 265 268 anon_268_271_0 268->anon_268_271_0 269 270 271 271->456 anon_271_323_0 271->anon_271_323_0 anon_271_363_0 271->anon_271_363_0 anon_271_448_0 271->anon_271_448_0 273 274 275 anon_275_456_0 275->anon_275_456_0 276 444 276->444 276->454 anon_276_324_0 276->anon_276_324_0 anon_276_336_0 276->anon_276_336_0 anon_276_373_0 276->anon_276_373_0 278 anon_278_456_0 278->anon_278_456_0 279 455 279->455 280 282 281->282 364 281->364 anon_281_298_0 281->anon_281_298_0 283 anon_283_361_0 283->anon_283_361_0 306 284->306 285 297 285->297 288 anon_288_449_0 288->anon_288_449_0 290 291 292 293 294 326 294->326 296 anon_296_458_0 296->anon_296_458_0 298 300 301 303 304 350 305->350 385 305->385 307 315 307->315 308->186 L995F 308->208 L995S 314 308->314 369 308->369 309 310 311 450 311->450 313 324 313->324 316->350 S2076T 318 anon_318_351_0 318->anon_318_351_0 319 322->186 R254K 323 327 328 329 anon_329_453_0 329->anon_329_453_0 331 332 334 335 336 337 338 339 384 339->384 342 343 345 anon_345_456_0 345->anon_345_456_0 346 347 anon_347_448_0 347->anon_347_448_0 348 348->455 349 anon_349_354_0 349->anon_349_354_0 351 352 anon_352_459_0 352->anon_352_459_0 353 353->207 L995F 353->310 L995S 353->444 anon_353_362_0 353->anon_353_362_0 354 355 356 357 360 361 362 363 anon_364_457_0 364->anon_364_457_0 366 367 371 372 373 386 373->386 374 anon_374_452_0 374->anon_374_452_0 377 378 380 383 383->4 L995F 388 389 390 391 392 393 394 395 396 397 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 422 421->422 424 421->424 423 424->426 430 440 441 442 443 445 445->446 445->anon_123_445_0 V1254I anon_446_447_0 446->anon_446_447_0 447 447->149 V1254I 448 448->231 L995F anon_448_449_0 448->anon_448_449_0 457 450->457 451 451->452 anon_451_459_0 451->anon_451_459_0 anon_454_455_0 454->anon_454_455_0 459 458->459 anon_3_19_0->19 anon_4_32_1 anon_4_32_0->anon_4_32_1 anon_4_32_1->32 G54C anon_4_56_0->56 P940H anon_4_418_1 anon_4_418_0->anon_4_418_1 anon_4_418_2 anon_4_418_1->anon_4_418_2 anon_4_418_3 anon_4_418_2->anon_4_418_3 anon_4_418_3->418 anon_7_52_0->52 anon_12_20_0->20 anon_12_37_1 anon_12_37_0->anon_12_37_1 anon_12_37_2 anon_12_37_1->anon_12_37_2 anon_12_37_3 anon_12_37_2->anon_12_37_3 anon_12_37_3->37 anon_16_29_1 anon_16_29_0->anon_16_29_1 Q545L anon_16_29_1->29 anon_18_25_0->25 L1366F anon_58_63_0->63 anon_58_72_1 anon_58_72_0->anon_58_72_1 anon_58_72_2 anon_58_72_1->anon_58_72_2 anon_58_72_2->72 anon_58_281_1 anon_58_281_0->anon_58_281_1 anon_58_281_2 anon_58_281_2->281 anon_58_281_2->anon_58_281_1 L995S anon_59_178_0->178 anon_59_180_0->180 anon_61_73_0->73 anon_80_443_1 anon_80_443_0->anon_80_443_1 anon_80_443_1->443 anon_82_124_1 anon_82_124_0->anon_82_124_1 anon_82_124_2 anon_82_124_1->anon_82_124_2 anon_82_124_3 anon_82_124_2->anon_82_124_3 anon_82_124_3->124 anon_86_121_0->121 anon_90_443_0->443 anon_92_444_1 anon_92_444_0->anon_92_444_1 anon_92_444_2 anon_92_444_1->anon_92_444_2 anon_92_444_2->444 anon_94_170_0->170 anon_96_146_1 anon_96_146_0->anon_96_146_1 anon_96_146_1->146 A1125S anon_108_190_1 anon_108_190_0->anon_108_190_1 anon_108_190_2 anon_108_190_1->anon_108_190_2 anon_108_190_2->190 anon_109_445_0->445 anon_110_447_1 anon_110_447_0->anon_110_447_1 anon_110_447_2 anon_110_447_1->anon_110_447_2 anon_110_447_2->447 anon_118_124_1 anon_118_124_0->anon_118_124_1 anon_118_124_2 anon_118_124_1->anon_118_124_2 anon_118_124_2->124 anon_125_161_1 anon_125_161_0->anon_125_161_1 anon_125_161_1->161 anon_132_158_1 anon_132_158_0->anon_132_158_1 anon_132_158_2 anon_132_158_1->anon_132_158_2 anon_132_158_3 anon_132_158_2->anon_132_158_3 anon_132_158_3->158 anon_133_443_0->443 anon_134_143_1 anon_134_143_0->anon_134_143_1 anon_134_143_2 anon_134_143_1->anon_134_143_2 anon_134_143_3 anon_134_143_2->anon_134_143_3 anon_134_143_3->143 anon_153_161_0->161 anon_154_281_1 anon_154_281_0->anon_154_281_1 anon_154_281_1->281 anon_157_257_1 anon_157_257_0->anon_157_257_1 anon_157_257_1->257 anon_160_263_1 anon_160_263_0->anon_160_263_1 anon_160_263_2 anon_160_263_1->anon_160_263_2 anon_160_263_3 anon_160_263_2->anon_160_263_3 anon_160_263_3->263 anon_161_163_1 anon_161_163_0->anon_161_163_1 anon_161_163_1->163 anon_172_176_0->176 anon_177_457_1 anon_177_457_0->anon_177_457_1 anon_177_457_2 anon_177_457_1->anon_177_457_2 anon_177_457_3 anon_177_457_2->anon_177_457_3 anon_177_457_3->457 anon_185_197_1 anon_185_197_0->anon_185_197_1 anon_185_197_2 anon_185_197_1->anon_185_197_2 anon_185_197_3 anon_185_197_2->anon_185_197_3 anon_185_197_3->197 anon_186_265_1 anon_186_265_0->anon_186_265_1 anon_186_265_2 anon_186_265_1->anon_186_265_2 anon_186_265_2->265 anon_186_291_1 anon_186_291_0->anon_186_291_1 anon_186_291_1->291 anon_186_334_0->334 anon_186_356_1 anon_186_356_0->anon_186_356_1 anon_186_356_2 anon_186_356_1->anon_186_356_2 anon_186_356_2->356 anon_190_264_1 anon_190_264_0->anon_190_264_1 anon_190_264_1->264 anon_191_195_0->191 D466H anon_191_242_1 anon_191_242_0->anon_191_242_1 anon_191_242_2 anon_191_242_1->anon_191_242_2 anon_191_242_2->242 anon_193_457_1 anon_193_457_0->anon_193_457_1 anon_193_457_1->457 anon_194_257_0->257 anon_198_309_1 anon_198_309_0->anon_198_309_1 anon_198_309_2 anon_198_309_1->anon_198_309_2 anon_198_309_3 anon_198_309_2->anon_198_309_3 anon_198_309_3->309 anon_202_255_1 anon_202_255_0->anon_202_255_1 anon_202_255_2 anon_202_255_1->anon_202_255_2 anon_202_255_3 anon_202_255_2->anon_202_255_3 anon_202_255_3->255 anon_204_273_1 anon_204_273_0->anon_204_273_1 anon_204_273_1->273 anon_205_451_0->451 anon_207_315_0->315 anon_208_261_0->261 anon_212_217_0->217 anon_214_453_1 anon_214_453_0->anon_214_453_1 anon_214_453_1->453 anon_215_457_1 anon_215_457_0->anon_215_457_1 anon_215_457_2 anon_215_457_1->anon_215_457_2 anon_215_457_2->457 anon_220_271_1 anon_220_271_0->anon_220_271_1 anon_220_271_2 anon_220_271_1->anon_220_271_2 anon_220_271_2->271 anon_221_249_0->249 anon_222_455_1 anon_222_455_0->anon_222_455_1 anon_222_455_1->455 anon_224_355_0->355 anon_224_371_1 anon_224_371_0->anon_224_371_1 anon_224_371_2 anon_224_371_1->anon_224_371_2 anon_224_371_2->371 anon_232_451_1 anon_232_451_0->anon_232_451_1 anon_232_451_1->451 anon_233_263_0->263 anon_234_346_0->346 anon_236_450_0->450 anon_244_296_1 anon_244_296_0->anon_244_296_1 anon_244_296_1->296 anon_245_380_0->380 anon_247_459_1 anon_247_459_0->anon_247_459_1 anon_247_459_2 anon_247_459_1->anon_247_459_2 anon_247_459_2->459 anon_249_303_0->303 anon_249_304_1 anon_249_304_0->anon_249_304_1 anon_249_304_1->304 anon_257_300_0->300 anon_262_430_1 anon_262_430_0->anon_262_430_1 anon_262_430_2 anon_262_430_1->anon_262_430_2 anon_262_430_2->430 anon_268_271_1 anon_268_271_0->anon_268_271_1 anon_268_271_2 anon_268_271_1->anon_268_271_2 anon_268_271_3 anon_268_271_2->anon_268_271_3 anon_268_271_3->271 anon_271_323_0->323 anon_271_363_1 anon_271_363_0->anon_271_363_1 anon_271_363_2 anon_271_363_1->anon_271_363_2 anon_271_363_3 anon_271_363_2->anon_271_363_3 anon_271_363_3->363 anon_271_448_1 anon_271_448_0->anon_271_448_1 anon_271_448_1->448 anon_275_456_1 anon_275_456_0->anon_275_456_1 anon_275_456_2 anon_275_456_1->anon_275_456_2 anon_275_456_2->456 anon_276_324_0->324 anon_276_336_0->336 T679S anon_276_373_1 anon_276_373_0->anon_276_373_1 anon_276_373_2 anon_276_373_1->anon_276_373_2 anon_276_373_3 anon_276_373_2->anon_276_373_3 anon_276_373_3->373 anon_278_456_0->456 anon_281_298_0->298 anon_283_361_0->361 anon_288_449_0->449 anon_296_458_0->458 anon_318_351_1 anon_318_351_0->anon_318_351_1 anon_318_351_2 anon_318_351_1->anon_318_351_2 anon_318_351_3 anon_318_351_2->anon_318_351_3 anon_318_351_3->351 anon_329_453_0->453 anon_345_456_0->456 anon_347_448_1 anon_347_448_0->anon_347_448_1 anon_347_448_1->448 anon_349_354_1 anon_349_354_0->anon_349_354_1 anon_349_354_2 anon_349_354_1->anon_349_354_2 anon_349_354_2->354 anon_352_459_1 anon_352_459_0->anon_352_459_1 anon_352_459_2 anon_352_459_1->anon_352_459_2 anon_352_459_2->459 anon_353_362_0->362 anon_364_457_1 anon_364_457_0->anon_364_457_1 anon_364_457_2 anon_364_457_1->anon_364_457_2 anon_364_457_2->457 anon_374_452_1 anon_374_452_0->anon_374_452_1 anon_374_452_1->452 anon_446_447_0->447 anon_448_449_1 anon_448_449_0->anon_448_449_1 anon_448_449_1->449 anon_451_459_0->459 anon_454_455_1 anon_454_455_0->anon_454_455_1 anon_454_455_1->455

Matplotlib integration


In [73]:
cluster_idx = 33
dend_start, dend_stop, cluster_hap_indices = cluster_spans_995F[cluster_idx]
cluster_haps = h_vgsc_995F.take(cluster_hap_indices, axis=1)
cluster_hap_pops = hap_pops_995F[cluster_hap_indices]
cluster_hap_colors = np.array([pop_colors[p] for p in cluster_hap_pops])
graph = hapclust.graph_haplotype_network(
    cluster_haps, hap_colors=cluster_hap_colors, network_method='mjn',
    edge_weight=12, overlap=False, 
    show_node_labels=2, fontsize='8', variant_labels=variant_labels_vgsc)
graph


Out[73]:
%3 0 3 3 0->3 1 1 11 1->11 18 1->18 22 1->22 23 1->23 2 2 31 2->31 anon_2_15_0 2->anon_2_15_0 3->1 I1868T 3->2 A1934V 4 4 3->4 P1874S 5 5 3->5 P1874L 6 6 3->6 K1603T 8 8 3->8 9 3->9 13 13 3->13 I1940T 14 14 3->14 T791M 17 17 3->17 N1345I 19 19 3->19 E1597G 21 21 3->21 V1853I 25 3->25 P55L 26 3->26 29 3->29 V1303A 38 3->38 I829L 42 3->42 43 3->43 45 3->45 46 3->46 47 47 3->47 48 3->48 50 3->50 anon_3_27_0 3->anon_3_27_0 anon_3_37_0 3->anon_3_37_0 32 4->32 34 4->34 35 4->35 36 4->36 10 5->10 49 5->49 7 12 12 7->12 8->10 P1874L 8->31 A1934V 39 8->39 anon_12_24_0 12->anon_12_24_0 16 13->16 M704V 28 13->28 14->12 A1746S anon_14_20_0 14->anon_14_20_0 15 30 19->30 33 19->33 20 24 27 27 37 44 39->44 anon_39_41_0 39->anon_39_41_0 40 41 40->41 anon_2_15_0->15 anon_3_27_1 anon_3_27_0->anon_3_27_1 anon_3_27_1->27 G54C anon_3_37_0->37 P940H anon_12_24_1 anon_12_24_0->anon_12_24_1 Q545L anon_12_24_1->24 anon_14_20_0->20 L1366F anon_39_41_0->41

In [74]:
fig = plt.figure(figsize=(12, 10), dpi=120)

ax = fig.add_subplot(2, 2, 1)
ax.set_axis_off()
ax.set_title('default options')
hapclust.plot_graphviz(graph, ax)

ax = fig.add_subplot(2, 2, 2)
ax.set_axis_off()
ax.set_title('higher res, default interpolation (bilinear)')
hapclust.plot_graphviz(graph, ax, dpi=600)

ax = fig.add_subplot(2, 2, 3)
# leave frame around to allow comparison with ratio='fill' below
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('higher res, lanczos interpolation')
hapclust.plot_graphviz(graph, ax, dpi=600, interpolation='lanczos')

# using ratio='fill' means graphviz will scale the graph to fill the available space
ax = fig.add_subplot(2, 2, 4)
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('ratio=fill')
hapclust.plot_graphviz(graph, ax, dpi=600, ratio='fill', interpolation='lanczos')

fig.tight_layout()



In [ ]: