In [288]:
#Imports

# standard library
import sys
import os 
import time
import re
import glob
import pickle 
import operator 

# numpy, scipy, etc
import numpy as np
import mkl
from scipy import ndimage as ndi
from scipy.stats import norm
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd

# for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import cm
import seaborn as sns

# for networks
import networkx as nx

# required bastet, MIDAS, and pactolus specific modules:
def isneeded(x):
    if x not in sys.path:
        sys.path.append(x)

isneeded('/Users/curt/bastet/')
isneeded('/Users/curt/meta-iq/pactolus/pactolus/')
isneeded('/Users/curt/meta-iq/midas_lbl/')

# pactolus
from score_spectra import score_spectra_pairwise, sparse_uncertain_dot, calc_lambda, calc_dot_matrix


# timing and profiling
import timeit
%load_ext line_profiler


The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler

In [2]:
with open('Actino_C18_Pos.pickle', 'r+') as io:
    my_data = pickle.load(io)

In [3]:
my_data.keys()


Out[3]:
['rt',
 'scan',
 'collision_energy',
 'precursor_intensity',
 'file',
 'precursor_mz']

In [4]:
len(my_data['scan'])


Out[4]:
651072

In [5]:
# how many unique scans?

len(np.unique(my_data['precursor_mz']))


Out[5]:
62085

In [6]:
np.min(my_data['precursor_mz'])


Out[6]:
70.010002

In [7]:
np.max(my_data['precursor_mz'])


Out[7]:
1048.7751

In [11]:
sorted_unique_precursors = np.unique(my_data['precursor_mz'])

In [12]:
sorted_unique_precursors[0:10]


Out[12]:
array([ 70.01000214,  70.01332855,  70.01337433,  70.01338196,
        70.01339722,  70.01340485,  70.01343536,  70.01344299,
        70.01345062,  70.01346588], dtype=float32)

In [13]:
plt.plot(np.diff(sorted_unique_precursors), '.')


Out[13]:
[<matplotlib.lines.Line2D at 0x1e249a190>]

In [14]:
bin_edges = np.arange(70, 1050, 1)

binned_precursor_mzs, _ = np.histogram(sorted_unique_precursors, bins=bin_edges)

bin_width = np.diff(bin_edges).mean()

bin_centers = bin_edges[:-1] + bin_width/2

plt.bar(bin_centers, binned_precursor_mzs)


Out[14]:
<Container object of 979 artists>

In [17]:
# All precursor masses that round to the same value should be treated as identical
rounded_uniques, index_to_uniques, counts = np.unique(np.around(my_data['precursor_mz'], 
                                                                decimals = 4), 
                                                      return_inverse=True,
                                                      return_counts = True)

index_to_sort = index_to_uniques.argsort()

# TODO: implement a function which is like around() 
#       but puts out a tuple of possible "true" precursor mzs to allow for neutral losses

print len(index_to_uniques)

plt.plot(counts)


651072
Out[17]:
[<matplotlib.lines.Line2D at 0x136e7e290>]

In [18]:
len(counts)


Out[18]:
27437

In [19]:
index_to_uniques[index_to_sort][0:200]


Out[19]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2,
       2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6])

In [20]:
# group scans by their rounded mass value

my_grouped_data = {}

for field in my_data:
    my_grouped_data[field] = [my_data[field][el] for el in index_to_sort]
    
my_grouped_data['group'] = index_to_uniques[index_to_sort]

In [23]:
# example entries my my_grouped_data

my_grouped_data['precursor_mz'][0:10], my_grouped_data['group'][0:10]

my_grouped_data['precursor_mz'][2000:2010], my_grouped_data['group'][2000:2010]


Out[23]:
([array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32),
  array([ 72.08000183], dtype=float32)],
 array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20]))

In [82]:
# convert to list of Scan objects

class MS2_Scan(object):
    """Python class for holding a single tandem mass spectrum"""
    def __init__(self, params):
        """
        :param params: dict that must contain these key, value pairs:
                            precursor_mz: float
                            collision_energy: float
                            precursor_intensity: float
                            scan_data: ndarray of shape (n_peaks, 2) with columnz m/z and intensity
                            filename: string
                        option key, value pair:
                            group: int or string
        """
        
        # required parameters
        self.rt = params['rt']
        self.precursor_mz = params['precursor_mz']
        self.collision_energy = params['collision_energy']
        self.precursor_intensity = params['precursor_intensity']
        self.scan_data = params['scan_data']
        self.filename = params['filename']
        
        # optional parameters
        if 'group' in params.keys():
            self.group = params['group']
        else:
            self.group = None
        return

class MS2_ScanSet(list):
    """Python class for holding groups of MS2 scans."""
    def __init__(self, list_of_MS2_Scans):
        
        # record the scans
        self.scans = list_of_MS2_Scans
        
        # record the groups as a dictionary keyed on group names, with values as lists of indices
        self.groups = {}
        all_groups = np.unique([scan.group for scan in self.scans])
        for group_name in all_groups:
            self.groups[group_name] = np.where(scan.group == group_name for scan in self.scans)[0]
        return
    
    def calc_dot_matrix_of_groups(self, params=None, max_num_scans=500):
        self.dot_matrix_by_groups = {}
        if params is None:
            params = {
                      'mass_tol': 0.001,
                      'neutral_losses': [0],
                      'noise_cutoff': 1000,
                      'normalize': 2,
                      }
        for group_name in self.groups:
            raw_group_scans = [scan.scan_data for scan in self.scans if scan.group == group_name]
            if len(raw_group_scans) > max_num_scans:
                print 'Too many scans (%s) in group %s for calculation; skipping...' % (len(raw_group_scans), group_name)
                continue
            print 'Finding similarity matrix for group %s of %s scans.' % (group_name, len(raw_group_scans))
            self.dot_matrix_by_groups[group_name] = calc_dot_matrix(raw_group_scans, params)
        return
            
        


def convert_dict_of_lists_to_list_of_scans(my_dict):
    scan_list = []
    
    for idx, _ in enumerate(my_dict['file']):
        params = {'rt': my_dict['rt'][idx],
                  'precursor_mz': my_dict['precursor_mz'][idx],
                  'precursor_intensity': my_dict['precursor_intensity'][idx],
                  'collision_energy': my_dict['collision_energy'][idx],
                  'scan_data': my_dict['scan'][idx],
                  'filename': my_dict['file'][idx],
                  }
        
        if 'group' in my_dict.keys():
            params['group'] = my_dict['group'][idx]
        scan_list.append(MS2_Scan(params))
    return scan_list

In [151]:
# load the data from a pickle file if it is there, otherwise do the calculation and save it
try:
    with open('dumped_pickle_data.pkl', 'r+') as io:
        my_scan_set = pickle.load(io)
        print 'Found pickle file; loading data.'
except IOError:
    print 'Could not find pickle file, repeating calculation of group proximity matrices.'
    my_scan_list = convert_dict_of_lists_to_list_of_scans(my_grouped_data)
    my_scan_set = MS2_ScanSet(my_scan_list)

    # do dot matrixing
    my_scan_set.calc_dot_matrix_of_groups()
    
    # save results
    with open('dumped_pickle_data.pkl', 'w') as io:
        pickle.dump(my_scan_set, io)


Found pickle file; loading data.

In [152]:
len(my_scan_set.scans)


Out[152]:
651072

In [153]:
# Look at distribution of all MS2 peak intensities in all scans to decide noise cutoff

all_ints = np.array([my_scan_list[idx].scan_data[:, 1] for idx in xrange(len(my_scan_list))]).flatten()

plt.hist(all_ints, bins = np.logspace(0, 8))

plt.gca().set_xscale('log')



In [154]:
# Look at distribution of all parent peak intensities in all scans to decide noise cutoff

all_ints = np.array([my_scan_list[idx].precursor_intensity for idx in xrange(len(my_scan_list))])

plt.hist(all_ints, bins = np.logspace(0, 9))

plt.gca().set_xscale('log')



In [155]:
# do some clustering

plt.imshow(my_scan_set.dot_matrix_by_groups[0], 
           cmap=cm.Greens_r,
           interpolation='none',
           )


Out[155]:
<matplotlib.image.AxesImage at 0x25a0d67d0>

In [338]:
def make_distance_matrix_clustergram(prox_mat):
    # convert upper-triangular proximity matrix to a full-square distance matrix
    upper_trig = prox_mat.copy()
    lower_trig = upper_trig.T.copy()
    diag_idxs = np.diag_indices_from(upper_trig)
    lower_trig[diag_idxs] = 0
    proximity_mat = upper_trig + lower_trig

    # get rid of NaNs
    proximity_mat[np.isnan(proximity_mat)] = 0
    
    # ensure diagonal of distance_matrix is zero
    proximity_mat[np.diag_indices_from(proximity_mat)] = 1
    
    # ensure full proximity matrix is symmetrc
    assert np.allclose(proximity_mat, proximity_mat.T)
    
    # clustering require distances, not proximities
    distance_mat = 1 - proximity_mat
    
    # 
    D = ssd.squareform(distance_mat)
    Y = sch.linkage(D, method='complete')

    g = sns.clustermap(data=distance_mat,
                       row_linkage=Y,
                       col_linkage=Y,
                       figsize=(12, 10),
                       yticklabels=False,
                       )
    my_labels = [el.get_text() for el in g.ax_heatmap.get_xticklabels()]
    g.ax_heatmap.set_xticklabels(my_labels, size=8)
    g.ax_row_dendrogram.set_visible(False)
    return g

make_distance_matrix_clustergram(my_scan_set.dot_matrix_by_groups[0])


Out[338]:
<seaborn.matrix.ClusterGrid at 0x2f7e21c90>

In [268]:
my_labels = [el.get_text() for el in g.ax_heatmap.get_xticklabels()]
print my_labels


[u'100', u'101', u'106', u'50', u'15', u'47', u'66', u'20', u'61', u'36', u'57', u'69', u'3', u'67', u'51', u'52', u'28', u'2', u'71', u'89', u'59', u'76', u'105', u'74', u'77', u'22', u'64', u'4', u'42', u'54', u'6', u'78', u'48', u'11', u'85', u'98', u'37', u'1', u'34', u'16', u'87', u'104', u'21', u'29', u'62', u'31', u'45', u'103', u'84', u'97', u'73', u'102', u'99', u'96', u'70', u'35', u'40', u'68', u'24', u'81', u'30', u'80', u'46', u'88', u'83', u'95', u'38', u'92', u'94', u'9', u'44', u'10', u'60', u'86', u'13', u'53', u'93', u'58', u'5', u'79', u'91', u'90', u'82', u'75', u'72', u'0', u'19', u'65', u'63', u'56', u'55', u'49', u'43', u'41', u'39', u'33', u'32', u'27', u'26', u'25', u'23', u'18', u'17', u'14', u'12', u'7', u'8']

In [223]:
print g.dendrogram_row.reordered_ind


[100, 101, 106, 50, 15, 47, 66, 20, 61, 36, 57, 69, 3, 67, 51, 52, 28, 2, 71, 89, 59, 76, 105, 74, 77, 22, 64, 4, 42, 54, 6, 78, 48, 11, 85, 98, 37, 1, 34, 16, 87, 104, 21, 29, 62, 31, 45, 103, 84, 97, 73, 102, 99, 96, 70, 35, 40, 68, 24, 81, 30, 80, 46, 88, 83, 95, 38, 92, 94, 9, 44, 10, 60, 86, 13, 53, 93, 58, 5, 79, 91, 90, 82, 75, 72, 0, 19, 65, 63, 56, 55, 49, 43, 41, 39, 33, 32, 27, 26, 25, 23, 18, 17, 14, 12, 7, 8]

In [226]:
all_0_data = [scan for scan in my_scan_set.scans if scan.group==0]

In [235]:
def plot_spectrum(scan):
    plt.figure(figsize=(5, 5))
    plt.stem(scan[:, 0], scan[:, 1])
    plt.show()
    
plot_spectrum(all_0_data[100].scan_data[0])
plot_spectrum(all_0_data[101].scan_data[0])
plot_spectrum(all_0_data[102].scan_data[0])



In [285]:
# determine where to threshold our distance metric in order to make links

plt.hist(my_scan_set.dot_matrix_by_groups[0].flatten(), bins = np.arange(0, 1, 0.1))
plt.gca().set_yscale('log')



In [289]:
# draw network from heatmap

dot_cutoff = 0.5

proximity_matrix = my_scan_set.dot_matrix_by_groups[0].copy()

adjacency_matrix = proximity_matrix > dot_cutoff

group_0_network = nx.to_networkx_graph(adjacency_matrix)

In [325]:
from math import sqrt

plt.figure(figsize=(10, 10))


layout = nx.spring_layout(group_0_network)
nx.draw(group_0_network, pos=layout)



In [313]:
group_0_network.number_of_nodes


Out[313]:
<bound method Graph.number_of_nodes of <networkx.classes.graph.Graph object at 0x136ba8fd0>>

In [335]:
from collections import Counter

group_counter = Counter([scan.group for scan in my_scan_set.scans])

less_than_500 = group_counter.copy()
for group in group_counter:
    if group_counter[group] > 500:
        less_than_500.pop(group)
        
less_than_500.most_common()


Out[335]:
[(224, 498),
 (5795, 498),
 (783, 491),
 (5797, 489),
 (5794, 488),
 (1374, 486),
 (3463, 486),
 (308, 481),
 (5807, 473),
 (191, 471),
 (4291, 458),
 (5799, 456),
 (192, 455),
 (6, 452),
 (1359, 451),
 (1379, 450),
 (605, 448),
 (3158, 443),
 (117, 441),
 (2664, 441),
 (5802, 439),
 (5924, 439),
 (507, 435),
 (1334, 430),
 (309, 427),
 (4335, 426),
 (15821, 424),
 (5265, 421),
 (544, 420),
 (806, 419),
 (43, 412),
 (5189, 412),
 (2658, 410),
 (5801, 409),
 (3276, 405),
 (632, 404),
 (4284, 404),
 (11026, 401),
 (395, 397),
 (3356, 395),
 (4292, 391),
 (2784, 379),
 (1349, 376),
 (1331, 370),
 (1906, 370),
 (6004, 362),
 (2414, 359),
 (3354, 359),
 (397, 353),
 (294, 351),
 (1908, 351),
 (3358, 344),
 (6775, 344),
 (687, 342),
 (4315, 337),
 (3278, 336),
 (23802, 334),
 (537, 333),
 (16349, 333),
 (375, 332),
 (3355, 331),
 (62, 329),
 (458, 320),
 (1442, 320),
 (7331, 318),
 (572, 317),
 (161, 316),
 (3277, 316),
 (93, 312),
 (231, 312),
 (4430, 312),
 (940, 310),
 (332, 308),
 (468, 308),
 (1847, 306),
 (7369, 303),
 (2989, 302),
 (394, 299),
 (505, 298),
 (2261, 297),
 (149, 296),
 (3782, 294),
 (99, 293),
 (396, 293),
 (492, 293),
 (11258, 293),
 (498, 291),
 (1907, 290),
 (3275, 287),
 (310, 285),
 (5441, 285),
 (162, 284),
 (9912, 283),
 (177, 282),
 (4319, 282),
 (1333, 278),
 (1905, 274),
 (27377, 272),
 (611, 270),
 (1551, 270),
 (2014, 270),
 (2665, 270),
 (3236, 270),
 (509, 267),
 (16450, 267),
 (4317, 266),
 (1335, 265),
 (689, 263),
 (6213, 263),
 (2415, 260),
 (5383, 259),
 (26772, 258),
 (8785, 257),
 (7990, 255),
 (40, 253),
 (2062, 248),
 (2106, 247),
 (23796, 247),
 (27219, 246),
 (95, 245),
 (2408, 244),
 (105, 242),
 (982, 241),
 (15819, 240),
 (6245, 239),
 (189, 234),
 (8230, 233),
 (3366, 232),
 (1330, 230),
 (26309, 229),
 (5290, 227),
 (298, 226),
 (2049, 226),
 (94, 225),
 (178, 224),
 (104, 223),
 (5292, 223),
 (2039, 222),
 (5288, 222),
 (3279, 219),
 (4725, 219),
 (788, 218),
 (3274, 218),
 (1816, 216),
 (2427, 215),
 (3280, 215),
 (2050, 214),
 (3359, 213),
 (330, 210),
 (8156, 209),
 (8232, 209),
 (3294, 208),
 (3357, 208),
 (4471, 208),
 (306, 207),
 (4210, 207),
 (106, 206),
 (4887, 206),
 (15823, 206),
 (606, 205),
 (688, 205),
 (361, 204),
 (5294, 204),
 (6007, 204),
 (16893, 204),
 (206, 203),
 (628, 203),
 (686, 203),
 (7474, 203),
 (3360, 201),
 (7544, 201),
 (23795, 200),
 (6195, 199),
 (1909, 198),
 (2657, 198),
 (25038, 198),
 (8289, 196),
 (4314, 194),
 (8234, 193),
 (19706, 192),
 (2052, 191),
 (26001, 191),
 (11208, 190),
 (626, 188),
 (2016, 188),
 (27076, 188),
 (163, 186),
 (1034, 185),
 (15820, 185),
 (6176, 184),
 (1380, 183),
 (7672, 183),
 (15817, 183),
 (122, 178),
 (534, 178),
 (853, 178),
 (4293, 178),
 (5289, 178),
 (23803, 178),
 (144, 177),
 (859, 177),
 (4316, 177),
 (7407, 177),
 (8236, 177),
 (2048, 176),
 (837, 175),
 (1890, 174),
 (1955, 171),
 (7249, 171),
 (15825, 171),
 (15921, 171),
 (2015, 169),
 (1916, 168),
 (301, 167),
 (1881, 166),
 (324, 165),
 (145, 164),
 (265, 164),
 (1894, 164),
 (5396, 164),
 (5713, 164),
 (1910, 163),
 (8806, 163),
 (373, 161),
 (1420, 161),
 (300, 160),
 (1178, 160),
 (1904, 160),
 (4321, 160),
 (11201, 160),
 (15827, 159),
 (3505, 158),
 (5147, 158),
 (5803, 158),
 (10173, 158),
 (23809, 158),
 (249, 157),
 (5978, 157),
 (362, 156),
 (1892, 156),
 (2395, 156),
 (16875, 156),
 (211, 154),
 (1365, 154),
 (2766, 153),
 (3503, 152),
 (2393, 151),
 (3353, 151),
 (23798, 151),
 (621, 150),
 (8228, 150),
 (1722, 149),
 (6335, 149),
 (625, 148),
 (2763, 148),
 (4361, 148),
 (5857, 147),
 (244, 146),
 (23800, 146),
 (1891, 145),
 (1386, 144),
 (16877, 144),
 (2013, 142),
 (346, 140),
 (601, 140),
 (2762, 139),
 (2764, 139),
 (8231, 139),
 (15824, 139),
 (264, 138),
 (1893, 138),
 (2017, 138),
 (4318, 138),
 (5579, 138),
 (30, 137),
 (2051, 136),
 (3453, 136),
 (4164, 136),
 (4320, 136),
 (5291, 136),
 (6481, 135),
 (23780, 135),
 (616, 134),
 (2329, 134),
 (2569, 134),
 (792, 133),
 (5793, 133),
 (16873, 133),
 (87, 132),
 (204, 132),
 (1336, 132),
 (5293, 132),
 (4353, 131),
 (6292, 131),
 (7999, 131),
 (35, 130),
 (1076, 130),
 (5075, 130),
 (5542, 130),
 (7997, 130),
 (16874, 130),
 (25020, 130),
 (2765, 129),
 (3273, 128),
 (3504, 128),
 (6364, 128),
 (8001, 128),
 (11643, 128),
 (3506, 127),
 (6137, 127),
 (348, 126),
 (1387, 126),
 (8857, 126),
 (3228, 125),
 (276, 124),
 (319, 124),
 (2394, 124),
 (2682, 124),
 (3502, 124),
 (16879, 124),
 (1385, 123),
 (2416, 123),
 (4165, 123),
 (23805, 123),
 (3247, 122),
 (5920, 122),
 (146, 121),
 (2053, 121),
 (8235, 120),
 (15987, 120),
 (4846, 119),
 (6129, 119),
 (6541, 119),
 (5804, 118),
 (6024, 118),
 (6954, 118),
 (25641, 118),
 (1205, 117),
 (3507, 117),
 (78, 116),
 (347, 116),
 (789, 116),
 (2779, 116),
 (8727, 116),
 (711, 115),
 (2397, 115),
 (15829, 115),
 (2666, 114),
 (6127, 114),
 (1015, 113),
 (2757, 113),
 (8229, 113),
 (11109, 113),
 (11800, 113),
 (398, 112),
 (1010, 112),
 (4277, 112),
 (690, 111),
 (850, 111),
 (4163, 111),
 (5296, 111),
 (11007, 111),
 (16883, 111),
 (25642, 111),
 (212, 110),
 (3457, 110),
 (5295, 110),
 (20683, 110),
 (1230, 109),
 (19708, 109),
 (3454, 108),
 (0, 107),
 (66, 107),
 (193, 107),
 (627, 107),
 (4856, 107),
 (4885, 107),
 (113, 106),
 (510, 106),
 (629, 106),
 (3455, 106),
 (5320, 106),
 (6796, 106),
 (1398, 105),
 (4644, 105),
 (6677, 105),
 (15822, 105),
 (9701, 104),
 (25258, 104),
 (649, 103),
 (1671, 103),
 (2012, 103),
 (15220, 103),
 (16878, 103),
 (651, 102),
 (2778, 102),
 (4162, 102),
 (4740, 102),
 (6415, 102),
 (23793, 102),
 (1411, 101),
 (2777, 101),
 (2991, 101),
 (4642, 101),
 (8233, 101),
 (23807, 101),
 (137, 100),
 (1768, 100),
 (2776, 100),
 (4384, 99),
 (4583, 99),
 (4995, 99),
 (7280, 99),
 (2018, 98),
 (5526, 98),
 (8252, 98),
 (8844, 98),
 (535, 97),
 (1075, 97),
 (2088, 97),
 (2562, 97),
 (3251, 97),
 (4136, 97),
 (23370, 97),
 (219, 96),
 (791, 96),
 (1933, 96),
 (2942, 96),
 (4201, 96),
 (4283, 96),
 (9226, 96),
 (9264, 96),
 (15818, 96),
 (266, 95),
 (321, 95),
 (1388, 95),
 (1389, 95),
 (2796, 95),
 (4166, 95),
 (7769, 95),
 (10668, 95),
 (16881, 95),
 (1078, 94),
 (1373, 94),
 (5792, 94),
 (5915, 94),
 (6582, 94),
 (478, 93),
 (4203, 93),
 (5911, 93),
 (5930, 93),
 (8758, 93),
 (10022, 93),
 (23482, 93),
 (1077, 92),
 (1895, 92),
 (8438, 92),
 (160, 91),
 (320, 91),
 (685, 91),
 (7068, 91),
 (15155, 91),
 (16882, 91),
 (406, 90),
 (2775, 90),
 (7995, 90),
 (10417, 90),
 (15828, 90),
 (23752, 90),
 (88, 89),
 (1079, 89),
 (1399, 89),
 (3371, 89),
 (13677, 89),
 (23797, 89),
 (221, 88),
 (2333, 88),
 (2407, 88),
 (3281, 88),
 (3282, 88),
 (7931, 88),
 (11139, 88),
 (25635, 88),
 (25639, 88),
 (622, 87),
 (893, 87),
 (2301, 87),
 (2337, 87),
 (3268, 87),
 (3290, 87),
 (3369, 87),
 (5913, 87),
 (10432, 87),
 (10673, 87),
 (15826, 87),
 (25648, 87),
 (26315, 87),
 (167, 86),
 (8238, 86),
 (23811, 86),
 (89, 85),
 (151, 85),
 (652, 85),
 (1661, 85),
 (1992, 85),
 (2054, 85),
 (16885, 85),
 (504, 84),
 (1681, 84),
 (3199, 84),
 (6125, 84),
 (7037, 84),
 (13390, 84),
 (16871, 84),
 (92, 83),
 (153, 83),
 (322, 83),
 (1568, 83),
 (2767, 83),
 (823, 82),
 (3303, 82),
 (3373, 82),
 (8440, 82),
 (20626, 82),
 (220, 81),
 (762, 81),
 (1561, 81),
 (2396, 81),
 (2561, 81),
 (3635, 81),
 (5310, 81),
 (25644, 81),
 (568, 80),
 (639, 80),
 (716, 80),
 (1472, 80),
 (1819, 80),
 (3456, 80),
 (8478, 80),
 (8846, 80),
 (15841, 80),
 (107, 79),
 (1865, 79),
 (5909, 79),
 (26000, 79),
 (2768, 78),
 (4200, 78),
 (5928, 78),
 (6128, 78),
 (277, 77),
 (486, 77),
 (2877, 77),
 (3637, 77),
 (4223, 77),
 (650, 76),
 (3361, 76),
 (3634, 76),
 (4646, 76),
 (6058, 76),
 (6199, 76),
 (1642, 75),
 (2992, 75),
 (3227, 75),
 (3639, 75),
 (4776, 75),
 (15815, 75),
 (17160, 75),
 (23808, 75),
 (25654, 75),
 (26339, 75),
 (484, 74),
 (4205, 74),
 (5308, 74),
 (6131, 74),
 (9358, 74),
 (9862, 74),
 (23799, 74),
 (23, 73),
 (79, 73),
 (714, 73),
 (1199, 73),
 (1738, 73),
 (3508, 73),
 (7900, 73),
 (14343, 73),
 (23791, 73),
 (201, 72),
 (349, 72),
 (1356, 72),
 (2398, 72),
 (3321, 72),
 (5926, 72),
 (8003, 72),
 (9824, 72),
 (782, 71),
 (3272, 71),
 (4435, 71),
 (6709, 71),
 (19990, 71),
 (179, 70),
 (3458, 70),
 (3631, 70),
 (3872, 70),
 (4146, 70),
 (4202, 70),
 (4225, 70),
 (4643, 70),
 (5218, 70),
 (7902, 70),
 (8260, 70),
 (9009, 70),
 (10342, 70),
 (10464, 70),
 (14204, 70),
 (356, 69),
 (415, 69),
 (3389, 69),
 (4206, 69),
 (4282, 69),
 (11380, 69),
 (3636, 68),
 (7996, 68),
 (16876, 68),
 (101, 67),
 (5183, 67),
 (8224, 67),
 (14800, 67),
 (17227, 67),
 (1620, 66),
 (3060, 66),
 (3401, 66),
 (6130, 66),
 (8442, 66),
 (8838, 66),
 (9119, 66),
 (11038, 66),
 (344, 65),
 (483, 65),
 (1275, 65),
 (1367, 65),
 (1396, 65),
 (3374, 65),
 (4227, 65),
 (5932, 65),
 (7413, 65),
 (7664, 65),
 (8436, 65),
 (12484, 65),
 (23827, 65),
 (25637, 65),
 (26333, 65),
 (26337, 65),
 (381, 64),
 (953, 64),
 (1256, 64),
 (1474, 64),
 (1663, 64),
 (2579, 64),
 (2995, 64),
 (2996, 64),
 (3370, 64),
 (3541, 64),
 (4542, 64),
 (4838, 64),
 (7433, 64),
 (7898, 64),
 (11817, 64),
 (25646, 64),
 (26340, 64),
 (54, 63),
 (753, 63),
 (834, 63),
 (2563, 63),
 (3352, 63),
 (6046, 63),
 (8437, 63),
 (8848, 63),
 (11359, 63),
 (13211, 63),
 (26485, 63),
 (50, 62),
 (132, 62),
 (2030, 62),
 (2114, 62),
 (2335, 62),
 (5345, 62),
 (6755, 62),
 (7666, 62),
 (15831, 62),
 (16880, 62),
 (16886, 62),
 (360, 61),
 (984, 61),
 (2532, 61),
 (2781, 61),
 (4226, 61),
 (4836, 61),
 (5287, 61),
 (7507, 61),
 (8237, 61),
 (8843, 61),
 (19424, 61),
 (26028, 61),
 (479, 60),
 (562, 60),
 (983, 60),
 (1425, 60),
 (1471, 60),
 (2334, 60),
 (2417, 60),
 (2524, 60),
 (3252, 60),
 (3293, 60),
 (3501, 60),
 (4084, 60),
 (4161, 60),
 (4294, 60),
 (5235, 60),
 (7417, 60),
 (7998, 60),
 (14812, 60),
 (23810, 60),
 (27114, 60),
 (61, 59),
 (560, 59),
 (2336, 59),
 (2566, 59),
 (2597, 59),
 (3638, 59),
 (4648, 59),
 (5229, 59),
 (5244, 59),
 (5910, 59),
 (6798, 59),
 (7415, 59),
 (7522, 59),
 (8262, 59),
 (23806, 59),
 (164, 58),
 (407, 58),
 (472, 58),
 (595, 58),
 (751, 58),
 (752, 58),
 (768, 58),
 (811, 58),
 (1266, 58),
 (1473, 58),
 (2303, 58),
 (2565, 58),
 (2656, 58),
 (3005, 58),
 (4068, 58),
 (4148, 58),
 (4434, 58),
 (5784, 58),
 (13096, 58),
 (13538, 58),
 (14308, 58),
 (14935, 58),
 (23804, 58),
 (461, 57),
 (569, 57),
 (809, 57),
 (827, 57),
 (2994, 57),
 (3452, 57),
 (4066, 57),
 (4167, 57),
 (4565, 57),
 (5159, 57),
 (5283, 57),
 (9301, 57),
 (11378, 57),
 (19996, 57),
 (23794, 57),
 (83, 56),
 (822, 56),
 (1200, 56),
 (1644, 56),
 (3229, 56),
 (3372, 56),
 (4144, 56),
 (4204, 56),
 (4840, 56),
 (7200, 56),
 (8850, 56),
 (9864, 56),
 (12601, 56),
 (20415, 56),
 (26917, 56),
 (80, 55),
 (733, 55),
 (825, 55),
 (1141, 55),
 (2560, 55),
 (4313, 55),
 (5312, 55),
 (5931, 55),
 (7414, 55),
 (9299, 55),
 (11382, 55),
 (15767, 55),
 (16852, 55),
 (16994, 55),
 (660, 54),
 (1114, 54),
 (1381, 54),
 (2564, 54),
 (5914, 54),
 (6075, 54),
 (9600, 54),
 (13615, 54),
 (25652, 54),
 (27079, 54),
 (363, 53),
 (824, 53),
 (1366, 53),
 (2580, 53),
 (2780, 53),
 (3375, 53),
 (4327, 53),
 (5181, 53),
 (5929, 53),
 (6673, 53),
 (7667, 53),
 (8441, 53),
 (8444, 53),
 (16332, 53),
 (19988, 53),
 (19998, 53),
 (26104, 53),
 (24, 52),
 (311, 52),
 (1971, 52),
 (2188, 52),
 (2528, 52),
 (4437, 52),
 (4645, 52),
 (4863, 52),
 (5057, 52),
 (7496, 52),
 (7668, 52),
 (8847, 52),
 (11162, 52),
 (14429, 52),
 (25650, 52),
 (26335, 52),
 (176, 51),
 (417, 51),
 (588, 51),
 (717, 51),
 (1470, 51),
 (1521, 51),
 (1748, 51),
 (1813, 51),
 (2255, 51),
 (3378, 51),
 (4224, 51),
 (5246, 51),
 (5446, 51),
 (5912, 51),
 (8264, 51),
 (10257, 51),
 (12075, 51),
 (13209, 51),
 (23801, 51),
 (27237, 51),
 (810, 50),
 (2027, 50),
 (2139, 50),
 (2399, 50),
 (2881, 50),
 (3955, 50),
 (4067, 50),
 (4130, 50),
 (4143, 50),
 (4145, 50),
 (5230, 50),
 (6965, 50),
 (8698, 50),
 (9861, 50),
 (10478, 50),
 (11034, 50),
 (14798, 50),
 (19994, 50),
 (26022, 50),
 (26795, 50),
 (1139, 49),
 (1140, 49),
 (1142, 49),
 (1390, 49),
 (1820, 49),
 (1937, 49),
 (1969, 49),
 (1995, 49),
 (3254, 49),
 (4367, 49),
 (4641, 49),
 (4647, 49),
 (4842, 49),
 (5231, 49),
 (6669, 49),
 (14933, 49),
 (15830, 49),
 (16884, 49),
 (19793, 49),
 (19799, 49),
 (25640, 49),
 (1443, 48),
 (1662, 48),
 (2332, 48),
 (5112, 48),
 (5307, 48),
 (6013, 48),
 (6751, 48),
 (7494, 48),
 (8261, 48),
 (8842, 48),
 (9474, 48),
 (19992, 48),
 (25655, 48),
 (26342, 48),
 (202, 47),
 (563, 47),
 (671, 47),
 (715, 47),
 (1519, 47),
 (1520, 47),
 (1531, 47),
 (1665, 47),
 (4439, 47),
 (5185, 47),
 (6671, 47),
 (7476, 47),
 (7904, 47),
 (7933, 47),
 (8439, 47),
 (8644, 47),
 (8845, 47),
 (13358, 47),
 (13540, 47),
 (23036, 47),
 (26021, 47),
 (26332, 47),
 (408, 46),
 (435, 46),
 (1889, 46),
 (1932, 46),
 (2582, 46),
 (3923, 46),
 (4861, 46),
 (4864, 46),
 (7419, 46),
 (8913, 46),
 (9303, 46),
 (11338, 46),
 (13542, 46),
 (14934, 46),
 (16872, 46),
 (22457, 46),
 (25947, 46),
 (26006, 46),
 (130, 45),
 (213, 45),
 (225, 45),
 (436, 45),
 (691, 45),
 (985, 45),
 (2189, 45),
 (2644, 45),
 (2676, 45),
 (3459, 45),
 (4086, 45),
 (4160, 45),
 (4199, 45),
 (6126, 45),
 (9476, 45),
 (10216, 45),
 (11036, 45),
 (13515, 45),
 (14802, 45),
 (15285, 45),
 (15816, 45),
 (16859, 45),
 (19147, 45),
 (19151, 45),
 (19153, 45),
 (20000, 45),
 (26271, 45),
 (683, 44),
 (1190, 44),
 (1192, 44),
 (1855, 44),
 (2192, 44),
 (2338, 44),
 (2578, 44),
 (3253, 44),
 (4065, 44),
 (4147, 44),
 ...]

In [339]:
make_distance_matrix_clustergram(my_scan_set.dot_matrix_by_groups[224])


Out[339]:
<seaborn.matrix.ClusterGrid at 0x2945606d0>

In [ ]: