notebook.community

Edit and run



In [1]:

    
import re
import sys
import pandas as pd
import brainbox as bb
from matplotlib import pyplot as plt
import scipy.spatial.distance as dist
import scipy.cluster.hierarchy as clh



In [2]:

    
in_path = '/data1/abide/Out/Remote/some_failed/out'
out_path = '/data1/abide/Analysis/Remote/some_missing'
metric = 'stability_maps'



In [5]:

    
file_dict = bb.fileOps.grab_files(in_path, '.nii.gz', sub=metric)









    



I will be pulling files from /data1/abide/Out/Remote/some_failed/out/stability_maps



In [60]:

    
array_dict = bb.dataOps.read_files(file_dict)









    



I found 607 files to load.
 100.0 % done 0.00 seconds to go.
We are done

Compute the similarity of network metrics among the subjects for all network

This could be done in parallel if Marc gives me more memory



In [61]:

    
result_list = []
for n_id in range(10):
    sys.stdout.write('\rRunning network {}.'.format(n_id))
    sys.stdout.flush()
    
    eucl = dist.squareform(dist.pdist(array_dict[metric][..., n_id], 'euclidean'))
    Y = clh.linkage(eucl, method='ward')
    result_list.append((eucl, Y))









    



Running network 9.

Visualize the things



In [8]:

    
f = plt.figure(figsize=(18, 50))
order_net = None
for n_id in range(10):
    n_id += 1
    sys.stdout.write('\rRunning network {}.'.format(n_id))
    sys.stdout.flush()
    (eucl, Y) = result_list[n_id-1]
    ax = f.add_subplot(5,2,n_id)
    subdend = bb.visuOps.add_subplot_axes(ax, [0, 0.71, 1, 0.29])
    submat = bb.visuOps.add_subplot_axes(ax, [0, 0, 1, 0.7])
    if order_net:
        Y = result_list[order_net][1]
    
    Z1 = clh.dendrogram(Y, ax=subdend)
    if order_net:
        subdend.matshow(np.ones((100,20)), aspect='auto')

    idx = Z1['leaves']
    tmp = eucl[idx, :]
    D = tmp[:, idx]
    a = submat.matshow(-D, aspect='auto')
    submat.set_xticks([])
    submat.set_yticks([])
    subdend.set_xticks([])
    subdend.set_yticks([])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title('Network {}'.format(n_id))
f.suptitle('Stability Maps')









    



Running network 10.





    Out[8]:





<matplotlib.text.Text at 0x13b7af50>

Grab the phenotype data



In [62]:

    
pheno_path = '/home/surchs/Project/abide/pheno/pheno_full.csv'
pheno = pd.read_csv(pheno_path)



In [63]:

    
# Get the real sub ids from the file names
sub_ids = np.array([int64(re.search(r'(?<=\d{2})\d{5}', sub_id).group()) for sub_id in file_dict['sub_name']])



In [78]:

    
# Get the relevant section of the pheno data
this_pheno = pheno[pheno['SUB_ID'].isin(sub_ids)]

Find a specific number of clusters and investigate them



In [66]:

    
# Define the network
netw = 8
# Build the distance metric
eucl = dist.squareform(dist.pdist(array_dict[metric][..., netw], 'euclidean'))
Y = clh.linkage(eucl, method='ward')
# Make clusters out of the hierarchy
fl = clh.fcluster(Y,10,criterion='maxclust')



In [80]:

    
sub_mask = [x in this_pheno['SUB_ID'].values for x in sub_ids]



In [93]:

    
len(np.unique(sub_ids))









    Out[93]:





582



In [68]:

    
phens = this_pheno['SEX'].values
f = plt.figure()
for cl_id in range(1,10):
    vals = phens[fl==cl_id]
    ax = f.add_subplot(2,5,cl_id)
    ax.hist(vals)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-68-0c754cb151bb> in <module>()
      2 f = plt.figure()
      3 for cl_id in range(1,10):
----> 4     vals = phens[fl==cl_id]
      5     ax = f.add_subplot(2,5,cl_id)
      6     ax.hist(vals)

ValueError: too many boolean indices





    





<matplotlib.figure.Figure at 0x5812a10>



In [56]:

    
# Define the cluster
clust = 1
# Get the subjects belonging to this cluster
cl_subs = sub_ids[fl == clust]
cl_phen = pheno[pheno['SUB_ID'].isin(cl_subs)]



In [65]:

    
cl_phen[['AGE_AT_SCAN', 'FIQ']]









    Out[65]:






  
    
      
      AGE_AT_SCAN
      FIQ
    
  
  
    
      8   
       20.9000
       101
    
    
      16  
       17.5000
       125
    
    
      23  
       20.0000
       111
    
    
      26  
       21.1000
       103
    
    
      28  
       23.6000
       128
    
    
      38  
       33.0000
       103
    
    
      48  
       24.0000
       118
    
    
      52  
       28.0000
       129
    
    
      57  
       20.0000
       124
    
    
      64  
       30.0000
       126
    
    
      68  
       11.0000
       101
    
    
      71  
        9.7300
       122
    
    
      74  
        9.3000
       120
    
    
      77  
        8.0700
       115
    
    
      80  
       10.8200
       117
    
    
      96  
        8.4500
        88
    
    
      98  
       11.3300
        95
    
    
      101 
       11.0900
       125
    
    
      103 
       11.8500
       117
    
    
      111 
       11.7900
        98
    
    
      114 
       11.1700
       114
    
    
      115 
       12.4300
        98
    
    
      117 
       10.2900
       109
    
    
      121 
       24.0000
       106
    
    
      130 
       22.0000
       128
    
    
      131 
       19.0000
       109
    
    
      157 
       16.2000
       NaN
    
    
      158 
       12.4000
       NaN
    
    
      164 
       12.8000
       NaN
    
    
      165 
       15.2000
       NaN
    
    
      ...
      ...
      ...
    
    
      761 
       17.7900
        95
    
    
      764 
       13.4600
       107
    
    
      766 
       14.0300
       108
    
    
      767 
       11.2500
       118
    
    
      775 
       15.7400
       106
    
    
      779 
       13.8200
       106
    
    
      780 
        9.5000
       109
    
    
      785 
       13.0800
        92
    
    
      793 
       13.2800
        91
    
    
      798 
       11.6600
       119
    
    
      799 
       12.1500
       128
    
    
      807 
       13.6300
       118
    
    
      809 
       12.8200
       102
    
    
      960 
       14.9432
       138
    
    
      967 
       24.9884
       100
    
    
      969 
       27.3238
       116
    
    
      970 
       12.9391
       130
    
    
      971 
       10.4668
       144
    
    
      977 
       18.8008
       111
    
    
      982 
       18.1547
       111
    
    
      998 
       21.6482
       117
    
    
      1001
       20.1780
        91
    
    
      1003
       29.0897
       127
    
    
      1004
       17.1663
        88
    
    
      1006
       21.4346
        96
    
    
      1013
       19.6277
       110
    
    
      1021
       21.1307
        99
    
    
      1025
       32.9582
        90
    
    
      1027
       17.6454
        87
    
    
      1044
       18.4175
        80
    
  

163 rows × 2 columns

Some random testing



In [37]:

    
# Make a random template clustering
rand_clust = np.random.randint(1, 5, len(this_pheno))
cov_interest = ['DX_GROUP', 'AGE_AT_SCAN', 'SITE_ID', 'SEX', 'EYE_STATUS_AT_SCAN']



In [10]:

    
sub_1 = sub_ids[rand_clust==1]



In [58]:

    
a = this_pheno['SEX']



In [59]:

    
f = plt.figure()
for cl_id in range(1,10):
    vals = a.values[rand_clust==cl_id]
    ax = f.add_subplot(2,5,cl_id)
    ax.hist(vals)



In [51]:



In [52]:









    Out[52]:





array([1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2,
       1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2,
       2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1])



In [ ]:

	AGE_AT_SCAN	FIQ
8	20.9000	101
16	17.5000	125
23	20.0000	111
26	21.1000	103
28	23.6000	128
38	33.0000	103
48	24.0000	118
52	28.0000	129
57	20.0000	124
64	30.0000	126
68	11.0000	101
71	9.7300	122
74	9.3000	120
77	8.0700	115
80	10.8200	117
96	8.4500	88
98	11.3300	95
101	11.0900	125
103	11.8500	117
111	11.7900	98
114	11.1700	114
115	12.4300	98
117	10.2900	109
121	24.0000	106
130	22.0000	128
131	19.0000	109
157	16.2000	NaN
158	12.4000	NaN
164	12.8000	NaN
165	15.2000	NaN
...	...	...
761	17.7900	95
764	13.4600	107
766	14.0300	108
767	11.2500	118
775	15.7400	106
779	13.8200	106
780	9.5000	109
785	13.0800	92
793	13.2800	91
798	11.6600	119
799	12.1500	128
807	13.6300	118
809	12.8200	102
960	14.9432	138
967	24.9884	100
969	27.3238	116
970	12.9391	130
971	10.4668	144
977	18.8008	111
982	18.1547	111
998	21.6482	117
1001	20.1780	91
1003	29.0897	127
1004	17.1663	88
1006	21.4346	96
1013	19.6277	110
1021	21.1307	99
1025	32.9582	90
1027	17.6454	87
1044	18.4175	80