Assess Huntington's disease progression from PET/MR images



In [1]:

    
import itertools
import glob
import os

%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import gridspec
import nibabel as nib
import numpy as np
import pandas as pd
import seaborn as sns

import hd_classifier

Inflammation assesment using PBR scans

Participants information



In [2]:

    
participants_df = pd.read_csv('data/participants.tsv', delimiter='\t')
participants_df









    Out[2]:






  
    
      
      subject_id
      group
      neurological_score
      intracraneal_volume
    
  
  
    
      0
      sub-control01
      control
      None
      1.720386e+06
    
    
      1
      sub-control02
      control
      None
      1.294547e+06
    
    
      2
      sub-control03
      control
      None
      1.719868e+06
    
    
      3
      sub-control04
      control
      None
      1.593478e+06
    
    
      4
      sub-control05
      control
      None
      1.502192e+06
    
    
      5
      sub-control06
      control
      None
      1.498275e+06
    
    
      6
      sub-patient01
      pre-HD
      12
      1.691144e+06
    
    
      7
      sub-patient02
      HD
      11
      1.620931e+06
    
    
      8
      sub-patient03
      HD
      11
      1.623385e+06
    
    
      9
      sub-patient04
      HD
      6
      1.311834e+06
    
    
      10
      sub-patient05
      HD
      6
      1.609141e+06
    
    
      11
      sub-patient06
      HD
      8
      1.407048e+06
    
    
      12
      sub-patient07
      HD
      9
      1.709589e+06
    
    
      13
      sub-patient08
      HD
      11
      1.356421e+06

We also set some colors for the groups.



In [3]:

    
subject_colors = {'HD': 'r', 'pre-HD': 'g', 'control': 'b'}

Principal component analysis

We extract all voxels for each subject and region directly from the nifti images and the masks generated from Freesurfer.

We use principal component analysis to transform each subject into the vector space spanned by the eigenvectors of the covariance matrix. The plots show the components along each axis in this subspace for each patient. The axis are ordered according to the value of the eigenvectors.

We choose a few statistical quantities to describe each region



In [4]:

    
q_1 = lambda x: np.percentile(x, q=25)
q_3 = lambda x: np.percentile(x, q=75)

features = {'value' : {'min' : np.min, 'max' : np.max, 'mean' : np.mean, 
                       'q_1' : q_1, 'median' : np.median, 'q_3' : q_3 }}

Extract the voxel data, calculate statistical features for all regions and subjects, and put them into a pd.DataFrame.



In [5]:

    
path_to_tracer_data = './data/func'
items = os.listdir(path_to_tracer_data)

subjects = hd_classifier.make_subjects(items, path_to_tracer_data)
image_filter = '^r.*\.lin_T1_orientOK_skullstripped_norm_sm6mm.nii'

masked_region_df_pbr, masked_region_features_pbr = hd_classifier.extract_features(subjects, features, image_filter)
masked_region_features_pbr









    Out[5]:






  
    
      
      value
    
    
      
      min
      max
      mean
      ...
      q_1
      median
      q_3
    
    
      region
      caudate
      pallidum
      putamen
      thalamus
      caudate
      pallidum
      putamen
      thalamus
      caudate
      pallidum
      ...
      putamen
      thalamus
      caudate
      pallidum
      putamen
      thalamus
      caudate
      pallidum
      putamen
      thalamus
    
    
      subject_id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      sub-control01
      -0.357949
      -0.986629
      -0.479293
      0.157071
      -0.378503
      -0.939962
      -0.736445
      -1.532574
      -1.150603
      -0.761989
      ...
      -1.180815
      -0.090717
      -0.914007
      -0.784279
      -1.294768
      -0.466123
      -1.102167
      -0.778780
      -1.172044
      -0.759360
    
    
      sub-control02
      2.350970
      -0.919913
      0.236221
      1.654546
      0.101800
      -0.405937
      -0.620280
      -0.493392
      0.430512
      -0.759552
      ...
      -1.499511
      0.485232
      0.359119
      -0.739018
      -1.459175
      0.148270
      -0.357245
      -0.737293
      -1.338486
      0.062132
    
    
      sub-control03
      1.354500
      -0.874280
      -1.572039
      1.180632
      -0.565642
      -0.791115
      -1.048844
      -0.487197
      0.489527
      -0.734835
      ...
      -0.603145
      0.272596
      0.305083
      -0.722195
      -0.703113
      0.235514
      0.032952
      -0.744411
      -0.865679
      0.341701
    
    
      sub-control04
      0.054450
      -1.087225
      -0.097938
      -1.097358
      -1.368482
      -1.673687
      -1.693252
      -2.011960
      0.143684
      -1.160719
      ...
      -0.946822
      -0.819745
      0.340724
      -1.115710
      -1.087211
      -1.351511
      -0.024543
      -1.178474
      -1.256888
      -1.886643
    
    
      sub-control05
      -1.019404
      -0.357744
      -0.204904
      -1.233020
      0.044988
      -0.649875
      -0.792171
      0.086470
      -0.759269
      -0.703325
      ...
      -0.489154
      -0.847543
      -0.927993
      -0.737042
      -0.650332
      -0.767099
      -0.389207
      -0.722665
      -0.678387
      -0.577034
    
    
      sub-control06
      -0.750866
      -1.328470
      -1.709454
      -1.707702
      -2.440500
      -0.773118
      -0.312556
      0.873162
      -2.181602
      -1.147991
      ...
      -1.293506
      -2.735069
      -2.068964
      -1.111939
      -0.780123
      -2.311989
      -2.287850
      -1.132210
      -0.640211
      -1.923121
    
    
      sub-patient01
      0.681762
      -0.290246
      -0.430546
      0.964200
      -0.075971
      -0.586998
      -0.694732
      -0.033583
      0.334657
      -0.616984
      ...
      -0.233491
      0.680017
      0.309945
      -0.677452
      -0.312480
      0.743108
      -0.056386
      -0.587935
      -0.336465
      0.946529
    
    
      sub-patient02
      -0.160849
      -0.488603
      0.797064
      0.576606
      -0.392622
      -0.166097
      -0.091567
      -0.231409
      -0.266129
      -0.620940
      ...
      0.696320
      -0.142947
      -0.303515
      -0.641768
      0.607843
      -0.108955
      0.012291
      -0.567292
      0.439834
      -0.063074
    
    
      sub-patient03
      -0.367979
      1.121198
      -1.202382
      0.091982
      0.673253
      0.317037
      0.178251
      -0.412254
      -0.158297
      0.808728
      ...
      0.496184
      0.760108
      -0.243706
      0.898787
      0.510219
      0.698865
      0.130179
      0.809988
      0.614069
      0.322064
    
    
      sub-patient04
      -0.631583
      0.763923
      1.004552
      -1.187244
      1.004082
      0.850643
      1.026635
      0.864310
      -0.993388
      0.917083
      ...
      0.705017
      -0.305956
      -1.179329
      0.861236
      0.902033
      -0.203721
      -0.608125
      0.878438
      0.987629
      -0.056633
    
    
      sub-patient05
      -0.660727
      1.394849
      0.714170
      0.121850
      1.047222
      1.095686
      1.134615
      0.688741
      0.836467
      1.494141
      ...
      1.012394
      -0.010918
      0.832995
      1.500747
      1.129411
      0.080089
      1.106190
      1.405435
      1.267722
      0.356892
    
    
      sub-patient06
      -0.609619
      1.270318
      0.370503
      0.349612
      0.561016
      1.379259
      1.328593
      1.626617
      0.592886
      1.103613
      ...
      0.986254
      0.781821
      0.893214
      0.995257
      0.886581
      1.022429
      0.852077
      0.943261
      0.768694
      1.029692
    
    
      sub-patient07
      1.074556
      0.771186
      1.157279
      0.498919
      1.070641
      1.650544
      1.328101
      1.121272
      1.177436
      1.247389
      ...
      0.908398
      1.208932
      0.995042
      1.287511
      0.707224
      1.454749
      0.953866
      1.547930
      0.695325
      1.306399
    
    
      sub-patient08
      -0.957262
      1.011637
      1.416767
      -0.370096
      0.718718
      0.693619
      0.993653
      -0.058205
      1.504118
      0.935381
      ...
      1.441877
      0.764191
      1.601392
      0.985863
      1.543891
      0.826375
      1.737968
      0.864008
      1.514886
      0.900456
    
  

14 rows × 24 columns

Calculate the principal component rotated basis



In [6]:

    
from sklearn.decomposition import PCA
pca = PCA(whiten=True)
pca.fit(masked_region_features_pbr.as_matrix())

rotated_subjects_pbr = pca.fit_transform(masked_region_features_pbr.as_matrix())

# Note that we flip the axis of the first feature, assigned randomly by the algorithm, so increases from
# controls to patients.
pca_results = pd.DataFrame(dict(first_feature=-rotated_subjects_pbr[:,0],
                         second_feature=rotated_subjects_pbr[:,1],
                         subject_id = masked_region_features_pbr.index.values))

pca_results.sort_values(by='first_feature', inplace=True)
pca_results.reset_index(drop=True, inplace=True)

pca_results = pca_results.merge(participants_df[['subject_id', 'group']], on='subject_id')
pca_results









    Out[6]:






  
    
      
      first_feature
      second_feature
      subject_id
      group
    
  
  
    
      0
      -1.761875
      -1.815264
      sub-control06
      control
    
    
      1
      -1.282251
      0.437398
      sub-control04
      control
    
    
      2
      -1.041683
      0.080606
      sub-control01
      control
    
    
      3
      -0.750422
      -0.688063
      sub-control05
      control
    
    
      4
      -0.528717
      1.838545
      sub-control02
      control
    
    
      5
      -0.514645
      1.373600
      sub-control03
      control
    
    
      6
      -0.117564
      1.128435
      sub-patient01
      pre-HD
    
    
      7
      -0.113103
      -0.093377
      sub-patient02
      HD
    
    
      8
      0.501968
      -1.643643
      sub-patient04
      HD
    
    
      9
      0.512946
      -0.301880
      sub-patient03
      HD
    
    
      10
      1.188873
      -0.215550
      sub-patient06
      HD
    
    
      11
      1.197301
      -0.603299
      sub-patient05
      HD
    
    
      12
      1.338185
      -0.027222
      sub-patient08
      HD
    
    
      13
      1.370987
      0.529713
      sub-patient07
      HD



In [7]:

    
# Make a list with all the name features as region_feature to label the plots. E.g. thalamus_min
pca_features = [(item[1] + ' ' + item[0]).title() 
                for item in itertools.product(list(masked_region_features_pbr.columns.levels[1]),                         
                                              list(masked_region_features_pbr.columns.levels[2]))]

# Get the coefficient of each feature for the first 3 principal axis
transformed_features = sorted(zip(pca_features, *pca.components_[0:4]), key=lambda t: t[0])

Let's order the data according to the score in the plot above.



In [8]:

    
masked_region_df_pbr['subject_id'] = pd.Categorical(masked_region_df_pbr['subject_id'], pca_results.subject_id)

Just a couple of features classify inflammation in patients vs controls. Even only one feature, classifies the subjects in at least two groups



In [9]:

    
# Some values overlap in the final plot, so we add some ad-hoc jittering
jitters = np.zeros(14)
jitters[0] = -.01
jitters[1] = .02
jitters[2] = -.02
jitters[3] = .01
jitters[4] = -.02
jitters[5] = .02
jitters[6] = -.03
jitters[7] = .03
jitters[8] = -.01
jitters[9] = .01
jitters[10] = -.03
jitters[11] = .03
jitters[12] = -.01
jitters[13] = .02

joined = pca_results.join(pd.DataFrame(dict(jitters=jitters)))
groups = joined.groupby('group')

sns.set_style("ticks")

fig = plt.figure(figsize=(14, 6))

outer_grid = gridspec.GridSpec(1, 2, wspace=0.1, width_ratios=[4,4])

right_plot = gridspec.GridSpecFromSubplotSpec(2, 1,
            subplot_spec=outer_grid[0], hspace=0, height_ratios=[1,3])

marker_size = 350
# plot the first and second components in a scatter plot
ax = plt.Subplot(fig, right_plot[1])  
for name, items in groups:
    ax.scatter(items.first_feature, items.second_feature, s=marker_size, alpha=0.4, 
               c=subject_colors[name], label=name)

for idx in range(len(pca_results.index)):
    ax.text(pca_results.first_feature[idx], pca_results.second_feature[idx], str(idx+1), 
            horizontalalignment='center', verticalalignment='center')
ax.set_xlabel('first PCA axis')
ax.set_ylabel('second PCA axis')
ax.legend(labelspacing=1.4)
fig.add_subplot(ax)

sns.set_style("whitegrid")

# plot the first component in a line on top
ax_top = plt.Subplot(fig, right_plot[0])
for name, items in groups:
    ax_top.scatter(items.first_feature, items.jitters, s=marker_size, alpha=0.4, 
               color=subject_colors[name], label=name)

ax_top.set_xticks([])
ax_top.set_yticks([0])
ax_top.set_yticklabels([])
ax_top.set_ylabel('')
ax_top.xaxis.set_label_coords(0.5, 0.88)
ax_top.set_xlabel('first PCA axis')
ax_top.set(xlim=ax.get_xlim())
for sp in ax_top.spines.values(): sp.set_visible(False)
fig.add_subplot(ax_top)

# plot the eigenvectors in the original feature space
number_of_components = 3
number_of_features = len(transformed_features)

left_plot = gridspec.GridSpecFromSubplotSpec(1, number_of_components,
            subplot_spec=outer_grid[1], wspace=0.18)

for i in range(number_of_components):
    ax = plt.Subplot(fig, left_plot[i])  
    ax.plot([item[i+1] if i != 0 else -item[i+1] for item in transformed_features], list(range(number_of_features)))
    ax.set_title(round(pca.explained_variance_ratio_[i],2))
    ax.set(xlim=(-.4, .4))
    ax.set(ylim=(0, number_of_features-1))
    ax.set_yticks(list(range(number_of_features)))
    ax.set_yticklabels([item[0] for item in transformed_features] if i==2 else [])
    ax.yaxis.tick_right()
    ax.set_xticks([-0.4, -.2, 0, .2, 0.4])
    ax.set_xticklabels([-0.4, -0.2, 0, 0.2, 0.4])
    for i in range(0,4):
        ax.axhline(y=5.5+6*i, ls='dashed', c='black', alpha=0.4)
    
    fig.add_subplot(ax)

fig.savefig('results/figs/pca_analysis.pdf', format='pdf')

Region of interest analysis



In [10]:

    
merged = masked_region_df_pbr.merge(pca_results[['group', 'subject_id', 'first_feature']], on='subject_id')
merged.sort_values(by='first_feature', inplace=True)
merged.reset_index(drop=True, inplace=True)

regions = ['pallidum', 'putamen', 'caudate', 'thalamus']
region_data = list(map(lambda r: merged[merged['region'] == r], regions))

def plot_roi_histograms(data, ax, ax_hist):
    sns.violinplot(data=data, x="subject_id", y="value", bw=.2, 
                   scale='count', cut=1, linewidth=1, ax=ax)
    groups = data.groupby('group')
    for name, group in groups:
        sns.kdeplot(group['value'], vertical=True, ax=ax_hist,
                    label=name, color=subject_colors[name],
                    ls=('--' if name=='pre-HD' else '-'))

fig = plt.figure(figsize=(14, 6))

# gridspec inside gridspec
outer_grid = gridspec.GridSpec(2, 2, wspace=0.1, hspace=0.1)

for i in range(4):
    inner_grid = gridspec.GridSpecFromSubplotSpec(1, 2,
            subplot_spec=outer_grid[i], wspace=0.0, hspace=0.0, width_ratios=[9,2],)
    ax = plt.Subplot(fig, inner_grid[0])  

    ax_hist = plt.Subplot(fig, inner_grid[1])
    plot_roi_histograms(region_data[i], ax, ax_hist)
    ax.set_title(regions[i])
    ax.set(ylim=(masked_region_df_pbr['value'].min(), masked_region_df_pbr['value'].max()))
    ax_hist.set(ylim=(masked_region_df_pbr['value'].min(), masked_region_df_pbr['value'].max()))

    ax.set_yticks([0.6, 0.8, 1, 1.2, 1.4, 1.6])
    # show only xticklabels only for the lower plots and show the patient number instead of subject_id
    if i in [0,1]:
        ax.set_xticks([])
    else:
        ax.set_xticklabels(list(range(1, len(merged)+1)))
    ax_hist.set_xticks([])
    ax_hist.set_yticks([])
    ax.set_xlabel('')
    ax.set_ylabel('')
    fig.add_subplot(ax)
    fig.add_subplot(ax_hist)

all_axes = fig.get_axes()

#show only the outside spines
for ax in all_axes:
    for sp in ax.spines.values():
        sp.set_visible(False)
        
plt.savefig('results/figs/regions_of_interest_pbr.pdf', format='pdf')

We can assess the difference between the distributions above using a permutation test. We compare the distributions for the control and HD for each region using the mean and the median of each subject as input.



In [11]:

    
def bipartition_means_difference(seq, size_l, size_r):
    '''
    Calculate the difference between the averages of a sequence bipartition
    
    Parameters
    ==========
    seq: numpy array like object
    size_l: int size of the left part
    size_r: int size of the right part
    '''
    if len(seq) != size_l + size_r:
        raise Exception('Not a bipartition')
        
    np.random.shuffle(seq)
    left = seq[:size_l]
    right = seq[-size_r:]
    return (left.mean() - right.mean())

def permutatation_test(z, y, num_samples):
    '''
    Calculate the p-value for a permutation test with num_samples
    
    Parameters
    ==========
    z: numpy array like object with one group of observations
    y: numpy array like object with another group of observations
    num_samples: int with the number of samples 
    '''
    pooled = np.hstack([z,y])
    delta = z.mean() - y.mean()
    
    estimates = map(lambda x: bipartition_means_difference(pooled, z.size, y.size), 
                    range(num_samples))
    count = len(list(filter(lambda x: x > delta, estimates)))
    
    return 1.0 - float(count)/float(num_samples)

num_samples = 100000

for stat in ['median', 'mean']:
        
    print('Permutations test p-values using the {0} as input'.format(stat))

    for region in regions:
        controls = np.array(masked_region_features_pbr['value'][stat][region][:3])
        hd = np.array(masked_region_features_pbr['value'][stat][region][4:])

        print (region, permutatation_test(controls, hd, num_samples))









    



Permutations test p-values using the median as input
pallidum 0.059840000000000004
putamen 0.007380000000000053
caudate 0.44693000000000005
thalamus 0.34918000000000005
Permutations test p-values using the mean as input
pallidum 0.05657000000000001
putamen 0.007469999999999977
caudate 0.43142
thalamus 0.41998

Correlation between caudate volumes and inflammation

The volume of each region as it has a lot of variability, even among subjects with similar disease progression. We normalize the volumes and show the correlations between the value of the classifier for inflammation



In [12]:

    
region_volumes = masked_region_df_pbr.groupby(['subject_id', 'region']).agg('count').unstack()
# Pretty ugly fix: no time to do something smarter
region_volumes.columns = [' '.join(col).strip().split(' ')[1] for col in region_volumes.columns.values]

intracraneal_volume_df = participants_df[['subject_id', 'intracraneal_volume']].set_index('subject_id')

merged = region_volumes.join(intracraneal_volume_df)
merged = merged.div(merged.intracraneal_volume, axis='index')

normalized_region_volumes = hd_classifier.normalize(merged)
normalized_region_volumes.drop('intracraneal_volume', axis=1, inplace=True)

to_fit = normalized_region_volumes.join(pca_results[['first_feature', 'subject_id', 'group']].set_index('subject_id'))
to_fit









    Out[12]:






  
    
      
      caudate
      pallidum
      putamen
      thalamus
      first_feature
      group
    
    
      subject_id
      
      
      
      
      
      
    
  
  
    
      sub-control06
      1.133108
      0.810849
      0.767162
      2.168097
      -1.761875
      control
    
    
      sub-control04
      1.013271
      1.031377
      1.317388
      1.890655
      -1.282251
      control
    
    
      sub-control01
      0.470059
      0.621381
      0.045859
      -0.590825
      -1.041683
      control
    
    
      sub-control05
      0.494861
      0.885540
      0.179945
      1.253160
      -0.750422
      control
    
    
      sub-control02
      1.711536
      1.060078
      1.403697
      -0.123007
      -0.528717
      control
    
    
      sub-control03
      0.760147
      1.232750
      0.853340
      -0.210560
      -0.514645
      control
    
    
      sub-patient01
      0.040897
      0.595554
      1.183144
      -0.158557
      -0.117564
      pre-HD
    
    
      sub-patient02
      0.235659
      -0.193724
      0.239201
      -0.571727
      -0.113103
      HD
    
    
      sub-patient04
      -1.026189
      -1.544380
      -1.272116
      -0.325858
      0.501968
      HD
    
    
      sub-patient03
      -1.222662
      -1.265373
      -0.827031
      -0.561234
      0.512946
      HD
    
    
      sub-patient06
      -1.070568
      -1.064942
      -0.460963
      -0.670042
      1.188873
      HD
    
    
      sub-patient05
      -0.148916
      -0.958321
      -0.963189
      -0.721838
      1.197301
      HD
    
    
      sub-patient08
      -1.400453
      -0.993386
      -1.203759
      -0.851669
      1.338185
      HD
    
    
      sub-patient07
      -0.990752
      -0.217404
      -1.262676
      -0.526594
      1.370987
      HD



In [13]:

    
from statsmodels.stats.outliers_influence import summary_table
import statsmodels.api as sm

sns.set_style("ticks")

def fit_region_volume_vs_score(to_fit, region):

    x = to_fit.first_feature
    y = to_fit[region].values
    X = sm.add_constant(x)

    return (x, y, sm.OLS(y, X).fit())

x, y, re = fit_region_volume_vs_score(to_fit, 'caudate')
print(re.summary())

st, data, ss2 = summary_table(re, alpha=0.05)

fittedvalues = data[:,2]
predict_mean_se  = data[:,3]
predict_mean_ci_low, predict_mean_ci_upp = data[:,4:6].T
predict_ci_low, predict_ci_upp = data[:,6:8].T

plt.plot(x, fittedvalues, 'b-', lw=1)
plt.plot(x, predict_ci_low, 'r--', lw=1.5)
plt.plot(x, predict_ci_upp, 'r--', lw=1.5)
plt.plot(x, predict_mean_ci_low, 'b--', lw=1)
plt.plot(x, predict_mean_ci_upp, 'b--', lw=1)

groups = to_fit.groupby('group')

for name, items in groups:
    plt.scatter(items.first_feature, items.caudate, s=marker_size, alpha=0.4, 
               c=subject_colors[name], label=name)

for idx in range(len(to_fit)):
    plt.text(x[idx], y[idx], str(idx+1), 
            horizontalalignment='center', verticalalignment='center')

plt.xlabel('first PCA axis')
plt.ylabel('caudate volume')
plt.legend(labelspacing=1.4)
plt.savefig('results/figs/region_volumes_vs_inflammation_scores.pdf', format='pdf')









    



/opt/conda/lib/python3.6/site-packages/scipy/stats/stats.py:1334: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=14
  "anyway, n=%i" % int(n))






    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.703
Model:                            OLS   Adj. R-squared:                  0.678
Method:                 Least Squares   F-statistic:                     28.38
Date:                Thu, 15 Feb 2018   Prob (F-statistic):           0.000180
Time:                        22:05:25   Log-Likelihood:                -10.852
No. Observations:                  14   AIC:                             25.70
Df Residuals:                      12   BIC:                             26.98
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const         -5.551e-17      0.152  -3.66e-16      1.000      -0.330       0.330
first_feature    -0.8079      0.152     -5.327      0.000      -1.138      -0.477
==============================================================================
Omnibus:                        4.503   Durbin-Watson:                   1.746
Prob(Omnibus):                  0.105   Jarque-Bera (JB):                2.090
Skew:                           0.904   Prob(JB):                        0.352
Kurtosis:                       3.562   Cond. No.                         1.00
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Calculate the results of the regression for the other regions



In [14]:

    
for region in ['pallidum', 'putamen', 'thalamus']:
    _, _, re = fit_region_volume_vs_score(to_fit, region)
    print('Correlation between ' + region + ' volume and first PC')
    print(re.summary())
    print('+' * 91)









    



Correlation between pallidum volume and first PC
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.637
Model:                            OLS   Adj. R-squared:                  0.607
Method:                 Least Squares   F-statistic:                     21.10
Date:                Thu, 15 Feb 2018   Prob (F-statistic):           0.000618
Time:                        22:05:25   Log-Likelihood:                -12.244
No. Observations:                  14   AIC:                             28.49
Df Residuals:                      12   BIC:                             29.77
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          -2.22e-16      0.167  -1.33e-15      1.000      -0.365       0.365
first_feature    -0.7694      0.167     -4.593      0.001      -1.134      -0.404
==============================================================================
Omnibus:                        0.361   Durbin-Watson:                   0.753
Prob(Omnibus):                  0.835   Jarque-Bera (JB):                0.454
Skew:                          -0.291   Prob(JB):                        0.797
Kurtosis:                       2.337   Cond. No.                         1.00
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Correlation between putamen volume and first PC
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.626
Model:                            OLS   Adj. R-squared:                  0.595
Method:                 Least Squares   F-statistic:                     20.12
Date:                Thu, 15 Feb 2018   Prob (F-statistic):           0.000745
Time:                        22:05:25   Log-Likelihood:                -12.455
No. Observations:                  14   AIC:                             28.91
Df Residuals:                      12   BIC:                             30.19
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          -1.11e-15      0.170  -6.53e-15      1.000      -0.370       0.370
first_feature    -0.7627      0.170     -4.485      0.001      -1.133      -0.392
==============================================================================
Omnibus:                        0.764   Durbin-Watson:                   1.647
Prob(Omnibus):                  0.682   Jarque-Bera (JB):                0.732
Skew:                           0.372   Prob(JB):                        0.693
Kurtosis:                       2.162   Cond. No.                         1.00
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Correlation between thalamus volume and first PC
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.581
Model:                            OLS   Adj. R-squared:                  0.546
Method:                 Least Squares   F-statistic:                     16.67
Date:                Thu, 15 Feb 2018   Prob (F-statistic):            0.00152
Time:                        22:05:25   Log-Likelihood:                -13.251
No. Observations:                  14   AIC:                             30.50
Df Residuals:                      12   BIC:                             31.78
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const         -4.996e-16      0.180  -2.78e-15      1.000      -0.392       0.392
first_feature    -0.7347      0.180     -4.082      0.002      -1.127      -0.343
==============================================================================
Omnibus:                        0.477   Durbin-Watson:                   2.229
Prob(Omnibus):                  0.788   Jarque-Bera (JB):                0.369
Skew:                          -0.337   Prob(JB):                        0.831
Kurtosis:                       2.576   Cond. No.                         1.00
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++






    



/opt/conda/lib/python3.6/site-packages/scipy/stats/stats.py:1334: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=14
  "anyway, n=%i" % int(n))

Thalamic nuclei analysis

Get the images in MNI space, instead of subject space, so we can use the atlas, and then apply all the masks in the atlas to each of the subject's MNI images. The result is a dictionary masked_imgs with the masked images for each nuclei and patient.

As the license of the Morel atlas does not allow redistribution, you need to get a licensed copy of the Morel atlas before using the code below to generate the plots yourself.



In [15]:

    
from functools import reduce
from nilearn.image import resample_to_img, new_img_like
from nilearn.masking import apply_mask

def merge_nuclei_masks(nuclei, nuclei_masks, imgs):

    masks_to_join = []
    for k in nuclei:
        resampled = resample_to_img(nuclei_masks[k], imgs[0])        
        masks_to_join.append(resampled.get_data() > 0.5)

    joint_mask_data_as_bool = reduce(np.logical_or, masks_to_join) 
    joint_mask_as_bool = new_img_like(imgs[0], joint_mask_data_as_bool)
    masked_nuclei = apply_mask(imgs, joint_mask_as_bool)
    return masked_nuclei

def find_threshold(imgs, q):
    from math import floor
    parts = []
    for img in imgs:
        threshold_index = floor(q * len(img))
        parts.append(np.partition(img, threshold_index)[threshold_index:])
    merged = np.concatenate(parts)
    merged_threshold = floor (1- len(merged) / len(parts))
    return np.partition(merged, merged_threshold)[merged_threshold]

def make_masked_nuclei_imgs(imgs, path_to_morel_atlas, excludes):
    
    if not os.path.isdir(path_to_morel_atlas):
        raise FileNotFoundError
    
    left_volumes = glob.glob(os.path.join(path_to_morel_atlas, 'left-vols-1mm/*.nii.gz'))
    right_volumes = glob.glob(os.path.join(path_to_morel_atlas,'right-vols-1mm/*.nii.gz'))

    def parse_nuclei_name(vol):
        return os.path.dirname(vol).split('/')[-1].split('-')[0] + '_' + os.path.basename(vol).split('.')[0]

    nuclei_mask_dict = { parse_nuclei_name(vol) : vol 
                         for vol in left_volumes + right_volumes 
                         if not ''.join(parse_nuclei_name(vol).split('_')[1:]).startswith(tuple(excludes)) }

    nuclei_masks = { k: nib.load(v) for k, v in nuclei_mask_dict.items() }
    masked_img = {}
    nuclei_sizes = {}

    for k, v in nuclei_masks.items():
        resampled = resample_to_img(v, imgs[0])
        resampled_data_as_bool = resampled.get_data() > 0.5
        nuclei_sizes[k] = np.sum(resampled_data_as_bool)
        resampled_as_bool = new_img_like(resampled, resampled_data_as_bool)
        try: 
            masked_img[k] = apply_mask(imgs, resampled_as_bool)
        except:
            print('Something is wrong for nucleus {}, but I can keep going for the rest.'.format(k))
            continue
            
    return masked_img, nuclei_masks, nuclei_sizes

path_to_tracer_data = './data/func'
items = os.listdir(path_to_tracer_data)

subjects = hd_classifier.make_subjects(items, path_to_tracer_data)
mni_image_filter = 'nl_MNI152_norm_sm6mm.nii'

subjects.sort(key=lambda s: pca_results[pca_results['subject_id']==s.subject_id].index.tolist()[0])

assert ([s.subject_id for s in subjects] == list(pca_results['subject_id']))

mni_images = map(lambda s: hd_classifier.find_masks(s.images, mni_image_filter), subjects)
imgs = [nib.load(image) for image in list(itertools.chain.from_iterable(mni_images))]

path_to_morel_atlas = './data/private/Atlas/Morel'

try:
    excluded_nuclei = [] # = ['global', 'MAX']
    masked_img, nuclei_masks, nuclei_sizes = make_masked_nuclei_imgs(imgs, path_to_morel_atlas, excluded_nuclei)
except FileNotFoundError:
    print("You need a copy of the Morel Atlas")









    



Something is wrong for nucleus left_AD, but I can keep going for the rest.
Something is wrong for nucleus left_Pv, but I can keep going for the rest.
Something is wrong for nucleus right_AD, but I can keep going for the rest.

Plot the histogram of uptake for each relevant nuclei and subject. For the remaining nuclei in the Morel Atlas differences between controls and patients are not as marked as for those selected. The vertical lines are a guide ot the eye and mark the value of SUVR corresponding to the 95 percentile of all voxels in the control group.



In [16]:

    
relevant_nuclei_group_names = ['left_VLpv', 'left_VApc', 'left_PuL']
relevant_nuclei_groups = { k: [it for it in list(nuclei_masks.keys()) if k in it] for k in relevant_nuclei_group_names }
masked_relevant_groups = {k: merge_nuclei_masks(v, nuclei_masks, imgs) for k, v in relevant_nuclei_groups.items()}

fig = plt.figure(figsize=(10, 7))
num_subjects = len(subjects)

# gridspec two split all patients and averages
outer_grid = gridspec.GridSpec(1, len(masked_relevant_groups), wspace=0.09, hspace=0)

min_x, max_x = 0.4, 2
pallete = sns.color_palette("hls", num_subjects)
grey_shadow = '#857e7e'

for j, (k, v) in enumerate(masked_relevant_groups.items()):   

    column_grid = gridspec.GridSpecFromSubplotSpec(num_subjects, 1,
                subplot_spec=outer_grid[j], wspace=0, hspace=0.0)

    for i in range(0, num_subjects):
        
        threshold = find_threshold(masked_img[k][:6], 0.90)
        
        ax = plt.Subplot(fig, column_grid[i])  
        ax.set(xlim=(min_x, max_x))
        if i == 0: ax.set_title(k)
        if i == num_subjects - 1:
            ax.set_xticks([min_x, threshold, max_x])
            ax.set_xticklabels([min_x, round(threshold, 2), max_x])
        else:
            ax.set_xticks([])
        ax.set_yticks([])
        ax.set_yticklabels([])
        ax.set_xlabel('')
        if j == 0: 
            ax.set_ylabel(str(i+1), rotation='horizontal')
            
        ax.axvline(x=threshold, color=grey_shadow, ls=':')
        sns.kdeplot(v[i], shade=True, color=pallete[i], # use the same colors as before to identify subjects
                    ax=ax)
        #if 'left_PuL' in k:
        #    sns.kdeplot(masked_img['right_PuL'][i], shade=True, color=grey_shadow, ax=ax)
        sns.kdeplot(masked_img['left_global'][i], ls='--', ax=ax, color=grey_shadow)        
        
        fig.add_subplot(ax)      

plt.savefig('results/figs/nuclei.pdf', format='pdf')

	subject_id	group	neurological_score	intracraneal_volume
0	sub-control01	control	None	1.720386e+06
1	sub-control02	control	None	1.294547e+06
2	sub-control03	control	None	1.719868e+06
3	sub-control04	control	None	1.593478e+06
4	sub-control05	control	None	1.502192e+06
5	sub-control06	control	None	1.498275e+06
6	sub-patient01	pre-HD	12	1.691144e+06
7	sub-patient02	HD	11	1.620931e+06
8	sub-patient03	HD	11	1.623385e+06
9	sub-patient04	HD	6	1.311834e+06
10	sub-patient05	HD	6	1.609141e+06
11	sub-patient06	HD	8	1.407048e+06
12	sub-patient07	HD	9	1.709589e+06
13	sub-patient08	HD	11	1.356421e+06

	value
	min				max				mean		...	q_1		median				q_3
region	caudate	pallidum	putamen	thalamus	caudate	pallidum	putamen	thalamus	caudate	pallidum	...	putamen	thalamus	caudate	pallidum	putamen	thalamus	caudate	pallidum	putamen	thalamus
subject_id
sub-control01	-0.357949	-0.986629	-0.479293	0.157071	-0.378503	-0.939962	-0.736445	-1.532574	-1.150603	-0.761989	...	-1.180815	-0.090717	-0.914007	-0.784279	-1.294768	-0.466123	-1.102167	-0.778780	-1.172044	-0.759360
sub-control02	2.350970	-0.919913	0.236221	1.654546	0.101800	-0.405937	-0.620280	-0.493392	0.430512	-0.759552	...	-1.499511	0.485232	0.359119	-0.739018	-1.459175	0.148270	-0.357245	-0.737293	-1.338486	0.062132
sub-control03	1.354500	-0.874280	-1.572039	1.180632	-0.565642	-0.791115	-1.048844	-0.487197	0.489527	-0.734835	...	-0.603145	0.272596	0.305083	-0.722195	-0.703113	0.235514	0.032952	-0.744411	-0.865679	0.341701
sub-control04	0.054450	-1.087225	-0.097938	-1.097358	-1.368482	-1.673687	-1.693252	-2.011960	0.143684	-1.160719	...	-0.946822	-0.819745	0.340724	-1.115710	-1.087211	-1.351511	-0.024543	-1.178474	-1.256888	-1.886643
sub-control05	-1.019404	-0.357744	-0.204904	-1.233020	0.044988	-0.649875	-0.792171	0.086470	-0.759269	-0.703325	...	-0.489154	-0.847543	-0.927993	-0.737042	-0.650332	-0.767099	-0.389207	-0.722665	-0.678387	-0.577034
sub-control06	-0.750866	-1.328470	-1.709454	-1.707702	-2.440500	-0.773118	-0.312556	0.873162	-2.181602	-1.147991	...	-1.293506	-2.735069	-2.068964	-1.111939	-0.780123	-2.311989	-2.287850	-1.132210	-0.640211	-1.923121
sub-patient01	0.681762	-0.290246	-0.430546	0.964200	-0.075971	-0.586998	-0.694732	-0.033583	0.334657	-0.616984	...	-0.233491	0.680017	0.309945	-0.677452	-0.312480	0.743108	-0.056386	-0.587935	-0.336465	0.946529
sub-patient02	-0.160849	-0.488603	0.797064	0.576606	-0.392622	-0.166097	-0.091567	-0.231409	-0.266129	-0.620940	...	0.696320	-0.142947	-0.303515	-0.641768	0.607843	-0.108955	0.012291	-0.567292	0.439834	-0.063074
sub-patient03	-0.367979	1.121198	-1.202382	0.091982	0.673253	0.317037	0.178251	-0.412254	-0.158297	0.808728	...	0.496184	0.760108	-0.243706	0.898787	0.510219	0.698865	0.130179	0.809988	0.614069	0.322064
sub-patient04	-0.631583	0.763923	1.004552	-1.187244	1.004082	0.850643	1.026635	0.864310	-0.993388	0.917083	...	0.705017	-0.305956	-1.179329	0.861236	0.902033	-0.203721	-0.608125	0.878438	0.987629	-0.056633
sub-patient05	-0.660727	1.394849	0.714170	0.121850	1.047222	1.095686	1.134615	0.688741	0.836467	1.494141	...	1.012394	-0.010918	0.832995	1.500747	1.129411	0.080089	1.106190	1.405435	1.267722	0.356892
sub-patient06	-0.609619	1.270318	0.370503	0.349612	0.561016	1.379259	1.328593	1.626617	0.592886	1.103613	...	0.986254	0.781821	0.893214	0.995257	0.886581	1.022429	0.852077	0.943261	0.768694	1.029692
sub-patient07	1.074556	0.771186	1.157279	0.498919	1.070641	1.650544	1.328101	1.121272	1.177436	1.247389	...	0.908398	1.208932	0.995042	1.287511	0.707224	1.454749	0.953866	1.547930	0.695325	1.306399
sub-patient08	-0.957262	1.011637	1.416767	-0.370096	0.718718	0.693619	0.993653	-0.058205	1.504118	0.935381	...	1.441877	0.764191	1.601392	0.985863	1.543891	0.826375	1.737968	0.864008	1.514886	0.900456

	first_feature	second_feature	subject_id	group
0	-1.761875	-1.815264	sub-control06	control
1	-1.282251	0.437398	sub-control04	control
2	-1.041683	0.080606	sub-control01	control
3	-0.750422	-0.688063	sub-control05	control
4	-0.528717	1.838545	sub-control02	control
5	-0.514645	1.373600	sub-control03	control
6	-0.117564	1.128435	sub-patient01	pre-HD
7	-0.113103	-0.093377	sub-patient02	HD
8	0.501968	-1.643643	sub-patient04	HD
9	0.512946	-0.301880	sub-patient03	HD
10	1.188873	-0.215550	sub-patient06	HD
11	1.197301	-0.603299	sub-patient05	HD
12	1.338185	-0.027222	sub-patient08	HD
13	1.370987	0.529713	sub-patient07	HD

	caudate	pallidum	putamen	thalamus	first_feature	group
subject_id
sub-control06	1.133108	0.810849	0.767162	2.168097	-1.761875	control
sub-control04	1.013271	1.031377	1.317388	1.890655	-1.282251	control
sub-control01	0.470059	0.621381	0.045859	-0.590825	-1.041683	control
sub-control05	0.494861	0.885540	0.179945	1.253160	-0.750422	control
sub-control02	1.711536	1.060078	1.403697	-0.123007	-0.528717	control
sub-control03	0.760147	1.232750	0.853340	-0.210560	-0.514645	control
sub-patient01	0.040897	0.595554	1.183144	-0.158557	-0.117564	pre-HD
sub-patient02	0.235659	-0.193724	0.239201	-0.571727	-0.113103	HD
sub-patient04	-1.026189	-1.544380	-1.272116	-0.325858	0.501968	HD
sub-patient03	-1.222662	-1.265373	-0.827031	-0.561234	0.512946	HD
sub-patient06	-1.070568	-1.064942	-0.460963	-0.670042	1.188873	HD
sub-patient05	-0.148916	-0.958321	-0.963189	-0.721838	1.197301	HD
sub-patient08	-1.400453	-0.993386	-1.203759	-0.851669	1.338185	HD
sub-patient07	-0.990752	-0.217404	-1.262676	-0.526594	1.370987	HD