In [41]:
%matplotlib inline
import matplotlib.pyplot as plt

from ping.research.data import get_all_data, map_colors, strip_prefix

data = get_all_data('desikan')
data.filter(lambda k, v: k.startswith('MRI_cort_area') or k.startswith('MRI_cort_thick') or k.startswith('DTI_fiber_vol') or 'gender' in k.lower())
data.filter(lambda k, v: k.endswith('LH_PLUS_RH') or k == 'Gender')
data.filter(lambda k, v: 'fuzzy' not in k)


Computing derived data...
PCA data matrix size: (447, 1119)
Out[41]:
<ping.ping.data.default.PINGData at 0x1136c65d0>

In [42]:
len(data.data_dict.keys())


Out[42]:
92

In [43]:
import copy
import numpy as np

mydata = copy.deepcopy(data.data_dict)
del mydata['SubjID']

y = mydata['Gender']
y = 2 * (y == 'F') - 1
del mydata['Gender']

X = np.asarray(mydata.values(), dtype=float).T
X_lbl = np.asarray(mydata.keys())

# Remove nan columns
np.any(np.isnan(X).sum(axis=0) == X.shape[0])
# Remove nan subjects
good_idx = np.isnan(X).sum(axis=1) == 0
X = X[good_idx]
y = y[good_idx]
X.shape, np.unique(y)


Out[43]:
((1119, 90), array([-1,  1]))

In [44]:
# Select data with different means
female_data = X[y==1]
male_data = X[y==-1]
all_means = X.mean(axis=0)
female_norm_means = female_data.mean(axis=0) / all_means
male_norm_means = male_data.mean(axis=0) / all_means

plt.figure(figsize=(16,6))
plt.subplot(1,3,1)
plt.hist(female_norm_means, 25)
plt.subplot(1,3,2)
plt.hist(male_norm_means, 25)
plt.subplot(1,3,3)
plt.hist([female_norm_means, male_norm_means], 25)
plt.legend(['F', 'M'])

# Now choose only the well-split features
split_feat_idx = np.abs(female_norm_means - 1) > 0.025
print(split_feat_idx.sum())

plt.figure(figsize=(16,6))
plt.subplot(1,3,1)
plt.hist(female_norm_means[split_feat_idx], 25)
plt.subplot(1,3,2)
plt.hist(male_norm_means[split_feat_idx], 25)
plt.subplot(1,3,3)
plt.hist([female_norm_means[split_feat_idx],
          male_norm_means[split_feat_idx]], 25)
plt.legend(['F', 'M'])

# select the 25 most different values.
idx = np.argsort(np.abs(1 - female_norm_means[split_feat_idx]))
split_feat_idx = np.zeros(split_feat_idx.shape, dtype=bool)
split_feat_idx[idx[-25:]] = True
print(split_feat_idx.sum())
#X = X[:, split_feat_idx]
#X_lbl = np.asarray(X_lbl)[split_feat_idx]


55
25

In [45]:
# Look at the std distributions
female_data = X[y==1]
male_data = X[y==-1]
all_stds = X.mean(axis=0)
female_norm_stds = female_data.std(axis=0) / all_stds
male_norm_stds = male_data.std(axis=0) / all_stds

plt.figure(figsize=(16,6))
plt.subplot(1,3,1)
plt.hist(female_norm_stds, 25)
plt.subplot(1,3,2)
plt.hist(male_norm_stds, 25)
plt.subplot(1,3,3)
plt.hist([female_norm_stds, male_norm_stds], 25)
plt.legend(['F', 'M'])

# Now choose only the well-split features
split_feat_idx = np.abs(female_norm_stds) > 0
print(split_feat_idx.sum())

plt.figure(figsize=(16,6))
plt.subplot(1,3,1)
plt.hist(female_norm_stds[split_feat_idx], 25)
plt.subplot(1,3,2)
plt.hist(male_norm_stds[split_feat_idx], 25)
plt.subplot(1,3,3)
plt.hist([female_norm_stds[split_feat_idx],
          male_norm_stds[split_feat_idx]], 25)
plt.legend(['F', 'M'])


90
Out[45]:
<matplotlib.legend.Legend at 0x1086cce50>

In [46]:
from sklearn.linear_model import LogisticRegression

svc = LogisticRegression()#(kernel="rbf")
svc.fit(X, y)

from sklearn.cross_validation import StratifiedKFold
from sklearn.feature_selection import RFECV

# Create the RFE object and compute a cross-validated score.

# The "accuracy" scoring is proportional to the number of correct
# classifications
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2),
              scoring='accuracy', verbose=10)
rfecv.fit(X, y)

print("Optimal number of features : %d" % rfecv.n_features_)

# Plot number of features VS. cross-validation scores
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (nb of correct classifications)")
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)


Fitting estimator with 90 features.
Fitting estimator with 89 features.
Fitting estimator with 88 features.
Fitting estimator with 87 features.
Fitting estimator with 86 features.
Fitting estimator with 85 features.
Fitting estimator with 84 features.
Fitting estimator with 83 features.
Fitting estimator with 82 features.
Fitting estimator with 81 features.
Fitting estimator with 80 features.
Fitting estimator with 79 features.
Fitting estimator with 78 features.
Fitting estimator with 77 features.
Fitting estimator with 76 features.
Fitting estimator with 75 features.
Fitting estimator with 74 features.
Fitting estimator with 73 features.
Fitting estimator with 72 features.
Fitting estimator with 71 features.
Fitting estimator with 70 features.
Fitting estimator with 69 features.
Fitting estimator with 68 features.
Fitting estimator with 67 features.
Fitting estimator with 66 features.
Fitting estimator with 65 features.
Fitting estimator with 64 features.
Fitting estimator with 63 features.
Fitting estimator with 62 features.
Fitting estimator with 61 features.
Fitting estimator with 60 features.
Fitting estimator with 59 features.
Fitting estimator with 58 features.
Fitting estimator with 57 features.
Fitting estimator with 56 features.
Fitting estimator with 55 features.
Fitting estimator with 54 features.
Fitting estimator with 53 features.
Fitting estimator with 52 features.
Fitting estimator with 51 features.
Fitting estimator with 50 features.
Fitting estimator with 49 features.
Fitting estimator with 48 features.
Fitting estimator with 47 features.
Fitting estimator with 46 features.
Fitting estimator with 45 features.
Fitting estimator with 44 features.
Fitting estimator with 43 features.
Fitting estimator with 42 features.
Fitting estimator with 41 features.
Fitting estimator with 40 features.
Fitting estimator with 39 features.
Fitting estimator with 38 features.
Fitting estimator with 37 features.
Fitting estimator with 36 features.
Fitting estimator with 35 features.
Fitting estimator with 34 features.
Fitting estimator with 33 features.
Fitting estimator with 32 features.
Fitting estimator with 31 features.
Fitting estimator with 30 features.
Fitting estimator with 29 features.
Fitting estimator with 28 features.
Fitting estimator with 27 features.
Fitting estimator with 26 features.
Fitting estimator with 25 features.
Fitting estimator with 24 features.
Fitting estimator with 23 features.
Fitting estimator with 22 features.
Fitting estimator with 21 features.
Fitting estimator with 20 features.
Fitting estimator with 19 features.
Fitting estimator with 18 features.
Fitting estimator with 17 features.
Fitting estimator with 16 features.
Fitting estimator with 15 features.
Fitting estimator with 14 features.
Fitting estimator with 13 features.
Fitting estimator with 12 features.
Fitting estimator with 11 features.
Fitting estimator with 10 features.
Fitting estimator with 9 features.
Fitting estimator with 8 features.
Fitting estimator with 7 features.
Fitting estimator with 6 features.
Fitting estimator with 5 features.
Fitting estimator with 4 features.
Fitting estimator with 3 features.
Fitting estimator with 2 features.
Finished fold with 1 / 90 feature ranks, score=0.523214
Finished fold with 2 / 90 feature ranks, score=0.519643
Finished fold with 3 / 90 feature ranks, score=0.560714
Finished fold with 4 / 90 feature ranks, score=0.555357
Finished fold with 5 / 90 feature ranks, score=0.596429
Finished fold with 6 / 90 feature ranks, score=0.592857
Finished fold with 7 / 90 feature ranks, score=0.617857
Finished fold with 8 / 90 feature ranks, score=0.617857
Finished fold with 9 / 90 feature ranks, score=0.639286
Finished fold with 10 / 90 feature ranks, score=0.635714
Finished fold with 11 / 90 feature ranks, score=0.617857
Finished fold with 12 / 90 feature ranks, score=0.612500
Finished fold with 13 / 90 feature ranks, score=0.623214
Finished fold with 14 / 90 feature ranks, score=0.626786
Finished fold with 15 / 90 feature ranks, score=0.626786
Finished fold with 16 / 90 feature ranks, score=0.625000
Finished fold with 17 / 90 feature ranks, score=0.632143
Finished fold with 18 / 90 feature ranks, score=0.639286
Finished fold with 19 / 90 feature ranks, score=0.632143
Finished fold with 20 / 90 feature ranks, score=0.616071
Finished fold with 21 / 90 feature ranks, score=0.617857
Finished fold with 22 / 90 feature ranks, score=0.625000
Finished fold with 23 / 90 feature ranks, score=0.625000
Finished fold with 24 / 90 feature ranks, score=0.621429
Finished fold with 25 / 90 feature ranks, score=0.619643
Finished fold with 26 / 90 feature ranks, score=0.625000
Finished fold with 27 / 90 feature ranks, score=0.632143
Finished fold with 28 / 90 feature ranks, score=0.632143
Finished fold with 29 / 90 feature ranks, score=0.628571
Finished fold with 30 / 90 feature ranks, score=0.626786
Finished fold with 31 / 90 feature ranks, score=0.628571
Finished fold with 32 / 90 feature ranks, score=0.632143
Finished fold with 33 / 90 feature ranks, score=0.632143
Finished fold with 34 / 90 feature ranks, score=0.632143
Finished fold with 35 / 90 feature ranks, score=0.630357
Finished fold with 36 / 90 feature ranks, score=0.639286
Finished fold with 37 / 90 feature ranks, score=0.735714
Finished fold with 38 / 90 feature ranks, score=0.687500
Finished fold with 39 / 90 feature ranks, score=0.680357
Finished fold with 40 / 90 feature ranks, score=0.673214
Finished fold with 41 / 90 feature ranks, score=0.682143
Finished fold with 42 / 90 feature ranks, score=0.685714
Finished fold with 43 / 90 feature ranks, score=0.691071
Finished fold with 44 / 90 feature ranks, score=0.716071
Finished fold with 45 / 90 feature ranks, score=0.683929
Finished fold with 46 / 90 feature ranks, score=0.675000
Finished fold with 47 / 90 feature ranks, score=0.676786
Finished fold with 48 / 90 feature ranks, score=0.692857
Finished fold with 49 / 90 feature ranks, score=0.657143
Finished fold with 50 / 90 feature ranks, score=0.675000
Finished fold with 51 / 90 feature ranks, score=0.691071
Finished fold with 52 / 90 feature ranks, score=0.682143
Finished fold with 53 / 90 feature ranks, score=0.696429
Finished fold with 54 / 90 feature ranks, score=0.694643
Finished fold with 55 / 90 feature ranks, score=0.689286
Finished fold with 56 / 90 feature ranks, score=0.671429
Finished fold with 57 / 90 feature ranks, score=0.666071
Finished fold with 58 / 90 feature ranks, score=0.676786
Finished fold with 59 / 90 feature ranks, score=0.682143
Finished fold with 60 / 90 feature ranks, score=0.698214
Finished fold with 61 / 90 feature ranks, score=0.683929
Finished fold with 62 / 90 feature ranks, score=0.685714
Finished fold with 63 / 90 feature ranks, score=0.687500
Finished fold with 64 / 90 feature ranks, score=0.678571
Finished fold with 65 / 90 feature ranks, score=0.682143
Finished fold with 66 / 90 feature ranks, score=0.673214
Finished fold with 67 / 90 feature ranks, score=0.685714
Finished fold with 68 / 90 feature ranks, score=0.675000
Finished fold with 69 / 90 feature ranks, score=0.687500
Finished fold with 70 / 90 feature ranks, score=0.682143
Finished fold with 71 / 90 feature ranks, score=0.680357
Finished fold with 72 / 90 feature ranks, score=0.687500
Finished fold with 73 / 90 feature ranks, score=0.689286
Finished fold with 74 / 90 feature ranks, score=0.687500
Finished fold with 75 / 90 feature ranks, score=0.692857
Finished fold with 76 / 90 feature ranks, score=0.682143
Finished fold with 77 / 90 feature ranks, score=0.687500
Finished fold with 78 / 90 feature ranks, score=0.676786
Finished fold with 79 / 90 feature ranks, score=0.669643
Finished fold with 80 / 90 feature ranks, score=0.694643
Finished fold with 81 / 90 feature ranks, score=0.685714
Finished fold with 82 / 90 feature ranks, score=0.675000
Finished fold with 83 / 90 feature ranks, score=0.678571
Finished fold with 84 / 90 feature ranks, score=0.676786
Finished fold with 85 / 90 feature ranks, score=0.675000
Finished fold with 86 / 90 feature ranks, score=0.675000
Finished fold with 87 / 90 feature ranks, score=0.676786
Finished fold with 88 / 90 feature ranks, score=0.675000
Finished fold with 89 / 90 feature ranks, score=0.667857
Finished fold with 90 / 90 feature ranks, score=0.682143
Fitting estimator with 90 features.
Fitting estimator with 89 features.
Fitting estimator with 88 features.
Fitting estimator with 87 features.
Fitting estimator with 86 features.
Fitting estimator with 85 features.
Fitting estimator with 84 features.
Fitting estimator with 83 features.
Fitting estimator with 82 features.
Fitting estimator with 81 features.
Fitting estimator with 80 features.
Fitting estimator with 79 features.
Fitting estimator with 78 features.
Fitting estimator with 77 features.
Fitting estimator with 76 features.
Fitting estimator with 75 features.
Fitting estimator with 74 features.
Fitting estimator with 73 features.
Fitting estimator with 72 features.
Fitting estimator with 71 features.
Fitting estimator with 70 features.
Fitting estimator with 69 features.
Fitting estimator with 68 features.
Fitting estimator with 67 features.
Fitting estimator with 66 features.
Fitting estimator with 65 features.
Fitting estimator with 64 features.
Fitting estimator with 63 features.
Fitting estimator with 62 features.
Fitting estimator with 61 features.
Fitting estimator with 60 features.
Fitting estimator with 59 features.
Fitting estimator with 58 features.
Fitting estimator with 57 features.
Fitting estimator with 56 features.
Fitting estimator with 55 features.
Fitting estimator with 54 features.
Fitting estimator with 53 features.
Fitting estimator with 52 features.
Fitting estimator with 51 features.
Fitting estimator with 50 features.
Fitting estimator with 49 features.
Fitting estimator with 48 features.
Fitting estimator with 47 features.
Fitting estimator with 46 features.
Fitting estimator with 45 features.
Fitting estimator with 44 features.
Fitting estimator with 43 features.
Fitting estimator with 42 features.
Fitting estimator with 41 features.
Fitting estimator with 40 features.
Fitting estimator with 39 features.
Fitting estimator with 38 features.
Fitting estimator with 37 features.
Fitting estimator with 36 features.
Fitting estimator with 35 features.
Fitting estimator with 34 features.
Fitting estimator with 33 features.
Fitting estimator with 32 features.
Fitting estimator with 31 features.
Fitting estimator with 30 features.
Fitting estimator with 29 features.
Fitting estimator with 28 features.
Fitting estimator with 27 features.
Fitting estimator with 26 features.
Fitting estimator with 25 features.
Fitting estimator with 24 features.
Fitting estimator with 23 features.
Fitting estimator with 22 features.
Fitting estimator with 21 features.
Fitting estimator with 20 features.
Fitting estimator with 19 features.
Fitting estimator with 18 features.
Fitting estimator with 17 features.
Fitting estimator with 16 features.
Fitting estimator with 15 features.
Fitting estimator with 14 features.
Fitting estimator with 13 features.
Fitting estimator with 12 features.
Fitting estimator with 11 features.
Fitting estimator with 10 features.
Fitting estimator with 9 features.
Fitting estimator with 8 features.
Fitting estimator with 7 features.
Fitting estimator with 6 features.
Fitting estimator with 5 features.
Fitting estimator with 4 features.
Fitting estimator with 3 features.
Fitting estimator with 2 features.
Finished fold with 1 / 90 feature ranks, score=0.531306
Finished fold with 2 / 90 feature ranks, score=0.550984
Finished fold with 3 / 90 feature ranks, score=0.576029
Finished fold with 4 / 90 feature ranks, score=0.597496
Finished fold with 5 / 90 feature ranks, score=0.595707
Finished fold with 6 / 90 feature ranks, score=0.586762
Finished fold with 7 / 90 feature ranks, score=0.601073
Finished fold with 8 / 90 feature ranks, score=0.613596
Finished fold with 9 / 90 feature ranks, score=0.601073
Finished fold with 10 / 90 feature ranks, score=0.615385
Finished fold with 11 / 90 feature ranks, score=0.615385
Finished fold with 12 / 90 feature ranks, score=0.608229
Finished fold with 13 / 90 feature ranks, score=0.626118
Finished fold with 14 / 90 feature ranks, score=0.615385
Finished fold with 15 / 90 feature ranks, score=0.631485
Finished fold with 16 / 90 feature ranks, score=0.636852
Finished fold with 17 / 90 feature ranks, score=0.608229
Finished fold with 18 / 90 feature ranks, score=0.610018
Finished fold with 19 / 90 feature ranks, score=0.617174
Finished fold with 20 / 90 feature ranks, score=0.626118
Finished fold with 21 / 90 feature ranks, score=0.617174
Finished fold with 22 / 90 feature ranks, score=0.622540
Finished fold with 23 / 90 feature ranks, score=0.620751
Finished fold with 24 / 90 feature ranks, score=0.624329
Finished fold with 25 / 90 feature ranks, score=0.626118
Finished fold with 26 / 90 feature ranks, score=0.622540
Finished fold with 27 / 90 feature ranks, score=0.626118
Finished fold with 28 / 90 feature ranks, score=0.618962
Finished fold with 29 / 90 feature ranks, score=0.618962
Finished fold with 30 / 90 feature ranks, score=0.617174
Finished fold with 31 / 90 feature ranks, score=0.624329
Finished fold with 32 / 90 feature ranks, score=0.624329
Finished fold with 33 / 90 feature ranks, score=0.624329
Finished fold with 34 / 90 feature ranks, score=0.624329
Finished fold with 35 / 90 feature ranks, score=0.624329
Finished fold with 36 / 90 feature ranks, score=0.669052
Finished fold with 37 / 90 feature ranks, score=0.694097
Finished fold with 38 / 90 feature ranks, score=0.697674
Finished fold with 39 / 90 feature ranks, score=0.694097
Finished fold with 40 / 90 feature ranks, score=0.699463
Finished fold with 41 / 90 feature ranks, score=0.692308
Finished fold with 42 / 90 feature ranks, score=0.717352
Finished fold with 43 / 90 feature ranks, score=0.674419
Finished fold with 44 / 90 feature ranks, score=0.679785
Finished fold with 45 / 90 feature ranks, score=0.677996
Finished fold with 46 / 90 feature ranks, score=0.636852
Finished fold with 47 / 90 feature ranks, score=0.652952
Finished fold with 48 / 90 feature ranks, score=0.670841
Finished fold with 49 / 90 feature ranks, score=0.677996
Finished fold with 50 / 90 feature ranks, score=0.685152
Finished fold with 51 / 90 feature ranks, score=0.688730
Finished fold with 52 / 90 feature ranks, score=0.642218
Finished fold with 53 / 90 feature ranks, score=0.665474
Finished fold with 54 / 90 feature ranks, score=0.654741
Finished fold with 55 / 90 feature ranks, score=0.669052
Finished fold with 56 / 90 feature ranks, score=0.672630
Finished fold with 57 / 90 feature ranks, score=0.663685
Finished fold with 58 / 90 feature ranks, score=0.674419
Finished fold with 59 / 90 feature ranks, score=0.656530
Finished fold with 60 / 90 feature ranks, score=0.672630
Finished fold with 61 / 90 feature ranks, score=0.663685
Finished fold with 62 / 90 feature ranks, score=0.652952
Finished fold with 63 / 90 feature ranks, score=0.690519
Finished fold with 64 / 90 feature ranks, score=0.667263
Finished fold with 65 / 90 feature ranks, score=0.663685
Finished fold with 66 / 90 feature ranks, score=0.660107
Finished fold with 67 / 90 feature ranks, score=0.672630
Finished fold with 68 / 90 feature ranks, score=0.670841
Finished fold with 69 / 90 feature ranks, score=0.670841
Finished fold with 70 / 90 feature ranks, score=0.672630
Finished fold with 71 / 90 feature ranks, score=0.674419
Finished fold with 72 / 90 feature ranks, score=0.674419
Finished fold with 73 / 90 feature ranks, score=0.679785
Finished fold with 74 / 90 feature ranks, score=0.676208
Finished fold with 75 / 90 feature ranks, score=0.674419
Finished fold with 76 / 90 feature ranks, score=0.672630
Finished fold with 77 / 90 feature ranks, score=0.676208
Finished fold with 78 / 90 feature ranks, score=0.658318
Finished fold with 79 / 90 feature ranks, score=0.667263
Finished fold with 80 / 90 feature ranks, score=0.670841
Finished fold with 81 / 90 feature ranks, score=0.670841
Finished fold with 82 / 90 feature ranks, score=0.667263
Finished fold with 83 / 90 feature ranks, score=0.677996
Finished fold with 84 / 90 feature ranks, score=0.667263
Finished fold with 85 / 90 feature ranks, score=0.672630
Finished fold with 86 / 90 feature ranks, score=0.670841
Finished fold with 87 / 90 feature ranks, score=0.672630
Finished fold with 88 / 90 feature ranks, score=0.670841
Finished fold with 89 / 90 feature ranks, score=0.669052
Finished fold with 90 / 90 feature ranks, score=0.669052
Optimal number of features : 37
Out[46]:
[<matplotlib.lines.Line2D at 0x10b22ad10>]

In [49]:
print rfecv.grid_scores_, rfecv.ranking_, rfecv.support_, rfecv.n_features_
print(X_lbl[rfecv.support_])
print(X_lbl[~rfecv.support_])


[ 0.52726009  0.53531338  0.56837145  0.57642634  0.5960676   0.58980961
  0.60946524  0.61572642  0.62017953  0.62554945  0.61662088  0.61036449
  0.62466618  0.62108516  0.62913525  0.63092576  0.62018592  0.6246518
  0.62465819  0.62109475  0.61751533  0.62377013  0.62287567  0.62287887
  0.62288046  0.62377013  0.62913046  0.62555265  0.62376693  0.62197962
  0.62645029  0.62823601  0.62823601  0.62823601  0.62734315  0.6541688
  0.71490544  0.69258721  0.68722687  0.68633881  0.68722527  0.70153335
  0.68274502  0.69792838  0.6809625   0.65592576  0.66486871  0.68184896
  0.66756964  0.68007603  0.68990065  0.66218055  0.68095132  0.67469173
  0.6791688   0.67202913  0.66487829  0.67560216  0.66933619  0.68542199
  0.67380686  0.66933299  0.68900939  0.6729172   0.672914    0.66666081
  0.67917199  0.67292039  0.67917039  0.67738628  0.67738787  0.6809593
  0.68453552  0.68185376  0.68363787  0.67738628  0.68185376  0.66755207
  0.66845291  0.68274182  0.67827754  0.67113148  0.67828393  0.67202434
  0.67381485  0.67292039  0.67470771  0.67292039  0.66845451  0.67559737] [ 1  1  1  1  9 36  1 34 24 41  1  1 40  1  5 27 11 18 52  1  1  1 20  1 28
  1 16 21  1 33 49  1 10  7  1  1  1 37 38  1 48 12  2  1 15 23 22 32  8 47
  1 35  1  1 42 14  1  1 43 45  1 13 17 50  1  1  1  1  1 26  1 51 25 46 31
  3  4  1 29 44 19  1 30  6 54 39  1 53  1  1] [ True  True  True  True False False  True False False False  True  True
 False  True False False False False False  True  True  True False  True
 False  True False False  True False False  True False False  True  True
  True False False  True False False False  True False False False False
 False False  True False  True  True False False  True  True False False
  True False False False  True  True  True  True  True False  True False
 False False False False False  True False False False  True False False
 False False  True False  True  True] 37
['MRI_cort_thick.ctx.precentral_LH_PLUS_RH'
 'MRI_cort_thick.ctx.parsorbitalis_LH_PLUS_RH'
 'MRI_cort_thick.ctx.parahippocampal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.temporalpole_LH_PLUS_RH'
 'MRI_cort_thick.ctx.posteriorcingulate_LH_PLUS_RH'
 'MRI_cort_thick.ctx.caudalmiddlefrontal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.superiortemporal_LH_PLUS_RH'
 'MRI_cort_area.ctx.caudalanteriorcingulate_LH_PLUS_RH'
 'MRI_cort_thick.ctx.mean_LH_PLUS_RH'
 'MRI_cort_thick.ctx.caudalanteriorcingulate_LH_PLUS_RH'
 'MRI_cort_thick.ctx.bankssts_LH_PLUS_RH'
 'MRI_cort_thick.ctx.isthmuscingulate_LH_PLUS_RH'
 'MRI_cort_thick.ctx.precuneus_LH_PLUS_RH'
 'MRI_cort_thick.ctx.rostralanteriorcingulate_LH_PLUS_RH'
 'MRI_cort_thick.ctx.middletemporal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.lingual_LH_PLUS_RH'
 'MRI_cort_thick.ctx.lateralorbitofrontal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.rostralmiddlefrontal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.superiorfrontal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.supramarginal_LH_PLUS_RH'
 'MRI_cort_area.ctx.total_LH_PLUS_RH'
 'MRI_cort_thick.ctx.postcentral_LH_PLUS_RH'
 'MRI_cort_thick.ctx.paracentral_LH_PLUS_RH'
 'MRI_cort_thick.ctx.superiorparietal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.pericalcarine_LH_PLUS_RH'
 'MRI_cort_thick.ctx.parstriangularis_LH_PLUS_RH'
 'MRI_cort_thick.ctx.inferiortemporal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.fusiform_LH_PLUS_RH'
 'MRI_cort_thick.ctx_TOTAL_LH_PLUS_RH'
 'MRI_cort_thick.ctx.entorhinal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.lateraloccipital_LH_PLUS_RH'
 'MRI_cort_thick.ctx.transversetemporal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.frontalpole_LH_PLUS_RH'
 'MRI_cort_area.ctx_TOTAL_LH_PLUS_RH'
 'MRI_cort_thick.ctx.inferiorparietal_LH_PLUS_RH'
 'MRI_cort_thick.ctx.parsopercularis_LH_PLUS_RH'
 'MRI_cort_thick.ctx.medialorbitofrontal_LH_PLUS_RH']
['DTI_fiber_vol_Unc_LH_PLUS_RH'
 'MRI_cort_area.ctx.parahippocampal_LH_PLUS_RH'
 'DTI_fiber_vol_SIFC_LH_PLUS_RH' 'DTI_fiber_vol_SCS_LH_PLUS_RH'
 'MRI_cort_area.ctx.lateraloccipital_LH_PLUS_RH'
 'MRI_cort_area.ctx.inferiorparietal_LH_PLUS_RH'
 'MRI_cort_area.ctx.parsorbitalis_LH_PLUS_RH'
 'MRI_cort_area.ctx.superiorfrontal_LH_PLUS_RH'
 'MRI_cort_area.ctx.rostralanteriorcingulate_LH_PLUS_RH'
 'MRI_cort_area.ctx.isthmuscingulate_LH_PLUS_RH'
 'MRI_cort_area.ctx.rostralmiddlefrontal_LH_PLUS_RH'
 'DTI_fiber_vol_ATR_LH_PLUS_RH'
 'MRI_cort_area.ctx.superiorparietal_LH_PLUS_RH'
 'MRI_cort_area.ctx.middletemporal_LH_PLUS_RH'
 'DTI_fiber_vol_Fx_LH_PLUS_RH' 'DTI_fiber_vol_CST_LH_PLUS_RH'
 'MRI_cort_area.ctx.paracentral_LH_PLUS_RH'
 'DTI_fiber_vol_TOTAL_LH_PLUS_RH'
 'MRI_cort_area.ctx.lateralorbitofrontal_LH_PLUS_RH'
 'MRI_cort_area.ctx.parstriangularis_LH_PLUS_RH'
 'MRI_cort_area.ctx.parsopercularis_LH_PLUS_RH'
 'DTI_fiber_vol_CgC_LH_PLUS_RH' 'DTI_fiber_vol_AllFib_LH_PLUS_RH'
 'MRI_cort_area.ctx.entorhinal_LH_PLUS_RH' 'DTI_fiber_vol_tSLF_LH_PLUS_RH'
 'DTI_fiber_vol_IFO_LH_PLUS_RH' 'DTI_fiber_vol_SLF_LH_PLUS_RH'
 'MRI_cort_area.ctx.supramarginal_LH_PLUS_RH'
 'DTI_fiber_vol_Fxcut_LH_PLUS_RH' 'MRI_cort_area.ctx.lingual_LH_PLUS_RH'
 'MRI_cort_area.ctx.postcentral_LH_PLUS_RH'
 'MRI_cort_area.ctx.posteriorcingulate_LH_PLUS_RH'
 'DTI_fiber_vol_CgH_LH_PLUS_RH' 'MRI_cort_area.ctx.precuneus_LH_PLUS_RH'
 'MRI_cort_area.ctx.caudalmiddlefrontal_LH_PLUS_RH'
 'DTI_fiber_vol_AllFibnoCC_LH_PLUS_RH'
 'MRI_cort_area.ctx.bankssts_LH_PLUS_RH'
 'MRI_cort_area.ctx.pericalcarine_LH_PLUS_RH'
 'DTI_fiber_vol_ILF_LH_PLUS_RH' 'DTI_fiber_vol_pSCS_LH_PLUS_RH'
 'DTI_fiber_vol_IFSFC_LH_PLUS_RH'
 'MRI_cort_area.ctx.inferiortemporal_LH_PLUS_RH'
 'MRI_cort_area.ctx.frontalpole_LH_PLUS_RH'
 'MRI_cort_area.ctx.temporalpole_LH_PLUS_RH'
 'MRI_cort_thick.ctx.cuneus_LH_PLUS_RH'
 'MRI_cort_area.ctx.superiortemporal_LH_PLUS_RH'
 'MRI_cort_area.ctx.cuneus_LH_PLUS_RH'
 'MRI_cort_area.ctx.medialorbitofrontal_LH_PLUS_RH'
 'MRI_cort_area.ctx.fusiform_LH_PLUS_RH'
 'MRI_cort_area.ctx.transversetemporal_LH_PLUS_RH'
 'DTI_fiber_vol_pSLF_LH_PLUS_RH' 'DTI_fiber_vol_fSCS_LH_PLUS_RH'
 'MRI_cort_area.ctx.precentral_LH_PLUS_RH']

In [27]:
print(X.shape)
feat_indices = np.random.randint(0, X.shape[1], 5)
for idx in feat_indices:
    plt.figure(figsize=(16,6))

    plt.subplot(1,2,1)
    plt.hist([X[y==1, idx], X[y==-1, idx]], 25)
    plt.title(X_lbl[idx])
    plt.legend(['F', 'M'])

    plt.subplot(1,2,2)
    plt.hist([X[y==1, idx], X[y==-1, idx]], 25)
    plt.title(X_lbl[idx])
    plt.legend(['F', 'M'])


(1119, 126)

In [ ]: