As an observation from a09_extended_exploratory.ipynb, the experiment conditions could be classified by comparing the same brain region of different subjects. Each region called an ROI with a specific annotation number.
In [1]:
FEATURES_PATH = '../code/data/roi_features/features.csv' # use your own path
import numpy as np
import matplotlib
matplotlib.use('AGG') # avoid some error in matplotlib, delete this line if the following doesn't work
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import jgraph as ig
%matplotlib inline
# DATA description (column description)
# 0. class label [0=cocaine | 1=control | 2=fear]
# 1. brain number
# 2. roi number
# 3. roi position X
# 4. roi position Y
# 5. roi position Z
# 6. roi mean
# 7. roi std
# 8. Haralick feature - Energy
# 9. Haralick feature - Entropy
# 10. Haralick feature - Correlation
# 11. Haralick feature - Contrast
# 12. Haralick feature - Variance
# 13. Haralick feature - SumMean
# 14. Haralick feature - Inertia
# 15. Haralick feature - Cluster Shade
# 16. Haralick feature - Cluster tendency
# 17. Haralick feature - Homogeneity
# 18. Haralick feature - MaxProbability
# 19. Haralick feature - Inverse Variance
fields = ['label','nbrain','nroi','roix','roiy','roiy','mean','std','energy','entropy','correlation','contrast','variance',
'summean','inertia','cluster shade','cluster tendency','homogeneity','maxProbability','inverse variance']
data = np.genfromtxt(FEATURES_PATH, delimiter=",", dtype=np.float32)# the features data have been pre-processed and merged
brain_nums = np.unique(data[:,1])
roi_nums = np.unique(data[:,2])
# preview - print brain numbers
print brain_nums
# preview - print roi numbers
print roi_nums
print data.shape
In [2]:
ROI_NUM = roi_nums[9] # which region would like to study?
vectorized = data[data[:,2]==ROI_NUM, 6:19]
covar = np.cov(vectorized)
plt.figure(figsize=(7,7))
plt.imshow(covar)
plt.title('Covariance of Clarity ROI (%s) features'%(ROI_NUM))
plt.colorbar()
plt.show()
diag = covar.diagonal()*np.eye(covar.shape[0])
hollow = covar-diag
d_det = np.linalg.det(diag)
h_det = np.linalg.det(hollow)
plt.figure(figsize=(11,8))
plt.subplot(121)
plt.imshow(diag)
plt.clim([0, np.max(covar)])
plt.title('Determinant of on-diagonal: ' + str(d_det))
plt.subplot(122)
plt.imshow(hollow)
plt.clim([0, np.max(covar)])
plt.title('Determinant of off-diagonal: ' + str(h_det))
plt.show()
print "Ratio of on- and off-diagonal determinants: " + str(d_det/h_det)
In [3]:
import sklearn.mixture
i = np.linspace(1,12,12,dtype='int')
print i
bic = np.array(())
for idx in i:
print "Fitting and evaluating model with " + str(idx) + " clusters."
gmm = sklearn.mixture.GMM(n_components=idx,n_iter=1000,covariance_type='diag')
gmm.fit(vectorized)
bic = np.append(bic, gmm.bic(vectorized))
plt.figure(figsize=(7,7))
plt.plot(i, 1.0/bic)
plt.title('BIC')
plt.ylabel('score')
plt.xlabel('number of clusters')
plt.show()
print bic
In [4]:
from sklearn import cross_validation
from sklearn.cross_validation import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
%matplotlib inline
names = ["Nearest Neighbors", "Linear SVM", "Random Forest",
"Linear Discriminant Analysis", "Quadratic Discriminant Analysis"]
classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C=0.5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
LinearDiscriminantAnalysis(),
QuadraticDiscriminantAnalysis()]
In [5]:
accuracy = np.zeros((len(classifiers), 2), dtype=np.dtype('float64'))
X=data[data[:,2]==ROI_NUM, 8:19]
y=data[data[:,2]==ROI_NUM, 0]
for idx, cla in enumerate(classifiers):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2, random_state=0)
clf = cla.fit(X_train, y_train)
loo = LeaveOneOut(len(X))
scores = cross_validation.cross_val_score(clf, X, y, cv=loo)
accuracy[idx,] = [scores.mean(), scores.std()]
print("Accuracy of %s: %0.2f (+/- %0.2f)" % (names[idx], scores.mean(), scores.std() * 2))
print accuracy
In [ ]: