{u'channels': {u'Grayscale': {u'channel_type': u'oldchannel', u'datatype': u'uint16', u'description': u'oldchannel', u'exceptions': 1, u'propagate': 2, u'readonly': 1, u'resolution': 0, u'windowrange': [0, 0]}}, u'dataset': {u'cube_dimension': {u'0': [128, 128, 16], u'1': [128, 128, 16], u'2': [128, 128, 16], u'3': [128, 128, 16], u'4': [128, 128, 16], u'5': [64, 64, 64]}, u'description': u'Fear200', u'imagesize': {u'0': [17817, 23068, 1295], u'1': [8909, 11534, 1295], u'2': [4455, 5767, 1295], u'3': [2228, 2884, 1295], u'4': [1114, 1442, 1295], u'5': [557, 721, 1295]}, u'name': u'Fear200', u'neariso_imagesize': {u'0': [17817, 23068, 1295], u'1': [8909, 11534, 1295], u'2': [4455, 5767, 1295], u'3': [2228, 2884, 1295], u'4': [1114, 1442, 647], u'5': [557, 721, 431]}, u'neariso_offset': {u'0': [0.0, 0.0, 0.0], u'1': [0.0, 0.0, 0.0], u'2': [0.0, 0.0, 0.0], u'3': [0.0, 0.0, 0.0], u'4': [0.0, 0.0, 0.0], u'5': [0.0, 0.0, 0.0]}, u'neariso_scaledown': {u'0': 1, u'1': 1, u'2': 1, u'3': 1, u'4': 2, u'5': 3}, u'neariso_voxelres': {u'0': [1.0, 1.0, 10.0], u'1': [2.0, 2.0, 10.0], u'2': [4.0, 4.0, 10.0], u'3': [8.0, 8.0, 10.0], u'4': [16.0, 16.0, 20.0], u'5': [32.0, 32.0, 30.0]}, u'offset': {u'0': [0, 0, 0], u'1': [0, 0, 0], u'2': [0, 0, 0], u'3': [0, 0, 0], u'4': [0, 0, 0], u'5': [0, 0, 0]}, u'resolutions': [0, 1, 2, 3, 4, 5], u'scaling': u'zslices', u'scalinglevels': 5, u'timerange': [0, 0], u'voxelres': {u'0': [1.0, 1.0, 10.0], u'1': [2.0, 2.0, 10.0], u'2': [4.0, 4.0, 10.0], u'3': [8.0, 8.0, 10.0], u'4': [16.0, 16.0, 10.0], u'5': [32.0, 32.0, 10.0]}}, u'metadata': {}, u'project': {u'description': u'Fear200', u'name': u'Fear200', u'version': u'0.0'}}
{u'channels': {u'Grayscale': {u'channel_type': u'oldchannel', u'datatype': u'uint16', u'description': u'oldchannel', u'exceptions': 1, u'propagate': 2, u'readonly': 0, u'resolution': 0, u'windowrange': [99, 196]}}, u'dataset': {u'cube_dimension': {u'0': [128, 128, 16], u'1': [128, 128, 16], u'2': [128, 128, 16], u'3': [128, 128, 16], u'4': [128, 128, 16], u'5': [64, 64, 64]}, u'description': u'Fear199', u'imagesize': {u'0': [17275, 22939, 1358], u'1': [8638, 11470, 1358], u'2': [4319, 5735, 1358], u'3': [2160, 2868, 1358], u'4': [1080, 1434, 1358], u'5': [540, 717, 1358]}, u'name': u'Fear199', u'neariso_imagesize': {u'0': [17275, 22939, 1358], u'1': [8638, 11470, 1358], u'2': [4319, 5735, 1358], u'3': [2160, 2868, 1358], u'4': [1080, 1434, 679], u'5': [540, 717, 452]}, u'neariso_offset': {u'0': [0.0, 0.0, 0.0], u'1': [0.0, 0.0, 0.0], u'2': [0.0, 0.0, 0.0], u'3': [0.0, 0.0, 0.0], u'4': [0.0, 0.0, 0.0], u'5': [0.0, 0.0, 0.0]}, u'neariso_scaledown': {u'0': 1, u'1': 1, u'2': 1, u'3': 1, u'4': 2, u'5': 3}, u'neariso_voxelres': {u'0': [1.0, 1.0, 10.0], u'1': [2.0, 2.0, 10.0], u'2': [4.0, 4.0, 10.0], u'3': [8.0, 8.0, 10.0], u'4': [16.0, 16.0, 20.0], u'5': [32.0, 32.0, 30.0]}, u'offset': {u'0': [0, 0, 0], u'1': [0, 0, 0], u'2': [0, 0, 0], u'3': [0, 0, 0], u'4': [0, 0, 0], u'5': [0, 0, 0]}, u'resolutions': [0, 1, 2, 3, 4, 5], u'scaling': u'zslices', u'scalinglevels': 5, u'timerange': [0, 0], u'voxelres': {u'0': [1.0, 1.0, 10.0], u'1': [2.0, 2.0, 10.0], u'2': [4.0, 4.0, 10.0], u'3': [8.0, 8.0, 10.0], u'4': [16.0, 16.0, 10.0], u'5': [32.0, 32.0, 10.0]}}, u'metadata': {}, u'project': {u'description': u'Fear199', u'name': u'Fear199', u'version': u'0.0'}}
When comparing the above two to the metadata of one of the controls it is clear that simple overlay analysis is not going to be particularly effective - we also will get back to this in a later section when we use microGL for visualization purposes. The most important thing to note is that (obviously in retrospect), the brains are different sizes. The easiest form of analysis for a dataset this small would be to use simple overlay analysis - that is overlaying our brains over some kind of atlas, however since the brains are different sizes, overlaying will be very difficult. Additionally, because of the unusual "size" of the data volumes, using standardized tools like histogram normalization is difficult - errors like: "improper shape" are common.
{u'channels': {u'Grayscale': {u'channel_type': u'oldchannel', u'datatype': u'uint16', u'description': u'oldchannel', u'exceptions': 1, u'propagate': 2, u'readonly': 1, u'resolution': 0, u'windowrange': [0, 0]}}, u'dataset': {u'cube_dimension': {u'0': [128, 128, 16], u'1': [128, 128, 16], u'2': [128, 128, 16], u'3': [128, 128, 16], u'4': [128, 128, 16], u'5': [64, 64, 64]}, u'description': u'Control258', u'imagesize': {u'0': [17817, 24223, 1375], u'1': [8909, 12112, 1375], u'2': [4455, 6056, 1375], u'3': [2228, 3028, 1375], u'4': [1114, 1514, 1375], u'5': [557, 757, 1375]}, u'name': u'Control258', u'neariso_imagesize': {u'0': [17817, 24223, 1375], u'1': [8909, 12112, 1375], u'2': [4455, 6056, 1375], u'3': [2228, 3028, 1375], u'4': [1114, 1514, 687], u'5': [557, 757, 458]}, u'neariso_offset': {u'0': [0.0, 0.0, 6.0], u'1': [0.0, 0.0, 6.0], u'2': [0.0, 0.0, 6.0], u'3': [0.0, 0.0, 6.0], u'4': [0.0, 0.0, 3.0], u'5': [0.0, 0.0, 2.0]}, u'neariso_scaledown': {u'0': 1, u'1': 1, u'2': 1, u'3': 1, u'4': 2, u'5': 3}, u'neariso_voxelres': {u'0': [1.0, 1.0, 10.0], u'1': [2.0, 2.0, 10.0], u'2': [4.0, 4.0, 10.0], u'3': [8.0, 8.0, 10.0], u'4': [16.0, 16.0, 20.0], u'5': [32.0, 32.0, 30.0]}, u'offset': {u'0': [0, 0, 6], u'1': [0, 0, 6], u'2': [0, 0, 6], u'3': [0, 0, 6], u'4': [0, 0, 6], u'5': [0, 0, 6]}, u'resolutions': [0, 1, 2, 3, 4, 5], u'scaling': u'zslices', u'scalinglevels': 5, u'timerange': [0, 0], u'voxelres': {u'0': [1.0, 1.0, 10.0], u'1': [2.0, 2.0, 10.0], u'2': [4.0, 4.0, 10.0], u'3': [8.0, 8.0, 10.0], u'4': [16.0, 16.0, 10.0], u'5': [32.0, 32.0, 10.0]}}, u'metadata': {}, u'project': {u'description': u'Control258', u'name': u'Control258', u'version': u'0.0'}}
3D Analysis: In order to improve our visualization models I used several different softwares to visualize the data and try to extract better methods of creating a visualization model. This was mostly conducted using 3 different softwares:
ITKSnap:
MicroGL: MicroGL is used to create the model. Because it generates a properly dimensioned and segmented brain image it is the template we are using for the brain encasing.
Volview:
Currently I am using these three softwares to create a similar pipeline to create visualizations similar to http://f1000research.com/articles/4-466/v1.
As a last-ditch effort, because of the inconclusive results from classification due to small sample size, I varied the bin numbers for our classification attempts to see if any additional information could be extracted.
In [1]:
import os
PATH="/Users/albertlee/claritycontrol/code/scripts" # use your own path
os.chdir(PATH)
import clarity as cl # I wrote this module for easier operations on data
import matplotlib.pyplot as plt
import jgraph as ig
import clarity.resources as rs
import csv,gc # garbage memory collection :)
import matplotlib
#import matplotlib.pyplot as plt
import numpy as np
from skimage import data, img_as_float
from skimage import exposure
from numpy import genfromtxt
BINS=32 # histogram bins
RANGE=(10.0,300.0)
matplotlib.rcParams['font.size'] = 8
my_data = genfromtxt('../data/hist/Fear199.csv', delimiter=',')
print my_data
In [7]:
for token in rs.TOKENS:
c = cl.Clarity(token)
fname = rs.HIST_DATA_PATH+"4bin"+token+".csv"
hist, bin_edges = c.loadImg().getHistogram(bins=BINS,range=RANGE,density=False)
np.savetxt(fname,hist,delimiter=',')
print fname,"saved."
del c
my_data = genfromtxt(fname, delimiter=',')
print my_data
gc.collect()
In [8]:
import numpy as np
import clarity.resources as rs
features = np.empty(shape=(1,BINS))
for token in rs.TOKENS:
fname = rs.HIST_DATA_PATH+"4bin"+token+".csv"
data = np.loadtxt(fname,delimiter=',')
features = np.vstack([features,data])
features = features[1:,]
minc = np.min(features)
maxc = np.max(features)
features = (features-minc)/(maxc-minc)
print features
np.savetxt(rs.HIST_DATA_PATH+"4binfeatures.csv",features,delimiter=',')
In [9]:
from sklearn import cross_validation
from sklearn.cross_validation import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
%matplotlib inline
np.random.seed(12345678) # for reproducibility, set random seed
# Cocaine = ["Cocaine174","Cocaine175","Cocaine178"]
# Control = ["Control181","Control182","Control189","Control239","Control258"]
# Fear = ["Fear187","Fear197","Fear199","Fear200"]
features = np.loadtxt(rs.HIST_DATA_PATH+"4binfeatures.csv",delimiter=',')
temp_mu = np.mean(features,axis=1)
temp_std = np.std(features,axis=1)
mu = [np.mean(temp_mu[0:3]),np.mean(temp_mu[3:8]),np.mean(temp_mu[8:12])]
std = [np.mean(temp_std[0:3]),np.mean(temp_std[3:8]),np.mean(temp_std[8:12])]
print mu
print std
std=[1,1,1]
# define number of subjects per class
S = np.array((9, 21, 30, 39, 45, 63, 81, 96, 108, 210, 333))
names = ["Nearest Neighbors", "Linear SVM", "Random Forest",
"Linear Discriminant Analysis", "Quadratic Discriminant Analysis"]
classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C=0.5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
LinearDiscriminantAnalysis()]
In [10]:
accuracy = np.zeros((len(S), len(classifiers), 2), dtype=np.dtype('float64'))
for idx1, s in enumerate(S):
s0=s/3
s1=s/3
s2=s/3
x0 = np.random.normal(mu[0],std[0],(s0,BINS))
x1 = np.random.normal(mu[1],std[1],(s1,BINS))
x2 = np.random.normal(mu[2],std[2],(s2,BINS))
X = x0
X = np.vstack([X,x1])
X = np.vstack([X,x2])
y = np.append(np.append(np.zeros(s0), np.ones(s1)),np.ones(s2)*2)
for idx2, cla in enumerate(classifiers):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)
clf = cla.fit(X_train, y_train)
loo = LeaveOneOut(len(X))
scores = cross_validation.cross_val_score(clf, X, y, cv=loo)
accuracy[idx1, idx2,] = [scores.mean(), scores.std()]
print("Accuracy of %s: %0.2f (+/- %0.2f)" % (names[idx2], scores.mean(), scores.std() * 2))
print accuracy
In [11]:
plt.errorbar(S, accuracy[:,0,0], yerr = accuracy[:,0,1], hold=True, label=names[0])
plt.errorbar(S, accuracy[:,1,0], yerr = accuracy[:,1,1], color='green', hold=True, label=names[1])
plt.errorbar(S, accuracy[:,2,0], yerr = accuracy[:,2,1], color='red', hold=True, label=names[2])
plt.errorbar(S, accuracy[:,3,0], yerr = accuracy[:,3,1], color='black', hold=True, label=names[3])
# plt.errorbar(S, accuracy[:,4,0], yerr = accuracy[:,4,1], color='brown', hold=True, label=names[4])
plt.xscale('log')
plt.xlabel('number of samples')
plt.ylabel('accuracy')
plt.title('Accuracy of classification under simulated data')
plt.axhline(1, color='red', linestyle='--')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
In [12]:
y=np.array([0,0,0,1,1,1,1,1,2,2,2,2])
features = np.loadtxt(rs.HIST_DATA_PATH+"4binfeatures.csv",delimiter=',')
In [13]:
accuracy=np.zeros((len(classifiers),2))
for idx, cla in enumerate(classifiers):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, y, test_size=0.4, random_state=0)
clf = cla.fit(X_train, y_train)
loo = LeaveOneOut(len(features))
scores = cross_validation.cross_val_score(clf, features, y, cv=loo)
accuracy[idx,] = [scores.mean(), scores.std()]
print("Accuracy of %s: %0.2f (+/- %0.2f)" % (names[idx], scores.mean(), scores.std() * 2))
Now for the simplest case:
In [ ]:
for token in rs.TOKENS:
c = cl.Clarity(token)
fname = rs.HIST_DATA_PATH+"1bin"+token+".csv"
hist, bin_edges = c.loadImg().getHistogram(bins=BINS,range=RANGE,density=False)
np.savetxt(fname,hist,delimiter=',')
print fname,"saved."
del c
my_data = genfromtxt(fname, delimiter=',')
print my_data
gc.collect()
In [ ]: