Warning, this will generate a lot of data. But not on your machine, so don't worry.
In [308]:
# Get some paths
import os
import subprocess
import numpy as np
import pandas as pd
import statsmodels.api as sm
from pymatbridge import Matlab
from matplotlib import pyplot as plt
import scipy.cluster.hierarchy as clh
from sklearn import linear_model, cross_validation
In [2]:
# Set some paths
wk_dir = '/data1/age'
dl_dir = os.path.join(wk_dir, 'data')
csv_path = os.path.join(dl_dir, 'aging_full_model.csv')
data_url = 'http://downloads.figshare.com/article/public/1241650'
In [3]:
# Set up the scene
if not os.path.exists(wk_dir):
print("I'll create {}".format(wk_dir))
os.makedirs(wk_dir)
if not os.path.exists(dl_dir):
print("I'll create {}".format(dl_dir))
os.makedirs(dl_dir)
In [181]:
# Get the data
get_proc = subprocess.Popen(['wget', '-nc', data_url, '-P', dl_dir],
stderr=subprocess.STDOUT)
get_proc.wait()
get_status = get_proc.poll()
if not status == 0:
print("Trying to run {} didn't work".format(sys_call))
else:
zip_proc = subprocess.Popen(['unzip',
'{}/1241650'.format(dl_dir),
'-d', dl_dir], stderr=subprocess.STDOUT)
zip_proc.wait()
ztatus = zip_proc.poll()
if not ztatus == 0:
print("Trying to run {} didn't work".format(zip_call))
In [85]:
pheno = pd.read_csv(os.path.join(dl_dir, 'aging_full_model.csv'))
age = pheno['age'].values
subs = pheno[' '].values
# Strip the annoying trailing whitespace
subs = [x.strip(' ') for x in subs]
In [5]:
# Bring out the Matlab Master to deal with all those .mat files
mlab = Matlab()
mlab.start()
Out[5]:
In [11]:
# Yes, yes, this is all notoriously slow because we have to weak up the matlab giant for each command
for sub in subs:
read_this = os.path.join(dl_dir, 'correlation_{}_roi.mat'.format(sub))
comm = "tmp = load('{}');".format(read_this)
out = mlab.run_code(comm)
out = mlab.run_code('R = tmp.mat_r;')
# Save the stuff out as csv
out = mlab.run_code("csvwrite('{}/out_{}.csv', R);".format(dl_dir, sub))
print('Done writing')
In [51]:
# Now lets read this in again and deal with sensible data
n_subs = len(subs)
for idx, sub in enumerate(subs):
df = pd.read_csv('{}/out_{}.csv'.format(dl_dir, sub), header=None)
ts = np.array(df.values)
# Do mad things
ts = (ts - np.median(ts))/1.4785*np.median(np.absolute(ts-np.median(ts)))
if idx == 0:
conn = np.zeros((ts.shape[0], n_subs))
conn[:, idx] = ts[:,0]
In [120]:
# Cross-validated clustering
scale = 10
cv = cross_validation.LeaveOneOut(n_subs)
for train, test in cv:
train_conn = conn[:, train].T
test_conn = conn[:, test].T
train_age = age[train]
test_age = age[test]
y = train_conn
x = sm.add_constant(train_age)
N = len(train)
K = 2
c = np.ones((K, 1));
# Fit OLS
linm = sm.OLS()
res = linm.fit(y, x)
beta = res.params
e = res.resid
std_e = [None, np.sqrt(np.sum(e**2,0 )/(N-K))]
d = np.sqrt(np.dot(np.dot(c.T,np.dot(x.T,x))**(-1),c))
ttest = np.dot(c.T,beta)/np.dot(std_e.T,d).T
# Turn the thing back into a matrix
N = np.round((1+np.sqrt(1+8*size(ttest)))/2.)
mat = np.zeros((N, N))
mat[np.tril(np.ones((N, N), dtype=bool), -1)] = ttest[0]
mat = mat.T
mat[np.tril(np.ones((N, N), dtype=bool), -1)] = ttest[0]
# Hierarchical Clustering
link = clh.linkage(mat, method='ward')
part = clh.fcluster(link, scale, criterion='maxclust')
# Make new connectomes
avg_conn = np.zeros((n_subs-1,scale*(scale-1)/2))
for idx, sub in enumerate(subs):
avg_conn[idx, :] =
In [292]:
mat = np.zeros((N, N))
mat[np.tril(np.ones((N, N), dtype=bool), -1)] = ttest[0]
In [303]:
N = np.round((1+np.sqrt(1+8*size(ttest)))/2.)
mat = np.zeros((N, N))
mat[np.tril(np.ones((N, N), dtype=bool), -1)] = ttest[0]
mat = mat.T
mat[np.tril(np.ones((N, N), dtype=bool), -1)] = ttest[0]
In [310]:
scale = 10
link = clh.linkage(mat, method='ward')
part = clh.fcluster(link, scale, criterion='maxclust')
In [318]:
avg_conn = np.zeros((n_subs-1,scale*(scale-1)/2))
In [319]:
avg_conn.shape
Out[319]:
In [316]:
K*(K-1)/2
Out[316]:
In [317]:
K
Out[317]:
In [239]:
d.shape
Out[239]:
In [195]:
results = model.fit()
In [197]:
y.shape
Out[197]:
In [171]:
beta
Out[171]:
In [181]:
x.shape
Out[181]:
In [182]:
a = range(1,8)
In [183]:
a
Out[183]:
In [184]:
a = sm.add_constant(a)
Out[184]:
In [186]:
x = sm.add_constant(train_age)
In [187]:
x.shape
Out[187]:
In [190]:
y.shape
Out[190]:
In [ ]: