In [166]:
#!/bin/python

# Initialize dataset
import sys, ply, h5py, pandas as pd, numpy as np
sys.path.insert(0,'/scr/litauen1/toro/dist_meta/neurovault/neuro2/neurosynth')
from neurosynth.base.dataset import Dataset
from neurosynth.analysis import decode
from sklearn.cluster import KMeans 

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import pylab as plt
import scipy.cluster.hierarchy as hier
import scipy.spatial.distance as dist
sns.set(context="paper", font="sans-serif", font_scale=2)

In [ ]:
# Load the Dataset and add topic-based features--point to right location
# in your local environment.

#dataset = Dataset('/scr/litauen1/toro/dist_meta/neurovault/neuro2/neurosynth-data/database.txt')
#dataset.add_features('/scr/litauen1/toro/dist_meta/neurovault/neuro2/neurosynth-data/features.txt')
#dataset.save('/scr/litauen1/toro/dist_meta/neurovault/neuro2/neurosynth-data/dataset.pkl', keep_mappables = True)

In [5]:
pickled_dataset = '/scr/litauen1/toro/dist_meta/neurovault/neuro2/neurosynth-data/dataset.pkl'
dataset = Dataset.load(pickled_dataset)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-4bafe9d1412e> in <module>()
      1 pickled_dataset = '/scr/litauen1/toro/dist_meta/neurovault/neuro2/neurosynth-data/dataset.pkl'
----> 2 dataset = Dataset.load(pickled_dataset)

/scr/litauen1/toro/dist_meta/neurovault/neuro2/neurosynth/neurosynth/base/dataset.pyc in load(cls, filename)
    516         """ Load a pickled Dataset instance from file. """
    517         import cPickle
--> 518         dataset = cPickle.load(open(filename, 'rb'))
    519         if hasattr(dataset, 'feature_table'):
    520             dataset.feature_table._csr_to_sdf()

TypeError: ('_reconstruct: First argument must be a sub-type of ndarray', <built-in function _reconstruct>, (<class 'pandas.core.index.Index'>, (0,), 'b'))

In [246]:
'''
# Print terms from each topic
features = pd.read_csv('v3-topics-50-keys.txt', sep=' |\t', index_col=0, header=None)
features = features.T[topics_to_keep]
features.columns = labels
print features.T
'''
''', save='decoding_results.txt')
                    
# Write out results
import csv
lola = np.asarray(list(csv.reader(open('decoding_results.txt', 'rb'), delimiter=',')))

inputAll = []
inputNorm = []
namesAll = []
names = lola[1:,0]
size = np.shape(lola)
for i in xrange(1,size[1]):
    input = lola[1:,i]
    ord = np.argsort(input)
    inputAll.append(input[ord][:])
    namesAll.append(names[ord][:])
    inputNorm.append(input)

# Save files for matlab
f = h5py.File('neurosynth_meta.mat','w')
f['inputAll'] = np.asarray(inputAll)
f['namesAll'] = namesAll
f['names'] = names
f['inputNorm'] = inputNorm
f.close()
'''
#df = pd.read_table('decoding_results.txt', sep=',', header=0, index_col=0)


Out[246]:
", save='decoding_results.txt')\n                    \n# Write out results\nimport csv\nlola = np.asarray(list(csv.reader(open('decoding_results.txt', 'rb'), delimiter=',')))\n\ninputAll = []\ninputNorm = []\nnamesAll = []\nnames = lola[1:,0]\nsize = np.shape(lola)\nfor i in xrange(1,size[1]):\n    input = lola[1:,i]\n    ord = np.argsort(input)\n    inputAll.append(input[ord][:])\n    namesAll.append(names[ord][:])\n    inputNorm.append(input)\n\n# Save files for matlab\nf = h5py.File('neurosynth_meta.mat','w')\nf['inputAll'] = np.asarray(inputAll)\nf['namesAll'] = namesAll\nf['names'] = names\nf['inputNorm'] = inputNorm\nf.close()\n"

In [292]:
def getOrder(d, thr):
    dh = []
    for i in d.T:
        di = d.T[i]
        di[di<=thr] = 0
        dh.append(np.average(np.array(xrange(0,len(d.T[i]))) + 1, weights=di))
        heatmapOrder = np.argsort(dh)
    return heatmapOrder

def colorTicks(heatmapOrderVar, figVar):
    cl = clusters[heatmapOrderVar]
    a = figVar.gca().get_yticklabels()
    colorList = ["green","orange","blue","red"]
    for i in xrange(0,len(a)):
        a[i].set_color(colorList[cl[i]]) 
    return cl,a

def leafOrder(inputData):
    distanceMatrix = dist.pdist(inputData,  metric='euclidean')
    linkageMatrix = hier.linkage(distanceMatrix, method='average', metric='euclidean')
    heatmapOrderVar = hier.leaves_list(linkageMatrix)
    return heatmapOrderVar

def runKmeans(dataVar, numClust):
    kmeans = KMeans(n_clusters=numClust, n_init=100, max_iter=100)
    dg = dataVar.T/dataVar.T.max().astype(np.float64)
    kmeans.fit(dg.T)
    clustersVar = kmeans.labels_
    return clustersVar
    
def plotZones(dataVar, axVar, figVar, axTitle):
    df = []
    df = dataVar.copy()
    df.columns = ('medial parietal', 'lateral parietal', 'ventrolateral pfc', 
                  'medial temporal','ventromedial pfc','dorsolateral pfc','lateral temporal')
    df[np.logical_and(df<=thr, df>=(thr*-1))] = 0 
    sns.heatmap(df.reindex(df.index[heatmapOrder]), linewidths=1, square=True, center=0, 
                ax=axVar, vmin=vmin, vmax=vmax)#, vmin=vmin, vmax=vmax)#robust=True, 
    sns.axlabel(axTitle,'')#, 'NeuroSynth topics terms')  
    plt.setp(axVar.get_yticklabels(), visible=False)
    
    b = figVar.gca().get_xticklabels()
    colorlist = ["red", "blue","green","purple","orange","yellow","brown"]
    for i in xrange(0,len(b)): 
        b[i].set_color(colorlist[i])
    cl,a = colorTicks(heatmapOrder, f)

In [302]:
features = pd.read_csv('v3-topics-50.txt', sep='\t', index_col=0)
# Remove junk topics and replace with more sensible names. Check the topic-keys file
# to see what terms each topic loads on. These topics are identical to those at
# http://neurosynth.org/analyses/topics/v3-topics-50/

topics_to_keep = [ 4, 6, 14, 17, 18, 
                   21, 23, 25,# 27, 20,
                  29, 30, 31, 33, 35, 
                  36, 37, 38, 41, 45, 
                  49, 48] # 43, 16, 22, 46, 44, 1,  
labels = [ 'semantics', 'cued attention', 'working memory',  'reading', 'autobiographical memory',
           'number', 'inhibition', 'motor',  #'reward', 'visual perception',
          'visual attention', 'multisensory perception',  'visuospatial','eye movements', 'action',
          'auditory perception', 'language', 'pain', 'long-term memory', 'emotion',  
          'social cognition', 'control'] 
            # 'stress', 'autonomic', 'olfactory', 'learning', 'categorical', 'faces', 'motion', ,
features = features.iloc[:, topics_to_keep]
features.columns = labels
dataset.add_features(features, append=False)

# DISTANCE analysis:
decoder = decode.Decoder(dataset, method = 'roi')
data = decoder.decode([str(('/scr/litauen1/toro/dist_meta/distDMN_MNI_2mm_%s_%s.nii.gz' 
                            % (str(i * 5), str((i * 5) + 5)))) for i in xrange(0,18)])                    
df = []
df = data.copy()
newnames = []
[newnames.append(('%s-%s' % (str(i * 5), str((i*5) + 5)))) for i in xrange(0,len(df.columns))]
df.columns = newnames
clusters = runKmeans(df,3)

# Threshold
thr = 3.1
vmin = -15
vmax = 15
heatmapOrder = getOrder(data, thr)
df[np.logical_and(df<=thr, df>=(thr*-1))] = 0 
    
f, (ax1, ax2, ax3) = plt.subplots(nrows=1,ncols=3,figsize=(30, 10), sharey=True)
sns.heatmap(df.reindex(df.index[heatmapOrder]), linewidths=1, square=True, center=0, robust=True, 
            ax=ax1, vmin=vmin, vmax=vmax)
sns.axlabel('Distance from DMN peaks (mm)', 'NeuroSynth topics terms')
cl,a = colorTicks(heatmapOrder, f)

# ZONES analysis:
data = decoder.decode([str(('/scr/litauen1/toro/dist_meta/zonesDMN_MNI_2mm_%s.nii.gz' 
                            % str(i))) for i in range(1,8)])
plotZones(data,ax2,f, "Zones")

# ZONES thr30 analysis:
data = decoder.decode([str(('/scr/litauen1/toro/dist_meta/zonesDMNthr30_MNI_2mm_%s.nii.gz' 
                            % str(i))) for i in range(1,8)])
plotZones(data,ax3,f, "Zones (Thresh<30mm)")

# Save figure
f.savefig('Fig_distance_zones_neurosynth.pdf')
f.savefig('Fig_distance_zones_neurosynth.png')



In [ ]: