In [25]:
%matplotlib inline
import matplotlib.pylab as plt
In [26]:
import os.path as op
import re
import numpy as np
import pandas as pd
from scipy import stats
import nibabel as nb
from nibabel import load
import nipype.interfaces.fsl as fsl
import nitime
import nitime.fmri.io as io
from nitime.timeseries import TimeSeries
from nitime.analysis import SpectralAnalyzer, FilterAnalyzer, NormalizationAnalyzer
import moss
import seaborn as sns
sns.set(context='poster', style='whitegrid')
import lyman
from lyman import tools
%load_ext rpy2.ipython
%R require(lme4)
%R require(lmerTest)
Out[26]:
In [27]:
def determine_mask(mask, hemi, roi_type, contrast):
if contrast == 'anat':
if hemi == 'bilat':
mask_name = mask
else: # unilateral
if roi_type == 'aseg':
mask_name = mask
else: # label
mask_name = hemi + '.' + mask
else: # func contrast
if hemi == 'bilat':
mask_name = mask + '-' + contrast
else: # unilateral
if roi_type == 'contrast-aseg':
mask_name = mask + '-' + contrast
else: # label
mask_name = hemi + '.' + mask + '-' + contrast
return mask_name
In [331]:
experiment = 'objfam'
altmodel = 'submem_nuisance_comb'
subject_list = 'subids_subset_no23or19.txt' # Stored in data_dir
mni_space = True
if mni_space:
rois = 'rois_mni.csv'
regspace = 'mni'
smoothing = 'smoothed'
else:
rois = 'rois.csv' # Stored in analysis_dir for altmodel
regspace = 'epi'
smoothing = 'unsmoothed'
input_file = 'cope1.nii.gz'
In [332]:
project = lyman.gather_project_info()
exp = lyman.gather_experiment_info(experiment, altmodel)
exp_name = "-".join([experiment, altmodel])
subj_path = op.join(project['data_dir'], subject_list)
mask_path = op.join(project['analysis_dir'], exp_name, rois)
subjects = np.loadtxt(subj_path, str)
masks = np.array(pd.read_csv(mask_path, header=None))
In [334]:
# set up storage dataframe
df = pd.DataFrame(columns=('subid','roi','mask_vox','hemi','cond','value'))
for subid in subjects:
contrast_list = exp['condition_names']
contrast_names = exp['condition_names']
for contrast,label in zip(contrast_list, contrast_names):
fmri_file = op.join(project['analysis_dir'], exp_name,
subid, 'ffx', regspace, smoothing,
contrast, input_file)
# Read data using nibabel:
fmri_data = load(fmri_file)
func_arr = fmri_data.get_data()
for mask, hemi, roi_type, roi_id, contrast in masks:
mask_name = determine_mask(mask, hemi, roi_type, contrast)
if mni_space:
mask_file = op.join(project['data_dir'], 'fsaverage',
'masks', mask_name + '.nii.gz')
else:
mask_file = op.join(project['data_dir'], subid,
'masks', mask_name + '.nii.gz')
# Read in the mask as bool using nibabel:
mask_data = load(mask_file)
mask_arr = mask_data.get_data().astype(bool)
num_voxels = mask_arr.sum()
# Mask the data
func_masked = func_arr[mask_arr]
# print func_masked.shape
# Save to dataframe
row = pd.DataFrame([dict(subid = subid,
roi = mask + '-' + contrast,
mask_vox = num_voxels,
hemi = hemi,
cond = label,
value = func_masked.mean()), ])
df = df.append(row, ignore_index = True)
In [335]:
means = pd.pivot_table(df,'mask_vox','roi')
print means
In [336]:
sns.factorplot(x='cond', y='value', col='roi',
aspect=1, kind='bar', col_wrap=4,
dodge=0.01, ci=68, palette='husl',
units='subid', data=df)
Out[336]:
In [337]:
df_subset = df[(df.roi == 'topdownALEventral_inv_fslMNI_thresh-anat')&(df.cond !='Test')]
df_subset = df_subset.reset_index()
df_subset['beta'] = df_subset.value
df_subset.head()
Out[337]:
In [338]:
df_subset = df_subset[df_subset.mask_vox > 10].reset_index()
In [339]:
tmp = pd.DataFrame(df_subset.cond.str.split('_').tolist(), columns=['Morph', 'Response'])
df_subset = df_subset.join(tmp)
df_subset.head()
Out[339]:
In [340]:
sns.factorplot(x='Morph', y='beta', #col='hemi',
units='subid', ci=68, kind='point',
dodge=0.1, hue_order=['old', 'new'],
hue='Response', data=df_subset,
palette=['dodgerblue', 'mediumseagreen'])
# plt.savefig('/Users/steph-backup/Dropbox/Experiments/ObjFam/submem/fusiform_bilat.png')
Out[340]:
In [305]:
sns.factorplot(x='Morph', y='beta', col='subid', col_wrap=5,
ci=68, kind='point',
dodge=0.1, hue_order=['old', 'new'],
hue='Response', data=df_subset,
palette=['dodgerblue', 'mediumseagreen'])
Out[305]:
In [305]:
In [341]:
%R -i df_subset
In [343]:
%%R
df_subset$Morphq = as.numeric(df_subset$Morph)
print(str(df_subset))
contrasts(df_subset$Response) = cbind(OldvNew=c(-1, 1))
print(contrasts(df_subset$Response))
#contrasts(df_subset$hemi) = cbind(LvR=c(1, -1))
#print(contrasts(df_subset$hemi))
#res1 = lmer(beta~(scale(Morphq)*Response) + hemi + (1|subid), data=df_subset)
#print(summary(res1))
res1 = lmer(beta~(scale(Morphq)*Response) + (1 + Response + scale(Morphq)|subid), data=df_subset)
print(summary(res1))
In [282]:
%%R
## Summarizes data.
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
## data: a data frame.
## measurevar: the name of a column that contains the variable to be summariezed
## groupvars: a vector containing names of columns that contain grouping variables
## na.rm: a boolean that indicates whether to ignore NA's
## conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
require(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}
## Norms the data within specified groups in a data frame; it normalizes each
## subject (identified by idvar) so that they have the same mean, within each group
## specified by betweenvars.
## data: a data frame.
## idvar: the name of a column that identifies each subject (or matched subjects)
## measurevar: the name of a column that contains the variable to be summariezed
## betweenvars: a vector containing names of columns that are between-subjects variables
## na.rm: a boolean that indicates whether to ignore NA's
normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
na.rm=FALSE, .drop=TRUE) {
require(plyr)
# Measure var on left, idvar + between vars on right of formula.
data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
.fun = function(xx, col, na.rm) {
c(subjMean = mean(xx[,col], na.rm=na.rm))
},
measurevar,
na.rm
)
# Put the subject means with original data
data <- merge(data, data.subjMean)
# Get the normalized data in a new column
measureNormedVar <- paste(measurevar, "_norm", sep="")
data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
mean(data[,measurevar], na.rm=na.rm)
# Remove this subject mean column
data$subjMean <- NULL
return(data)
}
## Summarizes data, handling within-subjects variables by removing inter-subject variability.
## It will still work if there are no within-S variables.
## Gives count, un-normed mean, normed mean (with same between-group mean),
## standard deviation, standard error of the mean, and confidence interval.
## If there are within-subject variables, calculate adjusted values using method from Morey (2008).
## data: a data frame.
## measurevar: the name of a column that contains the variable to be summariezed
## betweenvars: a vector containing names of columns that are between-subjects variables
## withinvars: a vector containing names of columns that are within-subjects variables
## idvar: the name of a column that identifies each subject (or matched subjects)
## na.rm: a boolean that indicates whether to ignore NA's
## conf.interval: the percent range of the confidence interval (default is 95%)
summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {
# Ensure that the betweenvars and withinvars are factors
factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
FUN=is.factor, FUN.VALUE=logical(1))
if (!all(factorvars)) {
nonfactorvars <- names(factorvars)[!factorvars]
message("Automatically converting the following non-factors to factors: ",
paste(nonfactorvars, collapse = ", "))
data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
}
# Get the means from the un-normed data
datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
# Drop all the unused columns (these will be calculated with normed data)
datac$sd <- NULL
datac$se <- NULL
datac$ci <- NULL
# Norm each subject's data
ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)
# This is the name of the new column
measurevar_n <- paste(measurevar, "_norm", sep="")
# Collapse the normed data - now we can treat between and within vars the same
ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)
# Apply correction from Morey (2008) to the standard error and confidence interval
# Get the product of the number of conditions of within-S variables
nWithinGroups <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
FUN.VALUE=numeric(1)))
correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )
# Apply the correction factor
ndatac$sd <- ndatac$sd * correctionFactor
ndatac$se <- ndatac$se * correctionFactor
ndatac$ci <- ndatac$ci * correctionFactor
# Combine the un-normed means with the normed results
merge(datac, ndatac)
}
In [344]:
%%R
library(ggplot2)
# change background to white, increase font size
theme_set(theme_bw(base_size = 22))
# set up study parameters
y = 'beta'
x = 'Morph'
z = 'Response'
color_palette = c('mediumseagreen', 'dodgerblue')
pos_dodge = position_dodge(.13)
xlabel = 'Morph'
ylabel = 'beta'
legend_title= 'Response'
# for between subjects variables
data_summary <- summarySEwithin(df_subset,
measurevar=y,
withinvars=c(z,x),
idvar='subid')
# set color palette
ggplot(data_summary,
aes(x = Morph, y = beta,
colour = Response, group = Response)) +
geom_errorbar(aes(ymin = beta-se, ymax = beta+se),
width=0, position=pos_dodge,
size = 2) +
geom_line(position=pos_dodge, size=2) +
geom_point(position=pos_dodge, size=8) +
xlab(xlabel) +
ylab(ylabel) +
scale_color_manual(values = color_palette,
name = legend_title)
ggsave(file="~/Dropbox/Experiments/ObjFam/betas_topdownALEventral_inv_fslMNI_thresh-anat.png", width=7, height=4)
In [148]:
df['condition'] = df['cond']
cond_names = {'1_old': 'specificRecog', '2_old': 'falseRecog', '3_old': 'falseRecog',
'1_new': 'miss', '2_new': 'cr', '3_new': 'cr'}
for condition in cond_names.keys():
df.loc[df.cond.isin([condition]), 'condition'] = cond_names[condition]
In [22]:
df.head()
Out[22]:
In [23]:
sns.factorplot(x='condition', y='value', col='roi',
aspect=1.2, kind='bar', col_wrap=3,
dodge=0.01, ci=68, palette='husl',
units='subid', data=df)
Out[23]:
In [24]:
%R -i df
In [27]:
%%R
roi_name = 'Left-Hippocampus-anat'
cond_list = c('falseRecog', 'specificRecog')
data = df[df$condition %in% cond_list, ]
data = data[data$roi == roi_name, ]
# Factor categorical variables
data$roi = factor(data$roi)
data$condition = factor(data$condition)
print(str(data))
contrasts(data$condition) = c(-1,1)
print(contrasts(data$condition))
# Mixed-effects model
rs1 = lmer(value~condition + (1|subid), data = data)
rs2 = lmer(value~condition + (1 + condition|subid), data = data)
print(anova(rs1, rs2))
In [28]:
%%R
print(summary(rs1))
In [ ]: