In [1]:
library(lme4)
library(dplyr)
library(reshape2)
library(tidyr)
library(ggplot2)
library(grid)
library(gridExtra)
library(boot)
library(repr)
In [2]:
## ICC
#'
#' For each measure, we calculate the ICC using bootstrap to estimate confidence intervals.
#' We do this with our own code so we can ensure non-negative values amongst other things.
#'
icc <- function(data, indices) {
d <- data[indices,] %>%
melt(id.vars=c('Participant','Site',
'Series','Measure'),
measure.vars=c('session_1','session_2'),
variable.name="Session")
fit <- lmer(value ~ 1 + Site + (1|Participant), d)
s_e = sigma(fit)
s_a2 = VarCorr(fit)[[1]][1]
return(s_a2 / (s_e^2 + s_a2))
}
In [4]:
# read the data
corr_anat_qa<-read.csv("2016_05_CORR_qap_anatomical_spatial.csv")
# reduce to the columns that we are interested in
id.vars=c('Participant','Site','Session','Series')
measure.vars=c('CNR','Cortical.Contrast','EFC','FBER','FWHM','Qi1','SNR')
corr_anat_qa<-corr_anat_qa[c(id.vars,measure.vars)]
# Filter 1
# Use only the first scan from sessions 1 & 2
corr_anat_qa <- corr_anat_qa %>%
filter(Series == 'anat_1') %>%
filter(Session %in% c('session_1','session_2'))
corr_anat_qa$Series <- droplevels(corr_anat_qa$Series)
corr_anat_qa$Session <- droplevels(corr_anat_qa$Session)
# Filter 2
# remove participants that do not have two sessions of data
pts_keep <- (corr_anat_qa %>%
group_by(Participant) %>%
summarise(N=n()) %>%
filter(N>1))$Participant
corr_anat_qa <- corr_anat_qa %>%
filter(Participant %in% pts_keep)
# corr_anat_qa$Participant <- droplevels(corr_anat_qa$Participant)
# Filter 3
# remove Sites that have fewer than 5 pts
sites_keep <- (corr_anat_qa %>%
group_by(Site,Session) %>%
summarise(N=n()) %>%
filter(N>5))$Site
corr_anat_qa <- corr_anat_qa %>%
filter(Site %in% sites_keep)
corr_anat_qa$Site <- droplevels(corr_anat_qa$Site)
corr_anat_qa <- corr_anat_qa %>%
melt(id.vars=id.vars,
measure.vars=measure.vars,
variable.name="Measure")
corr_anat_qa$Participant <- factor(corr_anat_qa$Participant)
# one last thing, remove 25296, they are an outlier on motion
corr_anat_qa <- corr_anat_qa %>% filter(Participant != 25296)
table(corr_anat_qa$Site, corr_anat_qa$Session)
n_boot=1000
icc_dc<-data_frame()
for (measure in unique(corr_anat_qa$Measure)){
sdf<-corr_anat_qa %>%
filter(Measure == measure) %>%
spread(Session, value)
system.time(results <- boot(data=sdf, statistic=icc, R=n_boot, parallel="multicore", ncpus=20, strata=sdf$Site))
icc_dc<-rbind(icc_dc,data_frame(Measure=rep(measure,n_boot),Iteration=seq(1,n_boot),Value=as.vector(results$t)))
}
write.table(icc_dc, file = "corr_anat_spat.csv")
In [5]:
# read the data
corr_func_qa<-read.csv("2016_05_CoRR_qap_functional_spatial.csv")
# reduce to the columns that we are interested in
id.vars=c('Participant','Site','Session','Series')
measure.vars=c('EFC','FBER','FWHM','Ghost_y','SNR')
corr_func_qa<-corr_func_qa[c(id.vars,measure.vars)]
# read in the temporal measures
tdf<-read.csv("2016_05_CORR_qap_functional_temporal.csv")
tmeasure.vars=c('Fraction.of.Outliers..Mean.','GCOR','Quality..Mean.',
'RMSD..Mean.','Std..DVARS..Mean.')
tdf<-tdf[c(id.vars,tmeasure.vars)]
# combine the datasets
corr_func_qa<-merge(corr_func_qa,tdf,by=id.vars)
measure.vars<-c(measure.vars,tmeasure.vars)
# Filter 1
# Use only the first scan from sessions 1 & 2
corr_func_qa <- corr_func_qa %>%
filter(Series == 'rest_1') %>%
filter(Session %in% c('session_1','session_2'))
corr_func_qa$Series <- droplevels(corr_func_qa$Series)
corr_func_qa$Session <- droplevels(corr_func_qa$Session)
# Filter 2
# reduce to same participants in anatomical analysis
corr_func_qa <- corr_func_qa %>%
filter(Participant %in% corr_anat_qa$Participant)
corr_func_qa$Site <- droplevels(corr_func_qa$Site)
# mult table to make a seperate row for each obs of each measure
corr_func_qa <- corr_func_qa %>%
melt(id.vars=id.vars,
measure.vars=measure.vars,
variable.name="Measure")
# make sure that participant is a factor
corr_func_qa$Participant <- factor(corr_func_qa$Participant)
table(corr_func_qa$Site, corr_func_qa$Session)
n_boot=1000
icc_dc<-data_frame()
for (measure in unique(corr_func_qa$Measure)){
print(sprintf("Calculating ICC for %s", measure))
sdf<-corr_func_qa %>%
filter(Measure == measure) %>%
spread(Session, value)
results <- boot(data=sdf, statistic=icc, R=n_boot,
parallel="multicore", ncpus=20, strata=sdf$Site)
icc_dc<-rbind(icc_dc,
data_frame(Measure=rep(measure,n_boot),
Iteration=seq(1,n_boot),
Value=as.vector(results$t)))
}
write.table(icc_dc, file = "corr_func_icc_meanfd.csv")
In [86]:
# read the sMRI values
icc_df_smri<-read.table("corr_anat_spat.csv")
icc_df_smri$Datasource<-"Structural MRI"
# read the fMRI values
icc_df_fmri<-read.table("corr_func_icc_meanfd.csv")
icc_df_fmri$Datasource<-"Functional MRI"
# combine into s single dataframe
icc_df<-rbind(icc_df_smri,icc_df_fmri)
head(icc_df)
In [87]:
df<-icc_df
levels(df$Measure)<-list(CNR='CNR',
CC='Cortical.Contrast',
EFC='EFC',
FBER='FBER',
FWHM='FWHM',
FAV='Qi1',
SNR='SNR',
GSR='Ghost_y',
MFO='Fraction.of.Outliers..Mean.',
GCOR='GCOR',
MQI='Quality..Mean.',
MRMSD='RMSD..Mean.',
MDVRS='Std..DVARS..Mean.')
In [88]:
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
options(repr.plot.width=180.0/25.4, repr.plot.height=3)
nrow=1
ncol=3
strat_var='Measure'
cols=c("#78a22f","#f58025","#ffd457","#d75920")
icc_interp=data_frame(x1=c(0,0,0,0),
x2=c(+Inf,+Inf,+Inf,+Inf),
y1=c(0.0,0.4,0.6,0.75),
y2=c(0.4,0.6,0.75,1.2),
Interp=factor(c("Poor","Fair","Good","Excellent")))
mean_sdlow<-function(x) (mean(x)-sd(x))
mean_sdhigh<-function(x) (mean(x)+sd(x))
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
pval_thresh=c(0.05,0.01,0.001,0.0001)
pval_indicators=c("*","**","***","****")
datasource.vars = unique(df$Datasource)
p=list()
for (i in seq(1,length( datasource.vars)))
{
# get only the rows corresponding to this measure
sdf<- df %>% filter(Datasource == datasource.vars[i])
sdf_ranges <- sdf %>%
group_by_(as.formula(sprintf('%s ~ .',strat_var))) %>%
summarise(y=mean(Value),y.max=mean_sdhigh(Value),y.min=mean_sdlow(Value)) %>%
ungroup
val_range=quantile(sdf$value, probs = c(0.02,.98))
val_range["2%"]=min(val_range["2%"],min(sdf_ranges$y.min))
val_range["98%"]=max(val_range["98%"],max(sdf_ranges$y.max))
x_string=sprintf("as.factor(%s)",strat_var)
p[[i]] <- ggplot() +
geom_blank(data=sdf, aes_string(x=x_string, y='Value')) +
geom_rect(data=icc_interp, mapping=aes(xmin=x1, xmax=x2,
ymin=y1, ymax=y2,
fill=Interp), alpha=0.25) +
scale_fill_manual(values = cols,
guide=guide_legend(title = "ICC Values",
direction = "horizontal",
title.position = "top",
title.hjust = 0.5,
title.theme = element_text(family = "ArialMT",
face = "plain",
size = 10,
angle = 0,
vjust = 0,
hjust = 0),
label.theme = element_text(family = "ArialMT",
face = "plain",
size = 8,
angle = 0,
hjust = 1))) +
geom_boxplot(data=sdf, aes_string(x=x_string, y='Value'),
outlier.shape = NA, width=.5, fill='white') +
theme_bw() +
ggtitle(datasource.vars[[i]])+
xlab("")+
ylab("ICC(2,1)")+
theme(plot.title = element_text(family = "ArialMT",
face = "plain",
size = 10,
vjust = 0,
hjust = 0.5),
axis.text.x = element_text(family = "ArialMT",
face = "plain",
size = 8,
angle = 45,
hjust = 1),
axis.text.y = element_text(family = "ArialMT",
face = "plain",
size = 8,
angle = 90,
hjust = 0.5),
axis.title.y = element_text(family = "ArialMT",
face = "plain",
size = 10,
angle = 90,
vjust = 0.9),
plot.margin = unit(c(.25, .25, 0.25, .25),
"lines")) +
coord_cartesian(ylim=c(0.2,1.0))
# get the location of this plot in the grid
if(i==1) y=1
if(i==2) y=3
x=1
}
# dev.off()
In [92]:
options(repr.plot.width=180.0/25.4, repr.plot.height=3.5)
# extract the legend
lay <- rbind(c(1,1,1,1,1,1,1,NA,2,2,2,2,2,2,2,2,2,2),
c(3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3))
g <- ggplotGrob(p[[1]] + theme(legend.position="bottom"))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
lheight <- sum(legend$height)
grid.arrange(
p[[1]] + theme(legend.position="none"),
p[[2]] + theme(legend.position="none"),
legend,
layout_matrix=lay,
heights=unit(c(3.0,0.5), c("in","in")))
In [93]:
pdf("icc_plot.pdf", width = 180.0/25.4, height = 3.5, family="ArialMT")
grid.arrange(
p[[1]] + theme(legend.position="none"),
p[[2]] + theme(legend.position="none"),
legend,
layout_matrix=lay,
heights=unit(c(3.0,0.5), c("in","in")))
dev.off()
In [ ]: