In [1]:
workDir = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_taxaIncorp/'
In [2]:
library(dplyr)
library(tidyr)
library(ggplot2)
In [3]:
# classifying true positives, neg, ...
clsfy = function(guess,known){
if(is.na(guess) | is.na(known)){
return(NA)
}
if(guess == TRUE){
if(guess == known){
return('True positive')
} else {
return('False positive')
}
} else
if(guess == FALSE){
if(guess == known){
return('True negative')
} else {
return('False negative')
}
} else {
stop('Error: true or false needed')
}
}
In [4]:
# files on simulation accuracy
files = list.files(path=workDir, pattern='*-cMtx_byClass.txt', full.names=TRUE)
files
In [5]:
# combining files
df_byClass = list()
for (f in files){
ff = strsplit(f, '/') %>% unlist
fff = ff[length(ff)]
df_byClass[[fff]] = read.delim(f, sep='\t')
}
df_byClass = do.call(rbind, df_byClass)
df_byClass$file = gsub('\\.[0-9]+$', '', rownames(df_byClass))
df_byClass$method = gsub('-.+', '', df_byClass$file)
rownames(df_byClass) = 1:nrow(df_byClass)
df_byClass %>% dim %>% print
df_byClass %>% head(n=3)
In [6]:
df_byClass$method %>% table
In [7]:
# renaming methods
rename = data.frame(method = c('DESeq2', 'DESeq2_multi', 'heavyM1', 'qSIP'),
method_new = c('HR-SIP', 'MW-HR-SIP', 'Heavy-SIP', 'q-SIP'))
df_byClass = inner_join(df_byClass, rename, c('method'='method')) %>%
select(-method) %>%
rename('method' = method_new)
df_byClass %>% dim %>% print
df_byClass %>% head(n=3)
In [8]:
# summarize by SIPSim rep & library rep
df_byClass.s = df_byClass %>%
group_by(method, percIncorp, percTaxa, variables) %>%
summarize(mean_value = mean(values, na.rm=TRUE),
sd_value = sd(values, na.rm=TRUE))
# plotting
options(repr.plot.width=8, repr.plot.height=5)
p = ggplot(df_byClass.s, aes(variables, mean_value, color=method,
ymin=mean_value-sd_value,
ymax=mean_value+sd_value)) +
geom_pointrange(alpha=0.8, size=0.2) +
labs(y='Value') +
facet_grid(percTaxa ~ percIncorp) +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle=65, hjust=1)
)
plot(p)
In [9]:
# summarize by SIPSim rep & library rep
vars = c('Balanced Accuracy', 'Sensitivity', 'Specificity')
df_byClass.s.f = df_byClass.s %>%
filter(variables %in% vars) %>%
ungroup() %>%
mutate(percTaxa = percTaxa %>% as.character,
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
# plotting
options(repr.plot.width=9, repr.plot.height=5)
p.pnt = ggplot(df_byClass.s.f, aes(percIncorp, mean_value,
color=percTaxa,
group=percTaxa,
ymin=mean_value-sd_value,
ymax=mean_value+sd_value)) +
geom_point(alpha=0.8) +
geom_linerange(alpha=0.8, size=0.5) +
geom_line() +
scale_color_discrete('% incorp-\norators') +
labs(x='atom % excess 13C') +
facet_grid(variables ~ method) +
theme_bw() +
theme(
axis.title.y = element_blank()
)
plot(p.pnt)
In [10]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_acc.pdf')
ggsave(outF, p.pnt, width=10, height=6)
cat('File written:', outF, '\n')
In [11]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_acc.jpeg')
ggsave(outF, p.pnt, width=10, height=6)
cat('File written:', outF, '\n')
In [12]:
# combining files
df_byClass = list()
for (f in files){
ff = strsplit(f, '/') %>% unlist
fff = ff[length(ff)]
df_byClass[[fff]] = read.delim(f, sep='\t')
}
df_byClass = do.call(rbind, df_byClass)
df_byClass$file = gsub('\\.[0-9]+$', '', rownames(df_byClass))
df_byClass$method = gsub('-.+', '', df_byClass$file)
rownames(df_byClass) = 1:nrow(df_byClass)
df_byClass %>% dim %>% print
df_byClass %>% head(n=3)
In [13]:
# renaming methods
rename = data.frame(method = c('heavyM1', 'heavyM2', 'heavyM3', 'heavyM4'),
method_new = c('Heavy-SIP Method 1', 'Heavy-SIP Method 2',
'Heavy-SIP Method 3', 'Heavy-SIP Method 4'))
df_byClass = inner_join(df_byClass, rename, c('method'='method')) %>%
select(-method) %>%
rename('method' = method_new)
df_byClass %>% dim %>% print
df_byClass %>% head(n=3)
In [14]:
# summarizing by method
df_byClass.s = df_byClass %>%
group_by(method, percIncorp, percTaxa, variables) %>%
summarize(mean_value = mean(values, na.rm=TRUE),
sd_value = sd(values, na.rm=TRUE)) %>%
ungroup()
# summarize by SIPSim rep & library rep
vars = c('Balanced Accuracy', 'Sensitivity', 'Specificity')
df_byClass.s.f = df_byClass.s %>%
filter(variables %in% vars) %>%
ungroup() %>%
mutate(percTaxa = percTaxa %>% as.character,
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
# plotting
options(repr.plot.width=9, repr.plot.height=5)
p.pnt.H = ggplot(df_byClass.s.f, aes(percIncorp, mean_value,
color=percTaxa,
group=percTaxa,
ymin=mean_value-sd_value,
ymax=mean_value+sd_value)) +
geom_point(alpha=0.8) +
geom_linerange(alpha=0.8, size=0.5) +
geom_line() +
scale_color_discrete('% incorp-\norators') +
labs(x='atom % excess 13C') +
facet_grid(variables ~ method) +
theme_bw() +
theme(
axis.title.y = element_blank()
)
plot(p.pnt.H)
In [15]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_acc-heavy.pdf')
ggsave(outF, p.pnt.H, width=10, height=6)
cat('File written:', outF, '\n')
In [16]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_acc-heavy.jpeg')
ggsave(outF, p.pnt.H, width=10, height=6)
cat('File written:', outF, '\n')
In [17]:
BDshift_files = list.files(path=workDir, pattern='BD-shift_stats.txt', full.names=TRUE, recursive=TRUE)
BDshift_files %>% length %>% print
In [18]:
df_shift = list()
for(F in BDshift_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_shift[[F]] = tmp
}
df_shift = do.call(rbind, df_shift)
rownames(df_shift) = 1:nrow(df_shift)
df_shift = df_shift %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon, percIncorp, percTaxa, rep) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE))
# status
df_shift %>% nrow %>% print
df_shift %>% head(n=3)
In [19]:
comm_files = list.files(path=workDir, pattern='comm.txt', full.names=TRUE, recursive=TRUE)
comm_files %>% length %>% print
In [20]:
df_comm = list()
for(F in comm_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_comm[[F]] = tmp
}
df_comm = do.call(rbind, df_comm)
rownames(df_comm) = 1:nrow(df_comm)
# mean abund for libraries
df_comm = df_comm %>%
rename('taxon' = taxon_name) %>%
group_by(taxon, percIncorp, percTaxa, rep) %>%
summarize(mean_rel_abund_perc = mean(rel_abund_perc),
mean_rank_abund = mean(rank)) %>%
ungroup()
# status
df_comm %>% nrow %>% print
df_comm %>% head(n=3)
In [21]:
MW_files = list.files(path=workDir, pattern='_MW_DESeq2_incorp.txt', full.names=TRUE, recursive=TRUE)
MW_files %>% length %>% print
In [22]:
df_MW = list()
#for(F in MW_files){
for(F in MW_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_MW[[F]] = tmp
}
df_MW = do.call(rbind, df_MW)
rownames(df_MW) = 1:nrow(df_MW)
# status
df_MW %>% nrow %>% print
df_MW %>% head(n=3)
In [23]:
join_vars = c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'percTaxa'='percTaxa',
'rep'='rep')
# joining tables
df_MW %>% nrow %>% print
df.j = df_MW %>%
left_join(df_shift, join_vars)
df.j %>% nrow %>% print
df.j = df.j %>%
left_join(df_comm, join_vars)
# status
df.j %>% nrow %>% print
df.j %>% head(n=3)
In [24]:
# calling true_pos + false_neg
df.j = df.j %>%
mutate(incorp_cls = mapply(clsfy, incorp, true_incorporator))
# status
df.j %>% nrow %>% print
df.j$incorp_cls %>% table %>% print
In [25]:
# function for calculating sensitivity
calc_sensitivity = function(incorp_cls){
tp = sum(incorp_cls == 'True positive')
fn = sum(incorp_cls == 'False negative')
x = tp / (tp + fn)
ifelse(is.na(x), 0, x)
}
# grouping by abundance and calculating sensitivity
df.j.s = df.j %>%
mutate(n_group = ntile(log10(mean_rel_abund_perc), 10)) %>%
group_by(n_group, percIncorp, percTaxa, rep) %>%
summarize(min_abund = min(mean_rel_abund_perc, na.rm=TRUE),
mean_abund = mean(mean_rel_abund_perc, na.rm=TRUE),
max_abund = max(mean_rel_abund_perc, na.rm=TRUE),
sensitivity = calc_sensitivity(incorp_cls)) %>%
group_by(n_group, percIncorp, percTaxa) %>%
summarize(mean_abund = mean(mean_abund),
mean_sensitivity = mean(sensitivity),
sd_sensitivity = sd(sensitivity)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% as.character,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% as.character,
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
# status
df.j.s %>% head(n=3)
In [26]:
# plotting
options(repr.plot.width=8, repr.plot.height=4)
p_sens_abund = ggplot(df.j.s, aes(mean_abund, mean_sensitivity,
color=percTaxa,
ymin=mean_sensitivity-sd_sensitivity,
ymax=mean_sensitivity+sd_sensitivity)) +
geom_line(alpha=0.7) +
geom_linerange(alpha=0.7) +
geom_point(alpha=0.7) +
scale_x_log10(breaks=c(1e-3, 1e-2, 1e-1, 1e0)) +
scale_color_discrete('% incorp-\norators') +
labs(x='Mean % abundance',
y='Sensitivity') +
facet_wrap(~ percIncorp) +
theme_bw()
p_sens_abund
In [27]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_sens-abund.pdf')
ggsave(outF, p_sens_abund, width=10, height=5)
cat('File written:', outF, '\n')
In [28]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_sens-abund.jpeg')
ggsave(outF, p_sens_abund, width=10, height=5)
cat('File written:', outF, '\n')
In [29]:
BDshift_files = list.files(path=workDir, pattern='BD-shift_stats.txt', full.names=TRUE, recursive=TRUE)
BDshift_files %>% length %>% print
In [30]:
df_shift = list()
for(F in BDshift_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_shift[[F]] = tmp
}
df_shift = do.call(rbind, df_shift)
rownames(df_shift) = 1:nrow(df_shift)
df_shift = df_shift %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon, percIncorp, percTaxa, rep) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median)
# status
df_shift %>% nrow %>% print
df_shift %>% head(n=3)
In [31]:
incorp_files = list.files(path=workDir, pattern='OTU_abs1e9_PCR_sub_filt_MW_DESeq2_incorp.txt', full.names=TRUE, recursive=TRUE)
incorp_files %>% length %>% print
In [32]:
df_incorp = list()
for(F in incorp_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_incorp[[F]] = tmp
}
df_incorp = do.call(rbind, df_incorp)
rownames(df_incorp) = 1:nrow(df_incorp)
df_incorp %>% head(n=3) %>% print
In [33]:
# just incorporators
df_incorp = df_incorp %>%
filter(incorp == TRUE) %>%
dplyr::distinct(taxon, incorp, percIncorp, percTaxa, rep) %>%
rename('HWHRSIP_incorp' = incorp)
df_incorp %>% nrow %>% print
df_incorp %>% head(n=3) %>% print
In [141]:
atomX_files = list.files(path=workDir, pattern='*_qSIP_atom.txt', full.names=TRUE, recursive=TRUE)
atomX_files %>% length %>% print
In [142]:
df_atomX = list()
for(F in atomX_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_atomX[[F]] = tmp
}
df_atomX = do.call(rbind, df_atomX)
rownames(df_atomX) = 1:nrow(df_atomX)
df_atomX %>% head(n=3) %>% print
In [143]:
# table join
df_atomX %>% nrow %>% print
df.j = left_join(df_atomX, df_shift, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'percTaxa'='percTaxa',
'rep'='rep')) %>%
filter(!is.na(BD_diff)) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE),
true_atom_fraction_excess = median_true_BD_shift / 0.036,
atom_fraction_excess = ifelse(is.na(atom_CI_low), 0, atom_fraction_excess))
df.j %>% nrow %>% print
df.j %>% head(n=3) %>% print
In [144]:
df.j$true_incorporator %>% summary
In [145]:
df.j = left_join(df.j, df_incorp, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'percTaxa'='percTaxa',
'rep'='rep')) %>%
mutate(HWHRSIP_incorp = ifelse(is.na(HWHRSIP_incorp), FALSE, TRUE))
df.j %>% nrow %>% print
df.j %>% head(n=3) %>% print
In [146]:
# difference between true and estimated
## q-SIP incorporators (true vs false)
df.j.dis.qSIP = df.j %>%
filter(atom_CI_low > 0,
true_incorporator == TRUE) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, percTaxa, true_incorporator) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
# plotting
options(repr.plot.width=6, repr.plot.height=2.5)
p_qSIP_true = ggplot(df.j.dis.qSIP, aes(percIncorp, mean_delta_excess,
color=percTaxa, group=percTaxa,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
geom_line() +
#facet_grid(true_incorporator ~ .) +
scale_color_discrete('% incorp-\norators') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
plot(p_qSIP_true)
In [147]:
# difference between true and estimated
## q-SIP incorporators (true vs false)
df.j.dis.qSIP.TF = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
true_incorporator = ifelse(true_incorporator == TRUE, 'True incorp.', 'False incorp.')) %>%
group_by(percIncorp, percTaxa, true_incorporator) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
## q-SIP on all taxa
df.j.dis.qSIP.A = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
true_incorporator = 'All incorp.') %>%
group_by(percIncorp, percTaxa, true_incorporator) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
## combining
levs = c('All incorp.', 'True incorp.', 'False incorp.')
df.j.dis.qSIP = rbind(df.j.dis.qSIP.TF, df.j.dis.qSIP.A) %>%
mutate(true_incorporator = factor(true_incorporator, levels=levs))
# plotting
options(repr.plot.width=6, repr.plot.height=4)
p_qSIP_TF = ggplot(df.j.dis.qSIP, aes(percIncorp, mean_delta_excess,
color=percTaxa, group=percTaxa,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
geom_line() +
facet_grid(true_incorporator ~ .) +
scale_color_discrete('% incorp-\norators') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess (truth - estimate)') +
theme_bw()
plot(p_qSIP_TF)
In [148]:
# # difference between true and estimated
# ## q-SIP incorporators
# df.j.dis.qSIP = df.j %>%
# filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
# filter(HWHRSIP_incorp == TRUE) %>% # just MW-HR-SIP incorporators
# mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
# group_by(percIncorp, percTaxa) %>%
# summarize(mean_delta_excess = mean(delta_excess),
# sd_delta_excess = sd(delta_excess)) %>%
# ungroup() %>%
# mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
# percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
# # plotting
# options(repr.plot.width=6, repr.plot.height=2.5)
# p_qSIP = ggplot(df.j.dis.qSIP, aes(percIncorp, mean_delta_excess,
# color=percTaxa, group=percTaxa,
# ymin=mean_delta_excess-sd_delta_excess,
# ymax=mean_delta_excess+sd_delta_excess)) +
# geom_linerange(alpha=0.4, size=1) +
# geom_point() +
# geom_line() +
# #facet_grid(true_incorporator ~ .) +
# scale_color_discrete('% incorp-\norators') +
# labs(x='13C atom % excess (truth)',
# y='13C atom % excess\n(truth - estimate)') +
# theme_bw()
# plot(p_qSIP)
In [149]:
# difference between true and estimated
## q-SIP incorporators
tmp1 = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, percTaxa) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
incorp_called = 'No filter')
## MW-HR-SIP incorporators
tmp2 = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
filter(HWHRSIP_incorp == TRUE) %>% # just MW-HR-SIP incorporators
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, percTaxa) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
incorp_called = 'MW-HR-SIP filter')
# combining tables
df.j.dis.qSIP = rbind(tmp1, tmp2) %>%
mutate(incorp_called = factor(incorp_called, levels=c('No filter', 'MW-HR-SIP filter')))
# plotting
options(repr.plot.width=6, repr.plot.height=4)
p_qSIP_filter = ggplot(df.j.dis.qSIP, aes(percIncorp, mean_delta_excess,
color=percTaxa, group=percTaxa,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
facet_grid(incorp_called ~ .) +
scale_color_discrete('% incorp-\norators') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
plot(p_qSIP_filter)
In [150]:
df.j.f = df.j %>%
filter(percTaxa == '10',
atom_CI_low > 0) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
atom_perc_excess = atom_fraction_excess * 100,
true_atom_perc_excess = true_atom_fraction_excess * 100)
df.j.f %>% head
In [151]:
# for vline
tmp = df.j.f %>%
distinct(percIncorp) %>%
mutate(true_atom_perc_excess = percIncorp %>% as.character %>% as.numeric)
options(repr.plot.width=10, repr.plot.height=2.5)
p.qSIP.dens = ggplot(df.j.f, aes(atom_perc_excess, fill=true_incorporator)) +
geom_density(alpha=0.5) +
geom_vline(data=tmp, aes(xintercept=true_atom_perc_excess), linetype='dashed', alpha=0.4) +
scale_x_continuous(limits=c(-20, 120)) +
scale_fill_discrete('True incorp-\norator?') +
labs(x='atom % excess 13C (estimate)', y='Probability density') +
facet_grid(. ~ percIncorp) +
theme_bw()
plot(p.qSIP.dens)
In [152]:
df.j.f = df.j %>%
filter(percTaxa == '10',
atom_CI_low > 0,
true_incorporator == TRUE) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
atom_perc_excess = atom_fraction_excess * 100,
true_atom_perc_excess = true_atom_fraction_excess * 100)
df.j.f %>% head
In [153]:
# for vline
tmp = df.j.f %>%
distinct(percIncorp) %>%
mutate(true_atom_perc_excess = percIncorp %>% as.character %>% as.numeric)
options(repr.plot.width=10, repr.plot.height=2.5)
p.qSIP.dens.true = ggplot(df.j.f, aes(atom_perc_excess)) +
geom_density(alpha=0.5, fill='grey70') +
geom_vline(data=tmp, aes(xintercept=true_atom_perc_excess), linetype='dashed', alpha=0.4) +
scale_x_continuous(limits=c(-20, 120)) +
labs(x='atom % excess 13C (estimate)', y='Probability density') +
facet_grid(. ~ percIncorp) +
theme_bw()
plot(p.qSIP.dens.true)
In [154]:
dBD_files = list.files(path=workDir, pattern='*_dBD.txt', full.names=TRUE, recursive=TRUE)
dBD_files %>% length %>% print
In [155]:
df_dBD = list()
for(F in dBD_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_dBD[[F]] = tmp
}
df_dBD = do.call(rbind, df_dBD)
rownames(df_dBD) = 1:nrow(df_dBD)
df_dBD %>% head(n=3) %>% print
In [156]:
df.j = inner_join(df_dBD, df_shift, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'percTaxa'='percTaxa',
'rep'='rep')) %>%
filter(!is.na(delta_BD)) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE),
true_atom_fraction_excess = median_true_BD_shift / 0.036,
atom_fraction_excess = delta_BD / 0.036)
df.j %>% head(n=3)
In [157]:
df.j = left_join(df.j, df_incorp, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'percTaxa'='percTaxa',
'rep'='rep')) %>%
mutate(HWHRSIP_incorp = ifelse(is.na(HWHRSIP_incorp), FALSE, TRUE))
df.j %>% nrow %>% print
df.j %>% head(n=3)
In [182]:
# difference between true and estimated
df.j.dis.dBD = df.j %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
filter(true_incorporator == TRUE) %>%
group_by(percIncorp, percTaxa, true_incorporator) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
# plotting
options(repr.plot.width=6, repr.plot.height=2.5)
p_dBD_true = ggplot(df.j.dis.dBD, aes(percIncorp, mean_delta_excess,
color=percTaxa, group=percTaxa,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
scale_color_discrete('% incorp-\norators') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
plot(p_dBD_true)
In [183]:
# difference between true and estimated
tmp1 = df.j %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
true_incorporator = ifelse(true_incorporator == TRUE, 'True incorp.', 'False incorp.')) %>%
#filter(HWHRSIP_incorp == TRUE) %>%
group_by(percIncorp, percTaxa, true_incorporator) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
tmp2 = df.j %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
true_incorporator = 'All incorp.') %>%
#filter(HWHRSIP_incorp == TRUE) %>%
group_by(percIncorp, percTaxa, true_incorporator) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
levs = c('All incorp.', 'True incorp.', 'False incorp.')
df.j.dis.dBD = rbind(tmp1, tmp2) %>%
mutate(true_incorporator = factor(true_incorporator, levels=levs))
# plotting
options(repr.plot.width=6, repr.plot.height=4)
p_dBD = ggplot(df.j.dis.dBD, aes(percIncorp, mean_delta_excess,
color=percTaxa, group=percTaxa,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
facet_grid(true_incorporator ~ .) +
scale_color_discrete('% incorp-\norators') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
plot(p_dBD)
In [184]:
# difference between true and estimated
tmp1 = df.j %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, percTaxa) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
incorp_called = 'No filter')
tmp2 = df.j %>%
filter(HWHRSIP_incorp == TRUE) %>% # just MW-HR-SIP incorporators
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, percTaxa) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
incorp_called = 'MW-HR-SIP filter')
# combining tables
df.j.dis.dBD = rbind(tmp1, tmp2) %>%
mutate(incorp_called = factor(incorp_called, levels=c('No filter', 'MW-HR-SIP filter')))
# plotting
options(repr.plot.width=6, repr.plot.height=4)
p_dBD_filter = ggplot(df.j.dis.dBD, aes(percIncorp, mean_delta_excess,
color=percTaxa, group=percTaxa,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
facet_grid(incorp_called ~ .) +
scale_color_discrete('% incorp-\norators') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
plot(p_dBD_filter)
In [185]:
df.j.f = df.j %>%
filter(percTaxa == '10') %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
atom_perc_excess = atom_fraction_excess * 100,
true_atom_perc_excess = true_atom_fraction_excess * 100)
df.j.f %>% head
In [186]:
# for vline
tmp = df.j.f %>%
distinct(percIncorp) %>%
mutate(true_atom_perc_excess = percIncorp %>% as.character %>% as.numeric)
options(repr.plot.width=10, repr.plot.height=2.5)
p.deltaBD.dens = ggplot(df.j.f, aes(atom_perc_excess, fill=true_incorporator)) +
geom_density(alpha=0.5) +
geom_vline(data=tmp, aes(xintercept=true_atom_perc_excess), linetype='dashed', alpha=0.4) +
scale_x_continuous(limits=c(-20, 120)) +
scale_fill_discrete('True incorp-\norator?') +
labs(x='atom % excess 13C (estimate)', y='Probability density') +
facet_grid(. ~ percIncorp) +
theme_bw()
plot(p.deltaBD.dens)
In [187]:
df.j.f = df.j %>%
filter(percTaxa == '10',
true_incorporator == TRUE) %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric),
atom_perc_excess = atom_fraction_excess * 100,
true_atom_perc_excess = true_atom_fraction_excess * 100)
df.j.f %>% head
In [188]:
# for vline
tmp = df.j.f %>%
distinct(percIncorp) %>%
mutate(true_atom_perc_excess = percIncorp %>% as.character %>% as.numeric)
options(repr.plot.width=10, repr.plot.height=2.5)
p.deltaBD.dens.true = ggplot(df.j.f, aes(atom_perc_excess)) +
geom_density(alpha=0.5, fill='grey70') +
geom_vline(data=tmp, aes(xintercept=true_atom_perc_excess), linetype='dashed', alpha=0.4) +
scale_x_continuous(limits=c(-20, 120)) +
labs(x='atom % excess 13C (estimate)', y='Probability density') +
facet_grid(. ~ percIncorp) +
theme_bw()
plot(p.deltaBD.dens.true)
In [195]:
options(repr.plot.width=8, repr.plot.height=6)
cowplot::ggdraw() +
cowplot::draw_plot(p_dBD_true, 0.5, 0.6, 0.5, 0.4) +
cowplot::draw_plot(p_qSIP_true, 0, 0.6, 0.5, 0.4) +
cowplot::draw_plot(p.deltaBD.dens.true, 0, 0.3, 1, 0.3) +
cowplot::draw_plot(p.qSIP.dens.true, 0, 0, 1, 0.3)
In [175]:
df.jj = rbind(df.j.dis.qSIP %>% mutate(method='qSIP'),
df.j.dis.dBD %>% mutate(method='Delta BD')) %>%
mutate(method = gsub('qSIP', 'q-SIP', method))
options(repr.plot.width=8, repr.plot.height=4)
p.comb = ggplot(df.jj, aes(percIncorp, mean_delta_excess,
color=percTaxa, group=percTaxa,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.5, size=1) +
geom_point() +
scale_color_discrete('% incorp-\norators') +
labs(x='atom % excess 13C (truth)',
y='atom % excess 13C\n(truth - estimate)') +
facet_grid(incorp_called ~ method) +
theme_bw()
p.comb
In [172]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_BDshift-MWHRSIP.pdf')
ggsave(outF, p.comb, width=8, height=4)
cat('File written:', outF, '\n')
In [173]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_BDshift-MWHRSIP.jpeg')
ggsave(outF, p.comb, width=8, height=4)
cat('File written:', outF, '\n')
In [ ]:
BDshift_files = list.files(path=workDir, pattern='BD-shift_stats.txt', full.names=TRUE, recursive=TRUE)
BDshift_files %>% length %>% print
In [ ]:
df_shift = list()
for(F in BDshift_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_shift[[F]] = tmp
}
df_shift = do.call(rbind, df_shift)
rownames(df_shift) = 1:nrow(df_shift)
df_shift = df_shift %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon, percIncorp, percTaxa, rep) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median)
# status
df_shift %>% nrow %>% print
df_shift %>% head(n=3)
In [ ]:
atomX_files = list.files(path=workDir, pattern='*_qSIP_atom.txt', full.names=TRUE, recursive=TRUE)
atomX_files %>% length %>% print
In [ ]:
df_atomX = list()
for(F in atomX_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_atomX[[F]] = tmp
}
df_atomX = do.call(rbind, df_atomX)
rownames(df_atomX) = 1:nrow(df_atomX)
df_atomX %>% head(n=3) %>% print
In [ ]:
comm_files = list.files(path=workDir, pattern='comm.txt', full.names=TRUE, recursive=TRUE)
comm_files %>% length %>% print
In [ ]:
df_comm = list()
for(F in comm_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$percTaxa = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_comm[[F]] = tmp
}
df_comm = do.call(rbind, df_comm)
rownames(df_comm) = 1:nrow(df_comm)
# mean abund for libraries
df_comm = df_comm %>%
rename('taxon' = taxon_name) %>%
mutate(rel_abund_perc = ifelse(is.na(rel_abund_perc), 0, rel_abund_perc)) %>%
group_by(taxon, percIncorp, percTaxa, rep) %>%
summarize(any_zero = any(rel_abund_perc == 0),
mean_rel_abund_perc = mean(rel_abund_perc),
mean_rank_abund = mean(rank)) %>%
ungroup()
# status
df_comm %>% nrow %>% print
df_comm %>% head(n=3)
In [ ]:
df_comm$any_zero %>% table
In [ ]:
# table join
join_vars = c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'percTaxa'='percTaxa',
'rep'='rep')
df_atomX %>% nrow %>% print
# joining q-SIP + true-BD
df.j = left_join(df_atomX, df_shift, join_vars) %>%
filter(atom_CI_low > 0) %>%
filter(!is.na(BD_diff)) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE),
true_atom_fraction_excess = median_true_BD_shift / 0.036,
atom_fraction_excess = ifelse(is.na(atom_CI_low), 0, atom_fraction_excess)) %>%
#filter(true_incorporator == TRUE) %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100)
df.j %>% nrow %>% print
# joining with abundances
df.j = df.j %>%
inner_join(df_comm, join_vars) %>%
mutate(percIncorp = percIncorp %>% as.character,
percTaxa = percTaxa %>% as.character,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
percTaxa = percTaxa %>% reorder(percTaxa %>% as.numeric))
# status
df.j %>% nrow %>% print
df.j %>% head(n=3) %>% print
In [ ]:
options(repr.plot.width=8, repr.plot.height=6)
p.qSIP_abund = ggplot(df.j, aes(mean_rel_abund_perc, delta_excess,
color=true_incorporator, group=true_incorporator)) +
#geom_point(shape='O', alpha=0.15, size=0.5) +
geom_smooth(size=0.3) +
scale_x_log10() +
scale_color_discrete('True incorp-\norator') +
labs(x='% abundance',
y='atom % excess 13C\n(truth - estimate)') +
facet_grid(percIncorp ~ percTaxa) +
theme_bw()
p.qSIP_abund
In [ ]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_qSIP-abund.pdf')
ggsave(outF, p.qSIP_abund, width=9, height=6)
cat('File written:', outF, '\n')
In [ ]:
outF = file.path(workDir, 'atomIncorp_taxaIncorp_qSIP-abund.jpeg')
ggsave(outF, p.qSIP_abund, width=9, height=6)
cat('File written:', outF, '\n')
In [ ]: