In [1]:
workDir = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_sampDepth/'
In [2]:
library(dplyr)
library(tidyr)
library(ggplot2)
In [3]:
# classifying true positives, neg, ...
clsfy = function(guess,known){
if(is.na(guess) | is.na(known)){
return(NA)
}
if(guess == TRUE){
if(guess == known){
return('True positive')
} else {
return('False positive')
}
} else
if(guess == FALSE){
if(guess == known){
return('True negative')
} else {
return('False negative')
}
} else {
stop('Error: true or false needed')
}
}
In [4]:
# files on simulation accuracy
files = list.files(path=workDir, pattern='*-cMtx_byClass.txt', full.names=TRUE)
files
In [5]:
# combining files
df_byClass = list()
for (f in files){
ff = strsplit(f, '/') %>% unlist
fff = ff[length(ff)]
df_byClass[[fff]] = read.delim(f, sep='\t')
}
df_byClass = do.call(rbind, df_byClass)
df_byClass$file = gsub('\\.[0-9]+$', '', rownames(df_byClass))
df_byClass$method = gsub('-.+', '', df_byClass$file)
rownames(df_byClass) = 1:nrow(df_byClass)
df_byClass %>% head(n=3)
In [6]:
# renaming methods
rename = data.frame(method = c('DESeq2', 'DESeq2_multi', 'heavy', 'qSIP'),
method_new = c('HR-SIP', 'MW-HR-SIP', 'Heavy-SIP', 'q-SIP'))
df_byClass = inner_join(df_byClass, rename, c('method'='method')) %>%
select(-method) %>%
rename('method' = method_new)
df_byClass %>% head(n=3)
In [7]:
# summarize by SIPSim rep & library rep
df_byClass.s = df_byClass %>%
group_by(method, percIncorp, subsample_size, variables) %>%
summarize(mean_value = mean(values, na.rm=TRUE),
sd_value = sd(values, na.rm=TRUE))
# plotting
options(repr.plot.width=8, repr.plot.height=5)
p = ggplot(df_byClass.s, aes(variables, mean_value, color=method,
ymin=mean_value-sd_value,
ymax=mean_value+sd_value)) +
geom_pointrange(alpha=0.8, size=0.2) +
labs(y='Value') +
facet_grid(subsample_size ~ percIncorp) +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle=65, hjust=1)
)
plot(p)
In [8]:
# summarize by SIPSim rep & library rep
vars = c('Balanced Accuracy', 'Sensitivity', 'Specificity')
df_byClass.s.f = df_byClass.s %>%
filter(variables %in% vars) %>%
ungroup() %>%
mutate(subsample_size = subsample_size %>% as.character,
subsample_size = subsample_size %>% reorder(subsample_size %>% as.numeric))
# plotting
options(repr.plot.width=9, repr.plot.height=5)
p.pnt = ggplot(df_byClass.s.f, aes(percIncorp, mean_value,
color=subsample_size,
group=subsample_size,
ymin=mean_value-sd_value,
ymax=mean_value+sd_value)) +
geom_point(alpha=0.8) +
geom_linerange(alpha=0.8, size=0.5) +
geom_line() +
scale_color_discrete('Sequencing\ndepth') +
labs(x='atom % excess 13C') +
facet_grid(variables ~ method) +
theme_bw() +
theme(
axis.title.y = element_blank()
)
plot(p.pnt)
In [9]:
outF = file.path(workDir, 'atomIncorp_sampDepth_acc.pdf')
ggsave(outF, p.pnt, width=10, height=6)
cat('File written:', outF, '\n')
In [10]:
outF = file.path(workDir, 'atomIncorp_sampDepth_acc.jpeg')
ggsave(outF, p.pnt, width=10, height=6)
cat('File written:', outF, '\n')
In [48]:
BDshift_files = list.files(path=workDir, pattern='BD-shift_stats.txt', full.names=TRUE, recursive=TRUE)
BDshift_files %>% length %>% print
In [49]:
df_shift = list()
for(F in BDshift_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_shift[[F]] = tmp
}
df_shift = do.call(rbind, df_shift)
rownames(df_shift) = 1:nrow(df_shift)
df_shift = df_shift %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon, percIncorp, subsample_size, rep) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE))
# status
df_shift %>% nrow %>% print
df_shift %>% head(n=3)
In [50]:
comm_files = list.files(path=workDir, pattern='comm.txt', full.names=TRUE, recursive=TRUE)
comm_files %>% length %>% print
In [51]:
df_comm = list()
for(F in comm_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_comm[[F]] = tmp
}
df_comm = do.call(rbind, df_comm)
rownames(df_comm) = 1:nrow(df_comm)
# mean abund for libraries
df_comm = df_comm %>%
rename('taxon' = taxon_name) %>%
group_by(taxon, percIncorp, subsample_size, rep) %>%
summarize(mean_rel_abund_perc = mean(rel_abund_perc),
mean_rank_abund = mean(rank)) %>%
ungroup()
# status
df_comm %>% nrow %>% print
df_comm %>% head(n=3)
In [52]:
MW_files = list.files(path=workDir, pattern='_MW_DESeq2_incorp.txt', full.names=TRUE, recursive=TRUE)
MW_files %>% length %>% print
In [53]:
df_MW = list()
#for(F in MW_files){
for(F in MW_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_MW[[F]] = tmp
}
df_MW = do.call(rbind, df_MW)
rownames(df_MW) = 1:nrow(df_MW)
# status
df_MW %>% nrow %>% print
df_MW %>% head(n=3)
In [54]:
join_vars = c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'subsample_size'='subsample_size',
'rep'='rep')
# joining tables
df_MW %>% nrow %>% print
df.j = df_MW %>%
left_join(df_shift, join_vars)
df.j %>% nrow %>% print
df.j = df.j %>%
left_join(df_comm, join_vars)
# status
df.j %>% nrow %>% print
df.j %>% head(n=3)
In [58]:
# calling true_pos + false_neg
df.j = df.j %>%
mutate(incorp_cls = mapply(clsfy, incorp, true_incorporator))
# status
df.j %>% nrow %>% print
df.j$incorp_cls %>% table %>% print
In [66]:
# function for calculating sensitivity
calc_sensitivity = function(incorp_cls){
incorp_cls = incorp_cls[!is.na(incorp_cls)]
tp = sum(incorp_cls == 'True positive')
fn = sum(incorp_cls == 'False negative')
x = tp / (tp + fn)
ifelse(is.na(x), 0, x)
}
# grouping by abundance and calculating sensitivity
df.j.s = df.j %>%
mutate(n_group = ntile(log10(mean_rel_abund_perc), 10)) %>%
group_by(n_group, percIncorp, subsample_size, rep) %>%
summarize(min_abund = min(mean_rel_abund_perc, na.rm=TRUE),
mean_abund = mean(mean_rel_abund_perc, na.rm=TRUE),
max_abund = max(mean_rel_abund_perc, na.rm=TRUE),
sensitivity = calc_sensitivity(incorp_cls)) %>%
group_by(n_group, percIncorp, subsample_size) %>%
summarize(mean_abund = mean(mean_abund),
mean_sensitivity = mean(sensitivity),
sd_sensitivity = sd(sensitivity)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% as.character,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric)) %>%
mutate(percIncorp_txt = gsub('$', '% atom 13C excess', percIncorp),
percIncorp_txt = percIncorp_txt %>% reorder(percIncorp %>% as.numeric))
# status
df.j.s %>% head(n=3)
In [67]:
# plotting
options(repr.plot.width=8, repr.plot.height=4)
p_sens_abund = ggplot(df.j.s, aes(mean_abund, mean_sensitivity,
color=subsample_size, group=subsample_size,
ymin=mean_sensitivity-sd_sensitivity,
ymax=mean_sensitivity+sd_sensitivity)) +
geom_line(alpha=0.7) +
geom_linerange(alpha=0.7) +
geom_point(alpha=0.7) +
scale_x_log10(breaks=c(1e-3, 1e-2, 1e-1, 1e0)) +
scale_color_discrete('Sequencing\ndepth') +
labs(x='Mean % abundance',
y='Sensitivity') +
facet_wrap(~ percIncorp_txt) +
theme_bw() +
theme(
axis.text.x = element_text(angle=45, hjust=1)
)
p_sens_abund
In [68]:
outF = file.path(workDir, 'atomIncorp_sampDepth_MW-HR-SIP_sens-abund.pdf')
ggsave(outF, p_sens_abund, width=10, height=5)
cat('File written:', outF, '\n')
In [70]:
outF = file.path(workDir, 'atomIncorp_sampDepth_MW-HR-SIP_sens-abund.jpeg')
ggsave(outF, p_sens_abund, width=10, height=5)
cat('File written:', outF, '\n')
In [71]:
BDshift_files = list.files(path=workDir, pattern='BD-shift_stats.txt', full.names=TRUE, recursive=TRUE)
BDshift_files %>% length %>% print
In [ ]:
df_shift = list()
for(F in BDshift_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_shift[[F]] = tmp
}
df_shift = do.call(rbind, df_shift)
rownames(df_shift) = 1:nrow(df_shift)
df_shift = df_shift %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon, percIncorp, subsample_size, rep) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE))
# status
df_shift %>% nrow %>% print
df_shift %>% head(n=3)
In [ ]:
comm_files = list.files(path=workDir, pattern='comm.txt', full.names=TRUE, recursive=TRUE)
comm_files %>% length %>% print
In [ ]:
df_comm = list()
for(F in comm_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_comm[[F]] = tmp
}
df_comm = do.call(rbind, df_comm)
rownames(df_comm) = 1:nrow(df_comm)
# mean abund for libraries
df_comm = df_comm %>%
rename('taxon' = taxon_name) %>%
group_by(taxon, percIncorp, subsample_size, rep) %>%
summarize(mean_rel_abund_perc = mean(rel_abund_perc),
mean_rank_abund = mean(rank)) %>%
ungroup()
# status
df_comm %>% nrow %>% print
df_comm %>% head(n=3)
In [ ]:
qSIP_files = list.files(path=workDir, pattern='_qSIP_atom_incorp.txt', full.names=TRUE, recursive=TRUE)
qSIP_files %>% length %>% print
In [ ]:
df_qSIP = list()
for(F in qSIP_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_qSIP[[F]] = tmp
}
df_qSIP = do.call(rbind, df_qSIP)
rownames(df_qSIP) = 1:nrow(df_qSIP)
# status
df_qSIP %>% nrow %>% print
df_qSIP %>% head(n=3)
In [ ]:
join_vars = c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'subsample_size'='subsample_size',
'rep'='rep')
# joining tables
df_qSIP %>% nrow %>% print
df.j = df_qSIP %>%
left_join(df_shift, join_vars)
df.j %>% nrow %>% print
df.j = df.j %>%
left_join(df_comm, join_vars)
# status
df.j %>% nrow %>% print
df.j %>% head(n=3)
In [ ]:
# calling true_pos + false_neg
df.j = df.j %>%
mutate(incorp_cls = mapply(clsfy, incorp, true_incorporator),
incorp_cls = ifelse(is.na(incorp_cls), 'NA', incorp_cls))
# status
df.j %>% nrow %>% print
df.j$incorp_cls %>% table %>% print
In [ ]:
# function for calculating specificity
calc_specificity = function(incorp_cls){
incorp_cls = incorp_cls[!is.na(incorp_cls)]
tn = sum(incorp_cls == 'True negative')
fp = sum(incorp_cls == 'False positive')
#print(c(tn, fp))
x = tn / (tn + fp)
x = ifelse(is.na(x), 0, x)
return(x)
}
# grouping by abundance and calculating specificity
df.j.s = df.j %>%
mutate(n_group = ntile(log10(mean_rel_abund_perc), 10)) %>%
group_by(n_group, percIncorp, subsample_size, rep) %>%
summarize(min_abund = min(mean_rel_abund_perc, na.rm=TRUE),
mean_abund = mean(mean_rel_abund_perc, na.rm=TRUE),
max_abund = max(mean_rel_abund_perc, na.rm=TRUE),
specificity = calc_specificity(incorp_cls)) %>%
group_by(n_group, percIncorp, subsample_size) %>%
summarize(mean_abund = mean(mean_abund),
mean_specificity = mean(specificity),
sd_specificity = sd(specificity)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% as.character,
percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric))
# status
df.j.s %>% head(n=3)
In [ ]:
# plotting
options(repr.plot.width=8, repr.plot.height=4)
p_sens_abund = ggplot(df.j.s, aes(mean_abund, mean_specificity,
color=subsample_size, group=subsample_size,
ymin=mean_specificity-sd_specificity,
ymax=mean_specificity+sd_specificity)) +
geom_line(alpha=0.7) +
geom_linerange(alpha=0.7) +
geom_point(alpha=0.7) +
scale_x_log10(breaks=c(1e-3, 1e-2, 1e-1, 1e0)) +
scale_color_discrete('Sequencing\ndepth') +
labs(x='Mean % abundance',
y='Specificity') +
facet_wrap(~ percIncorp) +
theme_bw() +
theme(
axis.text.x = element_text(angle=45, hjust=1)
)
plot(p_sens_abund)
In [ ]:
outF = file.path(workDir, 'atomIncorp_sampDepth_qSIP_spec-abund.pdf')
ggsave(outF, p_sens_abund, width=10, height=5)
cat('File written:', outF, '\n')
In [ ]:
outF = file.path(workDir, 'atomIncorp_sampDepth_qSIP_spec-abund.jpeg')
ggsave(outF, p_sens_abund, width=10, height=5)
cat('File written:', outF, '\n')
In [23]:
BDshift_files = list.files(path=workDir, pattern='BD-shift_stats.txt', full.names=TRUE, recursive=TRUE)
BDshift_files %>% length %>% print
In [24]:
df_shift = list()
for(F in BDshift_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_shift[[F]] = tmp
}
df_shift = do.call(rbind, df_shift)
rownames(df_shift) = 1:nrow(df_shift)
df_shift = df_shift %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon, percIncorp, subsample_size, rep) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median)
# status
df_shift %>% nrow %>% print
df_shift %>% head(n=3)
In [25]:
incorp_files = list.files(path=workDir, pattern='OTU_abs1e9_PCR_sub_filt_MW_DESeq2_incorp.txt', full.names=TRUE, recursive=TRUE)
incorp_files %>% length %>% print
In [26]:
df_incorp = list()
for(F in incorp_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_incorp[[F]] = tmp
}
df_incorp = do.call(rbind, df_incorp)
rownames(df_incorp) = 1:nrow(df_incorp)
df_incorp %>% head(n=3) %>% print
In [27]:
# just incorporators
df_incorp = df_incorp %>%
filter(incorp == TRUE) %>%
dplyr::distinct(taxon, incorp, percIncorp, subsample_size, rep) %>%
rename('HWHRSIP_incorp' = incorp)
df_incorp %>% nrow %>% print
df_incorp %>% head(n=3) %>% print
In [28]:
atomX_files = list.files(path=workDir, pattern='*_qSIP_atom.txt', full.names=TRUE, recursive=TRUE)
atomX_files %>% length %>% print
In [29]:
df_atomX = list()
for(F in atomX_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_atomX[[F]] = tmp
}
df_atomX = do.call(rbind, df_atomX)
rownames(df_atomX) = 1:nrow(df_atomX)
df_atomX %>% head(n=3) %>% print
In [30]:
# table join
df_atomX %>% nrow %>% print
df.j = left_join(df_atomX, df_shift, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'subsample_size'='subsample_size',
'rep'='rep')) %>%
filter(!is.na(BD_diff)) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE),
true_atom_fraction_excess = median_true_BD_shift / 0.036,
atom_fraction_excess = ifelse(is.na(atom_CI_low), 0, atom_fraction_excess))
df.j %>% nrow %>% print
df.j %>% head(n=3) %>% print
In [31]:
df.j$true_incorporator %>% summary
In [32]:
df.j = left_join(df.j, df_incorp, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'subsample_size'='subsample_size',
'rep'='rep')) %>%
mutate(HWHRSIP_incorp = ifelse(is.na(HWHRSIP_incorp), FALSE, TRUE))
df.j %>% nrow %>% print
df.j %>% head(n=3) %>% print
In [33]:
# difference between true and estimated
## q-SIP incorporators
df.j.dis.qSIP = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, subsample_size) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
subsample_size = subsample_size %>% reorder(subsample_size %>% as.numeric))
# plotting
options(repr.plot.width=6, repr.plot.height=4)
p_qSIP = ggplot(df.j.dis.qSIP, aes(percIncorp, mean_delta_excess,
color=subsample_size, group=subsample_size,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
geom_line() +
#facet_grid(true_incorporator ~ .) +
scale_color_discrete('Sequencing\ndepth') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
p_qSIP
In [34]:
# difference between true and estimated
## q-SIP incorporators
df.j.dis.qSIP = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
filter(HWHRSIP_incorp == TRUE) %>% # just MW-HR-SIP incorporators
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, subsample_size) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
subsample_size = subsample_size %>% reorder(subsample_size %>% as.numeric))
# plotting
options(repr.plot.width=6, repr.plot.height=2.5)
p_qSIP = ggplot(df.j.dis.qSIP, aes(percIncorp, mean_delta_excess,
color=subsample_size, group=subsample_size,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
geom_line() +
#facet_grid(true_incorporator ~ .) +
scale_color_discrete('Sequencing\ndepth') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
p_qSIP
In [35]:
# difference between true and estimated
## q-SIP incorporators
tmp1 = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, subsample_size) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
subsample_size = subsample_size %>% reorder(subsample_size %>% as.numeric),
incorp_called = 'No filter')
## MW-HR-SIP incorporators
tmp2 = df.j %>%
filter(atom_CI_low > 0) %>% # just incorporators identified by q-SIP
filter(HWHRSIP_incorp == TRUE) %>% # just MW-HR-SIP incorporators
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, subsample_size) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
subsample_size = subsample_size %>% reorder(subsample_size %>% as.numeric),
incorp_called = 'MW-HR-SIP filter')
# combining tables
df.j.dis.qSIP = rbind(tmp1, tmp2) %>%
mutate(incorp_called = factor(incorp_called, levels=c('No filter', 'MW-HR-SIP filter')))
# plotting
options(repr.plot.width=6, repr.plot.height=4)
p_qSIP = ggplot(df.j.dis.qSIP, aes(percIncorp, mean_delta_excess,
color=subsample_size, group=subsample_size,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
facet_grid(incorp_called ~ .) +
scale_color_discrete('Sequencing\ndepth') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
p_qSIP
In [36]:
dBD_files = list.files(path=workDir, pattern='*_dBD.txt', full.names=TRUE, recursive=TRUE)
dBD_files %>% length %>% print
In [37]:
df_dBD = list()
for(F in dBD_files){
tmp = read.delim(F, sep='\t')
FF = strsplit(F, '/') %>% unlist
FFl = length(FF)
tmp$percIncorp = FF[FFl-3]
tmp$subsample_size = FF[FFl-2]
tmp$rep = FF[FFl-1]
tmp$file = FF[FFl]
df_dBD[[F]] = tmp
}
df_dBD = do.call(rbind, df_dBD)
rownames(df_dBD) = 1:nrow(df_dBD)
df_dBD %>% head(n=3) %>% print
In [38]:
df.j = inner_join(df_dBD, df_shift, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'subsample_size'='subsample_size',
'rep'='rep')) %>%
filter(!is.na(delta_BD)) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.002, TRUE, FALSE),
true_atom_fraction_excess = median_true_BD_shift / 0.036,
atom_fraction_excess = delta_BD / 0.036)
df.j %>% head(n=3)
In [39]:
df.j = left_join(df.j, df_incorp, c('taxon' = 'taxon',
'percIncorp'='percIncorp',
'subsample_size'='subsample_size',
'rep'='rep')) %>%
mutate(HWHRSIP_incorp = ifelse(is.na(HWHRSIP_incorp), FALSE, TRUE))
df.j %>% nrow %>% print
df.j %>% head(n=3)
In [40]:
# difference between true and estimated
tmp1 = df.j %>%
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, subsample_size) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
subsample_size = subsample_size %>% reorder(subsample_size %>% as.numeric),
incorp_called = 'No filter')
tmp2 = df.j %>%
filter(HWHRSIP_incorp == TRUE) %>% # just MW-HR-SIP incorporators
mutate(delta_excess = atom_fraction_excess * 100 - true_atom_fraction_excess * 100) %>%
group_by(percIncorp, subsample_size) %>%
summarize(mean_delta_excess = mean(delta_excess),
sd_delta_excess = sd(delta_excess)) %>%
ungroup() %>%
mutate(percIncorp = percIncorp %>% reorder(percIncorp %>% as.numeric),
subsample_size = subsample_size %>% reorder(subsample_size %>% as.numeric),
incorp_called = 'MW-HR-SIP filter')
# combining tables
df.j.dis.dBD = rbind(tmp1, tmp2) %>%
mutate(incorp_called = factor(incorp_called, levels=c('No filter', 'MW-HR-SIP filter')))
# plotting
options(repr.plot.width=8, repr.plot.height=4)
p_dBD = ggplot(df.j.dis.dBD, aes(percIncorp, mean_delta_excess,
color=subsample_size, group=subsample_size,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.4, size=1) +
geom_point() +
facet_grid(incorp_called ~ .) +
scale_color_discrete('Sequencing\ndepth') +
labs(x='13C atom % excess (truth)',
y='13C atom % excess\n(truth - estimate)') +
theme_bw()
p_dBD
In [41]:
df.jj = rbind(df.j.dis.qSIP %>% mutate(method='qSIP'),
df.j.dis.dBD %>% mutate(method='Delta BD')) %>%
mutate(method = gsub('qSIP', 'q-SIP', method))
p.comb = ggplot(df.jj, aes(percIncorp, mean_delta_excess,
color=subsample_size, group=subsample_size,
ymin=mean_delta_excess-sd_delta_excess,
ymax=mean_delta_excess+sd_delta_excess)) +
geom_line() +
geom_linerange(alpha=0.5, size=1) +
geom_point() +
scale_color_discrete('Sequencing\ndepth') +
labs(x='atom % excess 13C (truth)',
y='atom % excess 13C\n(truth - estimate)') +
facet_grid(incorp_called ~ method) +
theme_bw()
p.comb
In [42]:
outF = file.path(workDir, 'atomIncorp_sampDepth_BDshift.pdf')
ggsave(outF, p.comb, width=8, height=4)
cat('File written:', outF, '\n')
In [43]:
outF = file.path(workDir, 'atomIncorp_sampDepth_BDshift.jpeg')
ggsave(outF, p.comb, width=8, height=4)
cat('File written:', outF, '\n')
In [ ]: