In [2]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation_rep3/'
genomeDir = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/'
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
figureDir = '/home/nick/notebook/SIPSim/figures/bac_genome_n1147/'
bandwidth = 0.8
DBL_scaling = 0.5
subsample_dist = 'lognormal'
subsample_mean = 9.432
subsample_scale = 0.5
subsample_min = 10000
subsample_max = 30000
In [3]:
import glob
from os.path import abspath
import nestly
from IPython.display import Image
import os
%load_ext rpy2.ipython
%load_ext pushnote
In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [5]:
if not os.path.isdir(workDir):
os.makedirs(workDir)
if not os.path.isdir(figureDir):
os.makedirs(figureDir)
%cd $workDir
In [6]:
# Determining min/max BD that
## min G+C cutoff
min_GC = 13.5
## max G+C cutoff
max_GC = 80
## max G+C shift
max_13C_shift_in_BD = 0.036
min_range_BD = min_GC/100.0 * 0.098 + 1.66
max_range_BD = max_GC/100.0 * 0.098 + 1.66
max_range_BD = max_range_BD + max_13C_shift_in_BD
print 'Min BD: {}'.format(min_range_BD)
print 'Max BD: {}'.format(max_range_BD)
In [6]:
# estimated coverage
mean_frag_size = 9000.0
mean_amp_len = 300.0
n_frags = 10000
coverage = round(n_frags * mean_amp_len / mean_frag_size, 1)
msg = 'Average coverage from simulating {} fragments: {}X'
print msg.format(n_frags, coverage)
In [7]:
!SIPSim fragments \
$genomeDir/genome_index.txt \
--fp $genomeDir \
--fr ../../515F-806R.fna \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--nf 10000 \
--np 24 \
2> ampFrags.log \
> ampFrags.pkl
In [8]:
!printf "Number of taxa with >=1 amplicon: "
!grep "Number of amplicons: " ampFrags.log | \
perl -ne "s/^.+ +//; print unless /^0$/" | wc -l
In [9]:
!grep "Number of amplicons: " ampFrags.log | \
perl -pe 's/.+ +//' | hist
In [10]:
!SIPSim fragment_KDE \
ampFrags.pkl \
> ampFrags_kde.pkl
In [11]:
!SIPSim KDE_info \
-s ampFrags_kde.pkl \
> ampFrags_kde_info.txt
In [12]:
%%R
# loading
df = read.delim('ampFrags_kde_info.txt', sep='\t')
df.kde1 = df %>%
filter(KDE_ID == 1)
df.kde1 %>% head(n=3)
BD_GC50 = 0.098 * 0.5 + 1.66
In [13]:
%%R -w 500 -h 250
# plotting
p.amp = ggplot(df.kde1, aes(median)) +
geom_histogram(binwidth=0.001) +
geom_vline(xintercept=BD_GC50, linetype='dashed', color='red', alpha=0.7) +
labs(x='Median buoyant density') +
theme_bw() +
theme(
text = element_text(size=16)
)
p.amp
In [14]:
!SIPSim incorpConfigExample \
--percTaxa 10 \
--percIncorpUnif 100 \
--n_reps 3 \
> PT10_PI100.config
# checking output
!cat PT10_PI100.config
In [15]:
!SIPSim KDE_selectTaxa \
-f 0.1 \
ampFrags_kde.pkl \
> incorporators.txt
In [16]:
!SIPSim communities \
--config PT10_PI100.config \
$genomeDir/genome_index.txt \
> comm.txt
In [17]:
%%R -w 750 -h 300
tbl = read.delim('comm.txt', sep='\t') %>%
mutate(library = library %>% as.character %>% as.numeric,
condition = ifelse(library %% 2 == 0, 'Control', 'Treatment'))
ggplot(tbl, aes(rank, rel_abund_perc, color=condition, group=library)) +
geom_line() +
scale_y_log10() +
scale_color_discrete('Community') +
labs(x='Rank', y='Relative abundance (%)') +
theme_bw() +
theme(
text=element_text(size=16)
)
In [18]:
!SIPSim gradient_fractions \
--BD_min $min_range_BD \
--BD_max $max_range_BD \
comm.txt \
> fracs.txt
In [19]:
%%R -w 600 -h 500
tbl = read.delim('fracs.txt', sep='\t')
ggplot(tbl, aes(fraction, fraction_size)) +
geom_bar(stat='identity') +
facet_grid(library ~ .) +
labs(y='fraction size') +
theme_bw() +
theme(
text=element_text(size=16)
)
In [20]:
%%R -w 450 -h 250
tbl$library = as.character(tbl$library)
ggplot(tbl, aes(library, fraction_size)) +
geom_boxplot() +
labs(y='fraction size') +
theme_bw() +
theme(
text=element_text(size=16)
)
In [95]:
!SIPSim diffusion \
--bw $bandwidth \
--np 20 \
ampFrags_kde.pkl \
> ampFrags_kde_dif.pkl \
2> ampFrags_kde_dif.log
In [96]:
!SIPSim DBL \
--comm comm.txt \
--commx $DBL_scaling \
--np 20 \
-o ampFrags_kde_dif_DBL.pkl \
ampFrags_kde_dif.pkl \
2> ampFrags_kde_dif_DBL.log
# checking output
!tail -n 5 ampFrags_kde_dif_DBL.log
In [97]:
# none
!SIPSim KDE_info \
-s ampFrags_kde.pkl \
> ampFrags_kde_info.txt
# diffusion
!SIPSim KDE_info \
-s ampFrags_kde_dif.pkl \
> ampFrags_kde_dif_info.txt
# diffusion + DBL
!SIPSim KDE_info \
-s ampFrags_kde_dif_DBL.pkl \
> ampFrags_kde_dif_DBL_info.txt
In [98]:
%%R
inFile = 'ampFrags_kde_info.txt'
df.raw = read.delim(inFile, sep='\t') %>%
filter(KDE_ID == 1)
df.raw$stage = 'raw'
inFile = 'ampFrags_kde_dif_info.txt'
df.dif = read.delim(inFile, sep='\t')
df.dif$stage = 'diffusion'
inFile = 'ampFrags_kde_dif_DBL_info.txt'
df.DBL = read.delim(inFile, sep='\t')
df.DBL$stage = 'diffusion +\nDBL'
df = rbind(df.raw, df.dif, df.DBL)
df.dif = ''
df.DBL = ''
df %>% head(n=3)
In [99]:
%%R -w 350 -h 300
df$stage = factor(df$stage, levels=c('raw', 'diffusion', 'diffusion +\nDBL'))
ggplot(df, aes(stage)) +
geom_boxplot(aes(y=min), color='red') +
geom_boxplot(aes(y=median), color='darkgreen') +
geom_boxplot(aes(y=max), color='blue') +
labs(y = 'Buoyant density (g ml^-1)') +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.x = element_blank()
)
In [126]:
!SIPSim isotope_incorp \
--comm comm.txt \
--shift ampFrags_BD-shift.txt \
--taxa incorporators.txt \
--np 20 \
-o ampFrags_kde_dif_DBL_incorp.pkl \
ampFrags_kde_dif_DBL.pkl \
PT10_PI100.config \
2> ampFrags_kde_dif_DBL_incorp.log
# checking log
!tail -n 5 ampFrags_kde_dif_DBL_incorp.log
In [128]:
%%R
inFile = 'ampFrags_BD-shift.txt'
df = read.delim(inFile, sep='\t') %>%
mutate(library = library %>% as.character)
In [129]:
%%R -h 275 -w 375
inFile = 'ampFrags_BD-shift.txt'
df = read.delim(inFile, sep='\t') %>%
mutate(library = library %>% as.character %>% as.numeric)
df.s = df %>%
mutate(incorporator = ifelse(min > 0.001, TRUE, FALSE),
incorporator = ifelse(is.na(incorporator), 'NA', incorporator),
condition = ifelse(library %% 2 == 0, 'control', 'treatment')) %>%
group_by(library, incorporator, condition) %>%
summarize(n_incorps = n())
# plotting
ggplot(df.s, aes(library %>% as.character, n_incorps, fill=incorporator)) +
geom_bar(stat='identity') +
labs(x='Community', y = 'Count', title='Number of incorporators\n(according to BD shift)') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [130]:
!SIPSim OTU_table \
--abs 1e9 \
--np 20 \
ampFrags_kde_dif_DBL_incorp.pkl \
comm.txt \
fracs.txt \
> OTU_n2_abs1e9.txt \
2> OTU_n2_abs1e9.log
# checking log
!tail -n 5 OTU_n2_abs1e9.log
In [131]:
%%R
## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp50 = 0.5 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66
In [132]:
%%R -w 700 -h 450
# plotting absolute abundances
# loading file
df = read.delim('OTU_n2_abs1e9.txt', sep='\t')
df.s = df %>%
group_by(library, BD_mid) %>%
summarize(total_count = sum(count))
## plot
p = ggplot(df.s, aes(BD_mid, total_count)) +
#geom_point() +
geom_area(stat='identity', alpha=0.3, position='dodge') +
geom_vline(xintercept=c(BD.GCp50), linetype='dashed', alpha=0.5) +
labs(x='Buoyant density', y='Total abundance') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
p
In [133]:
%%R -w 700 -h 450
# plotting number of taxa at each BD
df.nt = df %>%
filter(count > 0) %>%
group_by(library, BD_mid) %>%
summarize(n_taxa = n())
## plot
p = ggplot(df.nt, aes(BD_mid, n_taxa)) +
#geom_point() +
geom_area(stat='identity', alpha=0.3, position='dodge') +
#geom_histogram(stat='identity') +
geom_vline(xintercept=c(BD.GCp50), linetype='dashed', alpha=0.5) +
labs(x='Buoyant density', y='Number of taxa') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'none'
)
p
In [134]:
%%R -w 700 -h 450
# plotting relative abundances
## plot
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
geom_vline(xintercept=c(BD.GCp50), linetype='dashed', alpha=0.5) +
labs(x='Buoyant density', y='Absolute abundance') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'none'
)
p + geom_area(stat='identity', position='dodge', alpha=0.5)
In [135]:
%%R -w 700 -h 450
p +
geom_area(stat='identity', position='fill') +
labs(x='Buoyant density', y='Relative abundance')
In [136]:
!SIPSim OTU_PCR \
OTU_n2_abs1e9.txt \
--debug \
> OTU_n2_abs1e9_PCR.txt
In [137]:
%%R -w 800 -h 300
# loading file
F = 'OTU_n2_abs1e9_PCR.txt'
df.SIM = read.delim(F, sep='\t') %>%
mutate(molarity_increase = final_molarity / init_molarity * 100,
library = library %>% as.character)
p1 = ggplot(df.SIM, aes(init_molarity, final_molarity, color=library)) +
geom_point(shape='O', alpha=0.5) +
labs(x='Initial molarity', y='Final molarity') +
theme_bw() +
theme(
text = element_text(size=16)
)
p2 = ggplot(df.SIM, aes(init_molarity, molarity_increase, color=library)) +
geom_point(shape='O', alpha=0.5) +
scale_y_log10() +
labs(x='Initial molarity', y='% increase in molarity') +
theme_bw() +
theme(
text = element_text(size=16)
)
grid.arrange(p1, p2, ncol=2)
In [138]:
# PCR w/out --debug (no extra output)
!SIPSim OTU_PCR \
OTU_n2_abs1e9.txt \
> OTU_n2_abs1e9_PCR.txt
In [139]:
!SIPSim OTU_subsample \
--dist $subsample_dist \
--dist_params mean:$subsample_mean,sigma:$subsample_scale \
--min_size $subsample_min \
--max_size $subsample_max \
OTU_n2_abs1e9_PCR.txt \
> OTU_n2_abs1e9_PCR_subNorm.txt
In [140]:
%%R -w 350 -h 250
df = read.csv('OTU_n2_abs1e9_PCR_subNorm.txt', sep='\t')
df.s = df %>%
group_by(library, fraction) %>%
summarize(total_count = sum(count)) %>%
ungroup() %>%
mutate(library = as.character(library))
ggplot(df.s, aes(library, total_count)) +
geom_boxplot() +
labs(y='Number of sequences\nper fraction') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [141]:
%%R
# loading file
df.abs = read.delim('OTU_n2_abs1e9.txt', sep='\t')
df.sub = read.delim('OTU_n2_abs1e9_PCR_subNorm.txt', sep='\t')
#lib.reval = c('1' = 'control',
# '2' = 'treatment',
# '3' = 'control',
# '4' = 'treatment',
# '5' = 'control',
# '6' = 'treatment')
#df.abs = mutate(df.abs, library = plyr::revalue(as.character(library), lib.reval))
#df.sub = mutate(df.sub, library = plyr::revalue(as.character(library), lib.reval))
In [142]:
%%R -w 700 -h 1000
# plotting absolute abundances
## plot
p = ggplot(df.abs, aes(BD_mid, count, fill=taxon)) +
geom_vline(xintercept=c(BD.GCp50), linetype='dashed', alpha=0.5) +
labs(x='Buoyant density') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank(),
legend.position = 'none',
plot.margin=unit(c(1,1,0.1,1), "cm")
)
p1 = p + geom_area(stat='identity', position='dodge', alpha=0.5) +
labs(y='Total community\n(absolute abundance)')
# plotting absolute abundances of subsampled
## plot
p = ggplot(df.sub, aes(BD_mid, count, fill=taxon)) +
geom_vline(xintercept=c(BD.GCp50), linetype='dashed', alpha=0.5) +
labs(x='Buoyant density') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'none'
)
p2 = p + geom_area(stat='identity', position='dodge', alpha=0.5) +
labs(y='Subsampled community\n(absolute abundance)') +
theme(
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank(),
plot.margin=unit(c(0.1,1,0.1,1), "cm")
)
# plotting relative abundances of subsampled
p3 = p + geom_area(stat='identity', position='fill') +
geom_vline(xintercept=c(BD.GCp50), linetype='dashed', alpha=0.5) +
labs(y='Subsampled community\n(relative abundance)') +
theme(
axis.title.y = element_text(vjust=1),
plot.margin=unit(c(0.1,1,1,1.35), "cm")
)
# combining plots
grid.arrange(p1, p2, p3, ncol=1)
In [143]:
!SIPSim OTU_wideLong -w \
OTU_n2_abs1e9_PCR_subNorm.txt \
> OTU_n2_abs1e9_PCR_subNorm_w.txt
In [144]:
!SIPSim OTU_sampleData \
OTU_n2_abs1e9_PCR_subNorm.txt \
> OTU_n2_abs1e9_PCR_subNorm_meta.txt
In [145]:
# making phyloseq object from OTU table
!SIPSimR phyloseq_make \
OTU_n2_abs1e9_PCR_subNorm_w.txt \
-s OTU_n2_abs1e9_PCR_subNorm_meta.txt \
> OTU_n2_abs1e9_PCR_subNorm.physeq
## making ordination
!SIPSimR phyloseq_ordination \
OTU_n2_abs1e9_PCR_subNorm.physeq \
OTU_n2_abs1e9_PCR_subNorm_bray-NMDS.pdf
## filtering phyloseq object to just taxa/samples of interest (eg., BD-min/max)
!SIPSimR phyloseq_edit \
--BD_min 1.71 \
--BD_max 1.75 \
OTU_n2_abs1e9_PCR_subNorm.physeq \
> OTU_n2_abs1e9_PCR_subNorm_filt.physeq
## making ordination
!SIPSimR phyloseq_ordination \
OTU_n2_abs1e9_PCR_subNorm_filt.physeq \
OTU_n2_abs1e9_PCR_subNorm_filt_bray-NMDS.pdf
# making png figures
!convert OTU_n2_abs1e9_PCR_subNorm_bray-NMDS.pdf OTU_n2_abs1e9_PCR_subNorm_bray-NMDS.png
!convert OTU_n2_abs1e9_PCR_subNorm_filt_bray-NMDS.pdf OTU_n2_abs1e9_PCR_subNorm_filt_bray-NMDS.png
In [146]:
Image(filename='OTU_n2_abs1e9_PCR_subNorm_bray-NMDS.png')
Out[146]:
In [147]:
Image(filename='OTU_n2_abs1e9_PCR_subNorm_filt_bray-NMDS.png')
Out[147]:
In [81]:
## DESeq2
!SIPSimR phyloseq_DESeq2 \
--log2 0.25 \
--hypo greater \
--cont 1,3,5 \
--treat 2,4,6 \
--occur_all 0.25 \
OTU_n2_abs1e9_PCR_subNorm_filt.physeq \
> OTU_n2_abs1e9_PCR_subNorm_DS2.txt
## Confusion matrix
!SIPSimR DESeq2_confuseMtx \
--libs 2,4,6 \
--padj 0.1 \
ampFrags_BD-shift.txt \
OTU_n2_abs1e9_PCR_subNorm_DS2.txt
In [82]:
%%R -w 500 -h 350
byClass = read.delim('DESeq2-cMtx_byClass.txt', sep='\t')
byClass %>% filter(variables=='Balanced Accuracy') %>% print
ggplot(byClass, aes(variables, values)) +
geom_bar(stat='identity') +
labs(y='Value') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.x = element_blank(),
axis.text.x = element_text(angle=45, hjust=1)
)
In [69]:
%%R -w 550 -h 350
df_cMtx = read.delim('DESeq2-cMtx_data.txt', sep='\t') %>%
gather(clsfy, clsfy_value, incorp.pred, incorp.known) %>%
filter(! is.na(clsfy_value))
ggplot(df_cMtx, aes(log2FoldChange, padj)) +
geom_point(size=3, shape='O') +
facet_grid(clsfy ~ clsfy_value) +
labs(x='log2 fold change', y='Adjusted P-value') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [70]:
%%R -w 300 -h 300
# checking correspondence of padj & padj.BH
# ggplot(df_cMtx, aes(padj, padj.BH)) +
# geom_point(shape='O', size=2) +
# theme_bw() +
# theme(
# text = element_text(size=16)
# )
In [71]:
%%R
clsfy = function(guess,known){
if(is.na(guess) | is.na(known)){
return(NA)
}
if(guess == TRUE){
if(guess == known){
return('True positive')
} else {
return('False positive')
}
} else
if(guess == FALSE){
if(guess == known){
return('True negative')
} else {
return('False negative')
}
} else {
stop('Error: true or false needed')
}
}
In [72]:
%%R
df = read.delim('DESeq2-cMtx_data.txt', sep='\t')
df = df %>%
filter(! is.na(log2FoldChange), library %in% c(2,4,6)) %>%
mutate(taxon = reorder(taxon, -log2FoldChange),
cls = mapply(clsfy, incorp.pred, incorp.known))
df %>% head(n=3)
In [73]:
%%R -w 800 -h 350
df.TN = df %>% filter(cls == 'True negative')
df.TP = df %>% filter(cls == 'True positive')
df.FP = df %>% filter(cls == 'False negative')
ggplot(df, aes(taxon, log2FoldChange, color=cls,
ymin=log2FoldChange - lfcSE, ymax=log2FoldChange + lfcSE)) +
geom_pointrange(size=0.4, alpha=0.5) +
geom_pointrange(data=df.TP, size=0.4, alpha=0.3) +
geom_pointrange(data=df.FP, size=0.4, alpha=0.3) +
labs(x = 'Taxon', y = 'Log2 fold change') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
legend.title=element_blank(),
axis.text.x = element_blank(),
legend.position = 'bottom'
)
In [10]:
!SIPSim qSIP \
OTU_n2_abs1e9.txt \
OTU_n2_abs1e9_PCR_subNorm.txt \
> OTU_n2_abs1e9_PCR_subNorm_qSIP.txt
In [13]:
# making an experimental design file for qSIP
import itertools
x = range(1,7)
y = ['control', 'treatment']
expDesignFile = os.path.join(workDir, 'qSIP_exp_design.txt')
with open(expDesignFile, 'wb') as outFH:
for i,z in itertools.izip(x,itertools.cycle(y)):
line = '\t'.join([str(i),z])
outFH.write(line + '\n')
!head $expDesignFile
In [ ]:
!SIPSim qSIP_atomExcess \
--np 10 \
OTU_n2_abs1e9_PCR_subNorm_qSIP.txt \
qSIP_exp_design.txt \
> OTU_n2_abs1e9_PCR_subNorm_qSIP_atom.txt
In [20]:
%%R
df_qSIP = read.delim('OTU_n2_abs1e9_PCR_subNorm_qSIP_atom.txt', sep='\t')
df_shift = read.delim('ampFrags_BD-shift.txt', sep='\t') %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median)
df_qSIP %>% head(n=3) %>% print
print('------------------------')
df_shift %>% head(n=3) %>% print
In [35]:
%%R
df.j = inner_join(df_qSIP, df_shift, c('taxon' = 'taxon')) %>%
filter(!is.na(BD_diff)) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.03, TRUE, FALSE),
true_atom_fraction_excess = median_true_BD_shift / 0.036,
atom_fraction_excess = ifelse(is.na(atom_CI_low), 0, atom_fraction_excess))
df.j %>% head(n=3)
In [36]:
%%R -w 650 -h 300
ggplot(df.j, aes(BD_diff, fill=true_incorporator)) +
geom_histogram(binwidth=0.005, alpha=0.7, position='identity') +
scale_color_discrete('Incorporator?') +
labs(x='qSIP: BD shift (g ml^-1)') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [37]:
%%R -w 800 -h 300
df.j$taxon = reorder(df.j$taxon, -df.j$atom_fraction_excess)
ggplot(df.j, aes(taxon, true_atom_fraction_excess,
ymin=atom_CI_low, ymax=atom_CI_high)) +
geom_linerange(alpha=0.75) +
geom_point(color='red', size=0.25) +
geom_point(aes(y=atom_fraction_excess), color='green', size=0.2) +
labs(y='13C atom fraction excess') +
theme_bw() +
theme(
text = element_text(size=16),
axis.text.x = element_blank()
)
In [52]:
%%R -w 500 -h 250
# true incorporator error
ggplot(df.j, aes(atom_fraction_excess - true_atom_fraction_excess,
fill=true_incorporator)) +
geom_histogram(binwidth=0.05, alpha=0.7, position='identity') +
scale_fill_discrete('Incorporator?') +
labs(x='distance from true value') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [57]:
%%R
zip.res = pscl::zeroinfl(true_atom_fraction_excess ~ atom_fraction_excess, data=df.j)
zip.res %>% summary
In [38]:
%%R
lm.res = lm(true_atom_fraction_excess ~ atom_fraction_excess, data=df.j)
lm.res %>% summary
In [105]:
!SIPSimR qSIP_confuseMtx \
--libs 2,4,6 \
ampFrags_BD-shift.txt \
OTU_n2_abs1e9_PCR_subNorm_qSIP_atom.txt
In [106]:
%%R -h 250
df = read.delim('qSIP-cMtx_byClass.txt', sep='\t') %>%
filter(library == 2)
ggplot(df, aes(variables, values)) +
geom_bar(stat='identity') +
theme_bw() +
theme(
text = element_text(size=16),
axis.text.x = element_text(angle=45, hjust=1),
axis.title.x = element_blank()
)
In [107]:
%%R
df
In [120]:
!SIPSim deltaBD \
OTU_n2_abs1e9_PCR_subNorm.txt \
qSIP_exp_design.txt \
> OTU_n2_abs1e9_PCR_subNorm_dBD.txt
In [25]:
%%R
df_dBD = read.delim('OTU_n2_abs1e9_PCR_subNorm_dBD.txt', sep='\t')
df_shift = read.delim('ampFrags_BD-shift.txt', sep='\t') %>%
filter(library %in% c(2,4,6)) %>%
group_by(taxon) %>%
summarize(median = median(median)) %>%
ungroup() %>%
rename('median_true_BD_shift' = median)
df_dBD %>% head(n=3) %>% print
print('------------------------')
df_shift %>% head(n=3) %>% print
In [26]:
%%R
df.j = inner_join(df_dBD, df_shift, c('taxon' = 'taxon')) %>%
mutate(true_incorporator = ifelse(median_true_BD_shift > 0.03, TRUE, FALSE))
df.j %>% head(n=3)
In [27]:
%%R -w 650 -h 300
ggplot(df.j, aes(delta_BD, fill=true_incorporator)) +
geom_histogram(binwidth=0.005, alpha=0.7, position='identity') +
scale_color_discrete('Incorporator?') +
labs(x='deltaBD: BD shift (g ml^-1)') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [28]:
%%R -w 800 -h 300
df.j$taxon = reorder(df.j$taxon, -df.j$delta_BD)
ggplot(df.j, aes(taxon, median_true_BD_shift)) +
geom_point(color='red', size=0.25) +
geom_point(aes(y=delta_BD), color='green', size=0.2) +
labs(y='BD shift') +
theme_bw() +
theme(
text = element_text(size=16),
axis.text.x = element_blank()
)
In [31]:
%%R
lm.res = lm(median_true_BD_shift ~ delta_BD, data=df.j)
lm.res %>% summary
In [ ]: