In [2]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome4/'
genomeDir = '/home/nick/notebook/SIPSim/t/genome_data/'
In [1]:
import os,sys
import numpy as np
import pandas as pd
In [12]:
%matplotlib inline
%load_ext rpy2.ipython
In [13]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [7]:
genomeSymlinkDir = os.path.join(workDir, 'genome_data')
if not os.path.isdir(genomeSymlinkDir):
os.mkdir(genomeSymlinkDir)
In [11]:
!cd $workDir; \
cut -f 1 genome_list.txt | \
xargs -n 1 -I % ln -s -f $genomeDir/%.fasta $genomeSymlinkDir/%.fna
!cd $workDir; \
ls -thlc $genomeSymlinkDir
In [32]:
!cd $workDir; \
SIPSim indexGenomes genome_list.txt \
--fp ./genome_data/ --np 4 > genome_data/index_log.txt
In [53]:
!cd $workDir; \
SIPSim fragments \
genome_list.txt \
--fp genome_data \
--fr ../515Fm-927Rm.fna \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--nf 10000 \
--np 4 \
2> ampFrag_skewN90-25-n5-nS.log \
> ampFrag_skewN90-25-n5-nS.pkl
!cd $workDir; \
tail ampFrag_skewN90-25-n5-nS.log
In [54]:
!cd $workDir; \
SIPSim fragment_kde \
ampFrag_skewN90-25-n5-nS.pkl \
> ampFrag_skewN90-25-n5-nS_kde.pkl
In [55]:
!cd $workDir; \
SIPSim diffusion \
ampFrag_skewN90-25-n5-nS_kde.pkl \
--np 4 \
> ampFrag_skewN90-25-n5-nS_dif_kde.pkl
In [59]:
!cd $workDir; \
SIPSim gradientComms \
genome_list.txt \
--abund_dist uniform \
--abund_dist_p low:45,high:55 \
--n_comm 2 \
> comm_n2.txt
!cd $workDir; \
head comm_n2.txt
In [63]:
# making config file
config = """
[1]
# baseline: no incorp
[[intraPopDist 1]]
distribution = uniform
[[[start]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[[[end]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[2]
# split inter-pop distribution (~30% of taxa have ~100% incorp)
[[intraPopDist 1]]
distribution = normal
[[[mu]]]
# these taxa get full incorp
[[[[interPopDist 1]]]]
weight = 0.2
distribution = uniform
start = 100
end = 100
# these taxa in the community get no incorp
[[[[interPopDist 2]]]]
weight = 0.8
distribution = uniform
start = 0
end = 0
[[[sigma]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
"""
outfile = os.path.join(workDir, 'incorp.config')
outf = open(outfile, 'wb')
outf.write(config)
outf.close()
In [64]:
!cd $workDir; \
SIPSim isoIncorp \
ampFrag_skewN90-25-n5-nS_dif_kde.pkl \
incorp.config \
--comm comm_n2.txt \
--np 4 \
> incorp_n2.pkl
In [65]:
!cd $workDir; \
SIPSim BD_shift \
ampFrag_skewN90-25-n5-nS_dif_kde.pkl \
incorp_n2.pkl \
--np 4
In [67]:
!cd $workDir; \
SIPSim fractions \
comm_n2.txt \
> fracs_n2.txt
!cd $workDir; \
head fracs_n2.txt
In [68]:
!cd $workDir; \
SIPSim OTU_table \
incorp_n2.pkl \
comm_n2.txt \
fracs_n2.txt \
--abs 1e9 \
--np 4 \
> OTU_n2_abs1e9.txt
In [77]:
!cd $workDir;\
SIPSim OTU_subsample \
--dist_params low:50000,high:50000 \
OTU_n2_abs1e9.txt \
> OTU_n2_abs1e9_sub50k.txt
In [80]:
%%R -i workDir
setwd(workDir)
tbl.sub = read.delim('OTU_n2_abs1e9_sub50k.txt')
tbl.sum = tbl.sub %>%
group_by(library, fraction) %>%
summarize(fraction_sum = sum(count))
## samples with no subsamples
tbl.sum %>% filter(fraction_sum < 10000) %>% print
## filtering
tbl.sub = tbl.sub %>%
filter(!grepl('inf', fraction, ignore.case=T)) %>%
separate(fraction, into = c('BD_min','BD_max'), sep='-', convert=TRUE) %>%
filter(BD_min != 1.795) %>%
mutate(
BD_min = as.numeric(BD_min),
BD_max = as.numeric(BD_max),
BD_mean = (BD_min + BD_max) / 2
) %>% as.data.frame
In [81]:
%%R
# ordering taxa by total abundances
tax_order = tbl.sub %>%
group_by(taxon) %>%
summarize(taxon_total_abund = sum(count)) %>%
as.data.frame
tax_order = unique(tax_order[order(tax_order$taxon_total_abund, decreasing=T),'taxon'])
tbl.sub$taxon = factor(tbl.sub$taxon, levels=sort(unique(tbl.sub$taxon), decreasing=T))
tbl.sub$taxon_group = factor(tbl.sub$taxon, levels=tax_order)
In [82]:
%%R -w 900
## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66
## plotting
p = ggplot(tbl.sub, aes(BD_mean, count, fill=taxon, group=taxon)) +
geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
labs(x='Buoyant density') +
facet_grid(library ~ .) +
theme(
text = element_text(size=16),
legend.position = 'none'
)
p1 = p + geom_area(stat='identity', alpha=0.5, position='dodge')
p2 = p + geom_area(stat='identity', alpha=0.8, position='fill')
grid.arrange(p1, p2, ncol=1)
In [98]:
%%R
# functions for relabeling facet labels
label_change <- function(value){
if (value == 1){
return(expression(paste(""^12, "C control")))
} else
if (value == 2){
return(expression(paste(""^13, "C treatment")))
} else {
return(value)
}
}
# facet labeling function
label_facet <- function(variable, value) {
sapply(as.character(value), label_change)
}
In [160]:
%%R -w 600 -h 600
## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66
# edit table
tbl.sub.sum = tbl.sub %>%
group_by(BD_mean) %>%
summarize(total = sum(count)) %>%
filter(total > c(0))
tbl.sub2 = tbl.sub %>%
filter(BD_mean %in% tbl.sub.sum$BD_mean) %>%
filter(BD_mean > 1.68, BD_mean < 1.74) %>%
mutate(taxon = as.character(as.numeric(taxon))) %>%
mutate(BD_mean_char = as.character(BD_mean)) #%>%
#arrange(desc(BD_mean))
tbl.sub2$BD_mean_char = reorder(tbl.sub2$BD_mean_char, rev(tbl.sub2$BD_mean))
## plotting
p = ggplot(tbl.sub2, aes(BD_mean_char, count, fill=taxon, group=taxon)) +
geom_bar(stat='identity', alpha=0.8, position='fill') +
coord_flip() +
labs(x='CsCl gradient fraction', y='Relative abundance') +
facet_grid(. ~ library, scales='free_x', labeller=label_facet) +
theme_bw() +
theme(
text = element_text(size=24),
axis.title.x = element_blank(),
axis.text.x = element_text(vjust=0.5, angle=90),
axis.text.y = element_blank(),
panel.background = element_blank(),
rect = element_rect(fill = "transparent",colour = NA)
)
p
In [161]:
%%R
setwd('/home/nick/notebook/fullCyc/figures/HR-SIP_examples/')
ggsave('taxon4_inc100.pdf', width=6, height=6)
In [ ]: