Making histograms of example taxa used for 16S region GC conservation figure
"Clostridium ljungdahlii DSM 13528",
"Escherichia coli APEC O78",
"Streptomyces griseus subsp griseus\nNBRC 13350",
"Lactobacillus gasseri ATCC 33323"
Plotting GC content for amplicons & shotgun reads
@ system76-server:~/notebook/deltaGC_notes/data/amplicon/140411_allBac/amp454-L215-N1k/140425_example-taxa
made a file of all 4 taxa
taxa_list.txt
$ head -n 1 ../skewnorm_m10k_r4k-11.5k.txt > skewnorm_m10k_r4k-11.5k_exp.txt; grep -f taxa_list.txt ../skewnorm_m10k_r4k-11.5k.txt >> skewnorm_m10k_r4k-11.5k_exp.txt
$ head -n 1 ../skewnorm_m15k_r4k-20k.txt > skewnorm_m15k_r4k-20k_exp.txt; grep -f taxa_list.txt ../skewnorm_m15k_r4k-20k.txt >> skewnorm_m15k_r4k-20k_exp.txt
@ system76-server:~/notebook/deltaGC_notes/data/mg-reads/140411_allBac/mg454-L215-N10k/140425_example-taxa
$ cp ~/notebook/deltaGC_notes/data/amplicon/140411_allBac/amp454-L215-N1k/140425_example-taxa/taxa_list.txt .
$ head -n 1 ../skewnorm_m10k_r4k-11.5k.txt > skewnorm_m10k_r4k-11.5k_exp.txt; grep -f taxa_list.txt ../skewnorm_m10k_r4k-11.5k.txt >> skewnorm_m10k_r4k-11.5k_exp.txt
$ head -n 1 ../skewnorm_m15k_r4k-20k.txt > skewnorm_m15k_r4k-20k_exp.txt; grep -f taxa_list.txt ../skewnorm_m15k_r4k-20k.txt >> skewnorm_m15k_r4k-20k_exp.txt
In [1]:
%load_ext rmagic
%load_ext rpy2.ipython
In [2]:
%%R
# loading tables
require(ggplot2)
require(data.table)
require(gridExtra)
In [3]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/amplicon/140411_allBac/amp454-L215-N1k/140425_example-taxa";
files = c(
'skewnorm_m15k_r4k-20k_exp',
'skewnorm_m10k_r4k-11.5k_exp'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
tbl.amp = do.call(rbind, tbl.l)
In [4]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/mg-reads/140411_allBac/mg454-L215-N10k/140425_example-taxa/";
files = list(
'skewnorm_m15k_r4k-20k_exp',
'skewnorm_m10k_r4k-11.5k_exp'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
tbl.mg = do.call(rbind, tbl.l)
In [5]:
%%R
# merging tables
tbl.amp[, dataset:= 'amplicon']
tbl.mg[, dataset:= 'shotgun']
tbl = rbind(tbl.amp, tbl.mg)
# editting values for plotting
tbl[, file:= gsub(".+m15k.+", "12.5 kb", file)]
tbl[, file:= gsub(".+m10k.+", "6 kb", file)]
## editing names
examples = c(
"Clostridium_ljungdahlii_DSM_13528",
"Escherichia_coli_APEC_O78",
"Streptomyces_griseus_subsp_griseus_NBRC_13350",
"Lactobacillus_gasseri_ATCC_33323"
)
new.names = c(
"C. ljungdahlii",
"E. coli",
"S. griseus",
"L. gasseri"
)
for(i in 1:length(examples)){
tbl$genome = gsub(examples[i], new.names[i], tbl$genome)
}
# ordering of taxa
tbl$genome = factor(tbl$genome, levels=new.names)
# renaming 'dataset' to 'Fragments'
tbl[, Fragments := dataset]
tbl[, dataset := NULL]
In [6]:
%%R
# loading in genome GC table
tbl.genGC = fread("~/notebook/deltaGC_notes/data/genome_data/prok-bac-genomes_Ngaps_GC/prok-bac-genomes_Ngaps_GC.txt",
header=F)
## editing table
colnames(tbl.genGC) = c('genome', 'genome_GC')
tbl.genGC[,genome:=gsub(".+\\/|\\.fasta", "", genome)]
## selecting taxa of interest
examples = c(
"Clostridium_ljungdahlii_DSM_13528",
"Escherichia_coli_APEC_O78",
"Streptomyces_griseus_subsp_griseus_NBRC_13350",
"Lactobacillus_gasseri_ATCC_33323"
)
tbl.genGC = subset(tbl.genGC, genome %in% examples)
## changing names
new.names = c(
"C. ljungdahlii",
"E. coli",
"S. griseus",
"L. gasseri"
)
for(i in 1:length(examples)){
tbl.genGC$genome = gsub(examples[i], new.names[i], tbl.genGC$genome)
}
## calculating BD for each
tbl.genGC[, BD:= 0.098 * genome_GC + 1.66]
In [21]:
%%R -w 7 -h 6 -u in
## plotting with mean GC
tbl1 = subset( tbl , file='12.5 kb' )
# plotting
p = ggplot(tbl1, aes(fragment_buoyant_density, fill=Fragments)) +
geom_density(alpha=0.5, size=0.4) +
scale_fill_manual(values=c('blue', 'orange')) +
facet_grid(genome ~ .) +
geom_vline(data=tbl.genGC, aes(xintercept=BD), linetype='dashed') +
labs(x=expression(paste("Fragment buoyant density (g ", ml^-1, ")")),
y="Kernel density") +
theme(
text = element_text(size=18),
axis.title.y = element_text(vjust=1),
axis.title.x = element_text(vjust=0),
axis.text.x = element_text(color='black'),
axis.text.y = element_text(color='black'),
strip.text = element_text(face='italic'),
strip.background = element_rect(size=4)
)
ggsave("~/notebook/deltaGC_notes/data/amp-mg/figures/BD-dist_example-bac.tiff",
width=7, height=6, units='in', dpi=300)
print(p)
xxx=1