In [3]:
%load_ext rmagic
%load_ext rpy2.ipython
In [4]:
%%R
require(ggplot2)
require(reshape)
require(data.table)
require(gridExtra)
In [5]:
%%R
# loading amplicon data
base.dir = "~/notebook/deltaGC_notes/data/amplicon/140411_allBac/amp454-L215-N1k";
files = c(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.amp = do.call(rbind, tbl.l)
tbl.amp$data = rep('amplicon\nfragments', nrow(tbl.amp))
tbl.l = list()
In [6]:
%%R
# loading shotgun read data
base.dir = "~/notebook/deltaGC_notes/data/mg-reads/140411_allBac/mg454-L215-N10k";
files = list(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.mg = do.call(rbind, tbl.l)
tbl.mg$data = rep('shotgun\nfragments', nrow(tbl.mg))
tbl.l = list()
In [7]:
%%R
## merging amp & mg tables
tbl.bac = rbind(tbl.amp, tbl.mg)
tbl.amp = vector() # clearing memory
tbl.mg = vector()
In [8]:
%%R
# editing $file names
tbl.bac[, file:=gsub("skewnorm_m15k_r4k-20k", "Fragment size\ndistribution: 12.5 kb", file)]
tbl.bac[, file:=gsub("skewnorm_m10k_r4k-11.5k", "Fragment size\ndistribution: 6 kb", file)]
In [9]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/amplicon/140416_allArc/amp454-L215-N1k";
files = c(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.amp = do.call(rbind, tbl.l)
tbl.amp$data = rep('amplicon\nfragments', nrow(tbl.amp))
tbl.l = list()
In [10]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/mg-reads/140417_allArc/mg454-L215-N10k";
files = list(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.mg = do.call(rbind, tbl.l)
tbl.mg$data = rep('shotgun\nfragments', nrow(tbl.mg))
tbl.l = list()
In [11]:
%%R
## merging amp & mg tables
tbl.arc = rbind(tbl.amp, tbl.mg)
tbl.amp = vector()
tbl.mg = vector()
In [12]:
%%R
# editing $file names
tbl.arc[, file:=gsub("skewnorm_m15k_r4k-20k", "Fragment size\ndistribution: 12.5 kb", file)]
tbl.arc[, file:=gsub("skewnorm_m10k_r4k-11.5k", "Fragment size\ndistribution: 6 kb", file)]
In [13]:
%%R
# merging tables
tbl.bac[, domain:= 'Bacteria']
tbl.arc[, domain:= 'Archaea']
#tbl = rbind(tbl.bac, tbl.arc)
#tbl$domain = factor(tbl$domain, levels=c('Bacteria', 'Archaea'))
xxx=1
In [15]:
%%R -h 5 -w 9 -u in
# summarizing fragment length distribution
# plot of fragment lengths
make_frag_len_hist = function(x){
p = ggplot(x, aes(fragment_length)) +
geom_histogram(binwidth=100) +
labs(x='Fragment length (bp)', y='Number of fragments') +
facet_grid(data ~ file, scales='free') +
theme(
text = element_text(size=16),
axis.text.x = element_text(angle=45, color='black'),
axis.text.y = element_text(color='black')
)
return(p)
}
# plotting bacterial fragments
p.bac = make_frag_len_hist(tbl.bac)
ggsave(p.bac, file="/home/nick/notebook/deltaGC_notes/data/amp-mg/figures/frag-len-dist_bac.tiff",
dpi=300, height=5, width=9, units='in')
print(p.bac)
In [16]:
%%R -h 5 -w 9 -u in
# plotting archaeal fragments
p.arc = make_frag_len_hist(tbl.arc)
ggsave(p.arc, file="/home/nick/notebook/deltaGC_notes/data/amp-mg/figures/frag-len-dist_arc.tiff",
dpi=300, height=5, width=9, units='in')
print(p.arc)
In [ ]: