In [1]:
%load_ext rmagic
%load_ext rpy2.ipython
In [2]:
%%R
require(ggplot2)
require(reshape)
require(data.table)
require(gridExtra)
In [3]:
%%R
# loading amplicon data
base.dir = "~/notebook/deltaGC_notes/data/amplicon/140411_allBac/amp454-L215-N1k";
files = c(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.amp = do.call(rbind, tbl.l)
tbl.amp$data = rep('amplicon\nfragments', nrow(tbl.amp))
tbl.l = list()
In [4]:
%%R
# loading shotgun read data
base.dir = "~/notebook/deltaGC_notes/data/mg-reads/140411_allBac/mg454-L215-N10k";
files = list(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.mg = do.call(rbind, tbl.l)
tbl.mg$data = rep('shotgun\nfragments', nrow(tbl.mg))
tbl.l = list()
In [5]:
%%R
## merging amp & mg tables
tbl.bac = rbind(tbl.amp, tbl.mg)
tbl.amp = vector() # clearing memory
tbl.mg = vector()
In [6]:
%%R
# editing $file names
tbl.bac[, file:=gsub("skewnorm_m15k_r4k-20k", "Fragment size\ndistribution: 12.5kb", file)]
tbl.bac[, file:=gsub("skewnorm_m10k_r4k-11.5k", "Fragment size\ndistribution: 6kb", file)]
In [7]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/amplicon/140416_allArc/amp454-L215-N1k";
files = c(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.amp = do.call(rbind, tbl.l)
tbl.amp$data = rep('amplicon\nfragments', nrow(tbl.amp))
tbl.l = list()
In [8]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/mg-reads/140417_allArc/mg454-L215-N10k";
files = list(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
## merging lists
tbl.mg = do.call(rbind, tbl.l)
tbl.mg$data = rep('shotgun\nfragments', nrow(tbl.mg))
tbl.l = list()
In [9]:
%%R
## merging amp & mg tables
tbl.arc = rbind(tbl.amp, tbl.mg)
tbl.amp = vector()
tbl.mg = vector()
In [10]:
%%R
# editing $file names
tbl.arc[, file:=gsub("skewnorm_m15k_r4k-20k", "Fragment size\ndistribution: 12.5kb", file)]
tbl.arc[, file:=gsub("skewnorm_m10k_r4k-11.5k", "Fragment size\ndistribution: 6kb", file)]
In [11]:
%%R
# merging tables
tbl.bac[, domain:= 'Bacteria']
tbl.arc[, domain:= 'Archaea']
tbl = rbind(tbl.bac, tbl.arc)
tbl$domain = factor(tbl$domain, levels=c('Bacteria', 'Archaea'))
In [12]:
%%R
# t-based CI
t.ci.range = function(x){
# 0.95 CI
# x = vector
x = as.vector(x)
m = mean(x)
s = sd(x)
n = length(x)
error <- qt(0.975,df=n-1)*s/sqrt(n)
ci.range = error * 2
return(ci.range)
}
tbl[, t.ci.range := t.ci.range(fragment_buoyant_density), by='file,data,genome']
tbl[,frag_variance := var(fragment_buoyant_density), by='file,data,genome']
tbl[,frag_stdev := sd(fragment_buoyant_density), by='file,data,genome']
print(summary(unique(tbl$t.ci.range)))
print(summary(unique(tbl$frag_variance)))
print(summary(unique(tbl$frag_stdev)))
In [16]:
%%R -w 7 -h 6 -u in
# plotting range of values
tbl[, range := (max(fragment_buoyant_density) - min(fragment_buoyant_density)), by='file,data,genome']
# dereplicating by file,data,genome,range
tbl.s = tbl[, sum(1), by='domain,file,data,genome,range']
# plotting
y.axis.t = expression(atop('Intra-genomic fragment buoyant', paste('density range (g ', ml^-1, ')')))
p = ggplot( tbl.s, aes(data, range) ) +
geom_boxplot() +
labs(x='fragment simulation', y=y.axis.t) +
facet_grid(domain ~ file) +
theme(
text = element_text(size=18),
axis.title.x = element_blank(),
axis.text.x = element_text(color='black'),
axis.text.y = element_text(color='black')
)
ggsave("/home/nick/notebook/deltaGC_notes/data/amp-mg/figures/BD_min-max-range_bac-arc_boxplot.tiff",
width=7, height=6, units='in', dpi=300)
print(p)
In [50]:
%%R
# min-max ranges
tbl.min = tbl.s[, min(range), by='domain,file,data']
print(tbl.min)
tbl.max = tbl.s[, max(range), by='domain,file,data']
print(tbl.max)
In [90]:
%%R
tmp = t.test(range ~ file, data=subset(tbl.s, data=='amplicon\nfragments' & domain=='Bacteria'), alternative='greater')
print(tmp)
print(names(tmp))
In [93]:
%%R
require(effsize)
# t-test on just amplicon fragments
print('#---amplicon-bacteria---#')
print(
t.test(range ~ file, data=subset(tbl.s, data=='amplicon\nfragments' & domain=='Bacteria'), alternative='greater')
)
print(
cohen.d(range ~ file, data=subset(tbl.s, data=='amplicon\nfragments' & domain=='Bacteria') )
)
print('#---amplicon-archaea---#')
print(
t.test(range ~ file, data=subset(tbl.s, data=='amplicon\nfragments' & domain=='Archaea'), alternative='greater')
)
print(
cohen.d(range ~ file, data=subset(tbl.s, data=='amplicon\nfragments' & domain=='Archaea') )
)
# t-test on just shotgun fragments
print('#---shotgun-bacteria---#')
print(
t.test(range ~ file, data=subset(tbl.s, data=='shotgun\nfragments' & domain=='Bacteria'), alternative='less')
)
print(
cohen.d(range ~ file, data=subset(tbl.s, data=='shotgun\nfragments' & domain=='Bacteria') )
)
print('#---shotgun-archaea---#')
print(
t.test(range ~ file, data=subset(tbl.s, data=='shotgun\nfragments' & domain=='Archaea'), alternative='less')
)
print(
cohen.d(range ~ file, data=subset(tbl.s, data=='shotgun\nfragments' & domain=='Archaea') )
)
In [ ]: