In [55]:
%load_ext rmagic
In [56]:
%%R
require(ggplot2)
require(data.table)
require(gridExtra)
In [57]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/amplicon/140416_allArc/amp454-L215-N1k";
files = c(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
tbl.bin.l = list()
for(i in files){
in.file = paste(c(i, "bin.txt"), collapse="_")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.bin.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.bin.l[[i]]$file = rep(i, nrow(tbl.bin.l[[i]]))
}
# loading in genome GC table
tbl.genGC = fread("~/notebook/deltaGC_notes/data/genome_data/prok-arc-genomes_Ngaps_GC/prok-arc-genomes_Ngaps_GC.txt",
header=F)
## editing table
colnames(tbl.genGC) = c('genome', 'genome_GC')
tbl.genGC[,genome:=gsub(".+\\/|\\.fasta", "", genome)]
## selecting just genomes in other tables
tbl.genGC = subset(tbl.genGC, tbl.genGC$genome %in% unique(tbl.bin.l[[i]]$genome) )
## adding rank
tbl.genGC[,genome_GC_rank:=rank(genome_GC, ties.method='first')]
## merging tables
for(i in names(tbl.bin.l)){
tbl.bin.l[[i]] = merge(tbl.bin.l[[i]], tbl.genGC, by='genome')
}
## merging lists
tbl.amp = do.call(rbind, tbl.l)
tbl.amp$data = rep('amplicon\nfragments', nrow(tbl.amp))
#tbl.l = list()
tbl.bin.amp = do.call(rbind, tbl.bin.l)
tbl.bin.amp$data = rep('amplicon\nfragments', nrow(tbl.bin.amp))
#tbl.bin.l = list()
#print(tbl.l)
print(head(tbl.genGC[order(tbl.genGC$genome_GC)], n=20))
In [59]:
%%R
# editing $file names
tbl.bin.amp[, file:=gsub("skewnorm_m15k_r4k-20k", "Fragment size\ndistribution: 12.5kb", file)]
tbl.bin.amp[, file:=gsub("skewnorm_m10k_r4k-11.5k", "Fragment size\ndistribution: 6kb", file)]
In [73]:
%%R
# parsing out columns & rows needed for heatmap plotting
tbl.bin.p = subset(tbl.bin.amp, file=="Fragment size\ndistribution: 12.5kb")
tbl.bin.p = tbl.bin.p[, c('data', 'genome', 'genome_GC', 'bin_min', 'frag_count', 'file'), with=F]
tbl.bin.p[, file := "Fragment size distribution: 12.5kb"]
## editing data column
tbl.bin.p[data == 'amplicon\nfragments', data:='amplicon-fragments']
tbl.bin.p[data == 'shotgun\nfragments', data:='shotgun-fragments']
# combining genome & genome_GC
tbl.bin.p[, genome_GC_name := paste(round(genome_GC*100, digits=1), genome, sep='-')]
print(tbl.bin.p)
In [82]:
%%R -h 700 -w 1100
# making heatmaps of buoyant density distributions
make_density_heatmap <- function(tbl, x.axis=TRUE){
## heatmap colors: black only for lowest value
cols = c(
rep("#000000", 1),
rep("darkblue", 99),
rep("#1F1FFF", 100),
rep("#3F3FFF", 100),
rep("#5F5FFF", 100),
rep("#7F7FFF", 100),
rep("#9F9FFF", 100),
rep("#BFBFFF", 100),
rep("#DFDFFF", 100),
rep("#FFFFFF", 100)
)
# plot title
plot.title = unique(tbl$file)
## heatmap plotting
p.heat = ggplot(tbl, aes(genome_GC_name, bin_min, fill=frag_count)) +
geom_tile() +
scale_alpha_continuous( guide=FALSE ) +
scale_fill_gradientn( colours=cols, name='Count' ) +
#scale_x_continuous( expand = c(0,0) ) +
scale_y_reverse( expand = c(0,0), limits=c(1.77,1.66) ) +
labs( x='Genome (ordered by genome G+C content)', y='Fragment buoyant density') +
theme(
text = element_text(size=18),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
#axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
legend.justification=c(0,0),
legend.position=c(0,0),
legend.text = element_text(size=14),
plot.margin = unit(c(0,0,0,1), "lines") # top, right, bottom, left
)
# removing x axis title if needed
if(x.axis==FALSE){ # no x-axis; top plot
p.heat = p.heat + theme( axis.title.x = element_blank() )
p.heat = p.heat + theme( plot.margin = unit(c(1,0,0,1), "lines") ) # top, right, bottom, left
p.heat = p.heat + labs( title = plot.title )
p.heat = p.heat + theme( title = element_text( vjust=1, hjust=0.75, size=16) )
} else {
p.heat = p.heat + theme( plot.margin = unit(c(0,0,1,1), "lines") ) # top, right, bottom, left
}
return(p.heat)
}
### amplicon
p.heat.amp.12.5 = make_density_heatmap( tbl.bin.p , x.axis=FALSE )
print(p.heat.amp.12.5)
In [45]:
%%R
# parsing out columns & rows needed for heatmap plotting
tbl.bin.p = subset(tbl.bin.amp, file=="Fragment size\ndistribution: 12.5kb")
tbl.bin.p = tbl.bin.p[, c('data', 'genome', 'genome_GC', 'genome_GC_rank', 'bin_min', 'frag_count', 'file'), with=F]
tbl.bin.p[, file := "Fragment size distribution: 12.5kb"]
## editing data column
tbl.bin.p[data == 'amplicon\nfragments', data:='amplicon-fragments']
tbl.bin.p[data == 'shotgun\nfragments', data:='shotgun-fragments']
# BD of each genome
tbl.bin.p[, genome_BD := genome_GC * 0.098 + 1.66]
In [46]:
%%R -h 700 -w 1100
# making heatmaps of buoyant density distributions
make_density_heatmap <- function(tbl, x.axis=TRUE){
## heatmap colors: black only for lowest value
cols = c(
rep("#000000", 1),
rep("darkblue", 99),
rep("#1F1FFF", 100),
rep("#3F3FFF", 100),
rep("#5F5FFF", 100),
rep("#7F7FFF", 100),
rep("#9F9FFF", 100),
rep("#BFBFFF", 100),
rep("#DFDFFF", 100),
rep("#FFFFFF", 100)
)
# plot title
plot.title = unique(tbl$file)
## heatmap plotting
p.heat = ggplot(tbl, aes(genome_GC_rank, bin_min, fill=frag_count)) +
geom_tile() +
geom_point(aes(genome_GC_rank, genome_BD, color='red')) +
scale_alpha_continuous( guide=FALSE ) +
scale_fill_gradientn( colours=cols, name='Count' ) +
#scale_x_continuous( expand = c(0,0) ) +
scale_y_reverse( expand = c(0,0), limits=c(1.77,1.66) ) +
labs( x='Genome (ordered by genome G+C content)', y='Fragment buoyant density') +
theme(
text = element_text(size=18),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
#axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
legend.justification=c(0,0),
legend.position=c(0,0),
legend.text = element_text(size=14),
plot.margin = unit(c(0,0,0,1), "lines") # top, right, bottom, left
)
# removing x axis title if needed
if(x.axis==FALSE){ # no x-axis; top plot
p.heat = p.heat + theme( axis.title.x = element_blank() )
p.heat = p.heat + theme( plot.margin = unit(c(1,0,0,1), "lines") ) # top, right, bottom, left
p.heat = p.heat + labs( title = plot.title )
p.heat = p.heat + theme( title = element_text( vjust=1, hjust=0.75, size=16) )
} else {
p.heat = p.heat + theme( plot.margin = unit(c(0,0,1,1), "lines") ) # top, right, bottom, left
}
return(p.heat)
}
### amplicon
p.heat.amp.12.5 = make_density_heatmap( tbl.bin.p[tbl.bin.p$data=='amplicon-fragments', ], x.axis=FALSE )
print(p.heat.amp.12.5)
In [46]:
In [47]:
%%R
base.dir = "~/notebook/deltaGC_notes/data/mg-reads/140417_allArc/mg454-L215-N10k";
files = list(
'skewnorm_m15k_r4k-20k',
'skewnorm_m10k_r4k-11.5k'
)
tbl.l = list()
for(i in files){
in.file = paste(c(i, "txt"), collapse=".")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.l[[i]]$file = rep(i, nrow(tbl.l[[i]]))
}
tbl.bin.l = list()
for(i in files){
in.file = paste(c(i, "bin.txt"), collapse="_")
in.file = paste(c(base.dir, in.file), collapse="/")
tbl.bin.l[[i]] = fread(in.file, header=T, sep="\t")
tbl.bin.l[[i]]$file = rep(i, nrow(tbl.bin.l[[i]]))
}
# loading in genome GC table
tbl.genGC = fread("~/notebook/deltaGC_notes/data/genome_data/prok-arc-genomes_Ngaps_GC/prok-arc-genomes_Ngaps_GC.txt",
header=F)
## editing table
colnames(tbl.genGC) = c('genome', 'genome_GC')
tbl.genGC[,genome:=gsub(".+\\/|\\.fasta", "", genome)]
## selecting just genomes in other tables
tbl.genGC = subset(tbl.genGC, tbl.genGC$genome %in% unique(tbl.bin.l[[i]]$genome) )
## adding rank
tbl.genGC[,genome_GC_rank:=rank(genome_GC, ties.method='first')]
## merging tables
for(i in names(tbl.bin.l)){
tbl.bin.l[[i]] = merge(tbl.bin.l[[i]], tbl.genGC, by='genome')
}
## merging lists
tbl.mg = do.call(rbind, tbl.l)
tbl.mg$data = rep('shotgun\nfragments', nrow(tbl.mg))
tbl.l = list()
tbl.bin.mg = do.call(rbind, tbl.bin.l)
tbl.bin.mg$data = rep('shotgun\nfragments', nrow(tbl.bin.mg))
tbl.bin.l = list()
print(tbl.bin.mg)
In [48]:
%%R
# editing $file names
tbl.bin.mg[, file:=gsub("skewnorm_m15k_r4k-20k", "Fragment size\ndistribution: 12.5kb", file)]
tbl.bin.mg[, file:=gsub("skewnorm_m10k_r4k-11.5k", "Fragment size\ndistribution: 6kb", file)]
In [53]:
%%R
# parsing out columns & rows needed for heatmap plotting
tbl.bin.p = subset(tbl.bin.mg, file=="Fragment size\ndistribution: 12.5kb")
tbl.bin.p = tbl.bin.p[, c('data', 'genome', 'genome_GC', 'genome_GC_rank', 'bin_min', 'frag_count', 'file'), with=F]
tbl.bin.p[, file := "Fragment size distribution: 12.5kb"]
## editing data column
tbl.bin.p[data == 'amplicon\nfragments', data:='amplicon-fragments']
tbl.bin.p[data == 'shotgun\nfragments', data:='shotgun-fragments']
# BD of each genome
tbl.bin.p[, genome_BD := genome_GC * 0.098 + 1.66]
In [54]:
%%R -h 700 -w 1100
# making heatmaps of buoyant density distributions
make_density_heatmap <- function(tbl, x.axis=TRUE){
## heatmap colors: black only for lowest value
cols = c(
rep("#000000", 1),
rep("darkblue", 99),
rep("#1F1FFF", 100),
rep("#3F3FFF", 100),
rep("#5F5FFF", 100),
rep("#7F7FFF", 100),
rep("#9F9FFF", 100),
rep("#BFBFFF", 100),
rep("#DFDFFF", 100),
rep("#FFFFFF", 100)
)
# plot title
plot.title = unique(tbl$file)
## heatmap plotting
p.heat = ggplot(tbl, aes(genome_GC_rank, bin_min, fill=frag_count)) +
geom_tile() +
geom_point(aes(genome_GC_rank, genome_BD, color='red')) +
scale_alpha_continuous( guide=FALSE ) +
scale_fill_gradientn( colours=cols, name='Count' ) +
#scale_x_continuous( expand = c(0,0) ) +
scale_y_reverse( expand = c(0,0), limits=c(1.77,1.66) ) +
labs( x='Genome (ordered by genome G+C content)', y='Fragment buoyant density') +
theme(
text = element_text(size=18),
axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
#axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
legend.justification=c(0,0),
legend.position=c(0,0),
legend.text = element_text(size=14),
plot.margin = unit(c(0,0,0,1), "lines") # top, right, bottom, left
)
# removing x axis title if needed
if(x.axis==FALSE){ # no x-axis; top plot
p.heat = p.heat + theme( axis.title.x = element_blank() )
p.heat = p.heat + theme( plot.margin = unit(c(1,0,0,1), "lines") ) # top, right, bottom, left
p.heat = p.heat + labs( title = plot.title )
p.heat = p.heat + theme( title = element_text( vjust=1, hjust=0.75, size=16) )
} else {
p.heat = p.heat + theme( plot.margin = unit(c(0,0,1,1), "lines") ) # top, right, bottom, left
}
return(p.heat)
}
### amplicon
p.heat.mg.12.5 = make_density_heatmap( tbl.bin.p[tbl.bin.p$data=='shotgun-fragments', ], x.axis=FALSE )
print(p.heat.mg.12.5)
Notes:
In [ ]: