In [ ]:
library(tidyr)
library(dplyr, warn.conflicts=F, quietly=T)
library(ggplot2)
In [ ]:
results = read.delim('2017-01-08_stats.tab', header=F,
col.names=c("seed", "metric", "sketchsize", "cov", "var", "rho"))
str(results)
summary(results)
In [ ]:
results.all = results
results = results.all %>%
filter(sketchsize == 1e6 | sketchsize == 1e8)
In [ ]:
table(results$sketchsize)
In [ ]:
ggplot(results, aes(x=cov, y=var)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
theme_bw()
In [ ]:
dat = results %>%
filter(cov==16) %>%
select(rho, metric, var, seed)
dat$var.f = as.factor(dat$var)
dat$seed = as.factor(dat$seed)
str(dat)
In [ ]:
ggplot(dat, aes(x=var.f, y=rho, fill=metric)) +
geom_boxplot(aes(fill=metric))
In [ ]:
dat = results %>%
filter(var==0.01) %>%
select(rho, metric, cov, seed)
dat.summ = dat %>%
group_by(cov, metric) %>%
summarise(rho_av=mean(rho), rho_err=sd(rho))
summary(dat)
summary(dat.summ)
In [ ]:
ggplot(dat.summ, aes(x=cov, y=rho_av)) +
geom_line(aes(linetype=metric)) +
geom_ribbon(aes(fill=metric, ymin=rho_av-rho_err, ymax=rho_av+rho_err), alpha=0.2) +
scale_x_log10() +
theme_bw()
In [ ]:
str(dat)
ggplot(dat, aes(x=cov, y=rho, colour=as.factor(seed), linetype=metric)) +
geom_line() +
scale_x_log10()
In [ ]:
summ = results %>%
select(metric, rho, var) %>%
group_by(var, metric) %>%
summarise(rho_av=mean(rho), rho_sd=sd(rho))
str(summ)
In [ ]:
p = ggplot(summ, aes(x=var, y=rho_av, ymin=rho_av-rho_sd, ymax=rho_av+rho_sd, group=metric)) +
geom_line(aes(linetype=metric)) +
geom_ribbon(aes(fill=metric), alpha=0.2) +
xlab(expression(paste('Mean pairwise variation (', pi, ')'))) +
ylab(expression(paste("Spearman's ", rho, " +- SD"))) +
scale_x_log10()+
theme_bw()
print(p)
In [ ]: