In [ ]:

    
library(tidyr)
library(dplyr, warn.conflicts=F, quietly=T)
library(ggplot2)



In [ ]:

    
results = read.delim('2017-01-06_fixed.tab', header=F,
                     col.names=c("seed", "metric", "kwipsize", "cov", "var", "rho"))
str(results)
summary(results)



In [ ]:

    
table(results$var)

All data



In [ ]:

    
dat = results %>%
        filter(cov==30) %>%
        select(rho, metric, var, seed) 

dat$var.f = as.factor(dat$var)
dat$seed = as.factor(dat$seed)
str(dat)



In [ ]:

    
ggplot(dat, aes(x=var.f, y=rho, fill=metric)) +
    geom_boxplot(aes(fill=metric))



In [ ]:

    
dat = results %>%
        filter(var==0.01) %>%
        select(rho, metric, cov, seed)  %>%
        group_by(cov, metric) %>%
        summarise(rho_av=mean(rho), rho_err=sd(rho))

summary(dat)
table(results$cov)



In [ ]:

    
ggplot(dat, aes(x=cov, y=rho_av)) +
    geom_line(aes(linetype=metric)) +
    geom_ribbon(aes(fill=metric, ymin=rho_av-rho_err, ymax=rho_av+rho_err), alpha=0.2) +
    scale_x_log10()



In [ ]:

    
ggplot(dat, aes(x=var, y=rho, colour=seed, linetype=metric)) +
    geom_line() +
    scale_x_log10()

$\pi$ vs performance



In [ ]:

    
summ = results %>%
           select(metric, rho, var) %>%
           group_by(var, metric) %>%
           summarise(rho_av=mean(rho), rho_sd=sd(rho))

str(summ)



In [ ]:

    
p = ggplot(summ, aes(x=var, y=rho_av, ymin=rho_av-rho_sd, ymax=rho_av+rho_sd, group=metric)) +
    geom_line(aes(linetype=metric)) +
    geom_ribbon(aes(fill=metric), alpha=0.2) +
    xlab(expression(paste('Mean pairwise variation (', pi, ')'))) +
    ylab(expression(paste("Spearman's ", rho, " +- SD"))) +
    scale_x_log10()+
    theme_bw()

print(p)



In [ ]: