In [ ]:
using DataFrames

In [ ]:
basedir = "2017-01-29_data/"
seeds = filter(x -> x != "stats", readdir(basedir))
open("2017-01-29_timing.tsv", "w") do outfile
    println(outfile, "tool\tseed\tmetric\tsize\tcov\tvar\twalltime")
    for seed in seeds, tool in ["mash", "kwip"]
        for measurement in readdir("$basedir/$seed/bench/$tool/")
            path = "$basedir/$seed/bench/$tool/$measurement"
            m, s, c, v, t = extracttime(path, tool)
            println(outfile, join([tool, seed, m, s, c, v, "$t"], "\t"))
        end
    end
end

In [ ]:
timing = readtable("2017-01-29_timing.tsv")
timing_summ = by(timing, [:tool, :metric, :size, :cov, :var]) do df
    DataFrame(time_mean=mean(df[:walltime]),
              time_sd=std(df[:walltime]))
end

In [ ]:
writetable("2017-01-29_timing_summary.tsv", timing_summ)