In [ ]:
using DataFrames
In [ ]:
basedir = "2017-01-29_data/"
seeds = filter(x -> x != "stats", readdir(basedir))
open("2017-01-29_timing.tsv", "w") do outfile
println(outfile, "tool\tseed\tmetric\tsize\tcov\tvar\twalltime")
for seed in seeds, tool in ["mash", "kwip"]
for measurement in readdir("$basedir/$seed/bench/$tool/")
path = "$basedir/$seed/bench/$tool/$measurement"
m, s, c, v, t = extracttime(path, tool)
println(outfile, join([tool, seed, m, s, c, v, "$t"], "\t"))
end
end
end
In [ ]:
timing = readtable("2017-01-29_timing.tsv")
timing_summ = by(timing, [:tool, :metric, :size, :cov, :var]) do df
DataFrame(time_mean=mean(df[:walltime]),
time_sd=std(df[:walltime]))
end
In [ ]:
writetable("2017-01-29_timing_summary.tsv", timing_summ)