In [6]:
using Gadfly; using DataFrames; using GLM;
In [3]:
#=
tests done on 2 nodes;
1 local i7 2.6 GHz quadcore with hyperthreading running windows and the registry
1 remote 2Ghz i5 dualcore with hyperthreading running linux
Notable is the linux machine burning more computations than the local windows machine.
=#
data = readtable("results-03-01-3506.csv", eltypes=[Int64,Int64,Int64,Int64,Int64,Int64]);
In [4]:
data_means = groupby(data, [:n,:split,:line_consumers,:line_searchers, :search_segments]);
data_means = combine(map(d -> median(d[:time_taken]) / 1000000, data_means))
rename!(data_means, :x1, :ms_time )
sort!(data_means, cols=[:ms_time])
Out[4]:
In [9]:
plot(data_means[(data_means[:line_searchers].==2), :], x=:split, y=:ms_time, color=:n, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))
Out[9]:
In [100]:
plot(data_means[(data_means[:split].==8000), :], x=:line_searchers, y=:ms_time, color=:n, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))
Out[100]:
In [10]:
plot(data_means[(data_means[:n].==160000), :], x=:line_consumers, y=:ms_time, color=:split, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))
Out[10]:
In [112]:
using GLM
In [118]:
OLS = glm(@formula(time_taken ~ n + split + line_searchers), data, Normal(), IdentityLink());
predict(OLS)
Out[118]: