notebook.community

Edit and run



In [6]:

    
using Gadfly; using DataFrames; using GLM;



In [3]:

    
#=
tests done on 2 nodes; 
1 local i7 2.6 GHz quadcore with hyperthreading running windows and the registry
1 remote 2Ghz i5 dualcore with hyperthreading running linux

Notable is the linux machine burning more computations than the local windows machine. 
=#
data = readtable("results-03-01-3506.csv", eltypes=[Int64,Int64,Int64,Int64,Int64,Int64]);



In [4]:

    
data_means = groupby(data, [:n,:split,:line_consumers,:line_searchers, :search_segments]); 
data_means = combine(map(d -> median(d[:time_taken]) / 1000000, data_means))
rename!(data_means, :x1, :ms_time )
sort!(data_means, cols=[:ms_time])









    Out[4]:




n split line_consumers line_searchers search_segments ms_time
1 10000 8000 1 3 4 72.5024515
2 10000 8000 2 2 4 84.9465735
3 10000 2000 2 2 4 120.755968
4 10000 2000 1 3 4 159.355899
5 10000 500 2 2 4 165.8729415
6 10000 500 1 3 4 186.5051135
7 40000 8000 2 2 4 366.142048
8 40000 2000 2 2 4 421.4802675
9 40000 8000 1 3 4 441.293911
10 40000 2000 1 3 4 476.553408
11 40000 500 2 2 4 477.0026415
12 40000 500 1 3 4 536.3740945
13 160000 8000 2 2 4 1594.253562
14 160000 2000 2 2 4 1610.5564335
15 160000 8000 1 3 4 1635.9254915
16 160000 2000 1 3 4 1667.8609385
17 160000 500 2 2 4 1779.6316445
18 160000 500 1 3 4 2001.7535105



In [9]:

    
plot(data_means[(data_means[:line_searchers].==2), :], x=:split, y=:ms_time, color=:n, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))









    Out[9]:



In [100]:

    
plot(data_means[(data_means[:split].==8000), :], x=:line_searchers, y=:ms_time, color=:n, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))









    Out[100]:



In [10]:

    
plot(data_means[(data_means[:n].==160000), :], x=:line_consumers, y=:ms_time, color=:split, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))









    Out[10]:



In [112]:

    
using GLM



In [118]:

    
OLS = glm(@formula(time_taken ~ n + split + line_searchers), data, Normal(), IdentityLink());
predict(OLS)









    Out[118]:





360-element Array{Float64,1}:
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 ⋮        
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9

	n	split	line_consumers	line_searchers	search_segments	ms_time
1	10000	8000	1	3	4	72.5024515
2	10000	8000	2	2	4	84.9465735
3	10000	2000	2	2	4	120.755968
4	10000	2000	1	3	4	159.355899
5	10000	500	2	2	4	165.8729415
6	10000	500	1	3	4	186.5051135
7	40000	8000	2	2	4	366.142048
8	40000	2000	2	2	4	421.4802675
9	40000	8000	1	3	4	441.293911
10	40000	2000	1	3	4	476.553408
11	40000	500	2	2	4	477.0026415
12	40000	500	1	3	4	536.3740945
13	160000	8000	2	2	4	1594.253562
14	160000	2000	2	2	4	1610.5564335
15	160000	8000	1	3	4	1635.9254915
16	160000	2000	1	3	4	1667.8609385
17	160000	500	2	2	4	1779.6316445
18	160000	500	1	3	4	2001.7535105