In [6]:
using Gadfly; using DataFrames; using GLM;

In [3]:
#=
tests done on 2 nodes; 
1 local i7 2.6 GHz quadcore with hyperthreading running windows and the registry
1 remote 2Ghz i5 dualcore with hyperthreading running linux

Notable is the linux machine burning more computations than the local windows machine. 
=#
data = readtable("results-03-01-3506.csv", eltypes=[Int64,Int64,Int64,Int64,Int64,Int64]);

In [4]:
data_means = groupby(data, [:n,:split,:line_consumers,:line_searchers, :search_segments]); 
data_means = combine(map(d -> median(d[:time_taken]) / 1000000, data_means))
rename!(data_means, :x1, :ms_time )
sort!(data_means, cols=[:ms_time])


Out[4]:
nsplitline_consumersline_searcherssearch_segmentsms_time
110000800013472.5024515
210000800022484.9465735
3100002000224120.755968
4100002000134159.355899
510000500224165.8729415
610000500134186.5051135
7400008000224366.142048
8400002000224421.4802675
9400008000134441.293911
10400002000134476.553408
1140000500224477.0026415
1240000500134536.3740945
1316000080002241594.253562
1416000020002241610.5564335
1516000080001341635.9254915
1616000020001341667.8609385
171600005002241779.6316445
181600005001342001.7535105

In [9]:
plot(data_means[(data_means[:line_searchers].==2), :], x=:split, y=:ms_time, color=:n, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))


Out[9]:
split -10000 -8000 -6000 -4000 -2000 0 2000 4000 6000 8000 10000 12000 14000 16000 18000 -8000 -7500 -7000 -6500 -6000 -5500 -5000 -4500 -4000 -3500 -3000 -2500 -2000 -1500 -1000 -500 0 500 1000 1500 2000 2500 3000 3500 4000 4500 5000 5500 6000 6500 7000 7500 8000 8500 9000 9500 10000 10500 11000 11500 12000 12500 13000 13500 14000 14500 15000 15500 16000 -10000 0 10000 20000 -8000 -7500 -7000 -6500 -6000 -5500 -5000 -4500 -4000 -3500 -3000 -2500 -2000 -1500 -1000 -500 0 500 1000 1500 2000 2500 3000 3500 4000 4500 5000 5500 6000 6500 7000 7500 8000 8500 9000 9500 10000 10500 11000 11500 12000 12500 13000 13500 14000 14500 15000 15500 16000 1.5×10⁵ 1.0×10⁵ 2.0×10⁵ 5.0×10⁴ 1.0×10⁰ n -2500 -2000 -1500 -1000 -500 0 500 1000 1500 2000 2500 3000 3500 4000 4500 -2000 -1900 -1800 -1700 -1600 -1500 -1400 -1300 -1200 -1100 -1000 -900 -800 -700 -600 -500 -400 -300 -200 -100 0 100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 -2000 0 2000 4000 -2000 -1800 -1600 -1400 -1200 -1000 -800 -600 -400 -200 0 200 400 600 800 1000 1200 1400 1600 1800 2000 2200 2400 2600 2800 3000 3200 3400 3600 3800 4000 ms_time

In [100]:
plot(data_means[(data_means[:split].==8000), :], x=:line_searchers, y=:ms_time, color=:n, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))


Out[100]:
line_searchers 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95 1.00 1.05 1.10 1.15 1.20 1.25 1.30 1.35 1.40 1.45 1.50 1.55 1.60 1.65 1.70 1.75 1.80 1.85 1.90 1.95 2.00 2.05 2.10 2.15 2.20 2.25 2.30 2.35 2.40 2.45 2.50 2.55 2.60 2.65 2.70 2.75 2.80 2.85 2.90 2.95 3.00 3.05 3.10 3.15 3.20 3.25 3.30 3.35 3.40 3.45 3.50 3.55 3.60 3.65 3.70 3.75 3.80 3.85 3.90 3.95 4.00 0 1 2 3 4 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 1.5×10⁵ 1.0×10⁵ 2.0×10⁵ 5.0×10⁴ 1.0×10⁰ n -2500 -2000 -1500 -1000 -500 0 500 1000 1500 2000 2500 3000 3500 4000 4500 -2000 -1900 -1800 -1700 -1600 -1500 -1400 -1300 -1200 -1100 -1000 -900 -800 -700 -600 -500 -400 -300 -200 -100 0 100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 -2000 0 2000 4000 -2000 -1800 -1600 -1400 -1200 -1000 -800 -600 -400 -200 0 200 400 600 800 1000 1200 1400 1600 1800 2000 2200 2400 2600 2800 3000 3200 3400 3600 3800 4000 ms_time

In [10]:
plot(data_means[(data_means[:n].==160000), :], x=:line_consumers, y=:ms_time, color=:split, Geom.point, Geom.line, Scale.x_continuous(format=:plain),Scale.y_continuous(format=:plain))


Out[10]:
line_consumers -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95 1.00 1.05 1.10 1.15 1.20 1.25 1.30 1.35 1.40 1.45 1.50 1.55 1.60 1.65 1.70 1.75 1.80 1.85 1.90 1.95 2.00 2.05 2.10 2.15 2.20 2.25 2.30 2.35 2.40 2.45 2.50 2.55 2.60 2.65 2.70 2.75 2.80 2.85 2.90 2.95 3.00 0 1 2 3 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.0 6000 2000 8000 4000 1 split 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 900 920 940 960 980 1000 1020 1040 1060 1080 1100 1120 1140 1160 1180 1200 1220 1240 1260 1280 1300 1320 1340 1360 1380 1400 1420 1440 1460 1480 1500 1520 1540 1560 1580 1600 1620 1640 1660 1680 1700 1720 1740 1760 1780 1800 1820 1840 1860 1880 1900 1920 1940 1960 1980 2000 2020 2040 2060 2080 2100 2120 2140 2160 2180 2200 2220 2240 2260 2280 2300 2320 2340 2360 2380 2400 2420 2440 2460 2480 2500 2520 2540 2560 2580 2600 2620 2640 2660 2680 2700 0 1000 2000 3000 900 950 1000 1050 1100 1150 1200 1250 1300 1350 1400 1450 1500 1550 1600 1650 1700 1750 1800 1850 1900 1950 2000 2050 2100 2150 2200 2250 2300 2350 2400 2450 2500 2550 2600 2650 2700 ms_time

In [112]:
using GLM

In [118]:
OLS = glm(@formula(time_taken ~ n + split + line_searchers), data, Normal(), IdentityLink());
predict(OLS)


Out[118]:
360-element Array{Float64,1}:
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 1.69777e8
 ⋮        
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9
 1.66667e9