In [314]:
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


In [21]:
### DEFAULT PARAMETERS
d_n=1000000;
d_k=20000;
d_m=100;

d_skew=1.0;
d_ratio=2.0;

d_cores=3;
d_inner=10000;
d_outer=10000;

### READ ALL RESULTS

experiments=read.csv("results/experiments.csv", sep = "\t");
small_experiments=enrich_s_exp(read.csv("results/small_experiments.csv", sep = "\t"));
all_experiments=bind_rows(experiments,small_experiments);

#k = k_factor * ratio * m^2
experiments$k_factor=experiments$k/(experiments$max_weight_ratio*experiments$m*experiments$m);
experiments$n_ratio=experiments$n/experiments$k;

write.table(experiments,"all_experiments.csv");

In [2]:
enrich_s_exp = function(s_exp) {
    epsilons = s_exp[,c(9:length(s_exp[1,]))];
    #s_exp = s_exp[,-c(9:length(s_exp[1,]))];
    avg_var_eps = as.data.frame(t(as.data.frame(lapply(as.data.frame(t(epsilons)),
           FUN=function(epss) c(mean(epss),var(epss),min(epss),max(epss))))));
    colnames(avg_var_eps)=c("epsilon_mean","epsilon_var","epsilon_min","epsilon_max");
    rownames(avg_var_eps)=c(1:length(s_exp[,1]));

    s_exp$epsilon_mean = avg_var_eps$epsilon_mean;
    s_exp$epsilon_var  = avg_var_eps$epsilon_var;
    s_exp$epsilon_min  = avg_var_eps$epsilon_min;
    s_exp$epsilon_max  = avg_var_eps$epsilon_max;

    s_exp$k_factor=s_exp$k/(s_exp$max_weight_ratio*s_exp$m*s_exp$m);
    s_exp$n_ratio =s_exp$n/s_exp$k;   
    return(s_exp);
}

In [28]:
small_n=small_experiments[small_experiments$n_ratio==1,][-c(1,length(small_experiments[1,]))];
big_n=small_experiments[small_experiments$n_ratio==2,][-c(1,length(small_experiments[1,]))];

small_n=small_n[order(small_n$k,small_n$skew,small_n$max_weight_ratio),];
big_n=big_n[order(big_n$k,big_n$skew,big_n$max_weight_ratio),];

tmp_df = data.frame((small_n$epsilon_mean-big_n$epsilon_mean))
colnames(tmp_df)=c("rel_diff")
tmp_df$epsilon_sq_var_small_n = sqrt(small_n$epsilon_var)
tmp_df$epsilon_sq_var_big_n   = sqrt(  big_n$epsilon_var)

tmp_df


Warning message:
In small_n$epsilon_mean - big_n$epsilon_mean: longer object length is not a multiple of shorter object length
Error in `$<-.data.frame`(`*tmp*`, "epsilon_sq_var_big_n", value = c(0.00078955894330738, : replacement has 63 rows, data has 168
Out[28]:
rel_diffepsilon_sq_var_small_n
10.0004355270.0007837276
2-5.9517e-050.0008215743
3-8.7036e-050.0005658365
4-1.3872e-050.0003920585
55.4938e-050.0002904446
60.0001559030.0003442497
73.15789e-050.0002277456
86.87666e-050.0001437744
90.000176030.0002199071
100.0012961440.0002667974
110.0013263110.0001423311
120.0013819740.0002019424
130.00055594310.0001066634
14-0.0022585138.983957e-05
150.0037798697.199421e-05
160.0040035097.70948e-05
170.0040700022.846604e-05
180.0040517411.684935e-05
190.0024254569.514622e-05
200.0024397220.0001188301
210.0014356648.627807e-05
22-0.0015467610.001969152
230.0072612960.005563032
24-0.00020795640.003612142
250.0024871250.0008848219
260.0019612280.001047045
270.0022364370.0008547022
280.0024999490.001250519
290.0011999730.0004394735
300.002303360.0006285522
310.0024656670.0005030337
320.0024570570.0002426786
330.0023853650.0001257477
340.0016231629.909828e-05
350.00081065539.962727e-05
36-0.0020865338.536074e-05
370.0029500182.740472e-05
380.0030032512.806771e-05
390.002464691.951362e-05
400.004281630.002056429
410.0013966130.00174099
420.0016245370.002304981
430.0045235940.003223666
440.002200860.002125127
450.0083928650.006723516
460.001865989.196336e-05
470.0014032936.731469e-05
480.00087867983.610574e-05
490.0018997130.0008972503
500.0021174840.001308667
510.0013355840.001148564
520.0023741760.001185513
530.0013223990.0009463993
540.0018617380.001587019
550.0029691330.001303905
560.0016175650.001161685
570.0020782430.000997274
580.0014583090.0002296142
590.0016614560.0006047754
600.0014362270.0001615137
610.0020260190.0005702309
620.0016501270.000128736
630.0016706780.0005232551
64-0.038095319.366904e-05
65-0.038211756.70366e-05
66-0.038367953.106305e-05
67-0.018188313.339247e-05
68-0.01823188.778653e-06
69-0.018196651.018214e-05
70-0.007004810.0009469889
71-0.0043970030.00205228
72-0.0033572960.00345951
73-0.0054812490.000751844
74-0.0054507120.0009390398
75-0.0046655620.001031201
76-0.0066841020.0007915892
77-0.0091483440.0009566349
78-0.0018615450.001362855
79-0.00258540.0006376236
80-0.0022498640.001058698
81-0.0017463020.001295686
82-0.0032999520.0001917912
83-0.0030190870.0003316656
84-0.0034872920.001122223
85-0.0075528930.0001108378
86-0.0022127970.0003594412
87-0.005042430.0008850829
88-0.0014929610.0001474347
89-0.0016151580.0003120714
90-0.0016305270.0004329445
91-0.0011772650.0007011244
92-0.0021188410.0007937227
93-0.0011298230.0009991803
94-0.0013910160.0004215932
95-0.00065898470.0008927552
96-0.0010656190.0006318275
97-0.0011814850.001021122
98-0.0019945520.001006786
99-0.0044676320.001424644
1000.0015777050.001241954
1010.0012277010.001557084
1020.00081335970.001778753
103-0.00068656570.000310019
104-0.0022099570.0005382635
105-0.00038494120.0009529565
1062.250525e-050.0002375631
107-0.00085138380.0006017708
108-6.866431e-050.0007462828
109-0.00021934580.0002960054
110-0.00081929530.0003286162
111-0.0012873440.0005452056
1120.00027479460.0004754265
1130.00091571430.0009043447
1140.0007054040.001018249
1150.00029313460.000414779
1160.00014194930.0004305181
117-0.00032623540.0006163001
118-3.646722e-050.0003144758
119-0.00072806650.0004481704
1200.00011738720.0005909805
1210.00011766990.0005170931
1220.00024578130.0006995569
123-0.0002522190.0003729746
1240.00052340720.001042279
1250.00079796820.0007186963
1260.00086449790.001041573
127-0.039491890.0005332115
128-0.037107990.002507107
129-0.03794280.001504717
130-0.019559630.0004031903
131-0.019351310.0006898967
132-0.018876010.001220619
133-0.0075748660.0003613908
134-0.0070290310.0005973307
135-0.0064645740.001079446
136-0.006378690.0002991002
137-0.0062970270.0004796874
138-0.0051801640.001161801
139-0.0072352750.000313417
140-0.0096300790.0005485056
141-0.0032411210.0008327803
142-0.003767140.0002907439
143-0.0034575540.0002963719
144-0.0033802780.0006600821
145-0.0036257750.0004733586
146-0.0030909910.0008220416
147-0.0048263690.0003319376
148-0.0073661130.001010116
149-0.0032084270.0001212852
150-0.0063090830.0004503631
151-0.0021276710.0003400564
152-0.0022226070.0003505918
153-0.0022934470.0003465113
1540.041382680.0002457217
1550.040843330.0001960676
1560.041994869.696129e-05
1570.020425750.0002059698
1580.020411760.0001559697
1590.020339437.491041e-05
1600.013411740.002058616
1610.01218520.002510329
1620.0073945790.001774763
1630.0062893350.002147631
1640.0070814840.001856094
1650.0052597980.0008150136
1660.004751530.0022644
1670.0030216610.003916896
1680.0054697250.003491087

In [29]:
k_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different k with same n, skew, ratio
    vary_k = experiments[
            experiments$n == d_n & experiments$skew == d_skew & experiments$max_weight_ratio == d_ratio,
            c("k","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_k[,"k"]/heuristic_k,vary_k$epsilon0.9,log="xy",xlab=expression("k/k_estimate"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: size of U vs relative error",sub=paste("n =",d_n,", m =",d_m,", skew =",d_skew,", w_ratio =",d_ratio,", k_estimate =",heuristic_k));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_k[,"k"]/heuristic_k,vary_k$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_k[,"k"]/heuristic_k,vary_k$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)

}

k_factor_given_ratio_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different k with same n, skew, ratio
    vary_k = experiments[
            experiments$n == d_n & experiments$skew == d_skew & experiments$max_weight_ratio == d_ratio,
            c("k_factor","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_k[,"k_factor"],vary_k$epsilon0.9,log="xy",xlab=expression("k_factor"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: size of U vs relative error",sub=paste("n =",d_n,", m =",d_m,", skew =",d_skew,", w_ratio =",d_ratio));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_k[,"k_factor"],vary_k$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_k[,"k_factor"],vary_k$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}

k_factor_given_k_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different k with same n, skew, ratio
    vary_k = experiments[
            experiments$n == d_n & experiments$skew == d_skew & experiments$k == d_k,
            c("k_factor","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_k[,"k_factor"],vary_k$epsilon0.9,log="xy",xlab=expression("k_factor"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: size of U vs relative error",sub=paste("n =",d_n,", k =",d_k,", m =",d_m,", skew =",d_skew));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_k[,"k_factor"],vary_k$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_k[,"k_factor"],vary_k$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}

n_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different n with same k, skew, ratio
    vary_n = experiments[
            experiments$k == d_k & experiments$skew == d_skew & experiments$max_weight_ratio == d_ratio,
            c("n","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    max_y = max(vary_n$epsilon0.9,vary_n$epsilon0.95,vary_n$epsilon0.99);
    
    plot(vary_n[,"n"]/d_k,vary_n$epsilon0.9,log="x",xlab=expression("n/k"),ylab="epsilon",ylim=c(0,max_y),col=cols[1],pch=pchs[1],
        main="AWS: size of R vs relative error",sub=paste("k =",d_k,", m =",d_m,", skew =",d_skew,", w_ratio =",d_ratio));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_n[,"n"]/d_k,vary_n$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_n[,"n"]/d_k,vary_n$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(20.0,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}


ratio_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different ratio with same n, k, skew
    vary_ratio = experiments[
            experiments$n == d_n & experiments$k == d_k & experiments$skew == d_skew,
            c("max_weight_ratio","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_ratio[,"max_weight_ratio"],vary_ratio$epsilon0.9,log="x",ylim=c(0.001,0.04),xlab=expression("Maximum weight ratio"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: Maximum weight ratio vs relative error",sub=paste("n =",d_n,", k =",d_k,", m =",d_m,", w_skew =",d_skew));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_ratio[,"max_weight_ratio"],vary_ratio$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_ratio[,"max_weight_ratio"],vary_ratio$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(20.0,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
    #NOTE: graph would be better if k is adjusted according to skew (to k_heuristic)
}




skew_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different skew with same n, k, ratio (DATA MISSING FOR NOW)
    vary_skew = experiments[
            experiments$n == d_n & experiments$k == d_k & experiments$max_weight_ratio == d_ratio,
            c("skew","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_skew[,"skew"],vary_skew$epsilon0.9,log="xy",xlab=expression("skew"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: skew vs relative error",sub=paste("n =",d_n,", k =",d_k,", m =",d_m,", w_ratio =",d_ratio));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_skew[,"skew"],vary_skew$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_skew[,"skew"],vary_skew$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(20.0,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}

In [30]:
###Identify common values of k
##table(sort(experiments$k))
##
##for(k in c(400,1600,5000,10000,20000,40000)) {
##    k_factor_given_k_vs_epsilon_plot(d_n = 1e6, d_k=k);
##}

skew_vs_epsilon_plot(d_ratio=1.1);
skew_vs_epsilon_plot(d_ratio=4.0);
skew_vs_epsilon_plot(d_ratio=16.0);

for(k in c(10000,20000,40000,80000)) {#did not plot k=400,1600,5000, for these are quite unrealistic k values
    n_vs_epsilon_plot(d_k=k);
}

for(n in c(80000,1e6,1600000,2400000,4800000)) {
    k_vs_epsilon_plot(d_n = n);#equiv to k_factor_given_ratio_vs_epsilon_plot(d_n = 1e6, d_ratio=ratio);
}

#Notice that both small and big ratio are good.
##Explanation: - for small ratio we have near uniform data
##             - for big ratio k_estimate gets near n, improving the result

for(ratio in c(1.1,2,4,8,16,32)) {
    k_vs_epsilon_plot(d_ratio=ratio);#equiv to k_factor_given_ratio_vs_epsilon_plot(d_n = 1e6, d_ratio=ratio);
}
###dev.print(pdf, 'exp2_3_vary_k.pdf')


Warning message:
In min(x): no non-missing arguments to min; returning InfWarning message:
In max(x): no non-missing arguments to max; returning -InfWarning message:
In min(x): no non-missing arguments to min; returning InfWarning message:
In max(x): no non-missing arguments to max; returning -Inf
Error in plot.window(...): need finite 'xlim' values

In [31]:
#My first plots

k_vs_epsilon_plot();

###dev.print(pdf, 'exp2_3_vary_k.pdf')

n_vs_epsilon_plot();
###dev.print(pdf, 'exp2_3_vary_n.pdf')

ratio_vs_epsilon_plot();
###dev.print(pdf, 'exp2_3_vary_ratio.pdf')



In [32]:
##plot(apply(experiments,1, FUN=function(r) r['m']^2*r['max_weight_ratio']/r['k']),experiments$epsilon0.95)

#ALL points
plot(experiments$k_factor,experiments$epsilon0.95,log="xy",col=1);

#Slices (for different weight ratio)
for(ratio in c(1.1, 2.0, 4.0, 16.0, 32.0)) {
    selected_experiments=experiments[experiments$max_weight_ratio == ratio,]
    plot(selected_experiments$k_factor,selected_experiments$epsilon0.95,log="xy",col=1,xlab="k-factor",ylab="epsilon at sigma=0.95",main=paste("ratio = ",ratio));
}
#points(selected_experiments$k_factor,selected_experiments$epsilon0.95,log="xy",col=2);
abline(v=1,lty=3);
abline(h=0.01,lty=3);
#[experiments$epsilon0.95 < 0.01,]
#experiments[experiments$epsilon0.95 >= 0.01,]

#scatterplot3d(log(experiments$k_factor),log(experiments$epsilon0.95), experiments$max_weight_ratio,log="xy");



In [33]:
high_ratio_low_kfactor=experiments[experiments$max_weight_ratio==32&experiments$k_factor==0.5,c("skew","epsilon0.9","epsilon0.95","epsilon0.99")]
plot(high_ratio_low_kfactor$skew,high_ratio_low_kfactor$epsilon0.9,pch=1,col="blue",ylim=c(0,max(high_ratio_low_kfactor$epsilon0.99)))
points(high_ratio_low_kfactor$skew,high_ratio_low_kfactor$epsilon0.95,pch=3,col="purple")
points(high_ratio_low_kfactor$skew,high_ratio_low_kfactor$epsilon0.99,pch=5,col="red")


Warning message:
In min(x): no non-missing arguments to min; returning InfWarning message:
In max(x): no non-missing arguments to max; returning -InfWarning message:
In max(high_ratio_low_kfactor$epsilon0.99): no non-missing arguments to max; returning -Inf
Error in plot.window(...): need finite 'xlim' values

In [121]:
plot(1:3,1:3,main=expression(italic(w)-relation))



In [372]:
df[1,]


Out[372]:
skewk_factormax_weight_ratio
110.52
Out[372]:
skewn_ratiok_factormax_weight_ratiom
111280.52100

In [22]:
make_plot = function(plot_var_id, value_rank) {
    #Note that the relevant quantities are: n,k,skew,ratio. Select 3 out of 4, use final one as axis vs epsilon
    plot_vars = c("skew","n_ratio","k_factor","max_weight_ratio","m");
    nice_plot_vars = c("skew","n-ratio","k-factor","w-ratio","m");
    plot_var = plot_vars[plot_var_id];

    const_vars = setdiff(plot_vars,plot_var);
    nice_const_vars = nice_plot_vars[-plot_var_id];
    nice_plot_var = nice_plot_vars[plot_var_id];
    
    df=experiments[,const_vars];
    #identify common combination of values
    common_value_sets=sort(table(unlist(lapply(as.list(as.data.frame(t(df))),toString))),decreasing=TRUE);
    common_value_set = as.numeric(strsplit(names(common_value_sets)[value_rank],",")[[1]])
    
    #select data based on this common combination
    plot_data=experiments[abs(experiments[,const_vars[1]]-common_value_set[1])<1e-6 & 
                          abs(experiments[,const_vars[2]]-common_value_set[2])<1e-6 & 
                          abs(experiments[,const_vars[3]]-common_value_set[3])<1e-6 & 
                          abs(experiments[,const_vars[4]]-common_value_set[4])<1e-6,]

    var_x =plot_data[,plot_var]
    var_y1=plot_data$epsilon0.9
    var_y2=plot_data$epsilon0.95
    var_y3=plot_data$epsilon0.99
    all_ys=c(var_y1,var_y2,var_y3);
    
    
    typical_size = 10*sqrt(var(all_ys));
    
    y_min = min(all_ys);
    y_max = max(all_ys);
    used_size = y_max-y_min;
    if(used_size < typical_size && y_min - (typical_size - used_size)*0.5 > 0) {
        y_min = y_min - (typical_size - used_size)*0.5;
        y_max = y_max + (typical_size - used_size)*0.5;
    }
    y_min = y_min# - 0.002;
    y_max = y_max# - 0.002; 
    
    plot_width = 3.1;
    plot_height = 3.5;
    options(repr.plot.las=1,repr.plot.width=plot_width, repr.plot.height=plot_height);
    par(las=1);
    plot(  var_x,var_y1, pch=1, col="blue", log="x",
         ylim=c(y_min,y_max),
         ylab="",xlab=nice_plot_var,
         main=bquote(paste(.(nice_plot_var)," vs ", epsilon)),
         #xaxt='n',##Remove for n_ratio
         yaxt='n',
            )
    
    
    points(var_x,var_y2,pch=3,col="purple")
    points(var_x,var_y3,pch=5,col="red")
    #points(1:64,0.04*8/(1:64),pch=5,col="black",type="l")
    

    mtext(paste(paste(sapply(1:4,function(i) paste(nice_const_vars[i],"=",common_value_set[i])),collapse=", ")),cex=0.7)
    mtext(expression(epsilon), side=2, line=3.5)
    #axis(side = 1, at=var_x)##Remove for n_ratio
    axis(side = 2,las=1)
        
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    #legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=c("blue","purple","red"),pch=c(1,3,5))
    legend_x = min(var_x)*0.1+max(var_x)*0.11;
    y_height_ratio = 1.0;
    legend_y = (1-y_height_ratio)*y_min+y_height_ratio*y_max;
    #legend(legend_x,legend_y,c(expression(sigma == 0.9),expression(sigma == 0.95),expression(sigma == 0.99)),col=c("blue","purple","red"),pch=c(1,3,5))
    #legend("topright",bty = "n",y.intersp=1.5,c(expression(sigma == 0.9),expression(sigma == 0.95),expression(sigma == 0.99)),col=c("blue","purple","red"),pch=c(1,3,5))
    legend(legend_x,legend_y,bty = "n",y.intersp=2.2,c(expression(sigma == 0.9),expression(sigma == 0.95),expression(sigma == 0.99)),col=c("blue","purple","red"),pch=c(1,3,5))
    dev.print(pdf, paste("AWS_plot_",plot_var,"_rank_",value_rank,".pdf",sep=""),width=plot_width,height=plot_height);
}
                         

                             
for(plot_var_id in c(5)) {
    for(value_rank in c(1,2,3)) {
        plot_vars = c("skew","n_ratio","k_factor","max_weight_ratio","m");
        plot_var = plot_vars[plot_var_id];
        print(paste("AWS_plot_",plot_var,"_rank_",value_rank,".pdf",sep=""));
        make_plot(plot_var_id,value_rank);
    }
}


[1] "AWS_plot_m_rank_1.pdf"
[1] "AWS_plot_m_rank_2.pdf"
[1] "AWS_plot_m_rank_3.pdf"

In [291]:
s_make_plot = function(plot_var_id, value_rank) {
    #Note that the relevant quantities are: n,k,skew,ratio. Select 3 out of 4, use final one as axis vs epsilon
    plot_vars = c("skew","n_ratio","k_factor","max_weight_ratio");
    plot_var = plot_vars[plot_var_id];

    const_vars = setdiff(plot_vars,plot_var);

    df = small_experiments[const_vars];

    #identify common combination of values
    common_value_sets=sort(table(unlist(lapply(as.list(as.data.frame(t(df))),toString))),decreasing=TRUE);
    common_value_set = as.numeric(strsplit(names(common_value_sets)[value_rank],",")[[1]])
    
    #select data based on this common combination
    plot_data=small_experiments[abs(small_experiments[,const_vars[1]]-common_value_set[1])<1e-6 & 
                          abs(small_experiments[,const_vars[2]]-common_value_set[2])<1e-6 & 
                          abs(small_experiments[,const_vars[3]]-common_value_set[3])<1e-6,]

    var_x =plot_data[,plot_var]
    var_y1=plot_data$epsilon_min;#plot_data$epsilon_mean-sqrt(plot_data$epsilon_var)
    var_y2=plot_data$epsilon_mean;
    var_y3=plot_data$epsilon_max;
    all_ys=c(var_y1,var_y2,var_y3);
    plot(  var_x,var_y1, pch=1, col="blue", log="xy",
         ylim=c(max(min(all_ys),1e-5),max(all_ys)),
         ylab="epsilon",xlab=plot_var,
         main=paste(plot_var," vs Epsilon"), 
         sub=paste(sapply(1:3,function(i) paste(const_vars[i],"=",common_value_set[i])),collapse=", ")
            )
    points(var_x,var_y2,pch=3,col="purple")
    points(var_x,var_y3,pch=5,col="red")

    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=c("blue","purple","red"),pch=c(1,3,5))
}
                         
for(plot_var_id in c(1,3,4)) {
    for(value_rank in 1:8) {
        s_make_plot(plot_var_id,value_rank);
    }
}



In [37]:
ordered_small_experiments=small_experiments[order(small_experiments$k,small_experiments$skew,small_experiments$max_weight_ratio,small_experiments$n),];

In [38]:
small_experiments[37,]$epsilon_mean
sqrt(small_experiments[37,]$epsilon_var)


Out[38]:
0.0052049598
Out[38]:
0.00088482191687415

In [39]:
small_experiments[55,]$epsilon_mean
sqrt(small_experiments[55,]$epsilon_var)


Out[39]:
0.0046768465
Out[39]:
0.00104704535789072

In [40]:
plot(unlist(small_experiments[37,c(9:18)]),col="red")
abline(h=small_experiments[37,]$epsilon_mean,lty=2,col="red")
abline(h=small_experiments[37,]$epsilon_mean-sqrt(small_experiments[37,]$epsilon_var),lty=3,col="red")
abline(h=small_experiments[37,]$epsilon_mean+sqrt(small_experiments[37,]$epsilon_var),lty=3,col="red")

points(unlist(small_experiments[55,c(9:18)]),col="blue")
abline(h=small_experiments[55,]$epsilon_mean,lty=2,col="blue")
abline(h=small_experiments[55,]$epsilon_mean-sqrt(small_experiments[55,]$epsilon_var),lty=3,col="blue")
abline(h=small_experiments[55,]$epsilon_mean+sqrt(small_experiments[55,]$epsilon_var),lty=3,col="blue")



In [387]:
plot_width = 3.1;
plot_height = 3.5;
options(repr.plot.width=plot_width, repr.plot.height=plot_height);

xs=1:1000/1000;
 #w[i] is in [1,max_weight_ratio]
plot(1, type="n",xaxt='n',yaxt='n', xlim=c(0,1),ylim=c(0,1), xlab="x   (the value)", ylab="F(x)   (the frequency)",main="CDFs of datasets")
axis(side = 1, at=c(0,0.5,1.0))
axis(side = 2, at=c(0,0.5,1.0))

skews=c(1,4,16);
ratios=c(1.1,2,16);
cols=c("red","purple","blue");
flatcols=c(1:9);
flatdescr=c(1:9);
flatltys=c(1:9);

                             

for(ratioi in 1:3) {
for(skewi in 1:3) {
    skew = skews[skewi];
    ratio = ratios[ratioi];
    j = 10-((ratioi-1)*3+skewi);
    col = cols[skewi];
    
    flatcols[j]=col;
    flatdescr[j]=paste("w-ratio=",ratio,", skew=",skew,sep="");
    flatltys[j]=ratioi;
    
    points(((xs)^skew*(ratio-1)+1)/ratio,xs,type="l",col=col,lty=ratioi)
}}
dev.print(pdf, "CDFs.pdf",width=plot_width,height=plot_height);

plot.new()
legend("topleft",bty="n",legend = flatdescr,col = flatcols, lty = flatltys, y.intersp=2.5,cex=0.9)
dev.print(pdf, "CDFs_legend.pdf",width=plot_width,height=plot_height);


Out[387]:
pdf: 2
Out[387]:
pdf: 2

In [360]:
tmpdat=all_experiments[all_experiments$skew == 16 & all_experiments$k_factor == 16 & abs(all_experiments$max_weight_ratio-4)<1e-3, ];



In [350]:
df = all_experiments[,c("skew","k_factor","max_weight_ratio")]
common_value_sets=sort(table(unlist(lapply(as.list(as.data.frame(t(df))),toString))),decreasing=TRUE);
all_experiments[all_experiments$skew == 1 & all_experiments$k_factor == 16 & abs(all_experiments$max_weight_ratio-4)<1e-3, ]$m
all_experiments[all_experiments$skew == 1 & all_experiments$k_factor == 4 & abs(all_experiments$max_weight_ratio-16)<1e-3, ]$m
all_experiments[all_experiments$skew == 16 & all_experiments$k_factor == 16 & abs(all_experiments$max_weight_ratio-4)<1e-3, ]$m


Out[350]:
  1. 100
  2. 100
  3. 10
  4. 10
Out[350]:
  1. 100
  2. 100
  3. 10
  4. 10
Out[350]:
  1. 100
  2. 100
  3. 10
  4. 10

In [5]:
common_value_sets


Error in eval(expr, envir, enclos): object 'common_value_sets' not found

In [ ]: