notebook.community

Edit and run



In [314]:

    
library(dplyr)









    



Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [21]:

    
### DEFAULT PARAMETERS
d_n=1000000;
d_k=20000;
d_m=100;

d_skew=1.0;
d_ratio=2.0;

d_cores=3;
d_inner=10000;
d_outer=10000;

### READ ALL RESULTS

experiments=read.csv("results/experiments.csv", sep = "\t");
small_experiments=enrich_s_exp(read.csv("results/small_experiments.csv", sep = "\t"));
all_experiments=bind_rows(experiments,small_experiments);

#k = k_factor * ratio * m^2
experiments$k_factor=experiments$k/(experiments$max_weight_ratio*experiments$m*experiments$m);
experiments$n_ratio=experiments$n/experiments$k;

write.table(experiments,"all_experiments.csv");



In [2]:

    
enrich_s_exp = function(s_exp) {
    epsilons = s_exp[,c(9:length(s_exp[1,]))];
    #s_exp = s_exp[,-c(9:length(s_exp[1,]))];
    avg_var_eps = as.data.frame(t(as.data.frame(lapply(as.data.frame(t(epsilons)),
           FUN=function(epss) c(mean(epss),var(epss),min(epss),max(epss))))));
    colnames(avg_var_eps)=c("epsilon_mean","epsilon_var","epsilon_min","epsilon_max");
    rownames(avg_var_eps)=c(1:length(s_exp[,1]));

    s_exp$epsilon_mean = avg_var_eps$epsilon_mean;
    s_exp$epsilon_var  = avg_var_eps$epsilon_var;
    s_exp$epsilon_min  = avg_var_eps$epsilon_min;
    s_exp$epsilon_max  = avg_var_eps$epsilon_max;

    s_exp$k_factor=s_exp$k/(s_exp$max_weight_ratio*s_exp$m*s_exp$m);
    s_exp$n_ratio =s_exp$n/s_exp$k;   
    return(s_exp);
}



In [28]:

    
small_n=small_experiments[small_experiments$n_ratio==1,][-c(1,length(small_experiments[1,]))];
big_n=small_experiments[small_experiments$n_ratio==2,][-c(1,length(small_experiments[1,]))];

small_n=small_n[order(small_n$k,small_n$skew,small_n$max_weight_ratio),];
big_n=big_n[order(big_n$k,big_n$skew,big_n$max_weight_ratio),];

tmp_df = data.frame((small_n$epsilon_mean-big_n$epsilon_mean))
colnames(tmp_df)=c("rel_diff")
tmp_df$epsilon_sq_var_small_n = sqrt(small_n$epsilon_var)
tmp_df$epsilon_sq_var_big_n   = sqrt(  big_n$epsilon_var)

tmp_df









    



Warning message:
In small_n$epsilon_mean - big_n$epsilon_mean: longer object length is not a multiple of shorter object length





    



Error in `$<-.data.frame`(`*tmp*`, "epsilon_sq_var_big_n", value = c(0.00078955894330738, : replacement has 63 rows, data has 168






    Out[28]:





rel_diff epsilon_sq_var_small_n

	1 0.000435527 0.0007837276
	2 -5.9517e-05 0.0008215743
	3 -8.7036e-05 0.0005658365
	4 -1.3872e-05 0.0003920585
	5 5.4938e-05 0.0002904446
	6 0.000155903 0.0003442497
	7 3.15789e-05 0.0002277456
	8 6.87666e-05 0.0001437744
	9 0.00017603 0.0002199071
	10 0.001296144 0.0002667974
	11 0.001326311 0.0001423311
	12 0.001381974 0.0002019424
	13 0.0005559431 0.0001066634
	14 -0.002258513 8.983957e-05
	15 0.003779869 7.199421e-05
	16 0.004003509 7.70948e-05
	17 0.004070002 2.846604e-05
	18 0.004051741 1.684935e-05
	19 0.002425456 9.514622e-05
	20 0.002439722 0.0001188301
	21 0.001435664 8.627807e-05
	22 -0.001546761 0.001969152
	23 0.007261296 0.005563032
	24 -0.0002079564 0.003612142
	25 0.002487125 0.0008848219
	26 0.001961228 0.001047045
	27 0.002236437 0.0008547022
	28 0.002499949 0.001250519
	29 0.001199973 0.0004394735
	30 0.00230336 0.0006285522
	31 0.002465667 0.0005030337
	32 0.002457057 0.0002426786
	33 0.002385365 0.0001257477
	34 0.001623162 9.909828e-05
	35 0.0008106553 9.962727e-05
	36 -0.002086533 8.536074e-05
	37 0.002950018 2.740472e-05
	38 0.003003251 2.806771e-05
	39 0.00246469 1.951362e-05
	40 0.00428163 0.002056429
	41 0.001396613 0.00174099
	42 0.001624537 0.002304981
	43 0.004523594 0.003223666
	44 0.00220086 0.002125127
	45 0.008392865 0.006723516
	46 0.00186598 9.196336e-05
	47 0.001403293 6.731469e-05
	48 0.0008786798 3.610574e-05
	49 0.001899713 0.0008972503
	50 0.002117484 0.001308667
	51 0.001335584 0.001148564
	52 0.002374176 0.001185513
	53 0.001322399 0.0009463993
	54 0.001861738 0.001587019
	55 0.002969133 0.001303905
	56 0.001617565 0.001161685
	57 0.002078243 0.000997274
	58 0.001458309 0.0002296142
	59 0.001661456 0.0006047754
	60 0.001436227 0.0001615137
	61 0.002026019 0.0005702309
	62 0.001650127 0.000128736
	63 0.001670678 0.0005232551
	64 -0.03809531 9.366904e-05
	65 -0.03821175 6.70366e-05
	66 -0.03836795 3.106305e-05
	67 -0.01818831 3.339247e-05
	68 -0.0182318 8.778653e-06
	69 -0.01819665 1.018214e-05
	70 -0.00700481 0.0009469889
	71 -0.004397003 0.00205228
	72 -0.003357296 0.00345951
	73 -0.005481249 0.000751844
	74 -0.005450712 0.0009390398
	75 -0.004665562 0.001031201
	76 -0.006684102 0.0007915892
	77 -0.009148344 0.0009566349
	78 -0.001861545 0.001362855
	79 -0.0025854 0.0006376236
	80 -0.002249864 0.001058698
	81 -0.001746302 0.001295686
	82 -0.003299952 0.0001917912
	83 -0.003019087 0.0003316656
	84 -0.003487292 0.001122223
	85 -0.007552893 0.0001108378
	86 -0.002212797 0.0003594412
	87 -0.00504243 0.0008850829
	88 -0.001492961 0.0001474347
	89 -0.001615158 0.0003120714
	90 -0.001630527 0.0004329445
	91 -0.001177265 0.0007011244
	92 -0.002118841 0.0007937227
	93 -0.001129823 0.0009991803
	94 -0.001391016 0.0004215932
	95 -0.0006589847 0.0008927552
	96 -0.001065619 0.0006318275
	97 -0.001181485 0.001021122
	98 -0.001994552 0.001006786
	99 -0.004467632 0.001424644
	100 0.001577705 0.001241954
	101 0.001227701 0.001557084
	102 0.0008133597 0.001778753
	103 -0.0006865657 0.000310019
	104 -0.002209957 0.0005382635
	105 -0.0003849412 0.0009529565
	106 2.250525e-05 0.0002375631
	107 -0.0008513838 0.0006017708
	108 -6.866431e-05 0.0007462828
	109 -0.0002193458 0.0002960054
	110 -0.0008192953 0.0003286162
	111 -0.001287344 0.0005452056
	112 0.0002747946 0.0004754265
	113 0.0009157143 0.0009043447
	114 0.000705404 0.001018249
	115 0.0002931346 0.000414779
	116 0.0001419493 0.0004305181
	117 -0.0003262354 0.0006163001
	118 -3.646722e-05 0.0003144758
	119 -0.0007280665 0.0004481704
	120 0.0001173872 0.0005909805
	121 0.0001176699 0.0005170931
	122 0.0002457813 0.0006995569
	123 -0.000252219 0.0003729746
	124 0.0005234072 0.001042279
	125 0.0007979682 0.0007186963
	126 0.0008644979 0.001041573
	127 -0.03949189 0.0005332115
	128 -0.03710799 0.002507107
	129 -0.0379428 0.001504717
	130 -0.01955963 0.0004031903
	131 -0.01935131 0.0006898967
	132 -0.01887601 0.001220619
	133 -0.007574866 0.0003613908
	134 -0.007029031 0.0005973307
	135 -0.006464574 0.001079446
	136 -0.00637869 0.0002991002
	137 -0.006297027 0.0004796874
	138 -0.005180164 0.001161801
	139 -0.007235275 0.000313417
	140 -0.009630079 0.0005485056
	141 -0.003241121 0.0008327803
	142 -0.00376714 0.0002907439
	143 -0.003457554 0.0002963719
	144 -0.003380278 0.0006600821
	145 -0.003625775 0.0004733586
	146 -0.003090991 0.0008220416
	147 -0.004826369 0.0003319376
	148 -0.007366113 0.001010116
	149 -0.003208427 0.0001212852
	150 -0.006309083 0.0004503631
	151 -0.002127671 0.0003400564
	152 -0.002222607 0.0003505918
	153 -0.002293447 0.0003465113
	154 0.04138268 0.0002457217
	155 0.04084333 0.0001960676
	156 0.04199486 9.696129e-05
	157 0.02042575 0.0002059698
	158 0.02041176 0.0001559697
	159 0.02033943 7.491041e-05
	160 0.01341174 0.002058616
	161 0.0121852 0.002510329
	162 0.007394579 0.001774763
	163 0.006289335 0.002147631
	164 0.007081484 0.001856094
	165 0.005259798 0.0008150136
	166 0.00475153 0.0022644
	167 0.003021661 0.003916896
	168 0.005469725 0.003491087



In [29]:

    
k_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different k with same n, skew, ratio
    vary_k = experiments[
            experiments$n == d_n & experiments$skew == d_skew & experiments$max_weight_ratio == d_ratio,
            c("k","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_k[,"k"]/heuristic_k,vary_k$epsilon0.9,log="xy",xlab=expression("k/k_estimate"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: size of U vs relative error",sub=paste("n =",d_n,", m =",d_m,", skew =",d_skew,", w_ratio =",d_ratio,", k_estimate =",heuristic_k));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_k[,"k"]/heuristic_k,vary_k$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_k[,"k"]/heuristic_k,vary_k$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)

}

k_factor_given_ratio_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different k with same n, skew, ratio
    vary_k = experiments[
            experiments$n == d_n & experiments$skew == d_skew & experiments$max_weight_ratio == d_ratio,
            c("k_factor","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_k[,"k_factor"],vary_k$epsilon0.9,log="xy",xlab=expression("k_factor"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: size of U vs relative error",sub=paste("n =",d_n,", m =",d_m,", skew =",d_skew,", w_ratio =",d_ratio));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_k[,"k_factor"],vary_k$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_k[,"k_factor"],vary_k$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}

k_factor_given_k_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different k with same n, skew, ratio
    vary_k = experiments[
            experiments$n == d_n & experiments$skew == d_skew & experiments$k == d_k,
            c("k_factor","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_k[,"k_factor"],vary_k$epsilon0.9,log="xy",xlab=expression("k_factor"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: size of U vs relative error",sub=paste("n =",d_n,", k =",d_k,", m =",d_m,", skew =",d_skew));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_k[,"k_factor"],vary_k$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_k[,"k_factor"],vary_k$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}

n_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different n with same k, skew, ratio
    vary_n = experiments[
            experiments$k == d_k & experiments$skew == d_skew & experiments$max_weight_ratio == d_ratio,
            c("n","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    max_y = max(vary_n$epsilon0.9,vary_n$epsilon0.95,vary_n$epsilon0.99);
    
    plot(vary_n[,"n"]/d_k,vary_n$epsilon0.9,log="x",xlab=expression("n/k"),ylab="epsilon",ylim=c(0,max_y),col=cols[1],pch=pchs[1],
        main="AWS: size of R vs relative error",sub=paste("k =",d_k,", m =",d_m,", skew =",d_skew,", w_ratio =",d_ratio));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_n[,"n"]/d_k,vary_n$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_n[,"n"]/d_k,vary_n$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(20.0,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}


ratio_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different ratio with same n, k, skew
    vary_ratio = experiments[
            experiments$n == d_n & experiments$k == d_k & experiments$skew == d_skew,
            c("max_weight_ratio","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_ratio[,"max_weight_ratio"],vary_ratio$epsilon0.9,log="x",ylim=c(0.001,0.04),xlab=expression("Maximum weight ratio"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: Maximum weight ratio vs relative error",sub=paste("n =",d_n,", k =",d_k,", m =",d_m,", w_skew =",d_skew));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_ratio[,"max_weight_ratio"],vary_ratio$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_ratio[,"max_weight_ratio"],vary_ratio$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(20.0,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
    #NOTE: graph would be better if k is adjusted according to skew (to k_heuristic)
}




skew_vs_epsilon_plot = function(
        d_n=1000000,d_k=20000,d_m=100,d_skew=1.0,d_ratio=2.0,d_cores=3,d_inner=10000,d_outer=10000
    ) {
    #different skew with same n, k, ratio (DATA MISSING FOR NOW)
    vary_skew = experiments[
            experiments$n == d_n & experiments$k == d_k & experiments$max_weight_ratio == d_ratio,
            c("skew","epsilon0.9","epsilon0.95","epsilon0.99")
        ];

    heuristic_k = d_m*d_m*d_ratio;

    cols = c("blue","purple","red");
    pchs = c(1,3,5);

    plot(vary_skew[,"skew"],vary_skew$epsilon0.9,log="xy",xlab=expression("skew"),ylab="epsilon",col=cols[1],pch=pchs[1],
        main="AWS: skew vs relative error",sub=paste("n =",d_n,", k =",d_k,", m =",d_m,", w_ratio =",d_ratio));
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    points(vary_skew[,"skew"],vary_skew$epsilon0.95,col=cols[2],pch=pchs[2])
    points(vary_skew[,"skew"],vary_skew$epsilon0.99,col=cols[3],pch=pchs[3])

    legend(20.0,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=cols,pch=pchs)
}



In [30]:

    
###Identify common values of k
##table(sort(experiments$k))
##
##for(k in c(400,1600,5000,10000,20000,40000)) {
##    k_factor_given_k_vs_epsilon_plot(d_n = 1e6, d_k=k);
##}

skew_vs_epsilon_plot(d_ratio=1.1);
skew_vs_epsilon_plot(d_ratio=4.0);
skew_vs_epsilon_plot(d_ratio=16.0);

for(k in c(10000,20000,40000,80000)) {#did not plot k=400,1600,5000, for these are quite unrealistic k values
    n_vs_epsilon_plot(d_k=k);
}

for(n in c(80000,1e6,1600000,2400000,4800000)) {
    k_vs_epsilon_plot(d_n = n);#equiv to k_factor_given_ratio_vs_epsilon_plot(d_n = 1e6, d_ratio=ratio);
}

#Notice that both small and big ratio are good.
##Explanation: - for small ratio we have near uniform data
##             - for big ratio k_estimate gets near n, improving the result

for(ratio in c(1.1,2,4,8,16,32)) {
    k_vs_epsilon_plot(d_ratio=ratio);#equiv to k_factor_given_ratio_vs_epsilon_plot(d_n = 1e6, d_ratio=ratio);
}
###dev.print(pdf, 'exp2_3_vary_k.pdf')









    













    













    













    













    













    













    













    



Warning message:
In min(x): no non-missing arguments to min; returning InfWarning message:
In max(x): no non-missing arguments to max; returning -InfWarning message:
In min(x): no non-missing arguments to min; returning InfWarning message:
In max(x): no non-missing arguments to max; returning -Inf





    













    



Error in plot.window(...): need finite 'xlim' values



In [31]:

    
#My first plots

k_vs_epsilon_plot();

###dev.print(pdf, 'exp2_3_vary_k.pdf')

n_vs_epsilon_plot();
###dev.print(pdf, 'exp2_3_vary_n.pdf')

ratio_vs_epsilon_plot();
###dev.print(pdf, 'exp2_3_vary_ratio.pdf')



In [32]:

    
##plot(apply(experiments,1, FUN=function(r) r['m']^2*r['max_weight_ratio']/r['k']),experiments$epsilon0.95)

#ALL points
plot(experiments$k_factor,experiments$epsilon0.95,log="xy",col=1);

#Slices (for different weight ratio)
for(ratio in c(1.1, 2.0, 4.0, 16.0, 32.0)) {
    selected_experiments=experiments[experiments$max_weight_ratio == ratio,]
    plot(selected_experiments$k_factor,selected_experiments$epsilon0.95,log="xy",col=1,xlab="k-factor",ylab="epsilon at sigma=0.95",main=paste("ratio = ",ratio));
}
#points(selected_experiments$k_factor,selected_experiments$epsilon0.95,log="xy",col=2);
abline(v=1,lty=3);
abline(h=0.01,lty=3);
#[experiments$epsilon0.95 < 0.01,]
#experiments[experiments$epsilon0.95 >= 0.01,]

#scatterplot3d(log(experiments$k_factor),log(experiments$epsilon0.95), experiments$max_weight_ratio,log="xy");



In [33]:

    
high_ratio_low_kfactor=experiments[experiments$max_weight_ratio==32&experiments$k_factor==0.5,c("skew","epsilon0.9","epsilon0.95","epsilon0.99")]
plot(high_ratio_low_kfactor$skew,high_ratio_low_kfactor$epsilon0.9,pch=1,col="blue",ylim=c(0,max(high_ratio_low_kfactor$epsilon0.99)))
points(high_ratio_low_kfactor$skew,high_ratio_low_kfactor$epsilon0.95,pch=3,col="purple")
points(high_ratio_low_kfactor$skew,high_ratio_low_kfactor$epsilon0.99,pch=5,col="red")









    



Warning message:
In min(x): no non-missing arguments to min; returning InfWarning message:
In max(x): no non-missing arguments to max; returning -InfWarning message:
In max(high_ratio_low_kfactor$epsilon0.99): no non-missing arguments to max; returning -Inf





    



Error in plot.window(...): need finite 'xlim' values



In [121]:

    
plot(1:3,1:3,main=expression(italic(w)-relation))



In [372]:

    
df[1,]









    Out[372]:





skew k_factor max_weight_ratio

	1 1 0.5 2









    Out[372]:





skew n_ratio k_factor max_weight_ratio m

	1 1 128 0.5 2 100



In [22]:

    
make_plot = function(plot_var_id, value_rank) {
    #Note that the relevant quantities are: n,k,skew,ratio. Select 3 out of 4, use final one as axis vs epsilon
    plot_vars = c("skew","n_ratio","k_factor","max_weight_ratio","m");
    nice_plot_vars = c("skew","n-ratio","k-factor","w-ratio","m");
    plot_var = plot_vars[plot_var_id];

    const_vars = setdiff(plot_vars,plot_var);
    nice_const_vars = nice_plot_vars[-plot_var_id];
    nice_plot_var = nice_plot_vars[plot_var_id];
    
    df=experiments[,const_vars];
    #identify common combination of values
    common_value_sets=sort(table(unlist(lapply(as.list(as.data.frame(t(df))),toString))),decreasing=TRUE);
    common_value_set = as.numeric(strsplit(names(common_value_sets)[value_rank],",")[[1]])
    
    #select data based on this common combination
    plot_data=experiments[abs(experiments[,const_vars[1]]-common_value_set[1])<1e-6 & 
                          abs(experiments[,const_vars[2]]-common_value_set[2])<1e-6 & 
                          abs(experiments[,const_vars[3]]-common_value_set[3])<1e-6 & 
                          abs(experiments[,const_vars[4]]-common_value_set[4])<1e-6,]

    var_x =plot_data[,plot_var]
    var_y1=plot_data$epsilon0.9
    var_y2=plot_data$epsilon0.95
    var_y3=plot_data$epsilon0.99
    all_ys=c(var_y1,var_y2,var_y3);
    
    
    typical_size = 10*sqrt(var(all_ys));
    
    y_min = min(all_ys);
    y_max = max(all_ys);
    used_size = y_max-y_min;
    if(used_size < typical_size && y_min - (typical_size - used_size)*0.5 > 0) {
        y_min = y_min - (typical_size - used_size)*0.5;
        y_max = y_max + (typical_size - used_size)*0.5;
    }
    y_min = y_min# - 0.002;
    y_max = y_max# - 0.002; 
    
    plot_width = 3.1;
    plot_height = 3.5;
    options(repr.plot.las=1,repr.plot.width=plot_width, repr.plot.height=plot_height);
    par(las=1);
    plot(  var_x,var_y1, pch=1, col="blue", log="x",
         ylim=c(y_min,y_max),
         ylab="",xlab=nice_plot_var,
         main=bquote(paste(.(nice_plot_var)," vs ", epsilon)),
         #xaxt='n',##Remove for n_ratio
         yaxt='n',
            )
    
    
    points(var_x,var_y2,pch=3,col="purple")
    points(var_x,var_y3,pch=5,col="red")
    #points(1:64,0.04*8/(1:64),pch=5,col="black",type="l")
    

    mtext(paste(paste(sapply(1:4,function(i) paste(nice_const_vars[i],"=",common_value_set[i])),collapse=", ")),cex=0.7)
    mtext(expression(epsilon), side=2, line=3.5)
    #axis(side = 1, at=var_x)##Remove for n_ratio
    axis(side = 2,las=1)
        
    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    #legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=c("blue","purple","red"),pch=c(1,3,5))
    legend_x = min(var_x)*0.1+max(var_x)*0.11;
    y_height_ratio = 1.0;
    legend_y = (1-y_height_ratio)*y_min+y_height_ratio*y_max;
    #legend(legend_x,legend_y,c(expression(sigma == 0.9),expression(sigma == 0.95),expression(sigma == 0.99)),col=c("blue","purple","red"),pch=c(1,3,5))
    #legend("topright",bty = "n",y.intersp=1.5,c(expression(sigma == 0.9),expression(sigma == 0.95),expression(sigma == 0.99)),col=c("blue","purple","red"),pch=c(1,3,5))
    legend(legend_x,legend_y,bty = "n",y.intersp=2.2,c(expression(sigma == 0.9),expression(sigma == 0.95),expression(sigma == 0.99)),col=c("blue","purple","red"),pch=c(1,3,5))
    dev.print(pdf, paste("AWS_plot_",plot_var,"_rank_",value_rank,".pdf",sep=""),width=plot_width,height=plot_height);
}
                         

                             
for(plot_var_id in c(5)) {
    for(value_rank in c(1,2,3)) {
        plot_vars = c("skew","n_ratio","k_factor","max_weight_ratio","m");
        plot_var = plot_vars[plot_var_id];
        print(paste("AWS_plot_",plot_var,"_rank_",value_rank,".pdf",sep=""));
        make_plot(plot_var_id,value_rank);
    }
}









    



[1] "AWS_plot_m_rank_1.pdf"
[1] "AWS_plot_m_rank_2.pdf"






    













    



[1] "AWS_plot_m_rank_3.pdf"



In [291]:

    
s_make_plot = function(plot_var_id, value_rank) {
    #Note that the relevant quantities are: n,k,skew,ratio. Select 3 out of 4, use final one as axis vs epsilon
    plot_vars = c("skew","n_ratio","k_factor","max_weight_ratio");
    plot_var = plot_vars[plot_var_id];

    const_vars = setdiff(plot_vars,plot_var);

    df = small_experiments[const_vars];

    #identify common combination of values
    common_value_sets=sort(table(unlist(lapply(as.list(as.data.frame(t(df))),toString))),decreasing=TRUE);
    common_value_set = as.numeric(strsplit(names(common_value_sets)[value_rank],",")[[1]])
    
    #select data based on this common combination
    plot_data=small_experiments[abs(small_experiments[,const_vars[1]]-common_value_set[1])<1e-6 & 
                          abs(small_experiments[,const_vars[2]]-common_value_set[2])<1e-6 & 
                          abs(small_experiments[,const_vars[3]]-common_value_set[3])<1e-6,]

    var_x =plot_data[,plot_var]
    var_y1=plot_data$epsilon_min;#plot_data$epsilon_mean-sqrt(plot_data$epsilon_var)
    var_y2=plot_data$epsilon_mean;
    var_y3=plot_data$epsilon_max;
    all_ys=c(var_y1,var_y2,var_y3);
    plot(  var_x,var_y1, pch=1, col="blue", log="xy",
         ylim=c(max(min(all_ys),1e-5),max(all_ys)),
         ylab="epsilon",xlab=plot_var,
         main=paste(plot_var," vs Epsilon"), 
         sub=paste(sapply(1:3,function(i) paste(const_vars[i],"=",common_value_set[i])),collapse=", ")
            )
    points(var_x,var_y2,pch=3,col="purple")
    points(var_x,var_y3,pch=5,col="red")

    abline(v=1,lty=3)
    abline(h=0.01,lty=3)
    legend(0.01,0.05,c("sigma = 0.9","sigma = 0.95","sigma = 0.99"),col=c("blue","purple","red"),pch=c(1,3,5))
}
                         
for(plot_var_id in c(1,3,4)) {
    for(value_rank in 1:8) {
        s_make_plot(plot_var_id,value_rank);
    }
}



In [37]:

    
ordered_small_experiments=small_experiments[order(small_experiments$k,small_experiments$skew,small_experiments$max_weight_ratio,small_experiments$n),];



In [38]:

    
small_experiments[37,]$epsilon_mean
sqrt(small_experiments[37,]$epsilon_var)









    Out[38]:




0.0052049598






    Out[38]:




0.00088482191687415



In [39]:

    
small_experiments[55,]$epsilon_mean
sqrt(small_experiments[55,]$epsilon_var)









    Out[39]:




0.0046768465






    Out[39]:




0.00104704535789072



In [40]:

    
plot(unlist(small_experiments[37,c(9:18)]),col="red")
abline(h=small_experiments[37,]$epsilon_mean,lty=2,col="red")
abline(h=small_experiments[37,]$epsilon_mean-sqrt(small_experiments[37,]$epsilon_var),lty=3,col="red")
abline(h=small_experiments[37,]$epsilon_mean+sqrt(small_experiments[37,]$epsilon_var),lty=3,col="red")

points(unlist(small_experiments[55,c(9:18)]),col="blue")
abline(h=small_experiments[55,]$epsilon_mean,lty=2,col="blue")
abline(h=small_experiments[55,]$epsilon_mean-sqrt(small_experiments[55,]$epsilon_var),lty=3,col="blue")
abline(h=small_experiments[55,]$epsilon_mean+sqrt(small_experiments[55,]$epsilon_var),lty=3,col="blue")



In [387]:

    
plot_width = 3.1;
plot_height = 3.5;
options(repr.plot.width=plot_width, repr.plot.height=plot_height);

xs=1:1000/1000;
 #w[i] is in [1,max_weight_ratio]
plot(1, type="n",xaxt='n',yaxt='n', xlim=c(0,1),ylim=c(0,1), xlab="x   (the value)", ylab="F(x)   (the frequency)",main="CDFs of datasets")
axis(side = 1, at=c(0,0.5,1.0))
axis(side = 2, at=c(0,0.5,1.0))

skews=c(1,4,16);
ratios=c(1.1,2,16);
cols=c("red","purple","blue");
flatcols=c(1:9);
flatdescr=c(1:9);
flatltys=c(1:9);

                             

for(ratioi in 1:3) {
for(skewi in 1:3) {
    skew = skews[skewi];
    ratio = ratios[ratioi];
    j = 10-((ratioi-1)*3+skewi);
    col = cols[skewi];
    
    flatcols[j]=col;
    flatdescr[j]=paste("w-ratio=",ratio,", skew=",skew,sep="");
    flatltys[j]=ratioi;
    
    points(((xs)^skew*(ratio-1)+1)/ratio,xs,type="l",col=col,lty=ratioi)
}}
dev.print(pdf, "CDFs.pdf",width=plot_width,height=plot_height);

plot.new()
legend("topleft",bty="n",legend = flatdescr,col = flatcols, lty = flatltys, y.intersp=2.5,cex=0.9)
dev.print(pdf, "CDFs_legend.pdf",width=plot_width,height=plot_height);









    Out[387]:




pdf: 2






    













    Out[387]:




pdf: 2



In [360]:

    
tmpdat=all_experiments[all_experiments$skew == 16 & all_experiments$k_factor == 16 & abs(all_experiments$max_weight_ratio-4)<1e-3, ];



In [350]:

    
df = all_experiments[,c("skew","k_factor","max_weight_ratio")]
common_value_sets=sort(table(unlist(lapply(as.list(as.data.frame(t(df))),toString))),decreasing=TRUE);
all_experiments[all_experiments$skew == 1 & all_experiments$k_factor == 16 & abs(all_experiments$max_weight_ratio-4)<1e-3, ]$m
all_experiments[all_experiments$skew == 1 & all_experiments$k_factor == 4 & abs(all_experiments$max_weight_ratio-16)<1e-3, ]$m
all_experiments[all_experiments$skew == 16 & all_experiments$k_factor == 16 & abs(all_experiments$max_weight_ratio-4)<1e-3, ]$m



In [5]:

    
common_value_sets









    



Error in eval(expr, envir, enclos): object 'common_value_sets' not found



In [ ]:

	rel_diff	epsilon_sq_var_small_n
1	0.000435527	0.0007837276
2	-5.9517e-05	0.0008215743
3	-8.7036e-05	0.0005658365
4	-1.3872e-05	0.0003920585
5	5.4938e-05	0.0002904446
6	0.000155903	0.0003442497
7	3.15789e-05	0.0002277456
8	6.87666e-05	0.0001437744
9	0.00017603	0.0002199071
10	0.001296144	0.0002667974
11	0.001326311	0.0001423311
12	0.001381974	0.0002019424
13	0.0005559431	0.0001066634
14	-0.002258513	8.983957e-05
15	0.003779869	7.199421e-05
16	0.004003509	7.70948e-05
17	0.004070002	2.846604e-05
18	0.004051741	1.684935e-05
19	0.002425456	9.514622e-05
20	0.002439722	0.0001188301
21	0.001435664	8.627807e-05
22	-0.001546761	0.001969152
23	0.007261296	0.005563032
24	-0.0002079564	0.003612142
25	0.002487125	0.0008848219
26	0.001961228	0.001047045
27	0.002236437	0.0008547022
28	0.002499949	0.001250519
29	0.001199973	0.0004394735
30	0.00230336	0.0006285522
31	0.002465667	0.0005030337
32	0.002457057	0.0002426786
33	0.002385365	0.0001257477
34	0.001623162	9.909828e-05
35	0.0008106553	9.962727e-05
36	-0.002086533	8.536074e-05
37	0.002950018	2.740472e-05
38	0.003003251	2.806771e-05
39	0.00246469	1.951362e-05
40	0.00428163	0.002056429
41	0.001396613	0.00174099
42	0.001624537	0.002304981
43	0.004523594	0.003223666
44	0.00220086	0.002125127
45	0.008392865	0.006723516
46	0.00186598	9.196336e-05
47	0.001403293	6.731469e-05
48	0.0008786798	3.610574e-05
49	0.001899713	0.0008972503
50	0.002117484	0.001308667
51	0.001335584	0.001148564
52	0.002374176	0.001185513
53	0.001322399	0.0009463993
54	0.001861738	0.001587019
55	0.002969133	0.001303905
56	0.001617565	0.001161685
57	0.002078243	0.000997274
58	0.001458309	0.0002296142
59	0.001661456	0.0006047754
60	0.001436227	0.0001615137
61	0.002026019	0.0005702309
62	0.001650127	0.000128736
63	0.001670678	0.0005232551
64	-0.03809531	9.366904e-05
65	-0.03821175	6.70366e-05
66	-0.03836795	3.106305e-05
67	-0.01818831	3.339247e-05
68	-0.0182318	8.778653e-06
69	-0.01819665	1.018214e-05
70	-0.00700481	0.0009469889
71	-0.004397003	0.00205228
72	-0.003357296	0.00345951
73	-0.005481249	0.000751844
74	-0.005450712	0.0009390398
75	-0.004665562	0.001031201
76	-0.006684102	0.0007915892
77	-0.009148344	0.0009566349
78	-0.001861545	0.001362855
79	-0.0025854	0.0006376236
80	-0.002249864	0.001058698
81	-0.001746302	0.001295686
82	-0.003299952	0.0001917912
83	-0.003019087	0.0003316656
84	-0.003487292	0.001122223
85	-0.007552893	0.0001108378
86	-0.002212797	0.0003594412
87	-0.00504243	0.0008850829
88	-0.001492961	0.0001474347
89	-0.001615158	0.0003120714
90	-0.001630527	0.0004329445
91	-0.001177265	0.0007011244
92	-0.002118841	0.0007937227
93	-0.001129823	0.0009991803
94	-0.001391016	0.0004215932
95	-0.0006589847	0.0008927552
96	-0.001065619	0.0006318275
97	-0.001181485	0.001021122
98	-0.001994552	0.001006786
99	-0.004467632	0.001424644
100	0.001577705	0.001241954
101	0.001227701	0.001557084
102	0.0008133597	0.001778753
103	-0.0006865657	0.000310019
104	-0.002209957	0.0005382635
105	-0.0003849412	0.0009529565
106	2.250525e-05	0.0002375631
107	-0.0008513838	0.0006017708
108	-6.866431e-05	0.0007462828
109	-0.0002193458	0.0002960054
110	-0.0008192953	0.0003286162
111	-0.001287344	0.0005452056
112	0.0002747946	0.0004754265
113	0.0009157143	0.0009043447
114	0.000705404	0.001018249
115	0.0002931346	0.000414779
116	0.0001419493	0.0004305181
117	-0.0003262354	0.0006163001
118	-3.646722e-05	0.0003144758
119	-0.0007280665	0.0004481704
120	0.0001173872	0.0005909805
121	0.0001176699	0.0005170931
122	0.0002457813	0.0006995569
123	-0.000252219	0.0003729746
124	0.0005234072	0.001042279
125	0.0007979682	0.0007186963
126	0.0008644979	0.001041573
127	-0.03949189	0.0005332115
128	-0.03710799	0.002507107
129	-0.0379428	0.001504717
130	-0.01955963	0.0004031903
131	-0.01935131	0.0006898967
132	-0.01887601	0.001220619
133	-0.007574866	0.0003613908
134	-0.007029031	0.0005973307
135	-0.006464574	0.001079446
136	-0.00637869	0.0002991002
137	-0.006297027	0.0004796874
138	-0.005180164	0.001161801
139	-0.007235275	0.000313417
140	-0.009630079	0.0005485056
141	-0.003241121	0.0008327803
142	-0.00376714	0.0002907439
143	-0.003457554	0.0002963719
144	-0.003380278	0.0006600821
145	-0.003625775	0.0004733586
146	-0.003090991	0.0008220416
147	-0.004826369	0.0003319376
148	-0.007366113	0.001010116
149	-0.003208427	0.0001212852
150	-0.006309083	0.0004503631
151	-0.002127671	0.0003400564
152	-0.002222607	0.0003505918
153	-0.002293447	0.0003465113
154	0.04138268	0.0002457217
155	0.04084333	0.0001960676
156	0.04199486	9.696129e-05
157	0.02042575	0.0002059698
158	0.02041176	0.0001559697
159	0.02033943	7.491041e-05
160	0.01341174	0.002058616
161	0.0121852	0.002510329
162	0.007394579	0.001774763
163	0.006289335	0.002147631
164	0.007081484	0.001856094
165	0.005259798	0.0008150136
166	0.00475153	0.0022644
167	0.003021661	0.003916896
168	0.005469725	0.003491087