In [1]:
using Distributions, Gadfly, Compose, DataFrames
include("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Plots/plot_simplex.jl")
include("/home/benavoli/Data/Work_Julia/Julia/Plots/plot_data1.jl")
include("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Tests/Bsigntest.jl")
ClassID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierID.dat", ',')
ClassNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierNames.dat", ',')
DatasetID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetID.dat", ',');
DatasetNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetNames.dat", ',');
Percent_correct = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/Percent_correct.dat", ',');
cl1a=1 #nbc
cl2a=2#aode
println("Comparison of ", ClassNames[cl1a,1], " vs. ", ClassNames[cl2a,1])
println()
#Compute mean accuracy
indi=find(x->x==cl1a,ClassID)
indj=find(x->x==cl2a,ClassID)
accNbc=Float64[]
accAode=Float64[]
for d=1:Int32(maximum(DatasetID))
indd=find(x->x==d,DatasetID)
indid=intersect(indi,indd)
indjd=intersect(indj,indd)
push!(accNbc,mean(Percent_correct[indid])/100)
push!(accAode,mean(Percent_correct[indjd])/100)
end
In [14]:
using HypothesisTests
p=plot_data1(cl1a,cl2a,"all datasets",accNbc-accAode,-0.12,0.08)
display(p)
p1=pvalue(SignedRankTest(accNbc,accAode))
p2=pvalue(SignTest(accNbc,accAode))
@printf "p-value SignedRank=%0.8f " p1
@printf "p-value Sign=%0.8f\n" p2
In [3]:
#Bayesian Sign Test
datacla=Bsigntest(accNbc,accAode,0)
#Some cPlot code
df1 = DataFrame(x=datacla[2,:][:]); p1=plot(df1, x=:x, Geom.histogram,Coord.Cartesian(xmin=0,xmax=1),Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc-aode)"))
set_default_plot_size(15cm, 8cm); display(p1)
prob=length(find(z->z> 0.5,datacla[2,:][:]))/length(datacla[2,:][:])
println("Probability that P(nbc-aode)>0.5 is $prob")
The cassock does not necessarily make the priest!
Can we say anything about the probability that nbc is practically equivalent to aode?
Let us ask the posterior
$$ P(-\infty,-r), ~~~ P[-r,r],~~~ P(r,\infty) $$This is equal to
$$ \begin{array}{rclcl} P(-\infty,-r)&=&w_0 P_0(-\infty,r)&+& \sum_{i=1}^n w_i I_{(-\infty,-r)}(x_i)\\ P[-r,r]&=&w_0 P_0[-r,r]&+& \sum_{i=1}^n w_i I_{[-r,r]}(x_i)\\ P(r,\infty)&=&w_0 P_0(r,\infty)&+& \sum_{i=1}^n w_i I_{(r,\infty)}(x_i) \end{array} $$
In [4]:
#Bayesian Sign Test with rope
rope=0.01
datacla=Bsigntest(accNbc,accAode,rope)
#Plot
dfa = DataFrame(x=datacla[1,:][:])
pl0=plot(dfa, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc \U300A aode)"))
dfb = DataFrame(x=datacla[2,:][:])
pc0=plot(dfb, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc=aode)"))
dfc = DataFrame(x=datacla[3,:][:])
pr0=plot(dfc, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc \U300B aode)"))
set_default_plot_size(40cm, 12cm)
display(hstack(pl0,pc0,pr0))
In [5]:
ClassID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierID.dat", ',')
ClassNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierNames.dat", ',')
DatasetID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetID.dat", ',');
DatasetNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetNames.dat", ',');
Percent_correct = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/Percent_correct.dat", ',');
cl1b=4 #nbc
cl2b=5 #aode
println("Comparison of ", ClassNames[cl1b,1], " vs. ", ClassNames[cl2b,1])
println()
#Compute mean accuracy
indi=find(x->x==cl1b,ClassID)
indj=find(x->x==cl2b,ClassID)
accJ48=Float64[]
accJ48gr=Float64[]
for d=1:Int32(maximum(DatasetID))
indd=find(x->x==d,DatasetID)
indid=intersect(indi,indd)
indjd=intersect(indj,indd)
push!(accJ48,mean(Percent_correct[indid])/100)
push!(accJ48gr,mean(Percent_correct[indjd])/100)
end
In [6]:
ptrianglea=plot_simplex(datacla, ClassNames[cl1a],ClassNames[cl2a])
set_default_plot_size(12.5cm, 10cm)
display(ptrianglea)
Query: for classifiers being practically equivalent means $$ \mathcal{P}\big(P(-\infty,-r)>P(r,\infty)+P[-r,r]\big) $$
In [7]:
using HypothesisTests
p1=pvalue(SignedRankTest(accJ48[20:50],accJ48gr[20:50]))
p2=pvalue(SignTest(accJ48[20:50],accJ48gr[20:50]))
p=plot_data1(cl1b,cl2b,"all datasets",accJ48[20:50]-accJ48gr[20:50],-0.013,0.011)
display(p)
@printf "p-value SignedRank=%0.4f " p1
@printf "p-value Sign=%0.4f\n" p2
In [8]:
#Bayesian Sign Test with rope
rope=0.01
dataclb=Bsigntest(accJ48[20:50],accJ48gr[20:50],rope)
#Plot
dfa = DataFrame(x=dataclb[1,:][:])
pl=plot(dfa, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(J48 \U300A J48gr)"))
dfb = DataFrame(x=dataclb[2,:][:])
pc=plot(dfb, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(J48=J48gr)"))
dfc = DataFrame(x=dataclb[3,:][:])
pr=plot(dfc, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(J48 \U300B J48gr)"))
set_default_plot_size(40cm, 12cm)
display(hstack(pl,pc,pr))
In [9]:
ptriangleb=plot_simplex(dataclb, ClassNames[cl1b],ClassNames[cl2b])
set_default_plot_size(12.5cm, 10cm)
display(ptriangleb)
In [10]:
using HypothesisTests
p1=pvalue(SignedRankTest(accJ48,accJ48gr))
p2=pvalue(SignTest(accJ48,accJ48gr))
p=plot_data1(cl1b,cl2b,"all datasets",accJ48-accJ48gr,-0.013,0.011)
display(p)
@printf "p-value SignedRank=%0.4f " p1
@printf "p-value Sign=%0.4f\n" p2
In [11]:
#Bayesian Sign Test with rope
rope=0.01
dataclball=Bsigntest(accJ48,accJ48gr,rope)
#Plot
ptriangleball=plot_simplex(dataclball, ClassNames[cl1b],ClassNames[cl2b])
set_default_plot_size(12.5cm, 10cm)
display(ptriangleball)
Similar to Sign test but it uses the magnitude of the difference. Let $Z,Z' \sim P$:
Known $P$: just check whether $P(Z>-Z')>0.5$
Unknown $P$: use observations from $P$ to estimate $P$ and query the posterior $P(Z>-Z')$
Our estimate of $P$ is not $$ \NO{P=w_0 P_0+ \sum_{i=1}^n w_i \delta_{x_i}}, $$ Alessio Benavoli, Giorgio Corani, Francesca Mangili, and Marco Zaffalon. A Bayesian non- parametric procedure for comparing algorithms. In Proceedings of the 31th International Conference on Machine Learning (ICML 2015), pages 1–9, 2015a</sub>
Bayesian Wilcoxon signed-rank:
$$ \begin{array}{rcl} \theta_{l}&=&\sum_{i=0}^n \sum_{j=0}^n w_iw_j I_{(-\infty,-2r)}(z_j+z_i),\\ \theta_{e}&=&\sum_{i=0}^n \sum_{j=0}^n w_iw_j I_{[-2r,2r]}(z_j+z_i),\\ \theta_{r}&=&\sum_{i=0}^n \sum_{j=0}^n w_iw_j I_{(2r,\infty)}(z_j+z_i), \end{array} $$No need of the symmetry assumptions!
Alessio Benavoli, Giorgio Corani, Francesca Mangili, and Marco Zaffalon. A Bayesian non- parametric procedure for comparing algorithms. In Proceedings of the 31th International Conference on Machine Learning (ICML 2015), pages 1–9, 2015a</sub>
In [12]:
include("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Tests/heaviside.jl")
include("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Tests/Bsignranktestv1.jl")
#Bayesian Sign-Rank Test without rope
rope=0.01
datasra=Bsignranktest(accNbc,accAode,rope)
datasrb=Bsignranktest(accJ48,accJ48gr,rope)
ptriangleSa=plot_simplex(datasra, ClassNames[cl1a],ClassNames[cl2a])
ptriangleSb=plot_simplex(datasrb, ClassNames[cl1b],ClassNames[cl2b])
set_default_plot_size(25cm, 10cm)
display(hstack(ptriangleSa,ptriangleSb))
Two Methods for comparing two classifiers on multiple datasets:
The hierarchical model is powerful
Issue:
All tests we have presented are available in R, Julia and Python https://github.com/BayesianTestsML/tutorial/
A. Benavoli, G. Corani, J. Demsar, and M. Zaffalon. Time for a change: a tutorial for comparing multiple classifiers through Bayesian analysis. ArXiv e-prints 1606.04316, 1-40, 2016.
posterior probability measure: $\alpha_n=\frac{s}{s+n} \alpha+ \frac{1}{s+n} \sum\limits_{i=1}^n \delta_{x_i}$
In [13]:
using Gadfly
using Compose
myplot=plot(x=rand(1)*0.0,y=rand(1)*0.0,Guide.annotation(compose(context(),line([(2, 0), (2, 1/7)]),stroke(colorant"blue"),linewidth(0.4))),Guide.annotation(compose(context(),line([(-1, 0), (-1, 1/7)]),line([(-0.5, 0), (-0.5, 1/7)]),line([(0.25, 0), (0.25, 1/7)]),line([(0.75, 0), (0.75, 1/7)]),line([(1.25, 0), (1.25, 1/7)]),line([(1.75, 0), (1.75, 1/7)]),stroke(colorant"orange"),linewidth(0.4))),Stat.xticks(ticks=[-2.0,-1.5,-1.0,-0.5,0.0, 0.5, 1.0,1.5,2]),Stat.yticks(ticks=[0.0, 0.5, 1.0]),xintercept=[0.0], Geom.line,Guide.ylabel("Probability"),Theme(major_label_font_size=15pt,minor_label_font_size=13pt))
draw(PNG("plots/realpost1b.png", 6inch, 3inch), myplot)