In [1]:
using Distributions, Gadfly, Compose, DataFrames
include("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Plots/plot_simplex.jl")
include("/home/benavoli/Data/Work_Julia/Julia/Plots/plot_data1.jl")
include("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Tests/Bsigntest.jl")
ClassID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierID.dat", ',')
ClassNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierNames.dat", ',')
DatasetID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetID.dat", ',');
DatasetNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetNames.dat", ',');
Percent_correct = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/Percent_correct.dat", ',');
cl1a=1 #nbc
cl2a=2#aode
println("Comparison of ", ClassNames[cl1a,1], " vs. ", ClassNames[cl2a,1])
println()
#Compute mean accuracy
indi=find(x->x==cl1a,ClassID)
indj=find(x->x==cl2a,ClassID)
accNbc=Float64[]
accAode=Float64[]
for d=1:Int32(maximum(DatasetID))
indd=find(x->x==d,DatasetID)
indid=intersect(indi,indd)
indjd=intersect(indj,indd)
push!(accNbc,mean(Percent_correct[indid])/100)
push!(accAode,mean(Percent_correct[indjd])/100)
end
In [14]:
using HypothesisTests
p=plot_data1(cl1a,cl2a,"all datasets",accNbc-accAode,-0.12,0.08)
display(p)
p1=pvalue(SignedRankTest(accNbc,accAode))
p2=pvalue(SignTest(accNbc,accAode))
@printf "p-value SignedRank=%0.8f " p1
@printf "p-value Sign=%0.8f\n" p2
In [3]:
#Bayesian Sign Test
datacla=Bsigntest(accNbc,accAode,0)
#Some cPlot code
df1 = DataFrame(x=datacla[2,:][:]); p1=plot(df1, x=:x, Geom.histogram,Coord.Cartesian(xmin=0,xmax=1),Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc-aode)"))
set_default_plot_size(15cm, 8cm); display(p1)
prob=length(find(z->z> 0.5,datacla[2,:][:]))/length(datacla[2,:][:])
println("Probability that P(nbc-aode)>0.5 is $prob")
The cassock does not necessarily make the priest!
Can we say anything about the probability that nbc is practically equivalent to aode?
Let us ask the posterior
$$ P(-\infty,-r), ~~~ P[-r,r],~~~ P(r,\infty) $$This is equal to
$$ \begin{array}{rclcl} P(-\infty,-r)&=&w_0 P_0(-\infty,r)&+& \sum_{i=1}^n w_i I_{(-\infty,-r)}(x_i)\\ P[-r,r]&=&w_0 P_0[-r,r]&+& \sum_{i=1}^n w_i I_{[-r,r]}(x_i)\\ P(r,\infty)&=&w_0 P_0(r,\infty)&+& \sum_{i=1}^n w_i I_{(r,\infty)}(x_i) \end{array} $$
In [4]:
#Bayesian Sign Test with rope
rope=0.01
datacla=Bsigntest(accNbc,accAode,rope)
#Plot
dfa = DataFrame(x=datacla[1,:][:])
pl0=plot(dfa, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc \U300A aode)"))
dfb = DataFrame(x=datacla[2,:][:])
pc0=plot(dfb, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc=aode)"))
dfc = DataFrame(x=datacla[3,:][:])
pr0=plot(dfc, x=:x, Geom.histogram,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt),Guide.xlabel("P(nbc \U300B aode)"))
set_default_plot_size(40cm, 12cm)
display(hstack(pl0,pc0,pr0))
In [5]:
ClassID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierID.dat", ',')
ClassNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/ClassifierNames.dat", ',')
DatasetID = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetID.dat", ',');
DatasetNames = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/DatasetNames.dat", ',');
Percent_correct = readdlm("/home/benavoli/Data/Work_Julia/Tutorial/Julia/Data/Percent_correct.dat", ',');
cl1b=4 #nbc
cl2b=5 #aode
println("Comparison of ", ClassNames[cl1b,1], " vs. ", ClassNames[cl2b,1])
println()
#Compute mean accuracy
indi=find(x->x==cl1b,ClassID)
indj=find(x->x==cl2b,ClassID)
accJ48=Float64[]
accJ48gr=Float64[]
for d=1:Int32(maximum(DatasetID))
indd=find(x->x==d,DatasetID)
indid=intersect(indi,indd)
indjd=intersect(indj,indd)
push!(accJ48,mean(Percent_correct[indid])/100)
push!(accJ48gr,mean(Percent_correct[indjd])/100)
end
In [6]:
ptrianglea=plot_simplex(datacla, ClassNames[cl1a],ClassNames[cl2a])
set_default_plot_size(12.5cm, 10cm)
display(ptrianglea)