Bayesian Analysis of Squash dataset



In [9]:

    
using DataFrames
using Distributions
import PyPlot

using Gadfly

include("Tests/ttest_correlated.jl")
include("Tests/Bttest_correlated.jl")
include("Plots/plot_posterior_ttest.jl")
include("Plots/plot_data.jl")
include("Data/get_accuracies.jl")

ClassID = readdlm("Data/ClassifierID.dat", ',')
ClassNames = readdlm("Data/ClassifierNames.dat", ',')
DatasetID = readdlm("Data/DatasetID.dat", ',');
DatasetNames = readdlm("Data/DatasetNames.dat", ',');
Percent_correct = readdlm("Data/Percent_correct.dat", ',');
rho=1/10

#Classifiers compare nbc versus aode on datasets 1
cl1=1 #nbc
cl2=2 #aode
dataset=46 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()



#load accuracies
acci,accj=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)

# perform Bayesian correlated t-test
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()


#Plot posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.26,0.2)
display(p1)









    



Comparison of nbc vs. aode
in dataset squash-unstored







    



WARNING: Method definition ttest_correlated(Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Tests/ttest_correlated.jl:11 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Tests/ttest_correlated.jl:11.
WARNING: Method definition Bttest_correlated(Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Tests/Bttest_correlated.jl:15 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Tests/Bttest_correlated.jl:15.
WARNING: Method definition plot_posterior_ttest(Any, Any, Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_posterior_ttest.jl:3 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_posterior_ttest.jl:3.
WARNING: Method definition plot_data(Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_data.jl:5 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_data.jl:5.
WARNING: Method definition get_accuracies(Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Data/get_accuracies.jl:3 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Data/get_accuracies.jl:3.






    













    



Parameters of the posterior mean=-0.055999993896000376, dev.std=0.05423222985646852 and dof=99.0

Compute the area under the density (left and right)



In [7]:

    
labels = ["P(nbc\$>\$aode)", "P(nbc\$<\$aode)"]


colors = ["lightskyblue","lightblue"]
sizes = NaN*zeros(2)
sizes= [p_r*100; p_l*100 ]

fig = PyPlot.figure("pyplot_piechart",figsize=(4,4))
p = PyPlot.pie(sizes,
        labels=labels,
        shadow=false,
        startangle=40,
         colors=colors,
        autopct="%1.1f%%")

#PyPlot.axis("equal")
io=open("Plots/piechartbay.pdf","w") 
writemime(io, "application/pdf", fig)

Compute the area under the density (left, rope and right)



In [8]:

    
# perform Bayesian correlated ttest
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)



labels = ["P(nbc\$\\gg\$aode)","P(nbc\$\\approx\$aode)", "P(nbc\$\\ll\$aode)"]


colors = ["lightskyblue","sandybrown","lightblue"]
sizes = NaN*zeros(3)
#explode = zeros(length(sizes))
#explode[round(length(sizes)*rand(1))] = 0.1
    sizes= [p_r*100; p_rope*100; p_l*100 ]

fig = PyPlot.figure("pyplot_piechart",figsize=(4,4))
p = PyPlot.pie(sizes,
        labels=labels,
        shadow=false,
        startangle=40,
         colors=colors,
autopct="%1.1f%%")

#PyPlot.axis("equal")
    io=open("Plots/piechartbay1.pdf","w") 
writemime(io, "application/pdf", fig)

Compute HDI intervals



In [2]:

    
# perform Bayesian correlated ttest
Prb=[0.50, 0.60, 0.70, 0.80, 0.90, 0.95, 0.99]
ys=zeros(size(Prb,1))
ymins=zeros(size(Prb,1))
ymaxs=zeros(size(Prb,1))
rope=0.0
i=1
for p in Prb
    mur,sigmar,dofr,p_r,p_l,p_rope,val =Bttest_correlated(acci-accj,rho,0,rope,rope,p)
    ys[i]=(val[1]+val[2])/2    
    ymaxs[i]=(val[1])    
    ymins[i]=(val[2])
    i=i+1
end

df = DataFrame(
    Probability=Prb,
    HDI=ys,
    ymin=ymins,
    ymax=ymaxs
)

p1=plot(df,x=:Probability, y=:HDI, ymin=:ymin, ymax=:ymax,
     Geom.point, Geom.errorbar,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt))
display(p1)
Gadfly.draw(PDF("Plots/bars.pdf", 6inch, 3inch), p1)



In [ ]: