Notebook3 Bayesian analysis of Squash dataset


Bayesian Analysis of Squash dataset


In [9]:
using DataFrames
using Distributions
import PyPlot

using Gadfly

include("Tests/ttest_correlated.jl")
include("Tests/Bttest_correlated.jl")
include("Plots/plot_posterior_ttest.jl")
include("Plots/plot_data.jl")
include("Data/get_accuracies.jl")

ClassID = readdlm("Data/ClassifierID.dat", ',')
ClassNames = readdlm("Data/ClassifierNames.dat", ',')
DatasetID = readdlm("Data/DatasetID.dat", ',');
DatasetNames = readdlm("Data/DatasetNames.dat", ',');
Percent_correct = readdlm("Data/Percent_correct.dat", ',');
rho=1/10

#Classifiers compare nbc versus aode on datasets 1
cl1=1 #nbc
cl2=2 #aode
dataset=46 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()



#load accuracies
acci,accj=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)

# perform Bayesian correlated t-test
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()


#Plot posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.26,0.2)
display(p1)


Comparison of nbc vs. aode
in dataset squash-unstored

WARNING: Method definition ttest_correlated(Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Tests/ttest_correlated.jl:11 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Tests/ttest_correlated.jl:11.
WARNING: Method definition Bttest_correlated(Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Tests/Bttest_correlated.jl:15 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Tests/Bttest_correlated.jl:15.
WARNING: Method definition plot_posterior_ttest(Any, Any, Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_posterior_ttest.jl:3 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_posterior_ttest.jl:3.
WARNING: Method definition plot_data(Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_data.jl:5 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Plots/plot_data.jl:5.
WARNING: Method definition get_accuracies(Any, Any, Any, Any, Any, Any) in module Main at /home/benavoli/Data/Github/tutorial/Julia/Data/get_accuracies.jl:3 overwritten at /home/benavoli/Data/Github/tutorial/Julia/Data/get_accuracies.jl:3.
DeltaAcc -1.1 -1.0 -0.9 -0.8 -0.7 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 -0.72 -0.70 -0.68 -0.66 -0.64 -0.62 -0.60 -0.58 -0.56 -0.54 -0.52 -0.50 -0.48 -0.46 -0.44 -0.42 -0.40 -0.38 -0.36 -0.34 -0.32 -0.30 -0.28 -0.26 -0.24 -0.22 -0.20 -0.18 -0.16 -0.14 -0.12 -0.10 -0.08 -0.06 -0.04 -0.02 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14 0.16 0.18 0.20 0.22 0.24 0.26 0.28 0.30 0.32 0.34 0.36 0.38 0.40 0.42 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58 0.60 0.62 0.64 0.66 -1.0 -0.5 0.0 0.5 1.0 -0.75 -0.70 -0.65 -0.60 -0.55 -0.50 -0.45 -0.40 -0.35 -0.30 -0.25 -0.20 -0.15 -0.10 -0.05 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 pdf rope legend -10 -8 -6 -4 -2 0 2 4 6 8 10 12 14 16 18 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 12.5 13.0 13.5 14.0 14.5 15.0 15.5 16.0 -10 0 10 20 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 12.5 13.0 13.5 14.0 14.5 15.0 15.5 16.0 pdf
Parameters of the posterior mean=-0.055999993896000376, dev.std=0.05423222985646852 and dof=99.0

Compute the area under the density (left and right)


In [7]:
labels = ["P(nbc\$>\$aode)", "P(nbc\$<\$aode)"]


colors = ["lightskyblue","lightblue"]
sizes = NaN*zeros(2)
sizes= [p_r*100; p_l*100 ]

fig = PyPlot.figure("pyplot_piechart",figsize=(4,4))
p = PyPlot.pie(sizes,
        labels=labels,
        shadow=false,
        startangle=40,
         colors=colors,
        autopct="%1.1f%%")

#PyPlot.axis("equal")
io=open("Plots/piechartbay.pdf","w") 
writemime(io, "application/pdf", fig)


Compute the area under the density (left, rope and right)


In [8]:
# perform Bayesian correlated ttest
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)



labels = ["P(nbc\$\\gg\$aode)","P(nbc\$\\approx\$aode)", "P(nbc\$\\ll\$aode)"]


colors = ["lightskyblue","sandybrown","lightblue"]
sizes = NaN*zeros(3)
#explode = zeros(length(sizes))
#explode[round(length(sizes)*rand(1))] = 0.1
    sizes= [p_r*100; p_rope*100; p_l*100 ]

fig = PyPlot.figure("pyplot_piechart",figsize=(4,4))
p = PyPlot.pie(sizes,
        labels=labels,
        shadow=false,
        startangle=40,
         colors=colors,
autopct="%1.1f%%")

#PyPlot.axis("equal")
    io=open("Plots/piechartbay1.pdf","w") 
writemime(io, "application/pdf", fig)


Compute HDI intervals


In [2]:
# perform Bayesian correlated ttest
Prb=[0.50, 0.60, 0.70, 0.80, 0.90, 0.95, 0.99]
ys=zeros(size(Prb,1))
ymins=zeros(size(Prb,1))
ymaxs=zeros(size(Prb,1))
rope=0.0
i=1
for p in Prb
    mur,sigmar,dofr,p_r,p_l,p_rope,val =Bttest_correlated(acci-accj,rho,0,rope,rope,p)
    ys[i]=(val[1]+val[2])/2    
    ymaxs[i]=(val[1])    
    ymins[i]=(val[2])
    i=i+1
end

df = DataFrame(
    Probability=Prb,
    HDI=ys,
    ymin=ymins,
    ymax=ymaxs
)

p1=plot(df,x=:Probability, y=:HDI, ymin=:ymin, ymax=:ymax,
     Geom.point, Geom.errorbar,Theme(major_label_font_size=13pt,minor_label_font_size=12pt,key_label_font_size=11pt))
display(p1)
Gadfly.draw(PDF("Plots/bars.pdf", 6inch, 3inch), p1)


Probability -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14 0.16 0.18 0.20 0.22 0.24 0.26 0.28 0.30 0.32 0.34 0.36 0.38 0.40 0.42 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58 0.60 0.62 0.64 0.66 0.68 0.70 0.72 0.74 0.76 0.78 0.80 0.82 0.84 0.86 0.88 0.90 0.92 0.94 0.96 0.98 1.00 1.02 1.04 1.06 1.08 1.10 1.12 1.14 1.16 1.18 1.20 1.22 1.24 1.26 1.28 1.30 1.32 1.34 1.36 1.38 1.40 1.42 1.44 1.46 1.48 1.50 0.0 0.5 1.0 1.5 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95 1.00 1.05 1.10 1.15 1.20 1.25 1.30 1.35 1.40 1.45 1.50 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 -0.50 -0.49 -0.48 -0.47 -0.46 -0.45 -0.44 -0.43 -0.42 -0.41 -0.40 -0.39 -0.38 -0.37 -0.36 -0.35 -0.34 -0.33 -0.32 -0.31 -0.30 -0.29 -0.28 -0.27 -0.26 -0.25 -0.24 -0.23 -0.22 -0.21 -0.20 -0.19 -0.18 -0.17 -0.16 -0.15 -0.14 -0.13 -0.12 -0.11 -0.10 -0.09 -0.08 -0.07 -0.06 -0.05 -0.04 -0.03 -0.02 -0.01 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.10 0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19 0.20 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.30 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.40 -0.5 0.0 0.5 -0.50 -0.48 -0.46 -0.44 -0.42 -0.40 -0.38 -0.36 -0.34 -0.32 -0.30 -0.28 -0.26 -0.24 -0.22 -0.20 -0.18 -0.16 -0.14 -0.12 -0.10 -0.08 -0.06 -0.04 -0.02 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14 0.16 0.18 0.20 0.22 0.24 0.26 0.28 0.30 0.32 0.34 0.36 0.38 0.40 HDI

In [ ]: