In [13]:
using Colors
using DataFrames
using Distributions
using Gadfly
In [14]:
function auc{R <: Real, N <: Integer}(h::Tuple{AbstractVector{R}, AbstractVector{N}})
auc(h[1], h[2])
end
function auc{R <: Real, N <: Integer}(edges::AbstractVector{R}, counts::AbstractVector{N})
deltax = edges[2 : end] - edges[1 : end - 1]
sum(deltax .* counts)
end;
In [15]:
d = Distributions.Normal(0, 1)
Out[15]:
In [37]:
srand(1)
n = 10000
bins = 100
x = rand(d, n)
(edges, counts) = hist(x, bins)
xauc = auc(edges, counts)
xvec = collect(edges)
xdf = DataFrame(
xmin = collect(xvec[1 : (end - 1)]),
xmax = collect(xvec[2 : end]),
count = counts )
xdf[:density] = xdf[:count] / xauc;
xdf[:x] = (xdf[:xmin] .+ xdf[:xmax]) ./ 2;
In [17]:
lower = floor(Int64, xvec[1])
upper = ceil(Int64, xvec[end])
xₛ = linspace(lower, upper, (upper - lower) * 100 + 1)
xpdf = DataFrame(x = xₛ, density = pdf(TDist(12), xₛ));
In [50]:
Gadfly.plot(
layer(
xpdf,
x = :x,
y = :density,
Geom.line,
Theme(default_color = colorant"orange") ),
layer(
xdf,
x = :xmin,
y = :density,
Geom.bar,
Theme(default_color = colorant"gainsboro") ) )
Out[50]:
In [38]:
plot(xdf, x = :x, y = :density, Geom.bar)
Out[38]:
In [34]:
reshape(xdf[:x], 23, 3)
Out[34]:
In [36]:
typeof(xdf[:density])
Out[36]: