In [64]:
using RDatasets
using Gadfly
hw = dataset("car", "Davis")
#hw = sub(hw, :(Height .>= 100)) # broken
hw = hw[hw[:Height] .>= 100, :]
set_default_plot_size(22cm, 12cm)
plot(hw, x=:Height, y=:Weight, color=:Sex, Guide.title("Height & Weight by Gender"))
Out[64]:
In [67]:
#males = sub(hw, :(Sex .== "M"))
males = hw[hw[:Sex] .== "M", :]
plot(males, x=:Height, y=:Weight, Guide.title("Heights & Weights of Males"))
Out[67]:
In [68]:
#females = subset(hw, :(Sex .== "F"))
females = hw[hw[:Sex] .== "F", :]
plot(females, x=:Height, y=:Weight, Guide.title("Heights & Weights of Females"))
Out[68]:
In [69]:
using JuMP
using GLPKMathProgInterface
function LAD(data)
m = Model(solver=GLPKSolverLP())
@defVar(m, x[1:3])
@defVar(m, residuals[1:nrow(data)] >= 0)
for (i, (height, weight)) in enumerate(zip(data[:Height], data[:Weight]))
fitted = dot([1, height, height^2], x)
# Linear way of representing residual >= |observed - fitted|
@addConstraint(m, residuals[i] >= fitted - weight)
@addConstraint(m, residuals[i] >= weight - fitted)
end
# Minimize the total sum of these residuals.
@setObjective(m, Min, sum(residuals))
solve(m)
coefficients = getValue(x)[:]
return coefficients, height -> dot([1, height, height^2], coefficients)
end
ladCoefficients, ladPredictor = LAD(males)
println("Coefficients = $ladCoefficients")
println(ladPredictor(175))
In [70]:
ladFitted = DataFrame(
Height = males[:Height],
Weight = males[:Weight],
FittedWeight = map(ladPredictor, males[:Height])
)
plot(
ladFitted,
layer(x=:Height, y=:FittedWeight, Geom.line),
layer(x=:Height, y=:Weight, Geom.point)
)
Out[70]:
In [71]:
using JuMP
using GLPKMathProgInterface
function EpsilonLAD(data, epsilon)
m = Model(solver=GLPKSolverLP())
@defVar(m, x[1:3])
@defVar(m, residuals[1:nrow(data)] >= 0)
for (i, (height, weight)) in enumerate(zip(data[:Height], data[:Weight]))
fitted = dot([1, height, height^2], x)
# Linear way of representing residual >= |observed - fitted| - epsilon
@addConstraint(m, residuals[i] >= fitted - weight - epsilon)
@addConstraint(m, residuals[i] >= weight - fitted - epsilon)
end
# Minimize the total sum of these residuals.
@setObjective(m, Min, sum(residuals))
solve(m)
coefficients = getValue(x)[:]
return coefficients, height -> dot([1, height, height^2], coefficients)
end
epsLadCoefficients, epsLadPredictor = EpsilonLAD(males, 1.5)
println("Coefficients = $epsLadCoefficients")
println(epsLadPredictor(175))
In [72]:
epsLadFitted = DataFrame(
Height = males[:Height],
Weight = males[:Weight],
LADWeight = map(ladPredictor, males[:Height]),
EpsLADWeight = map(epsLadPredictor, males[:Height])
)
plot(
epsLadFitted,
layer(x=:Height, y=:LADWeight, Theme(default_color=color("green")), Geom.line),
layer(x=:Height, y=:EpsLADWeight, Theme(default_color=color("violet")), Geom.line),
layer(x=:Height, y=:Weight, Geom.point)
)
Out[72]:
In [73]:
plot([abs, x -> max(abs(x) - 1, 0)], -5, 5)
Out[73]:
In [74]:
plot([x -> x^2, x -> max(abs(x) - 1, 0)^2], -5, 5)
Out[74]:
In [54]: