In [ ]:
include("ex10.4.jl");
using Distributions;
using Gadfly;
set_default_plot_size(9inch, 9inch/golden);
(b). Redo the computations for the example of Figure 10.2. Plot the training error as well as test error, and discuss its behaviour.
(c). Investigate the number of iterations needed to make the test error finally start to rise.
In [4]:
function gen_data_b(N, p)
X = randn(N, p)
idx = sum(X.^2, 2) .> median(Chisq(p));
Y = vec(2 .* idx - 1)
return (X, Y)
end
X_train, Y_train = gen_data_b(200, 5)
X_test, Y_test = gen_data_b(1000, 5)
# Run M iterations of AdaBoost
M = 200
test_errors_b, train_errors_b = make_adaboost_plotdata(X_train, Y_train, X_test, Y_test, M);
# plot the training and test error
plot(layer(x=1:M, y=test_errors_b, Theme(default_color=color("red")), Geom.line),
layer(x=1:M, y=train_errors_b, Geom.line),
Guide.XLabel("Iterations"), Guide.YLabel("Errors"))
Out[4]:
(d). Change the setup of this example as follows: define two classes, with the features in Class 1 being $X_1$, $X_2$, ..., $X_{10}$, standard independent Gaussian variates. In Class 2, the features $X_1$, $X_2$, ... $X_{10}$ are also standard independent Gaussian, but conditioned on the event $\sum_j X_j^2 > 12$. Now the classes have significant overlap in feature space. Repeat the AdaBoost experiments as in Figure 10.2 and discuss the results.
In [2]:
# Generate training data, using rejection sampling and a 50/50 split
# between the classes
function gen_data_d(N, p)
n_1 = int(N/2)
n_2 = N - n_1
X_1 = randn(n_1, p);
X_2 = randn(n_2, p);
for i=1:n_2
while true
x = randn(1, p)
if sum(x.^2) > 12
X_2[i, :] = x
break
end
end
end
X = [X_1; X_2];
Y = [ones(n_1, 1); -1*ones(n_2, 1)];
perm = randperm(N)
X = X[perm, :]
Y = vec(Y[perm, :])
return (X, Y)
end
trainX, trainY = gen_data_d(200, 5);
testX, testY = gen_data_d(1000, 5);
# Repeat the AdaBoost experiment
M = 200
test_errors_d, train_errors_d = make_adaboost_plotdata(trainX, trainY, testX, testY, M);
plot(layer(x=1:M, y=test_errors_d, Theme(default_color=color("red")), Geom.line),
layer(x=1:M, y=train_errors_d, Geom.line),
Guide.XLabel("Iterations"), Guide.YLabel("Errors"))
Out[2]: