In [239]:
using Gadfly
using StatsBase: sample
using Optim: optimize

In [ ]:
data = readdlm("./stanford_dl_ex/ex1/housing.data")
data = data'

In [191]:
data = vcat(ones(eltype(data), 1, 506), data)


Out[191]:
15x506 Array{Float64,2}:
   1.0        1.0        1.0      …    1.0        1.0        1.0    
   0.00632    0.02731    0.02729       0.06076    0.10959    0.04741
  18.0        0.0        0.0           0.0        0.0        0.0    
   2.31       7.07       7.07         11.93      11.93      11.93   
   0.0        0.0        0.0           0.0        0.0        0.0    
   0.538      0.469      0.469    …    0.573      0.573      0.573  
   6.575      6.421      7.185         6.976      6.794      6.03   
  65.2       78.9       61.1          91.0       89.3       80.8    
   4.09       4.9671     4.9671        2.1675     2.3889     2.505  
   1.0        2.0        2.0           1.0        1.0        1.0    
 296.0      242.0      242.0      …  273.0      273.0      273.0    
  15.3       17.8       17.8          21.0       21.0       21.0    
 396.9      396.9      392.83        396.9      393.45     396.9    
   4.98       9.14       4.03          5.64       6.48       7.88   
  24.0       21.6       34.7          23.9       22.0       11.9    

In [ ]:
mask = sample(1:506, 506, replace=false)

In [ ]:
# training data
train_X = data[1:end-1, mask[1:400]]
train_y = data[end, mask[1:400]]

# test data
test_X = data[1:end-1, mask[401:end]]
test_y = data[end, mask[401:end]]

In [ ]:
# number of training instances
m = size(train_X, 2)

# number of features
n = size(train_X, 1)

In [ ]:
theta = rand(n)

In [287]:
function f(theta::Vector)
    # fast implementation based on: http://stackoverflow.com/questions/21778374/what-is-the-recommended-way-to-iterate-a-matrix-over-rows
    obj = 0
    for j = 1:m
        y = 0
        for i = 1:n
            y += theta[i] * train_X[i, j]
        end
        obj += (y - train_y[j])^2
    end
    obj / 2
end


Out[287]:
f (generic function with 3 methods)

In [ ]:
f(theta)

In [289]:
function f_vec(theta::Vector)
    obj = 0
    for i = 1:size(train_X, 2)
        obj += (theta' * train_X[:, i] - train_y[i])[1]^2
    end
    obj / 2
end


Out[289]:
f_vec (generic function with 2 methods)

In [ ]:
f_vec(theta)

In [290]:
function g!(theta::Vector, storage::Vector)
    for k = 1:length(storage)
        storage[k] = 0
    end
    for j = 1:size(train_X, 2)
        y = 0
        for i = 1:size(train_X, 1)
            y += theta[i] * train_X[i, j] 
        end
        for k = 1:length(storage)
            storage[k] += (y - train_y[j]) * theta[k]
        end
    end
end


Out[290]:
g! (generic function with 3 methods)

In [296]:
optimize(f_vec, g!, theta, method=:gradient_descent, iterations=200)


Out[296]:
Results of Optimization Algorithm
 * Algorithm: Gradient Descent
 * Starting Point: [0.3309818315785851,0.009122037584050835,0.7235023426533551,0.2822699186792468,0.9548523757234924,0.22927950327577573,0.8721992636944087,0.8083549548301967,0.20493855978811815,0.9665200977732129,0.9124165991038633,0.4538740238604806,0.9218717420720561,0.7050704678204374]
 * Minimum: [-0.31336090566174524,-0.008636395373079423,-0.6849842731878779,-0.2672423345309657,-0.9040176126976947,-0.21707304130110078,-0.825764815806497,-0.7653194724726373,-0.19402796943242684,-0.9150641645010704,-0.8638410467195423,-0.42971052064980764,-0.872792813496418,-0.6675336809208039]
 * Value of Function at Minimum: 124161659.905248
 * Iterations: 1
 * Convergence: true
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-08: true
   * |g(x)| < 1.0e-08: false
   * Exceeded Maximum Number of Iterations: false
 * Objective Function Calls: 73
 * Gradient Call: 73

In [238]:
actual_prices = train_y
predicted_prices = θ' * train_X
train_rms = sqrt(mean((predicted_prices - actual_prices).^2))
println("Train RMS: $train_rms")

actual_prices = test_y
predicted_prices = θ' * test_X
test_rms = sqrt(mean((predicted_prices - actual_prices).^2))
println("Test RMS: $test_rms")


Train RMS: 442.0222001622523
Test RMS: 452.628204048933

In [271]:
Gadfly.plot(
    layer(x=1:length(test_y), y=sort(test_y'[:, 1]), Geom.point),
    layer(x=1:length(test_y), y=sort(predicted_prices'[:, 1]), Geom.point),
)


Out[271]:
x -200 -150 -100 -50 0 50 100 150 200 250 300 350 -150 -145 -140 -135 -130 -125 -120 -115 -110 -105 -100 -95 -90 -85 -80 -75 -70 -65 -60 -55 -50 -45 -40 -35 -30 -25 -20 -15 -10 -5 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100 105 110 115 120 125 130 135 140 145 150 155 160 165 170 175 180 185 190 195 200 205 210 215 220 225 230 235 240 245 250 255 260 265 270 275 280 285 290 295 300 -200 0 200 400 -150 -140 -130 -120 -110 -100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 -1000 -800 -600 -400 -200 0 200 400 600 800 1000 1200 1400 1600 1800 -800 -750 -700 -650 -600 -550 -500 -450 -400 -350 -300 -250 -200 -150 -100 -50 0 50 100 150 200 250 300 350 400 450 500 550 600 650 700 750 800 850 900 950 1000 1050 1100 1150 1200 1250 1300 1350 1400 1450 1500 1550 1600 -1000 0 1000 2000 -800 -750 -700 -650 -600 -550 -500 -450 -400 -350 -300 -250 -200 -150 -100 -50 0 50 100 150 200 250 300 350 400 450 500 550 600 650 700 750 800 850 900 950 1000 1050 1100 1150 1200 1250 1300 1350 1400 1450 1500 1550 1600 y