In [1]:
## Numeric derivative which will return Array{Float64, 1}
function derivative(func::Function, epsilon = 1e-4)
function(x)
(func(x + epsilon) - func(x - epsilon))/(2*epsilon)
end
end
Out[1]:
In [2]:
type NeuralLayer
weight::Union(Matrix, Array{None, 1}) # Let conversion do the work....
_nodes_value::Array{Float64, 1}
function NeuralLayer(w, nv)
new(w, nv)
end
end
In [3]:
type SimpleNeuralNetwork
structure::Array{Int, 1}
act_fun::Function
act_diff::Function
layers::Array{NeuralLayer, 1}
out_fun::Function
out_diff::Function
_deltas::Array{Vector{FloatingPoint}, 1} # Array for storing dels
# Constructor for NeuralNetwork type
function SimpleNeuralNetwork(struct; act_fun = tanh, act_diff = None, out_fun = x-> x, out_diff = None, lambda = 0.1)
if act_diff == None
act_diff = derivative(act_fun)
end
if out_diff == None
out_diff = derivative(out_fun)
end
layers = NeuralLayer[]
for ind = 1:(length(struct)-1)
dim_in = struct[ind]
dim_out = struct[ind+1]
b = sqrt(6) / sqrt(dim_in + dim_out)
w = 2b*rand(dim_out, dim_in + 1) - b
nodes_value = push!([1.0], rand(dim_in)...)
temp_layer = NeuralLayer(w, nodes_value)
push!(layers, temp_layer)
end
# append the output layer.
w = []
nodes_value = rand(struct[end])
output_layer = NeuralLayer(w, nodes_value)
push!(layers, output_layer)
_deltas = Vector{Float64}[[1.] for i = 1:length(struct)]
nn = new(struct, act_fun, act_diff, layers, out_fun, out_diff, _deltas)
end
end
In [4]:
function tanh_diff(x::Vector{Float64})
1 - tanh(x).^2
end
function tanh_diff(x::Vector{Float32})
1 - tanh(x).^2
end
Out[4]:
In [5]:
import Base.repr
function repr(nn::SimpleNeuralNetwork)
struct = join([string(i) for i in nn.structure], "x")
msg = join(["It is a ", struct, " SimpleNeuralNetwork.\n"] , "")
msg = join([msg, "Activate Function: ", string(nn.act_fun), '\n'], "")
msg = join([msg, "Output Function: ", string(nn.out_fun), '\n'], "")
msg
end
Out[5]:
In [6]:
import Base.show
function show(nn::SimpleNeuralNetwork)
print(repr(nn))
println()
end
Out[6]:
In [7]:
function predict(nn::SimpleNeuralNetwork, data::Array{Float64, 2})
predict_results = Array{Float64, 1}[]
for data_id = 1:size(data)[1]
v = data[data_id, :][:]
#println(v)
forward_prob!(nn, v)
push!(predict_results, nn.out_fun(nn.layers[end]._nodes_value))
end
return predict_results
end
Out[7]:
In [8]:
function forward_prob!(nn::SimpleNeuralNetwork, x)
# forward_prob! will update nodes_value for all layers.
nn.layers[1]._nodes_value = [1, x]
n_layers = length(nn.structure)
for layer_id = 1:(n_layers-2)
current_layer = nn.layers[layer_id]
next_layer = nn.layers[layer_id + 1]
temp = current_layer.weight * current_layer._nodes_value
next_layer._nodes_value = [1., nn.act_fun(temp)]
end
# Compute the node values of the last layer without pass through activation function.
current_layer = nn.layers[end - 1]
output_layer = nn.layers[end]
temp = current_layer.weight * current_layer._nodes_value
output_layer._nodes_value = temp[:]
return
end
Out[8]:
In [9]:
function back_prob!(nn::SimpleNeuralNetwork, x, y)
# back_prob! will update nn._detas.
forward_prob!(nn, x)
nn._deltas[end] = -2.*(y - nn.out_fun(nn.layers[end]._nodes_value)).*nn.out_diff(nn.layers[end]._nodes_value)
n_layers = length(nn.structure)
for layer_id = n_layers-1:-1:1
delta_next = nn._deltas[layer_id + 1]
w_this = nn.layers[layer_id].weight[:, 2:end]
nodes_value_this = nn.layers[layer_id]._nodes_value
dd = nn.act_diff(nodes_value_this[2:end])
temp = transpose(w_this) * delta_next
nn._deltas[layer_id] = temp[:] .* dd
end
return
end
Out[9]:
In [10]:
function sqr_cost(Y_hat::Array, Y::Array)
if size(Y_hat)[1] != size(Y)[1]
error("The number of predictions and observations doesn't agree with each other")
end
n_obs = convert(Float64, size(Y)[1])
err = 0.0
for ind = 1:n_obs
err += dot(Y_hat[ind] - Y[ind], Y_hat[ind] - Y[ind])
end
return err/n_obs
end
Out[10]:
In [11]:
function train!(nn::SimpleNeuralNetwork, X::Matrix, Y::Array; epos = 10000, cost_fun = sqr_cost, tol = 0.001, learning_rate = 0.1, lambda = 1.0)
y_predict = predict(nn, X)
#return y_predict
n_obs = size(X)[1]
#return n_obs
errors = Float64[]
for iter = 1:epos
ind = rand(1:n_obs)
x = X[ind, :][:]
y = Y[ind]
back_prob!(nn, x, y)
#return nn
for layer_id in 1:(length(nn.layers)-1)
next_id = layer_id + 1
nl = nn.layers[layer_id]
gradient = nn._deltas[next_id]*nl._nodes_value'
nl.weight -= learning_rate .* gradient
end
y_predict = predict(nn, X)
err = cost_fun(y_predict, Y)
push!(errors, err)
if iter % 1000 == 0
var_e = var(errors[(end-999):end])
if var_e < tol
println("Terminating training process due to no significant improvement.")
println("At iteration No. ", iter)
break
end
end
end
return errors
end
Out[11]:
In [12]:
x = [float32(1), float32(2)]
Out[12]:
In [13]:
tanh_diff(x)
Out[13]:
In [14]:
nn = SimpleNeuralNetwork([3, 4, 3, 2]; act_fun = tanh, out_fun = tanh);
In [15]:
names(nn)
Out[15]:
In [16]:
show(nn)
In [17]:
nn.layers[3].weight * [1., 3.0, 2.0, 1.1]
Out[17]:
In [18]:
print(repr(nn))
In [19]:
nn.act_diff([3., 2., 1.])
Out[19]:
In [20]:
nn.layers[4]
Out[20]:
In [21]:
forward_prob!(nn, [2., 3., 4.])
In [22]:
predict(nn, [2. 3. 4.; 1. 2. 3.])
Out[22]:
In [23]:
nn.layers[4]._nodes_value
Out[23]:
In [24]:
nn.act_diff([3., 2., 1.])
Out[24]:
In [25]:
tanh_diff([3.0, 2.0, 1.0])
Out[25]:
In [26]:
length(nn._deltas)
Out[26]:
In [27]:
back_prob!(nn, [1., 3., 2.], [1., 1.])
In [28]:
using Gadfly
In [29]:
f(x) = x
d_f = derivative(f)
Out[29]:
In [30]:
x = [f for f in 10:0.01:30];
y = d_f(x);
In [31]:
plot(x = x, y = y, Geom.line)
Out[31]:
In [104]:
@time plot([nn.out_fun, nn.out_diff], -2, 2)
Out[104]:
In [33]:
plot([nn.act_fun, nn.act_diff], -2, 2) # No xlim and ylim yet....
Out[33]:
In [ ]:
In [34]:
data = readdlm("hw4_nnet_train.dat", ' ');
X = data[:, 1:end-1];
Y = data[:, end];
In [35]:
println("X: ", size(X))
println("Y: ", size(Y))
In [44]:
nn_ntu = SimpleNeuralNetwork([2, 6, 1]; act_fun = tanh, out_fun = tanh);
In [45]:
sqr_cost(predict(nn_ntu, X), Y)
Out[45]:
In [46]:
@time errors = train!(nn_ntu, X, Y; epos = 50000);
errors[end-10:end]
Out[46]:
In [47]:
sqr_cost(predict(nn_ntu, X), Y)
Out[47]:
In [51]:
var(errors[end - 2000:end - 1000])
Out[51]:
In [48]:
num_sample = 1000
plot(x=1:num_sample, y = errors[1:num_sample], Geom.line)
Out[48]:
In [52]:
Y
Out[52]:
In [67]:
y_hat = Float64[ y[1] for y in map(round, predict(nn_ntu, X))]
Out[67]:
In [ ]:
In [68]:
using DataFrames
using RDatasets
In [69]:
wine_data = readtable("winequality-red.csv", separator = ';');
In [70]:
temp = DataFrames.columns(wine_data);
In [71]:
names(wine_data)
Out[71]:
In [72]:
wine_data[[:pH, :quality]][1:10,:]
Out[72]:
In [73]:
DataFrames.index(wine_data)
Out[73]:
In [74]:
X = array(wine_data[:, 1:end-1]);
Y = Float64[y for y in array(wine_data[:, end])];
In [75]:
X[1:3, :]
Out[75]:
In [76]:
Y[1:3]
Out[76]:
In [77]:
size(X)
Out[77]:
In [78]:
plot(wine_data, x = "quality", Geom.histogram)
Out[78]:
In [79]:
Y[rand(1:1599)]
Out[79]:
In [85]:
nn_w = SimpleNeuralNetwork([11, 12, 6, 1]; act_fun = tanh);
In [91]:
sqr_cost(predict(nn_w, X), Y)
Out[91]:
In [87]:
errors = train!(nn_w, X, Y; epos = 600);
In [88]:
plot(x=1:length(errors), y = errors, Geom.line)
Out[88]:
In [101]:
Y_hat = Float64[ round(y[1]) for y in predict(nn_w, X)];
In [103]:
sum(Y .== Y_hat)
Out[103]:
In [ ]:
In [33]:
import PyPlot
x = linspace(0, 3);
y = nn.out_diff(x);
PyPlot.ylim((-0.5, 2))
PyPlot.plot(x, y)
Out[33]:
In [66]:
?readdlm
In [30]:
var([1, 2, 3])
Out[30]:
In [ ]: