notebook.community

Edit and run



In [1]:

    
## Numeric derivative which will return Array{Float64, 1}
function derivative(func::Function, epsilon = 1e-4)
    function(x)
        (func(x + epsilon) - func(x - epsilon))/(2*epsilon)
    end
end









    Out[1]:





derivative (generic function with 2 methods)



In [2]:

    
type NeuralLayer
    weight::Union(Matrix, Array{None, 1}) # Let conversion do the work....
    _nodes_value::Array{Float64, 1}
    
    function NeuralLayer(w, nv)
        new(w, nv)
    end
end



In [3]:

    
type SimpleNeuralNetwork
    structure::Array{Int, 1}
    act_fun::Function
    act_diff::Function
    layers::Array{NeuralLayer, 1}
    out_fun::Function
    out_diff::Function
    _deltas::Array{Vector{FloatingPoint}, 1} # Array for storing dels
    
    # Constructor for NeuralNetwork type
    function SimpleNeuralNetwork(struct; act_fun = tanh, act_diff = None, out_fun = x-> x, out_diff = None, lambda = 0.1)
        if act_diff == None
            act_diff = derivative(act_fun)
        end
        if out_diff == None
            out_diff = derivative(out_fun)
        end
        layers = NeuralLayer[]
        for ind = 1:(length(struct)-1)
            dim_in = struct[ind]
            dim_out = struct[ind+1]
            b = sqrt(6) / sqrt(dim_in + dim_out)
            w = 2b*rand(dim_out, dim_in + 1) - b
            nodes_value = push!([1.0], rand(dim_in)...)
            temp_layer = NeuralLayer(w, nodes_value)
            push!(layers, temp_layer)
        end
        # append the output layer.
        w = []
        nodes_value = rand(struct[end])
        output_layer = NeuralLayer(w, nodes_value)
        push!(layers, output_layer)
        _deltas = Vector{Float64}[[1.] for i = 1:length(struct)]
        nn = new(struct, act_fun, act_diff, layers, out_fun, out_diff, _deltas)
    end    
end



In [4]:

    
function tanh_diff(x::Vector{Float64})
    1 - tanh(x).^2
end
function tanh_diff(x::Vector{Float32})
    1 - tanh(x).^2
end









    Out[4]:





tanh_diff (generic function with 2 methods)



In [5]:

    
import Base.repr

function repr(nn::SimpleNeuralNetwork)
    struct = join([string(i) for i in nn.structure], "x")
    msg = join(["It is a ", struct, " SimpleNeuralNetwork.\n"] , "")
    msg = join([msg, "Activate Function: ", string(nn.act_fun), '\n'], "")
    msg = join([msg, "Output Function: ", string(nn.out_fun), '\n'], "")
    msg
end









    Out[5]:





repr (generic function with 3 methods)



In [6]:

    
import Base.show

function show(nn::SimpleNeuralNetwork)
    print(repr(nn))
    println()
end









    Out[6]:





show (generic function with 91 methods)



In [7]:

    
function predict(nn::SimpleNeuralNetwork, data::Array{Float64, 2})
    predict_results = Array{Float64, 1}[]
    for data_id = 1:size(data)[1]
        v = data[data_id, :][:]
        #println(v)
        forward_prob!(nn, v)
        push!(predict_results, nn.out_fun(nn.layers[end]._nodes_value))
    end
    return predict_results
end









    Out[7]:





predict (generic function with 1 method)



In [8]:

    
function forward_prob!(nn::SimpleNeuralNetwork, x)
    # forward_prob! will update nodes_value for all layers.
    nn.layers[1]._nodes_value = [1, x]
    n_layers = length(nn.structure)
    for layer_id = 1:(n_layers-2)
        current_layer = nn.layers[layer_id]
        next_layer = nn.layers[layer_id + 1]
        temp = current_layer.weight * current_layer._nodes_value
        next_layer._nodes_value = [1., nn.act_fun(temp)]
    end
    # Compute the node values of the last layer without pass through activation function.
    current_layer = nn.layers[end - 1]
    output_layer = nn.layers[end]
    temp = current_layer.weight * current_layer._nodes_value
    output_layer._nodes_value = temp[:]
    return
end









    Out[8]:





forward_prob! (generic function with 1 method)



In [9]:

    
function back_prob!(nn::SimpleNeuralNetwork, x, y)
    # back_prob! will update nn._detas.
    forward_prob!(nn, x)
    nn._deltas[end] = -2.*(y - nn.out_fun(nn.layers[end]._nodes_value)).*nn.out_diff(nn.layers[end]._nodes_value)
    
    n_layers = length(nn.structure)
    for layer_id = n_layers-1:-1:1
        delta_next = nn._deltas[layer_id + 1]
        w_this = nn.layers[layer_id].weight[:, 2:end]
        nodes_value_this = nn.layers[layer_id]._nodes_value
        dd = nn.act_diff(nodes_value_this[2:end])
        temp = transpose(w_this) * delta_next
        nn._deltas[layer_id] = temp[:] .* dd
    end
    return
end









    Out[9]:





back_prob! (generic function with 1 method)



In [10]:

    
function sqr_cost(Y_hat::Array, Y::Array)
    if size(Y_hat)[1] != size(Y)[1]
        error("The number of predictions and observations doesn't agree with each other")
    end
    n_obs = convert(Float64, size(Y)[1])
    err = 0.0
    for ind = 1:n_obs
        err += dot(Y_hat[ind] - Y[ind], Y_hat[ind] - Y[ind])
    end
    return err/n_obs
end









    Out[10]:





sqr_cost (generic function with 1 method)



In [11]:

    
function train!(nn::SimpleNeuralNetwork, X::Matrix, Y::Array; epos = 10000, cost_fun = sqr_cost, tol = 0.001, learning_rate = 0.1, lambda = 1.0)
    y_predict = predict(nn, X)
    #return y_predict
    n_obs = size(X)[1]
    #return n_obs
    errors = Float64[]
    for iter = 1:epos
        ind = rand(1:n_obs)
        x = X[ind, :][:]
        y = Y[ind]
        back_prob!(nn, x, y)
        #return nn
        for layer_id in 1:(length(nn.layers)-1)
            next_id = layer_id + 1
            nl = nn.layers[layer_id]
            gradient = nn._deltas[next_id]*nl._nodes_value'
            nl.weight -= learning_rate .* gradient
        end
        
        y_predict = predict(nn, X)
        err = cost_fun(y_predict, Y)
        push!(errors, err)
        if iter % 1000 == 0
            var_e = var(errors[(end-999):end])
            if var_e < tol
                println("Terminating training process due to no significant improvement.")
                println("At iteration No. ", iter)
                break
            end
        end
    end
    return errors
end









    Out[11]:





train! (generic function with 1 method)

Testing Area



In [12]:

    
x = [float32(1), float32(2)]









    Out[12]:





2-element Array{Float32,1}:
 1.0
 2.0



In [13]:

    
tanh_diff(x)









    Out[13]:





2-element Array{Float32,1}:
 0.419974 
 0.0706508



In [14]:

    
nn = SimpleNeuralNetwork([3, 4, 3, 2]; act_fun = tanh, out_fun = tanh);



In [15]:

    
names(nn)









    Out[15]:





7-element Array{Symbol,1}:
 :structure
 :act_fun  
 :act_diff 
 :layers   
 :out_fun  
 :out_diff 
 :_deltas



In [16]:

    
show(nn)









    



It is a 3x4x3x2 SimpleNeuralNetwork.
Activate Function: tanh
Output Function: tanh



In [17]:

    
nn.layers[3].weight * [1., 3.0, 2.0, 1.1]









    Out[17]:





2-element Array{Float64,1}:
 -0.800941
  3.16321



In [18]:

    
print(repr(nn))









    



It is a 3x4x3x2 SimpleNeuralNetwork.
Activate Function: tanh
Output Function: tanh



In [19]:

    
nn.act_diff([3., 2., 1.])









    Out[19]:





3-element Array{Float64,1}:
 0.00986604
 0.0706508 
 0.419974



In [20]:

    
nn.layers[4]









    Out[20]:





NeuralLayer(None[],[0.965125,0.0963642])



In [21]:

    
forward_prob!(nn, [2., 3., 4.])



In [22]:

    
predict(nn, [2. 3. 4.; 1. 2. 3.])









    Out[22]:





2-element Array{Array{Float64,1},1}:
 [-0.902345,-0.669873]
 [-0.895396,-0.586614]



In [23]:

    
nn.layers[4]._nodes_value









    Out[23]:





2-element Array{Float64,1}:
 -1.4485  
 -0.672487



In [24]:

    
nn.act_diff([3., 2., 1.])









    Out[24]:





3-element Array{Float64,1}:
 0.00986604
 0.0706508 
 0.419974



In [25]:

    
tanh_diff([3.0, 2.0, 1.0])









    Out[25]:





3-element Array{Float64,1}:
 0.00986604
 0.0706508 
 0.419974



In [26]:

    
length(nn._deltas)









    Out[26]:





4



In [27]:

    
back_prob!(nn, [1., 3., 2.], [1., 1.])



In [28]:

    
using Gadfly



In [29]:

    
f(x) = x
d_f = derivative(f)









    Out[29]:





(anonymous function)



In [30]:

    
x = [f for f in 10:0.01:30];
y = d_f(x);



In [31]:

    
plot(x = x, y = y, Geom.line)









    Out[31]:



In [104]:

    
@time plot([nn.out_fun, nn.out_diff], -2, 2)









    



elapsed time: 0.000152265 seconds (7552 bytes allocated)






    Out[104]:



In [33]:

    
plot([nn.act_fun, nn.act_diff], -2, 2) # No xlim and ylim yet....









    Out[33]:



In [ ]:



In [34]:

    
data = readdlm("hw4_nnet_train.dat", ' ');
X = data[:, 1:end-1];
Y = data[:, end];



In [35]:

    
println("X: ", size(X))
println("Y: ", size(Y))









    



X: (25,2)
Y: (25,)



In [44]:

    
nn_ntu = SimpleNeuralNetwork([2, 6, 1]; act_fun = tanh, out_fun = tanh);



In [45]:

    
sqr_cost(predict(nn_ntu, X), Y)









    Out[45]:





1.6968214893813824



In [46]:

    
@time errors = train!(nn_ntu, X, Y; epos = 50000);
errors[end-10:end]









    



Terminating training process due to no significant improvement.
At iteration No. 15000
elapsed time: 14.937570843 seconds (2651925028 bytes allocated, 22.53% gc time)






    Out[46]:





11-element Array{Float64,1}:
 0.00266592
 0.00266489
 0.00266489
 0.00266392
 0.00226709
 0.00226709
 0.00226727
 0.00231913
 0.00232005
 0.00231991
 0.00231953



In [47]:

    
sqr_cost(predict(nn_ntu, X), Y)









    Out[47]:





0.0023195348805118447



In [51]:

    
var(errors[end - 2000:end - 1000])









    Out[51]:





0.01060648738010122



In [48]:

    
num_sample = 1000
plot(x=1:num_sample, y = errors[1:num_sample], Geom.line)









    Out[48]:



In [52]:

    
Y









    Out[52]:





25-element Array{Float64,1}:
  1.0
  1.0
  1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0



In [67]:

    
y_hat = Float64[ y[1] for y in map(round, predict(nn_ntu, X))]









    Out[67]:





25-element Array{Float64,1}:
  1.0
  1.0
  1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0



In [ ]:



In [68]:

    
using DataFrames
using RDatasets



In [69]:

    
wine_data = readtable("winequality-red.csv", separator = ';');



In [70]:

    
temp = DataFrames.columns(wine_data);



In [71]:

    
names(wine_data)









    Out[71]:





12-element Array{Symbol,1}:
 :fixed_acidity       
 :volatile_acidity    
 :citric_acid         
 :residual_sugar      
 :chlorides           
 :free_sulfur_dioxide 
 :total_sulfur_dioxide
 :density             
 :pH                  
 :sulphates           
 :alcohol             
 :quality



In [72]:

    
wine_data[[:pH, :quality]][1:10,:]









    Out[72]:




pH quality
1 3.51 5
2 3.2 5
3 3.26 5
4 3.16 6
5 3.51 5
6 3.51 5
7 3.3 5
8 3.39 7
9 3.36 7
10 3.35 5



In [73]:

    
DataFrames.index(wine_data)









    Out[73]:





Index([:residual_sugar=>4,:quality=>12,:chlorides=>5,:fixed_acidity=>1,:total_sulfur_dioxide=>7,:volatile_acidity=>2,:citric_acid=>3,:pH=>9,:alcohol=>11,:sulphates=>10,:density=>8,:free_sulfur_dioxide=>6],[:fixed_acidity,:volatile_acidity,:citric_acid,:residual_sugar,:chlorides,:free_sulfur_dioxide,:total_sulfur_dioxide,:density,:pH,:sulphates,:alcohol,:quality])



In [74]:

    
X = array(wine_data[:, 1:end-1]);
Y = Float64[y for y in array(wine_data[:, end])];



In [75]:

    
X[1:3, :]









    Out[75]:





3x11 Array{Float64,2}:
 7.4  0.7   0.0   1.9  0.076  11.0  34.0  0.9978  3.51  0.56  9.4
 7.8  0.88  0.0   2.6  0.098  25.0  67.0  0.9968  3.2   0.68  9.8
 7.8  0.76  0.04  2.3  0.092  15.0  54.0  0.997   3.26  0.65  9.8



In [76]:

    
Y[1:3]









    Out[76]:





3-element Array{Float64,1}:
 5.0
 5.0
 5.0



In [77]:

    
size(X)









    Out[77]:





(1599,11)



In [78]:

    
plot(wine_data, x = "quality", Geom.histogram)









    Out[78]:



In [79]:

    
Y[rand(1:1599)]









    Out[79]:





6.0



In [85]:

    
nn_w = SimpleNeuralNetwork([11, 12, 6, 1]; act_fun = tanh);



In [91]:

    
sqr_cost(predict(nn_w, X), Y)









    Out[91]:





0.8870209968110678



In [87]:

    
errors = train!(nn_w, X, Y; epos = 600);



In [88]:

    
plot(x=1:length(errors), y = errors, Geom.line)









    Out[88]:



In [101]:

    
Y_hat = Float64[ round(y[1]) for y in predict(nn_w, X)];



In [103]:

    
sum(Y .== Y_hat)









    Out[103]:





639



In [ ]:



In [33]:

    
import PyPlot
x = linspace(0, 3);
y = nn.out_diff(x);
PyPlot.ylim((-0.5, 2))
PyPlot.plot(x, y)









    



INFO: Loading help data...






    












    Out[33]:





1-element Array{Any,1}:
 PyObject <matplotlib.lines.Line2D object at 0x110b46110>



In [66]:

    
?readdlm









    



Base.readdlm(source, delim::Char, T::Type, eol::Char; header=false, skipstart=0, use_mmap, ignore_invalid_chars=false, quotes=true, dims, comments=true, comment_char='#')

   Read a matrix from the source where each line (separated by
   "eol") gives one row, with elements separated by the given
   delimeter. The source can be a text file, stream or byte array.
   Memory mapped files can be used by passing the byte array
   representation of the mapped segment as source.

   If "T" is a numeric type, the result is an array of that type,
   with any non-numeric elements as "NaN" for floating-point types,
   or zero. Other useful values of "T" include "ASCIIString",
   "String", and "Any".

   If "header" is "true", the first row of data will be read as
   header and the tuple "(data_cells, header_cells)" is returned
   instead of only "data_cells".

   Specifying "skipstart" will ignore the corresponding number of
   initial lines from the input.

   If "use_mmap" is "true", the file specified by "source" is
   memory mapped for potential speedups. Default is "true" except on
   Windows. On Windows, you may want to specify "true" if the file
   is large, and is only read once and not written to.

   If "ignore_invalid_chars" is "true", bytes in "source" with
   invalid character encoding will be ignored. Otherwise an error is
   thrown indicating the offending character position.

   If "quotes" is "true", column enclosed within double-quote (``)
   characters are allowed to contain new lines and column delimiters.
   Double-quote characters within a quoted field must be escaped with
   another double-quote.

   Specifying "dims" as a tuple of the expected rows and columns
   (including header, if any) may speed up reading of large files.

   If "comments" is "true", lines beginning with "comment_char"
   and text following "comment_char" in any line are ignored.

Base.readdlm(source, delim::Char, eol::Char; options...)

   If all data is numeric, the result will be a numeric array. If some
   elements cannot be parsed as numbers, a cell array of numbers and
   strings is returned.

Base.readdlm(source, delim::Char, T::Type; options...)

   The end of line delimiter is taken as "\n".

Base.readdlm(source, delim::Char; options...)

   The end of line delimiter is taken as "\n". If all data is
   numeric, the result will be a numeric array. If some elements
   cannot be parsed as numbers, a cell array of numbers and strings is
   returned.

Base.readdlm(source, T::Type; options...)

   The columns are assumed to be separated by one or more whitespaces.
   The end of line delimiter is taken as "\n".

Base.readdlm(source; options...)

   The columns are assumed to be separated by one or more whitespaces.
   The end of line delimiter is taken as "\n". If all data is
   numeric, the result will be a numeric array. If some elements
   cannot be parsed as numbers, a cell array of numbers and strings is
   returned.



In [30]:

    
var([1, 2, 3])









    Out[30]:





1.0



In [ ]: