notebook.community

Edit and run



In [1]:

    
import Base.+, Base.*

# This list isn't the tape itself, it's a list of "registered"
# adjoint jacobian function implementations. It is populated once,
# during module initialisation.
adj_jacobians = Array(Function,0)

# A class to represent the "tape" data structure
type Tape
    
    # The tape storage itself
    tape::Array{Int}
    # A list of locations of variables within the tape, so that
    # they can be reset to zero when needed
    var_pos::Array{Int}
    # The list of ultimate adjoint Jacobian outputs, shown in the
    # diagram as being at the start of the tape, are here held in
    # a separate list. Negative variable indices are used to
    # reference these outputs and distinguish them from the
    # variables in the tape.
    outputs::Array{Int}
    # The current position in the tape - only has meaning during
    # evaluation. Of course, holding this information (not to
    # mention the variables themselves) within the tape is pretty
    # dubious, from a state management / thread-safety point of
    # view.
    eval_pos::Int
    
    Tape() = new(Array(Int,0), Array(Int,0), Array(Int,0), 1);
end



In [2]:

    
# A class method for registering an adjoint jacobian implementation
# function. Returns a value that can be used as a tag in the tape.
function register_adjoint_jacobian(adj_jacobians::Array{Function}, adjoint_jacobian::Function)
    push!(adj_jacobians, adjoint_jacobian)
    return length(adj_jacobians)
end

# Grow the tape by appending a fixed value (will be a "black"
# input, a function tag or a variable reference)
function append_constant(tape::Tape, contents)
    push!(tape.tape, contents)
    tape.eval_pos = tape.eval_pos + 1
end

# Grow the tape by allocating a variable slot at the end
function append_variable(tape::Tape)
    push!(tape.tape, 0)
    pos = length(tape.tape)
    push!(tape.var_pos, pos)
    return pos
end

# Add an overall adjoint Jacobian output to the outputs list. This
# happens when an "initial" ADReverseDouble is created - i.e. one
# that doesn't arise from a previous AD-enabled operation.
function add_output(tape::Tape)
    push!(tape.outputs, 0)
    return -length(tape.outputs)
end

# Add a quantity to the variable referenced by the value in the
# current slot, which may be a variable in the tape (+ve ref) or
# an entry in self.outputs (-ve ref)
function add_to_referenced_variable(tape::Tape, value)
    ref = tape.tape[tape.eval_pos]
    if ref >=1
        tape.tape[ref] += value
    else
        tape.outputs[-ref] += value
    end
end

# Directly write to a variable at a particular position. This is
# needed when setting an overall adjoint Jaobian input, via an
# ADReverseDouble for a tape evaluation.
function write_to_variable_at_index(tape::Tape, idx::Int, contents)
    if idx >=1
        tape.tape[idx] = contents
    else
        tape.outputs[-idx] = contents
    end
end

# Directly read a variable at a particular index. This is needed
# when reading the adjoint Jacobian output from an
# ADReverseDouble.
function read_variable_at_index(tape::Tape, idx)
    if idx >=1
        return tape.tape[idx]
    else
        return tape.outputs[-idx]
    end
end
 
# Read the value at the current location in the tape (during
# evaluation)
function read_current_value(tape::Tape)
    return tape.tape[tape.eval_pos]
end
 
# Step back one position in the tape (during evaluation)
function step_back(tape::Tape)
    tape.eval_pos -= 1
end
 
# Reset all variables and outputs to zero. Needed the tape has to
# be evaluated for multiple adjoint Jacobian inputs.
function reset_variables(tape::Tape)
    for pos in tape.var_pos
        tape.tape[pos] = 0
    end
    for idx in 1:length(tape.outputs)
        tape.outputs[idx] = 0
    end
end
 
# Evaluate the tape.
function evaluate(tape::Tape)
    tape.eval_pos = length(tape.tape)
    while tape.eval_pos >= 1  
            # Look up the adjoint_jacobian according to the tag at the
            # current position in the tape, and call it
        adj_jacobians[tape.tape[tape.eval_pos]](tape)
        step_back(tape)
    end
end









    Out[2]:





evaluate (generic function with 1 method)



In [3]:

    
tape = Tape()









    Out[3]:





Tape(Int64[],Int64[],Int64[],1)



In [4]:

    
# A reverse-mode AD enabled floating point type
type ADReverseFloat
    value::Float64
    tape::Tape
    adj_jac_output_ref::Int
    
    function ADReverseFloat(value::Float64, tape::Tape, ref::Int)
        return new(value, tape, ref)
    end
end

ADReverseFloat(value::Float64, tape::Tape) = ADReverseFloat(value, tape, add_output(tape))









    Out[4]:





ADReverseFloat



In [5]:

    
# Used to set an input to the overall adjoint Jacobian. This
# should only be called on ADReverseFloats that represent ultimate
# outputs of the calculation.
function set_initial_adj_jac_input(adFloat::ADReverseFloat, value)
    write_to_variable_at_index(adFloat.tape, adFloat.adj_jac_output_ref, value)
end

# Used to get the result of the adjoint Jacobian evaluation. Would
# typically be called on ADReverseFloats representing the overall
# inputs to the calculation, but could be called on intermediate
# values in order to obtain their sensitivities as well.
function get_adj_jac_output(adFloat::ADReverseFloat)
    return read_variable_at_index(adFloat.tape, adFloat.adj_jac_output_ref)
end

# Implementation of the adjoint Jacobian for the + operator. The
# formula can be checked by working out the adjoint Jacobian
# matrix by hand.
function adFloat_adj_jac_add(tape::Tape)
    # Fetch the adjoint Jacobian input
    step_back(tape)
    adj_jac_input = read_current_value(tape)
    # Simply add it to both of the adjoint Jacobian output
    # variables
    step_back(tape)
    add_to_referenced_variable(tape, adj_jac_input)
    step_back(tape)
    add_to_referenced_variable(tape, adj_jac_input)
end
# Statically register the adjoint Jacobian function for the add
# operator with the Tape class
ADD_TAG = register_adjoint_jacobian(adj_jacobians, adFloat_adj_jac_add::Function)

# Implementation of the + operator.
function adFloat_add(adFloatSelf::ADReverseFloat, adFloatOther::ADReverseFloat)
    # Write the references to where to store the adjoint Jacobian
    # outputs
    append_constant(adFloatSelf.tape, adFloatSelf.adj_jac_output_ref)
    append_constant(adFloatSelf.tape, adFloatOther.adj_jac_output_ref)
    # Allocate a variable to store the ajoint Jacobian input
    ref = append_variable(adFloatSelf.tape)
    # Since this function is linear, its adjoint Jacobian doesn't
    # depend on the values of self and other (it's constant), so
    # we don't need to store them in the tape. We just proceed to
    # storing the function tag.
    append_constant(adFloatSelf.tape, ADD_TAG)
    # The returned ADReverseFloat references the variable allocated above
    return ADReverseFloat(adFloatSelf.value + adFloatOther.value, adFloatSelf.tape, ref)
    #return create_result(adFloatSelf.value + adFloatOther.value, adFloatSelf.tape, ref)
end

+(x::ADReverseFloat, y::ADReverseFloat) = adFloat_add(x, y)

# Implementation of the * operator.
# Implementation of the adjoint Jacobian for the * operatorq.  The
# formula can be checked by working out the adjoint Jacobian
# matrix by hand.
function adFloat_adj_jac_mul(tape::Tape)
    # Fetch the original function inputs
    step_back(tape)        
    other_value = read_current_value(tape)
    step_back(tape)
    self_value = read_current_value(tape)
    # Fetch the adjoint Jacobian input
    step_back(tape)
    adj_jac_input = read_current_value(tape)
    # Add the adjoint Jacobian outputs to the appropriate
    # variables
    step_back(tape)
    add_to_referenced_variable(tape, adj_jac_input * self_value)
    step_back(tape)
    add_to_referenced_variable(tape, adj_jac_input * other_value)
end

# Statically register the adjoint Jacobian function for the add
# operator with the Tape class
MUL_TAG = register_adjoint_jacobian(adj_jacobians, adFloat_adj_jac_mul::Function)

function adFloat_mul(adFloatSelf::ADReverseFloat, adFloatOther::ADReverseFloat)
    # Write the references to where to store the adjoint Jacobian
    # outputs
    append_constant(adFloatSelf.tape, adFloatSelf.adj_jac_output_ref)
    append_constant(adFloatSelf.tape, adFloatOther.adj_jac_output_ref)
    # Allocate a variable to store the ajoint Jacobian input
    ref = append_variable(adFloatSelf.tape)
    # Write the values of self and other, as they will be needed
    append_constant(adFloatSelf.tape, adFloatSelf.value)
    append_constant(adFloatSelf.tape, adFloatOther.value)
    # Store the function tag
    append_constant(adFloatSelf.tape, MUL_TAG)
    # The returned ADReverseFloat references the variable allocated above
    return ADReverseFloat(adFloatSelf.value * adFloatOther.value, adFloatSelf.tape, ref)
    #return create_result(adFloatSelf.value * adFloatOther.value, adFloatSelf.tape, ref)
end

*(x::ADReverseFloat, y::ADReverseFloat) = adFloat_mul(x, y)









    Out[5]:





* (generic function with 152 methods)



In [6]:

    
# Now let's implement a function of three variables: x^2 + xy + xz
function func1(x,y,z)
    return x * x + x * y + x * z
end

# Invoke the function with "ordinary" floats just gives us the result
println(func1(3.0,4.0,5.0)) # prints 36.0
 
# To get derivatives, we first create a tape
tape = Tape()
 
# Create some initial ADReverseFloats, linked to the tape
x = ADReverseFloat(3.0, tape)
y = ADReverseFloat(4.0, tape)
z = ADReverseFloat(5.0, tape)









    



36.0






    Out[6]:





ADReverseFloat(5.0,Tape(Int64[],Int64[],[0,0,0],1),-3)



In [7]:

    
# Evaluate the function
result = func1(x,y,z)









    Out[7]:





ADReverseFloat(36.0,Tape([-1,-1,0,3,3,2,-1,-2,0,3,4,2,-1,-3,0,3,5,2,3,9,0,1,21,15,0,1],[3,9,15,21,25],[0,0,0],22),25)



In [8]:

    
# Reset the tape variables. These will actually already be zero as
# this is the first time we're using the tape, but just to illustrate.
reset_variables(result.tape)



In [9]:

    
# Set the unit vector in output space, this identifying which value we
# want the derivatives _of_. There's only one output here, of course,
# so we just set its adjoint Jacobian input to 1.
set_initial_adj_jac_input(result, 1.0)









    Out[9]:





1.0



In [10]:

    
tape









    Out[10]:





Tape([-1,-1,0,3,3,2,-1,-2,0,3,4,2,-1,-3,0,3,5,2,3,9,0,1,21,15,1,1],[3,9,15,21,25],[0,0,0],22)



In [11]:

    
result









    Out[11]:





ADReverseFloat(36.0,Tape([-1,-1,0,3,3,2,-1,-2,0,3,4,2,-1,-3,0,3,5,2,3,9,0,1,21,15,1,1],[3,9,15,21,25],[0,0,0],22),25)



In [12]:

    
# Evaluate the tape
evaluate(tape)



In [13]:

    
# Read off the derivatives
println(get_adj_jac_output(x)) # prints 15.0, which is the derivative w.r.t x
println(get_adj_jac_output(y)) # prints 3.0, which is the derivative w.r.t y
println(get_adj_jac_output(z)) # prints 3.0, which is the derivative w.r.t z



In [ ]: