In [1]:
import Base.+, Base.*
# This list isn't the tape itself, it's a list of "registered"
# adjoint jacobian function implementations. It is populated once,
# during module initialisation.
adj_jacobians = Array(Function,0)
# A class to represent the "tape" data structure
type Tape
# The tape storage itself
tape::Array{Int}
# A list of locations of variables within the tape, so that
# they can be reset to zero when needed
var_pos::Array{Int}
# The list of ultimate adjoint Jacobian outputs, shown in the
# diagram as being at the start of the tape, are here held in
# a separate list. Negative variable indices are used to
# reference these outputs and distinguish them from the
# variables in the tape.
outputs::Array{Int}
# The current position in the tape - only has meaning during
# evaluation. Of course, holding this information (not to
# mention the variables themselves) within the tape is pretty
# dubious, from a state management / thread-safety point of
# view.
eval_pos::Int
Tape() = new(Array(Int,0), Array(Int,0), Array(Int,0), 1);
end
In [2]:
# A class method for registering an adjoint jacobian implementation
# function. Returns a value that can be used as a tag in the tape.
function register_adjoint_jacobian(adj_jacobians::Array{Function}, adjoint_jacobian::Function)
push!(adj_jacobians, adjoint_jacobian)
return length(adj_jacobians)
end
# Grow the tape by appending a fixed value (will be a "black"
# input, a function tag or a variable reference)
function append_constant(tape::Tape, contents)
push!(tape.tape, contents)
tape.eval_pos = tape.eval_pos + 1
end
# Grow the tape by allocating a variable slot at the end
function append_variable(tape::Tape)
push!(tape.tape, 0)
pos = length(tape.tape)
push!(tape.var_pos, pos)
return pos
end
# Add an overall adjoint Jacobian output to the outputs list. This
# happens when an "initial" ADReverseDouble is created - i.e. one
# that doesn't arise from a previous AD-enabled operation.
function add_output(tape::Tape)
push!(tape.outputs, 0)
return -length(tape.outputs)
end
# Add a quantity to the variable referenced by the value in the
# current slot, which may be a variable in the tape (+ve ref) or
# an entry in self.outputs (-ve ref)
function add_to_referenced_variable(tape::Tape, value)
ref = tape.tape[tape.eval_pos]
if ref >=1
tape.tape[ref] += value
else
tape.outputs[-ref] += value
end
end
# Directly write to a variable at a particular position. This is
# needed when setting an overall adjoint Jaobian input, via an
# ADReverseDouble for a tape evaluation.
function write_to_variable_at_index(tape::Tape, idx::Int, contents)
if idx >=1
tape.tape[idx] = contents
else
tape.outputs[-idx] = contents
end
end
# Directly read a variable at a particular index. This is needed
# when reading the adjoint Jacobian output from an
# ADReverseDouble.
function read_variable_at_index(tape::Tape, idx)
if idx >=1
return tape.tape[idx]
else
return tape.outputs[-idx]
end
end
# Read the value at the current location in the tape (during
# evaluation)
function read_current_value(tape::Tape)
return tape.tape[tape.eval_pos]
end
# Step back one position in the tape (during evaluation)
function step_back(tape::Tape)
tape.eval_pos -= 1
end
# Reset all variables and outputs to zero. Needed the tape has to
# be evaluated for multiple adjoint Jacobian inputs.
function reset_variables(tape::Tape)
for pos in tape.var_pos
tape.tape[pos] = 0
end
for idx in 1:length(tape.outputs)
tape.outputs[idx] = 0
end
end
# Evaluate the tape.
function evaluate(tape::Tape)
tape.eval_pos = length(tape.tape)
while tape.eval_pos >= 1
# Look up the adjoint_jacobian according to the tag at the
# current position in the tape, and call it
adj_jacobians[tape.tape[tape.eval_pos]](tape)
step_back(tape)
end
end
Out[2]:
In [3]:
tape = Tape()
Out[3]:
In [4]:
# A reverse-mode AD enabled floating point type
type ADReverseFloat
value::Float64
tape::Tape
adj_jac_output_ref::Int
function ADReverseFloat(value::Float64, tape::Tape, ref::Int)
return new(value, tape, ref)
end
end
ADReverseFloat(value::Float64, tape::Tape) = ADReverseFloat(value, tape, add_output(tape))
Out[4]:
In [5]:
# Used to set an input to the overall adjoint Jacobian. This
# should only be called on ADReverseFloats that represent ultimate
# outputs of the calculation.
function set_initial_adj_jac_input(adFloat::ADReverseFloat, value)
write_to_variable_at_index(adFloat.tape, adFloat.adj_jac_output_ref, value)
end
# Used to get the result of the adjoint Jacobian evaluation. Would
# typically be called on ADReverseFloats representing the overall
# inputs to the calculation, but could be called on intermediate
# values in order to obtain their sensitivities as well.
function get_adj_jac_output(adFloat::ADReverseFloat)
return read_variable_at_index(adFloat.tape, adFloat.adj_jac_output_ref)
end
# Implementation of the adjoint Jacobian for the + operator. The
# formula can be checked by working out the adjoint Jacobian
# matrix by hand.
function adFloat_adj_jac_add(tape::Tape)
# Fetch the adjoint Jacobian input
step_back(tape)
adj_jac_input = read_current_value(tape)
# Simply add it to both of the adjoint Jacobian output
# variables
step_back(tape)
add_to_referenced_variable(tape, adj_jac_input)
step_back(tape)
add_to_referenced_variable(tape, adj_jac_input)
end
# Statically register the adjoint Jacobian function for the add
# operator with the Tape class
ADD_TAG = register_adjoint_jacobian(adj_jacobians, adFloat_adj_jac_add::Function)
# Implementation of the + operator.
function adFloat_add(adFloatSelf::ADReverseFloat, adFloatOther::ADReverseFloat)
# Write the references to where to store the adjoint Jacobian
# outputs
append_constant(adFloatSelf.tape, adFloatSelf.adj_jac_output_ref)
append_constant(adFloatSelf.tape, adFloatOther.adj_jac_output_ref)
# Allocate a variable to store the ajoint Jacobian input
ref = append_variable(adFloatSelf.tape)
# Since this function is linear, its adjoint Jacobian doesn't
# depend on the values of self and other (it's constant), so
# we don't need to store them in the tape. We just proceed to
# storing the function tag.
append_constant(adFloatSelf.tape, ADD_TAG)
# The returned ADReverseFloat references the variable allocated above
return ADReverseFloat(adFloatSelf.value + adFloatOther.value, adFloatSelf.tape, ref)
#return create_result(adFloatSelf.value + adFloatOther.value, adFloatSelf.tape, ref)
end
+(x::ADReverseFloat, y::ADReverseFloat) = adFloat_add(x, y)
# Implementation of the * operator.
# Implementation of the adjoint Jacobian for the * operatorq. The
# formula can be checked by working out the adjoint Jacobian
# matrix by hand.
function adFloat_adj_jac_mul(tape::Tape)
# Fetch the original function inputs
step_back(tape)
other_value = read_current_value(tape)
step_back(tape)
self_value = read_current_value(tape)
# Fetch the adjoint Jacobian input
step_back(tape)
adj_jac_input = read_current_value(tape)
# Add the adjoint Jacobian outputs to the appropriate
# variables
step_back(tape)
add_to_referenced_variable(tape, adj_jac_input * self_value)
step_back(tape)
add_to_referenced_variable(tape, adj_jac_input * other_value)
end
# Statically register the adjoint Jacobian function for the add
# operator with the Tape class
MUL_TAG = register_adjoint_jacobian(adj_jacobians, adFloat_adj_jac_mul::Function)
function adFloat_mul(adFloatSelf::ADReverseFloat, adFloatOther::ADReverseFloat)
# Write the references to where to store the adjoint Jacobian
# outputs
append_constant(adFloatSelf.tape, adFloatSelf.adj_jac_output_ref)
append_constant(adFloatSelf.tape, adFloatOther.adj_jac_output_ref)
# Allocate a variable to store the ajoint Jacobian input
ref = append_variable(adFloatSelf.tape)
# Write the values of self and other, as they will be needed
append_constant(adFloatSelf.tape, adFloatSelf.value)
append_constant(adFloatSelf.tape, adFloatOther.value)
# Store the function tag
append_constant(adFloatSelf.tape, MUL_TAG)
# The returned ADReverseFloat references the variable allocated above
return ADReverseFloat(adFloatSelf.value * adFloatOther.value, adFloatSelf.tape, ref)
#return create_result(adFloatSelf.value * adFloatOther.value, adFloatSelf.tape, ref)
end
*(x::ADReverseFloat, y::ADReverseFloat) = adFloat_mul(x, y)
Out[5]:
In [6]:
# Now let's implement a function of three variables: x^2 + xy + xz
function func1(x,y,z)
return x * x + x * y + x * z
end
# Invoke the function with "ordinary" floats just gives us the result
println(func1(3.0,4.0,5.0)) # prints 36.0
# To get derivatives, we first create a tape
tape = Tape()
# Create some initial ADReverseFloats, linked to the tape
x = ADReverseFloat(3.0, tape)
y = ADReverseFloat(4.0, tape)
z = ADReverseFloat(5.0, tape)
Out[6]:
In [7]:
# Evaluate the function
result = func1(x,y,z)
Out[7]:
In [8]:
# Reset the tape variables. These will actually already be zero as
# this is the first time we're using the tape, but just to illustrate.
reset_variables(result.tape)
In [9]:
# Set the unit vector in output space, this identifying which value we
# want the derivatives _of_. There's only one output here, of course,
# so we just set its adjoint Jacobian input to 1.
set_initial_adj_jac_input(result, 1.0)
Out[9]:
In [10]:
tape
Out[10]:
In [11]:
result
Out[11]:
In [12]:
# Evaluate the tape
evaluate(tape)
In [13]:
# Read off the derivatives
println(get_adj_jac_output(x)) # prints 15.0, which is the derivative w.r.t x
println(get_adj_jac_output(y)) # prints 3.0, which is the derivative w.r.t y
println(get_adj_jac_output(z)) # prints 3.0, which is the derivative w.r.t z
In [ ]: