Documentation can be found here: online documentation.
In [ ]:
require 'BOAST'
Defining and declaring simple variables, their name can be anything that evaluate to a string. Note that by default BOAST uses the standard output and is using FORTRAN.
In [ ]:
a = BOAST::Int "a"
b = BOAST::Real "b"
BOAST::decl a, b
Defining a procedure construct, opening and closing it:
In [ ]:
p = BOAST::Procedure("test_proc", [a , b ] )
BOAST::opn p
BOAST::close p
nil;
Changing the language used by BOAST:
In [ ]:
BOAST::lang = BOAST::C
BOAST::opn p
BOAST::close p
nil;
BOAST procedure parameters should be input, output or input-output parameters.
In [ ]:
a = BOAST::Real("a",:dir => :in)
b = BOAST::Real("b",:dir => :out)
p = BOAST::Procedure("test_proc", [a , b ] ) {
BOAST::pr b === a + 2
}
BOAST::lang = BOAST::FORTRAN
BOAST::pr p
nil;
Writing BOAST:: all the time is tedious so let's import BOAST's namespace inside the global namespace:
In [ ]:
include BOAST
In [ ]:
n = Int("n" , :dir => :in)
a = Real("a", :dir => :in, :dim => [Dim(n)])
b = Real("b", :dir => :out, :dim => [Dim(n)])
p = Procedure("vector_increment", [n, a, b]) {
decl i = Int("i")
pr For(i, 1, n) {
pr b[i] === a[i] + 2
}
}
Creating a computing kernel from a procedure is straight forward if you have only one procedure:
In [ ]:
k = p.ckernel
nil
In [ ]:
k.build
nil
If one wants to know what BOAST did it needs to be put in verbose mode. Notice the three compilation phases:
In [ ]:
set_verbose(true)
k.build
nil
In order to call threads we need to have memory areas for input and output parameters. For this we use the NArray library (C arrays wrapped in ruby).
In [ ]:
input = NArray.float(1024).random
output = NArray.float(1024)
nil
Running and checking result:
In [ ]:
k.run(input.length, input, output)
raise "Error !" if (output - input - 2).abs.max > 1e-15
Taking a performancce measurement:
In [ ]:
stats = k.run(input.length, input, output)
puts " #{ stats[:duration]} s"
In [ ]:
set_verbose(false)
set_array_start(0)
def vector_add
n = Int("n", :dir => :in)
a = Real("a", :dir => :in, :dim => [Dim(n)])
b = Real("b", :dir => :in, :dim => [Dim(n)])
c = Real("c", :dir => :out, :dim => [Dim(n)])
i = Int("i")
p = Procedure("vector_add", [n, a, b, c]) {
decl i
expr = c[i] === a[i] + b[i]
if [CL, CUDA].include?(get_lang) then
pr i === get_global_id(0)
pr expr
else
pr For(i, 0, n - 1) {
pr expr
}
end
}
return p.ckernel
end
In [ ]:
n = 1024*1024
a = NArray.float( n ).random!
b = NArray.float( n ).random!
c = NArray.float( n )
epsilon = 10e-15
c_ref = a + b
nil
In [ ]:
[FORTRAN, C, CL].each {|l|
push_env( :lang => l ) {
puts "#{get_lang_name}:"
k = vector_add
puts k.print
c.random!
k.run(n, a, b, c, :global_work_size => [n ,1 ,1], :local_work_size => [32 ,1 ,1])
diff = (c_ref - c).abs
diff.each {|elem|
raise "Warning: residue too big: #{elem}" if elem > epsilon
}
}
}
puts "Success !"
In [ ]:
set_verbose(false)
set_array_start(0)
def vector_add_openmp
raise "Invalid language #{get_lang_name}" if lang == CL or lang == CUDA
n = Int("n", :dir => :in)
a = Real("a", :dir => :in, :dim => [Dim(n)])
b = Real("b", :dir => :in, :dim => [Dim(n)])
c = Real("c", :dir => :out, :dim => [Dim(n)])
i = Int("i")
p = Procedure("vector_add", [n, a, b, c]) {
decl i
pr OpenMP::Parallel(:shared => [a,b,c], :private => [i]) {
pr For(i, 0, n - 1, :openmp => true) {
pr c[i] === a[i] + b[i]
}
}
}
return p.ckernel
end
In [ ]:
n = 1024*1024
a = NArray.float( n ).random!
b = NArray.float( n ).random!
c = NArray.float( n )
epsilon = 10e-15
c_ref = a + b
nil
In [ ]:
[FORTRAN, C].each {|l|
push_env( :lang => l ) {
puts "#{get_lang_name}:"
k = vector_add_openmp
puts k.print
c.random!
k.build(:openmp => true)
k.run(n, a, b, c)
diff = (c_ref - c).abs
diff.each {|elem|
raise "Warning: residue too big: #{elem}" if elem > epsilon
}
}
}
puts "Success !"
In [ ]:
set_verbose(false)
set_array_start(0)
def vector_add_simd(vector_length = 4)
raise "Invalid language #{get_lang_name}" if lang == CL or lang == CUDA
nvec = Int("nvec", :dir => :in)
a = Real("a", :dir => :in, :vector_length => vector_length, :dim => [Dim(nvec)])
b = Real("b", :dir => :in, :vector_length => vector_length, :dim => [Dim(nvec)])
c = Real("c", :dir => :out, :vector_length => vector_length, :dim => [Dim(nvec)])
i = Int("i")
p = Procedure("vector_add", [nvec, a, b, c]) {
decl i
pr For(i, 0, nvec - 1) {
pr c[i] === a[i] + b[i]
}
}
return p.ckernel(:includes => "immintrin.h")
end
You need aligned arrays for vectorization. ANArray is a NArray with alignment taking into account. The first argument is the alignment.
In [ ]:
[2,4].each { |vector_length|
nvec = 1024*1024/vector_length
alignment = vector_length * get_default_real_size
a = ANArray.float( alignment, vector_length, nvec ).random!
b = ANArray.float( alignment, vector_length, nvec ).random!
c = ANArray.float( alignment, vector_length, nvec )
epsilon = 10e-15
c_ref = a + b
[FORTRAN, C].each {|l|
push_env( :lang => l ) {
puts "#{get_lang_name}:"
k = vector_add_simd(vector_length)
puts k.print
c.random!
k.run(nvec, a, b, c)
diff = (c_ref - c).abs
diff.each {|elem|
raise "Warning: residue too big: #{elem}" if elem > epsilon
}
}
}
}
puts "Success !"
In [ ]: