GEMM


In [112]:
include("../src/JuBLAS.jl")


WARNING: replacing module JuBLAS
Out[112]:
JuBLAS

In [113]:
A = randn(5,5)
B = randn(5,5)
C = zeros(5,5)


Out[113]:
5x5 Array{Float64,2}:
 0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0

In [114]:
println(maxabs(A*B - JuBLAS.gemm('N', 'N', 1.0, A, B)))

println(maxabs(A'*B - JuBLAS.gemm('T', 'N', 1.0, A, B)))

println(maxabs(A'*B' - JuBLAS.gemm('T', 'T', 1.0, A, B)))

println(maxabs(A*B' - JuBLAS.gemm('N', 'T', 1.0, A, B)))


C = 1000*ones(5,5)

C0 = copy(C)
println(maxabs(2*A*B +C - JuBLAS.gemm!('N', 'N', 2.0, A, B, 1.0, C0)))


C0 = copy(C)
println(maxabs(2*A*B +10*C - JuBLAS.gemm!('N', 'N', 2.0, A, B, 10, C0)))

C0 = copy(C)
println(maxabs(2*A'*B +10*C - JuBLAS.gemm!('T', 'N', 2.0, A, B, 10, C0)))

C0 = copy(C)
println(maxabs(2*A*B' +10*C - JuBLAS.gemm!('N', 'T', 2.0, A, B, 10, C0)))

C0 = copy(C)
println(maxabs(2*A'*B' +10*C - JuBLAS.gemm!('T', 'T', 2.0, A, B, 10, C0)))


0.0
0.0
0.0
0.0
2.2737367544323206e-13
1.8189894035458565e-12
0.0
1.8189894035458565e-12
0.0

In [115]:
randncmplx(n,m) = randn(n, m) + im*randn(n, m)


Out[115]:
randncmplx (generic function with 1 method)

In [116]:
A = randncmplx(5,5)
B = randncmplx(5,5)


println(maxabs(A*B - JuBLAS.gemm('N', 'N', 1, A, B)))
println(maxabs(A'*B - JuBLAS.gemm('C', 'N', 1, A, B)))
println(maxabs(transpose(A)*B - JuBLAS.gemm('T', 'N', 1, A, B)))
println(maxabs(A*B' - JuBLAS.gemm('N', 'C', 1, A, B)))
println(maxabs(A*transpose(B) - JuBLAS.gemm('N', 'T', 1, A, B)))
println(maxabs(transpose(A)*transpose(B) - JuBLAS.gemm('T', 'T', 1, A, B)))
println(maxabs(ctranspose(A)*ctranspose(B) - JuBLAS.gemm('C', 'C', 1, A, B)))


C = 1000*ones(Complex{Float64}, 5, 5)
C0 = copy(C)
println(maxabs(2*A*B + 2*C- JuBLAS.gemm!('N', 'N', 2, A, B, 2, C0)))

C0 = copy(C)
println(maxabs(2*A'*B +2*C - JuBLAS.gemm!('C', 'N', 2, A, B, 2, C0)))

C0 = copy(C)
println(maxabs(2*transpose(A)*B + 0*C - JuBLAS.gemm!('T', 'N', 2, A, B, 0, C0)))

C0 = copy(C)
println(maxabs(2*A*B' + 2*C - JuBLAS.gemm!('N', 'C', 2, A, B, 2, C0)))

C0 = copy(C)
println(maxabs(2*A*transpose(B) + 2*C - JuBLAS.gemm!('N', 'T', 2, A, B, 2, C0)))

C0 = copy(C)
println(maxabs(2*transpose(A)*transpose(B) + 2*C - JuBLAS.gemm!('T', 'T', 2, A, B, 2, C0)))

C0 = copy(C)
println(maxabs(2*ctranspose(A)*ctranspose(B) + 2*C - JuBLAS.gemm!('C', 'C', 2, A, B, 2, C0)))


1.2560739669470201e-15
1.9860273225978185e-15
1.7763568394002505e-15
1.2560739669470201e-15
8.881784197001252e-16
9.930136612989092e-16
9.930136612989092e-16
2.273806142312601e-13
2.2737757853755275e-13
3.552713678800501e-15
2.273806142312601e-13
4.54749302446187e-13
1.7763568394002505e-15
1.7763568394002505e-15

In [125]:
A = randncmplx(3,5)
B = randncmplx(5,3)

C = 1000*ones(Complex{Float64}, 3, 3)
C0 = copy(C)
println(maxabs(2*A*B + 2*C- JuBLAS.gemm!('N', 'N', 2, A, B, 2, C0)))

C = 1000*ones(Complex{Float64}, 5, 5)
C0 = copy(C)
println(maxabs(2*A'*B' + 2*C- JuBLAS.gemm!('C', 'C', 2, A, B, 2, C0)))

C = 1000*ones(Complex{Float64}, 5, 5)
C0 = copy(C)
println(maxabs(2*transpose(A)*transpose(B) + 2*C- JuBLAS.gemm!('T', 'T', 2, A, B, 2, C0)))


A = randncmplx(3,5)
B = randncmplx(3,6)
C = 1000*ones(Complex{Float64}, 5, 6)
C0 = copy(C)
println(maxabs(2*A'*B + 2*C- JuBLAS.gemm!('C', 'N', 2, A, B, 2, C0)))

C = 1000*ones(Complex{Float64}, 5, 6)
C0 = copy(C)
println(maxabs(2*transpose(A)*B + 2*C- JuBLAS.gemm!('T', 'N', 2, A, B, 2, C0)))

A = randncmplx(3,5)
B = randncmplx(6,5)
C = 1000*ones(Complex{Float64}, 3, 6)
C0 = copy(C)
println(maxabs(2*A*B' + 2*C- JuBLAS.gemm!('N', 'C', 2, A, B, 2, C0)))
C0 = copy(C)
println(maxabs(2*A*transpose(B) + 2*C- JuBLAS.gemm!('N', 'T', 2, A, B, 2, C0)))


2.273806142312601e-13
2.2737410912368746e-13
2.2737410912368746e-13
8.881784197001252e-16
8.881784197001252e-16
2.273892874185396e-13
4.547508203201813e-13

In [ ]: