In [ ]:
#%matplotlib inline

In [ ]:
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.algorithm
import pyopencl.clrandom
import matplotlib.pyplot as plt
norm = lambda x: np.max(np.abs(x))

In [ ]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

In [ ]:
with open("tensor.cl") as f:
    code = f.read()

prg = cl.Program(ctx, code).build()

matvec_single = prg.matvec_single
matvec_single.set_scalar_arg_dtypes([None, None, None, np.int32])

matvec_tile = prg.matvec_tile
matvec_tile.set_scalar_arg_dtypes([None, None, None, 
                                   np.int32, np.int32])

matmat_single = prg.matmat_single
matmat_single.set_scalar_arg_dtypes([None, None, None, 
                                     np.int32, np.int32])

matmat_tile = prg.matmat_tile
matmat_tile.set_scalar_arg_dtypes([None, None, None, 
                                   np.int32, np.int32, np.int32])

tensor_IIA_single = prg.tensor_IIA_single
tensor_IIA_single.set_scalar_arg_dtypes([None, None, None, np.int32])

tensor_IAI_single = prg.tensor_IAI_single
tensor_IAI_single.set_scalar_arg_dtypes([None, None, None, np.int32])

tensor_AII_single = prg.tensor_AII_single
tensor_AII_single.set_scalar_arg_dtypes([None, None, None, np.int32])

tensor_tile = prg.tensor_tile
tensor_tile.set_scalar_arg_dtypes([None, None, None, 
                                   np.int32, np.int32, np.int32])

Matrix Matrix Products


In [ ]:
tile_size = 16

n = 16*10**2
k = n

# x_d = cl.array.arange(queue, n, dtype=np.double)/n
# A_d = cl.array.arange(queue, n*n, dtype=np.double).reshape((n,n))/(n*n)
# M_d = cl.array.arange(queue, n*k, dtype=np.double).reshape((n,k))/(n*k)

x_d = cl.clrandom.rand(queue, n, dtype=np.double)/n
A_d = cl.clrandom.rand(queue, n*n, dtype=np.double).reshape((n,n))/(n*n)
M_d = cl.clrandom.rand(queue, n*k, dtype=np.double).reshape((n,k))/(n*k)

x = x_d.get()
A = A_d.get()
M = M_d.get()
b = A.dot(x)
B = A.dot(M)

Matvec Single


In [ ]:
b_d = cl.array.Array(queue, n, dtype=np.double)
evt = matvec_single(queue, (4,), None, 
                    A_d.data, x_d.data, b_d.data, n)

norm(b_d.get()-b)

In [ ]:
# %timeit matvec_single(queue, (4,), None,\
#                       A_d.data, x_d.data, b_d.data, n).wait()

Matvec Tile


In [ ]:
b_d = cl.array.Array(queue, n, dtype=np.double)
b_d[:] = 0.0
evt = matvec_tile(queue, (4,), None, 
                  A_d.data, x_d.data, b_d.data, 
                  n, tile_size)

norm(b_d.get()-b)

In [ ]:
# %timeit matvec_tile(queue, (4,), None,\
#                     A_d.data, x_d.data, b_d.data,\
#                     n, tile_size).wait()

Matmat Single


In [ ]:
B_d = cl.array.Array(queue, n*k, dtype=np.double)
evt = matmat_single(queue, (4,), None, 
                    A_d.data, M_d.data, B_d.data, n, k)

norm(B_d.get()-B.ravel())

In [ ]:
# %timeit matmat_single(queue, (4,), None,\
#                       A_d.data, M_d.data, B_d.data, n, k).wait()

Matmat Single


In [ ]:
B_d = cl.array.Array(queue, n*k, dtype=np.double)
evt = matmat_tile(queue, (4,), None, 
                  A_d.data, M_d.data, B_d.data, 
                  n, k, tile_size)

norm(B_d.get()-B.ravel())

In [ ]:
# %timeit matmat_tile(queue, (4,), None,\
#                     A_d.data, M_d.data, B_d.data,\
#                     n, k, tile_size).wait()

Tensor products


In [ ]:
tile_size = 16

n = 16*16

A_d = cl.array.arange(queue, n*n, 
                      dtype=np.double).reshape((n,n))/(n*n)+1
M_d = cl.array.arange(queue, n*n*n, 
                      dtype=np.double).reshape((n,n,n))/(n**3)
B_d = cl.array.arange(queue, n*n*n, 
                      dtype=np.double).reshape((n,n,n))/(n**3)

# A_d = cl.clrandom.rand(queue, n*n, 
#                        dtype=np.double).reshape((n,n))/(n*n)+1
# M_d = cl.clrandom.rand(queue, n*n*n, 
#                        dtype=np.double).reshape((n,n,n))/(n**3)
# B_d = cl.clrandom.rand(queue, n*n*n, 
#                        dtype=np.double).reshape((n,n,n))/(n**3)

A = A_d.get()
M = M_d.get()

IIA = np.einsum('kj,...ij', A, M)
IAI = np.einsum('ij,...jk', A, M)
AII = np.einsum('ij,jkl', A, M)

$I\otimes I \otimes A$


In [ ]:
B_d[:] = 0.0
evt = tensor_IIA_single(queue, (4,), None, 
                        A_d.data, M_d.data, B_d.data, n)

norm(B_d.get()-IIA)

In [ ]:
B_d[:] = 0.0
evt = tensor_tile(queue, (4,), None, 
                  A_d.data, M_d.data, B_d.data, 
                  n, tile_size, 0)

norm(B_d.get()-IIA)

$I\otimes A \otimes I$


In [ ]:
B_d[:] = 0.0
evt = tensor_IAI_single(queue, (4,), None, 
                        A_d.data, M_d.data, B_d.data, n)

norm(B_d.get()-IAI)

In [ ]:
B_d[:] = 0.0
evt = tensor_tile(queue, (4,), None, 
                  A_d.data, M_d.data, B_d.data, 
                  n, tile_size, 1)

norm(B_d.get()-IAI)

$A\otimes I \otimes I$


In [ ]:
B_d[:] = 0.0
evt = tensor_AII_single(queue, (4,), None, 
                        A_d.data, M_d.data, B_d.data, n)

norm(B_d.get()-AII)

In [ ]:
B_d[:] = 0.0
evt = tensor_tile(queue, (4,), None, 
                  A_d.data, M_d.data, B_d.data, 
                  n, tile_size, 2)

norm(B_d.get()-AII)

In [ ]:
# B_d[:] = 0.0
# %timeit tensor_AII_single(queue, (4,), None,\
#                         A_d.data, M_d.data, B_d.data, n).wait()

In [ ]:
B_d[:] = 0.0
%timeit tensor_tile(queue, (1,), None,\
                  A_d.data, M_d.data, B_d.data,\
                  n, tile_size, 1).wait()

In [ ]:
%timeit IIA = np.einsum('kj,...ij', A, M)
%timeit IAI = np.einsum('ij,...jk', A, M)
%timeit AII = np.einsum('ij,jkl', A, M)

In [ ]: