In [ ]:
#%matplotlib inline
In [ ]:
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.algorithm
import pyopencl.clrandom
import matplotlib.pyplot as plt
norm = lambda x: np.max(np.abs(x))
In [ ]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
In [ ]:
with open("tensor.cl") as f:
code = f.read()
prg = cl.Program(ctx, code).build()
matvec_single = prg.matvec_single
matvec_single.set_scalar_arg_dtypes([None, None, None, np.int32])
matvec_tile = prg.matvec_tile
matvec_tile.set_scalar_arg_dtypes([None, None, None,
np.int32, np.int32])
matmat_single = prg.matmat_single
matmat_single.set_scalar_arg_dtypes([None, None, None,
np.int32, np.int32])
matmat_tile = prg.matmat_tile
matmat_tile.set_scalar_arg_dtypes([None, None, None,
np.int32, np.int32, np.int32])
tensor_IIA_single = prg.tensor_IIA_single
tensor_IIA_single.set_scalar_arg_dtypes([None, None, None, np.int32])
tensor_IAI_single = prg.tensor_IAI_single
tensor_IAI_single.set_scalar_arg_dtypes([None, None, None, np.int32])
tensor_AII_single = prg.tensor_AII_single
tensor_AII_single.set_scalar_arg_dtypes([None, None, None, np.int32])
tensor_tile = prg.tensor_tile
tensor_tile.set_scalar_arg_dtypes([None, None, None,
np.int32, np.int32, np.int32])
In [ ]:
tile_size = 16
n = 16*10**2
k = n
# x_d = cl.array.arange(queue, n, dtype=np.double)/n
# A_d = cl.array.arange(queue, n*n, dtype=np.double).reshape((n,n))/(n*n)
# M_d = cl.array.arange(queue, n*k, dtype=np.double).reshape((n,k))/(n*k)
x_d = cl.clrandom.rand(queue, n, dtype=np.double)/n
A_d = cl.clrandom.rand(queue, n*n, dtype=np.double).reshape((n,n))/(n*n)
M_d = cl.clrandom.rand(queue, n*k, dtype=np.double).reshape((n,k))/(n*k)
x = x_d.get()
A = A_d.get()
M = M_d.get()
b = A.dot(x)
B = A.dot(M)
In [ ]:
b_d = cl.array.Array(queue, n, dtype=np.double)
evt = matvec_single(queue, (4,), None,
A_d.data, x_d.data, b_d.data, n)
norm(b_d.get()-b)
In [ ]:
# %timeit matvec_single(queue, (4,), None,\
# A_d.data, x_d.data, b_d.data, n).wait()
In [ ]:
b_d = cl.array.Array(queue, n, dtype=np.double)
b_d[:] = 0.0
evt = matvec_tile(queue, (4,), None,
A_d.data, x_d.data, b_d.data,
n, tile_size)
norm(b_d.get()-b)
In [ ]:
# %timeit matvec_tile(queue, (4,), None,\
# A_d.data, x_d.data, b_d.data,\
# n, tile_size).wait()
In [ ]:
B_d = cl.array.Array(queue, n*k, dtype=np.double)
evt = matmat_single(queue, (4,), None,
A_d.data, M_d.data, B_d.data, n, k)
norm(B_d.get()-B.ravel())
In [ ]:
# %timeit matmat_single(queue, (4,), None,\
# A_d.data, M_d.data, B_d.data, n, k).wait()
In [ ]:
B_d = cl.array.Array(queue, n*k, dtype=np.double)
evt = matmat_tile(queue, (4,), None,
A_d.data, M_d.data, B_d.data,
n, k, tile_size)
norm(B_d.get()-B.ravel())
In [ ]:
# %timeit matmat_tile(queue, (4,), None,\
# A_d.data, M_d.data, B_d.data,\
# n, k, tile_size).wait()
In [ ]:
tile_size = 16
n = 16*16
A_d = cl.array.arange(queue, n*n,
dtype=np.double).reshape((n,n))/(n*n)+1
M_d = cl.array.arange(queue, n*n*n,
dtype=np.double).reshape((n,n,n))/(n**3)
B_d = cl.array.arange(queue, n*n*n,
dtype=np.double).reshape((n,n,n))/(n**3)
# A_d = cl.clrandom.rand(queue, n*n,
# dtype=np.double).reshape((n,n))/(n*n)+1
# M_d = cl.clrandom.rand(queue, n*n*n,
# dtype=np.double).reshape((n,n,n))/(n**3)
# B_d = cl.clrandom.rand(queue, n*n*n,
# dtype=np.double).reshape((n,n,n))/(n**3)
A = A_d.get()
M = M_d.get()
IIA = np.einsum('kj,...ij', A, M)
IAI = np.einsum('ij,...jk', A, M)
AII = np.einsum('ij,jkl', A, M)
In [ ]:
B_d[:] = 0.0
evt = tensor_IIA_single(queue, (4,), None,
A_d.data, M_d.data, B_d.data, n)
norm(B_d.get()-IIA)
In [ ]:
B_d[:] = 0.0
evt = tensor_tile(queue, (4,), None,
A_d.data, M_d.data, B_d.data,
n, tile_size, 0)
norm(B_d.get()-IIA)
In [ ]:
B_d[:] = 0.0
evt = tensor_IAI_single(queue, (4,), None,
A_d.data, M_d.data, B_d.data, n)
norm(B_d.get()-IAI)
In [ ]:
B_d[:] = 0.0
evt = tensor_tile(queue, (4,), None,
A_d.data, M_d.data, B_d.data,
n, tile_size, 1)
norm(B_d.get()-IAI)
In [ ]:
B_d[:] = 0.0
evt = tensor_AII_single(queue, (4,), None,
A_d.data, M_d.data, B_d.data, n)
norm(B_d.get()-AII)
In [ ]:
B_d[:] = 0.0
evt = tensor_tile(queue, (4,), None,
A_d.data, M_d.data, B_d.data,
n, tile_size, 2)
norm(B_d.get()-AII)
In [ ]:
# B_d[:] = 0.0
# %timeit tensor_AII_single(queue, (4,), None,\
# A_d.data, M_d.data, B_d.data, n).wait()
In [ ]:
B_d[:] = 0.0
%timeit tensor_tile(queue, (1,), None,\
A_d.data, M_d.data, B_d.data,\
n, tile_size, 1).wait()
In [ ]:
%timeit IIA = np.einsum('kj,...ij', A, M)
%timeit IAI = np.einsum('ij,...jk', A, M)
%timeit AII = np.einsum('ij,jkl', A, M)
In [ ]: