In [ ]:
%matplotlib inline
In [ ]:
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as plt
In [ ]:
from sempy.tensormesh import HexCubePoisson
from sempy.maps import LinearIsopMap
from sempy.topology import CubicTopology
from sempy.poisson import PoissonProblem
In [ ]:
import pyopencl as cl
import pyopencl.array
import loopy as lp
In [ ]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
In [ ]:
mnorm = lambda x: np.max(np.abs(x))
def kron3(A, B, C):
return sps.kron(A, sps.kron(B, C))
In [ ]:
N = 8
NN = N
Ex = 17
Ey = Ex
Ez = Ex
periodic = True
In [ ]:
lmap = LinearIsopMap()
topo = CubicTopology(N, (Ex, Ey, Ez),
periodic=periodic)
topo.build()
etn = topo.elem_to_vertex
Q, etd = topo.Q, topo.elem_to_dof
R = topo.R
boundary_dofs = topo.boundary_dofs
vertex_ref = topo.get_vertex_ref()
vertex_phys = vertex_ref.copy()
vertex_phys[:,0] *= 1
vertex_phys[:,1] *= 1
vertex_phys[:,2] *= 1
shift = 0.5*0
chi, eta, zeta = vertex_ref.T+shift
sx = sy = sz = 0.1
vp = vertex_phys
sin3 = np.sin(np.pi*chi)*np.sin(np.pi*eta)*np.sin(np.pi*zeta)
vp[:,0] = chi +sx*sin3
vp[:,1] = eta +sy*sin3
vp[:,2] = zeta+sz*sin3
In [ ]:
poisson = PoissonProblem(topo, lmap)
poisson.build(vertex_phys)
In [ ]:
p = poisson
x0 = np.random.rand(p.n_dofs)
x = Q.dot(x0)
x = x.reshape((p.n_elem, -1))
x_d = cl.array.to_device(queue, x)
G11_d = cl.array.to_device(queue, p.G11)
G12_d = cl.array.to_device(queue, p.G12)
G13_d = cl.array.to_device(queue, p.G13)
G21_d = cl.array.to_device(queue, p.G21)
G22_d = cl.array.to_device(queue, p.G22)
G23_d = cl.array.to_device(queue, p.G23)
G31_d = cl.array.to_device(queue, p.G31)
G32_d = cl.array.to_device(queue, p.G32)
G33_d = cl.array.to_device(queue, p.G33)
D_d = cl.array.to_device(queue, p.D.copy())
In [ ]:
knl = lp.make_kernel(
["{ [i,j,k]: 0<=i,j,k<n }",
"{ [ii,jj,kk]: 0<=ii,jj,kk<n }",
"{ [a1,a2,a3]: 0<=a1,a2,a3<n }",
"{ [b1,b2,b3]: 0<=b1,b2,b3<n }",
"{ [m,mm]: 0<=m,mm<N }",
"{ [ielem]: 0<=ielem<n_elem}"],
"""
# Compute initial tensor products
D1[i*n*n+j*n+k]=sum(a1, D[k,a1]*A[ielem,i*n*n+j*n+a1])
D2[i*n*n+j*n+k]=sum(a2, D[j,a2]*A[ielem,i*n*n+a2*n+k])
D3[i*n*n+j*n+k]=sum(a3, D[i,a3]*A[ielem,a3*n*n+j*n+k])
# Multiply by diagonal factors
tmp1[m] = G11[ielem,m]*D1[m]+G12[ielem,m]*D2[m]+G13[ielem,m]*D3[m] {id=t1}
tmp2[m] = G21[ielem,m]*D1[m]+G22[ielem,m]*D2[m]+G23[ielem,m]*D3[m] {id=t2}
tmp3[m] = G31[ielem,m]*D1[m]+G32[ielem,m]*D2[m]+G33[ielem,m]*D3[m] {id=t3}
# Apply Transposed tensor products
DT1[ii*n*n+jj*n+kk]=sum(b1, D[b1,kk]*tmp1[ii*n*n+jj*n+b1]) {dep=t1}
DT2[ii*n*n+jj*n+kk]=sum(b2, D[b2,jj]*tmp2[ii*n*n+b2*n+kk]) {dep=t2}
DT3[ii*n*n+jj*n+kk]=sum(b3, D[b3,ii]*tmp3[b3*n*n+jj*n+kk]) {dep=t3}
# Sum into out
out[ielem,mm] = DT1[mm]+DT2[mm]+DT3[mm]
""",
[lp.GlobalArg("A", np.double, shape="n_elem,N"),
lp.GlobalArg("D", np.double, "n,n"),
lp.GlobalArg("G11", np.double, "n_elem,N"),
lp.GlobalArg("G12", np.double, "n_elem,N"),
lp.GlobalArg("G13", np.double, "n_elem,N"),
lp.GlobalArg("G21", np.double, "n_elem,N"),
lp.GlobalArg("G22", np.double, "n_elem,N"),
lp.GlobalArg("G23", np.double, "n_elem,N"),
lp.GlobalArg("G31", np.double, "n_elem,N"),
lp.GlobalArg("G32", np.double, "n_elem,N"),
lp.GlobalArg("G33", np.double, "n_elem,N"),
lp.GlobalArg("D1", np.double, "N"),
lp.GlobalArg("D2", np.double, "N"),
lp.GlobalArg("D3", np.double, "N"),
lp.GlobalArg("DT1", np.double, "N"),
lp.GlobalArg("DT2", np.double, "N"),
lp.GlobalArg("DT3", np.double, "N"),
lp.GlobalArg("tmp1", np.double, "N"),
lp.GlobalArg("tmp2", np.double, "N"),
lp.GlobalArg("tmp3", np.double, "N"),
lp.GlobalArg("out", np.double, "n_elem,N"),
lp.ValueArg("n", np.int, ),
lp.ValueArg("N", np.int),
lp.ValueArg("n_elem", np.int)])
knl = lp.prioritize_loops(knl, "ielem,k,j,i")
#knl = lp.set_temporary_scope(knl, "tmp1,tmp2,tmp3", "local")
#print lp.generate_code_v2(knl).device_code()
In [ ]:
n = NN+1
N = n**3
b_str = """
# Compute initial tensor products
D1[i*n*n+j*n+k]=sum(a1, D[k,a1]*A[ielem,i*n*n+j*n+a1])
D2[i*n*n+j*n+k]=sum(a2, D[j,a2]*A[ielem,i*n*n+a2*n+k])
D3[i*n*n+j*n+k]=sum(a3, D[i,a3]*A[ielem,a3*n*n+j*n+k])
# Multiply by diagonal factors
tmp1[m] = G11[ielem,m]*D1[m]+G12[ielem,m]*D2[m]+G13[ielem,m]*D3[m] {id=t1}
tmp2[m] = G21[ielem,m]*D1[m]+G22[ielem,m]*D2[m]+G23[ielem,m]*D3[m] {id=t2}
tmp3[m] = G31[ielem,m]*D1[m]+G32[ielem,m]*D2[m]+G33[ielem,m]*D3[m] {id=t3}
# Apply Transposed tensor products
DT1[ii*n*n+jj*n+kk]=sum(b1, D[b1,kk]*tmp1[ii*n*n+jj*n+b1]) {dep=t1}
DT2[ii*n*n+jj*n+kk]=sum(b2, D[b2,jj]*tmp2[ii*n*n+b2*n+kk]) {dep=t2}
DT3[ii*n*n+jj*n+kk]=sum(b3, D[b3,ii]*tmp3[b3*n*n+jj*n+kk]) {dep=t3}
# Sum into out
out[ielem,mm] = DT1[mm]+DT2[mm]+DT3[mm]
"""
b_str = b_str.replace("n", str(n))
knl = lp.make_kernel(
["{ [i,j,k]: 0<=i,j,k<%i }"%n,
"{ [ii,jj,kk]: 0<=ii,jj,kk<%i }"%n,
"{ [a1,a2,a3]: 0<=a1,a2,a3<%i }"%n,
"{ [b1,b2,b3]: 0<=b1,b2,b3<%i }"%n,
"{ [m,mm]: 0<=m,mm<%i }"%N,
"{ [ielem]: 0<=ielem<n_elem}"],
b_str
,
[lp.GlobalArg("A", np.double, shape="n_elem,%i"%N),
lp.GlobalArg("D", np.double, "%i,%i"%(n,n)),
lp.GlobalArg("G11", np.double, "n_elem,%i"%N),
lp.GlobalArg("G12", np.double, "n_elem,%i"%N),
lp.GlobalArg("G13", np.double, "n_elem,%i"%N),
lp.GlobalArg("G21", np.double, "n_elem,%i"%N),
lp.GlobalArg("G22", np.double, "n_elem,%i"%N),
lp.GlobalArg("G23", np.double, "n_elem,%i"%N),
lp.GlobalArg("G31", np.double, "n_elem,%i"%N),
lp.GlobalArg("G32", np.double, "n_elem,%i"%N),
lp.GlobalArg("G33", np.double, "n_elem,%i"%N),
lp.TemporaryVariable("D1", np.double, "%i"%N),
lp.TemporaryVariable("D2", np.double, "%i"%N),
lp.TemporaryVariable("D3", np.double, "%i"%N),
lp.TemporaryVariable("DT1", np.double, "%i"%N),
lp.TemporaryVariable("DT2", np.double, "%i"%N),
lp.TemporaryVariable("DT3", np.double, "%i"%N),
lp.TemporaryVariable("tmp1", np.double, "%i"%N),
lp.TemporaryVariable("tmp2", np.double, "%i"%N),
lp.TemporaryVariable("tmp3", np.double, "%i"%N),
lp.GlobalArg("out", np.double, "n_elem,%i"%N),
lp.ValueArg("n_elem", np.int)])
knl = lp.prioritize_loops(knl, "ielem,k,j,i")
knl = lp.tag_inames(knl, dict(ielem='g.0'))
print lp.generate_code_v2(knl).device_code()
In [ ]:
evt, res = knl(queue, A=x_d, D=D_d,
G11=G11_d, G12=G12_d, G13=G13_d,
G21=G21_d, G22=G22_d, G23=G23_d,
G31=G31_d, G32=G32_d, G33=G33_d)
evt.wait()
out = res[-1].get().ravel()
out = Q.T.dot(out)
In [ ]:
mnorm(p.apply_A(x0.ravel())-out)
In [ ]:
%timeit knl(queue, A=x_d, D=D_d, \
G11=G11_d, G12=G12_d, G13=G13_d,\
G21=G21_d, G22=G22_d, G23=G23_d,\
G31=G31_d, G32=G32_d, G33=G33_d)[0].wait()
In [ ]:
%timeit p.apply_A(x0.ravel())
In [ ]: