In [1]:
from sympy import *
from sympy.abc import *
from sympy.galgebra.ga import *
import numpy as np
from numpy import linalg as LA
from __future__ import print_function
init_printing()
We consder differential operation on a vector $u$ at a given point in 3D with a 1D stencil size $k$ (number of points in the stencil) for every order, the subindex $i$ represent the dimension number $1$ for z, $2$ for $x$ and 3 for $y$,
First order : $ \frac{d u}{dx_i} $
Second order : $ \frac{d^2 u}{dx_i^2} $
Second order cross derivative $ \frac{d^2 u}{dx_i dx_j} $
In [2]:
# Arithmetic operations
k = symbols('k')
s = symbols('s')
# 1D stencil
# multiplication addition
AI_dxi = k + 1 + k - 1
AI_dxxi = k + 1 + k - 1
AI_dxxij = 2*k + 2*k-1
# square stencil (all uses the same stencil mask)
# multiplication addition
AI_dxis = k**2 + k**2 - 1
AI_dxxis = k**2 + k**2 - 1
AI_dxxijs = k**2 + k**2 - 1
In [3]:
# I/O operations
# load
IO_dxi = k
IO_dxxi = k
IO_dxxij = 2*k
IO_square = k**2
In [4]:
# Operational intensity in single precision
print(AI_dxi/(4*IO_dxi))
print(AI_dxxi/(4*IO_dxxi))
print(AI_dxxij/(4*IO_dxxij))
print(AI_dxis/(4*IO_square))
print(AI_dxxis/(4*IO_square))
print(AI_dxxijs/(4*IO_square))
OI_dxi = lambdify(k,AI_dxi/(4*IO_dxi))
OI_dxxi = lambdify(k,AI_dxxi/(4*IO_dxxi))
OI_dxxij = lambdify(k,AI_dxxij/(4*IO_dxxij))
OI_dxis = lambdify(k,AI_dxis/(4*IO_dxxij))
OI_dxxis = lambdify(k,AI_dxxis/(4*IO_dxxij))
OI_dxxijs = lambdify(k,AI_dxxijs/(4*IO_dxxij))
We now consider geophysical wave equations to obtain the theoretical expression of the operational intensity. We write directly the expression of a single time step as a function of differential operators. An operation on a wavefield is counted only once as we consider the minimum of arithmetic operations required.
$ u(x,y,z,t+dt) = dt^2 v^2(x,y,z) ( 2 u(x,y,z,t) + u(x,y,z,t-dt) + \nabla^2 u(x,y,z,t) +q ) $
$ p(x,y,z,t+dt) = dt^2 v^2(x,y,z) \left( 2 p(x,y,z,t) + p(x,y,z,t-dt) +(1+2\epsilon)(\frac{d^2 p(x,t)}{dx^2}+\frac{d^2 p(x,t)}{dyx^2}) + \sqrt{(1+2\delta)} \frac{d^2 r(x,t)}{dz^2} + q \right) $ $ r(x,y,z,t+dt) = dt^2 v^2(x,y,z) \left( 2 r(x,y,z,t) + r(x,y,z,t-dt) +\sqrt{(1+2\delta)}(\frac{d^2 p(x,t)}{dx^2}+ \frac{d^2 p(x,t)}{dy^2}) + \frac{d^2 r(x,t)}{dz^2} + q \right) $
$ p(x,y,z,t+dt) = dt^2 v^2(x,y,z) \left( 2 p(x,y,z,t) + p(x,y,z,t-dt) + (1+2\epsilon) (G_{\bar{x}\bar{x}} + G_{\bar{y}\bar{y}}) p(x,y,z,t) + \sqrt{(1+2\delta)} G_{\bar{z}\bar{z}} r(x,y,z,t) + q \right) $ $ r(x,y,z,t+dt) = dt^2 v^2(x,y,z) \left( 2 r(x,y,z,t) + r(x,y,z,t-dt) + \sqrt{(1+2\delta)}(G_{\bar{x}\bar{x}} + G_{\bar{y}\bar{y}}) p(x,y,z,t) + G_{\bar{z}\bar{z}} r(x,y,z) +q \right) $
where $ \begin{cases} G_{\bar{x}\bar{x}} & = cos(\phi)^2 cos(\theta)^2 \frac{d^2}{dx^2} +sin(\phi)^2 cos(\theta)^2 \frac{d^2}{dy^2}+ sin(\theta)^2 \frac{d^2}{dz^2} + sin(2\phi) cos(\theta)^2 \frac{d^2}{dx dy} - sin(\phi) sin(2\theta) \frac{d^2}{dy dz} -cos(\phi) sin(2\theta) \frac{d^2}{dx dz} \\ G_{\bar{y}\bar{y}} & = sin(\phi)^2 \frac{d^2}{dx^2} +cos(\phi)^2 \frac{d^2}{dy^2} - sin(2\phi)^2 \frac{d^2}{dx dy}\\ G_{\bar{z}\bar{z}} & = cos(\phi)^2 sin(\theta)^2 \frac{d^2}{dx^2} +sin(\phi)^2 sin(\theta)^2 \frac{d^2}{dy^2}+ cos(\theta)^2 \frac{d^2}{dz^2} + sin(2\phi) sin(\theta)^2 \frac{d^2}{dx dy} + sin(\phi) sin(2\theta) \frac{d^2}{dy dz} +cos(\phi) sin(2\theta) \frac{d^2}{dx dz} \\ \end{cases} $
In [5]:
# Arithmetic
# dxi dxxi dxxij multiplications additions duplicates
AI_acou = 0*AI_dxi + 3*AI_dxxi + 0*AI_dxxij + 3 + 5 - 2 * 2
AI_vti = 2 * ( 0*AI_dxi + 3*AI_dxxi + 0*AI_dxxij + 5 + 5 - 2 )
AI_tti = 2 * ( 0*AI_dxi + 3*AI_dxxi + 3*AI_dxxij + 44 + 17 - 8 )
AI_acoums = 0*AI_dxi + 3*s*AI_dxxi + 0*AI_dxxij + 3*s + 5*s - 2 * 2 *s
AI_vtims = 2 * ( 0*AI_dxi + 3*s*AI_dxxi + 0*AI_dxxij + 5*s + 5*s - 2*s )
AI_ttims = 2 * ( 0*AI_dxi + 3*s*AI_dxxi + 3*s*AI_dxxij + 44*s + 17*s - 8*s )
AI_acous = 0*AI_dxis + 3*AI_dxxis + 0*AI_dxxijs + 3 + 5 - 2 * 2
AI_vtis = 2 * ( 0*AI_dxis + 3*AI_dxxis + 0*AI_dxxijs + 5 + 5 - 2 * 2 )
AI_ttis = 2 * ( 0*AI_dxis + 3*AI_dxxis + 3*AI_dxxijs + 44 + 17 - 3*k**2 )
In [6]:
# I/O operations , the full domain of size N is in cache for best case scenario
#
IO_acou = 4 * N
IO_vti = 9 * N
IO_tti = 15 * N
IO_acoums = 3 * s * N + N
IO_vtims = 6 * s * N + 3 * N
IO_ttims = 6 * s * N + 9 * N
IO_acous = 4 * N
IO_vtis = 9 * N
IO_ttis = 15 * N
In [7]:
print(simplify(N*AI_acou/(4*IO_acou)))
print(simplify(N*AI_vti/(4*IO_vti)))
print(simplify(N*AI_tti/(4*IO_tti)))
print(simplify(N*AI_acoums/(4*IO_acoums)))
print(simplify(N*AI_vtims/(4*IO_vtims)))
print(simplify(N*AI_ttims/(4*IO_ttims)))
print(simplify(N*AI_acous/(4*IO_acous)))
print(simplify(N*AI_vtis/(4*IO_vtis)))
print(simplify(N*AI_ttis/(4*IO_ttis)))
OI_acou = lambdify(k,N*AI_acou/(4*IO_acou))
OI_vti = lambdify(k,N*AI_vti/(4*IO_vti))
OI_tti = lambdify(k,N*AI_tti/(4*IO_tti))
OI_acoums = lambdify((k,s),N*AI_acoums/(4*IO_acoums))
OI_vtims = lambdify((k,s),N*AI_vtims/(4*IO_vtims))
OI_ttims = lambdify((k,s),N*AI_ttims/(4*IO_ttims))
OI_acous = lambdify(k,N*AI_acous/(4*IO_acous))
OI_vtis = lambdify(k,N*AI_vtis/(4*IO_vtis))
OI_ttis = lambdify(k,N*AI_ttis/(4*IO_ttis))
In [8]:
print(limit(OI_acou(k),k,51))
print(limit(OI_vti(k),k,51))
print(limit(OI_tti(k),k,51))
print(limit(OI_acoums(3,s),s,128))
print(limit(OI_vtims(3,s),s,128))
print(limit(OI_ttims(3,s),s,128))
print(limit(OI_acous(k),k,51))
print(limit(OI_vtis(k),k,51))
print(limit(OI_ttis(k),k,51))
In [9]:
kk=[3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
ss=[2,4,8,16,32,64]
OI_wave=np.zeros((15,6))
OI_wavems=np.zeros((15,6,3))
OI=np.zeros((15,3))
for i in range(0,15):
OI_wave[i,0]=OI_acou(kk[i])
OI_wave[i,1]=OI_vti(kk[i])
OI_wave[i,2]=OI_tti(kk[i])
OI_wave[i,3]=OI_acous(kk[i])
OI_wave[i,4]=OI_vtis(kk[i])
OI_wave[i,5]=OI_ttis(kk[i])
OI[i,0]=OI_dxi(kk[i])
OI[i,1]=OI_dxxi(kk[i])
OI[i,2]=OI_dxxij(kk[i])
for j in range(0,6):
OI_wavems[i,j,0]=OI_acoums(kk[i],ss[j])
OI_wavems[i,j,1]=OI_vtims(kk[i],ss[j])
OI_wavems[i,j,2]=OI_ttims(kk[i],ss[j])
In [10]:
import matplotlib.pyplot as plt
In [11]:
fig = plt.figure()
plt.hold("off")
acou = plt.plot(OI_wave[:,0],label='acou') # this is how you'd plot a single line...
vti = plt.plot(OI_wave[:,1],label='vti') # this is how you'd plot a single line...
tti = plt.plot(OI_wave[:,2],label='tti') # this is how you'd plot a single line...
fig = plt.figure()
plt.hold("off")
acou = plt.plot(OI_wave[:,3],label='acous') # this is how you'd plot a single line...
vti = plt.plot(OI_wave[:,4],label='vtis') # this is how you'd plot a single line...
tti = plt.plot(OI_wave[:,5],label='ttis') # this is how you'd plot a single line...
fig = plt.figure()
plt.hold("off")
acou = plt.plot(OI_wavems[:,2,0],label='acous') # this is how you'd plot a single line...
vti = plt.plot(OI_wavems[:,2,1],label='vtis') # this is how you'd plot a single line...
tti = plt.plot(OI_wavems[:,2,2],label='ttis') # this is how you'd plot a single line...
fig = plt.figure()
plt.hold("off")
acou = plt.plot(OI_wavems[:,5,0],label='acous') # this is how you'd plot a single line...
vti = plt.plot(OI_wavems[:,5,1],label='vtis') # this is how you'd plot a single line...
tti = plt.plot(OI_wavems[:,5,2],label='ttis') # this is how you'd plot a single line...
plt.show()
In [ ]: