In [1]:
import numpy as np
import scipy.fftpack as fftpack
import pylab as plt
import matplotlib as matplotlib
import ctypes
In [2]:
import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
In [3]:
import pyfftw # A pythonic wrapper of FFTW
In [4]:
%matplotlib inline
In [5]:
def SetGrid():
##########################################################################################
#
# Generating grids
#
##########################################################################################
# get coordinate and momentum step sizes
dX = 2. * X_amplitude / X_gridDIM
dP = 2. * P_amplitude / P_gridDIM
# coordinate grid
X = np.linspace(-X_amplitude, X_amplitude - dX, X_gridDIM)
X = X[np.newaxis, :]
# Lambda grid (variable conjugate to the coordinate)
Lambda = np.fft.fftfreq(X_gridDIM, dX / (2 * np.pi))
# take only first half, as required by the real fft
Lambda = Lambda[:(1 + X_gridDIM // 2)]
#
Lambda = Lambda[np.newaxis, :]
# momentum grid
P = np.linspace(-P_amplitude, P_amplitude - dP, P_gridDIM)
P = P[:, np.newaxis]
# Theta grid (variable conjugate to the momentum)
Theta = np.fft.fftfreq(P_gridDIM, dP / (2 * np.pi))
# take only first half, as required by the real fft
Theta = Theta[:(1 + P_gridDIM // 2)]
#
Theta = Theta[:, np.newaxis]
return X,P,Theta,Lambda
In [6]:
def SetExponentials(X,P,Theta,Lambda):
##############################################################################
# Exponentials
#############################################################################
expV = np.exp(
- dt * 0.5j * ( Potential(X - 0.5*Theta) - Potential( X + 0.5*Theta))
)
expK = np.exp(
- dt * 1j * ( KineticEnergy(P + 0.5*Lambda) - KineticEnergy( P - 0.5*Lambda))
)
return expK, expV
In [7]:
def MemoryAllocation(X,P,Theta,Lambda):
#############################################################################
# Memory allocation
#############################################################################
W = pyfftw.n_byte_align_empty(
(P.size, X.size) , 8, dtype='float64', order='C')
B = pyfftw.n_byte_align_empty(
(Theta.size, X.size) , 8, dtype='complex128', order='C')
Z = pyfftw.n_byte_align_empty(
(P.size, Lambda.size) , 8, dtype='complex128', order='C')
return W,B,Z
In [8]:
def Set_FFTW_Plans(W,B,Z):
#######################################################################
# FFTW plans
#######################################################################
fftw_kwargs = dict(flags=('FFTW_ESTIMATE',))
fftw_kwargs['flags'] = fftw_kwargs['flags'] + ('FFTW_DESTROY_INPUT',)
# plan the FFT for the p x -> theta x transform
p2theta_transform = pyfftw.FFTW(
W, B,
axes=(0,),
direction='FFTW_FORWARD',
**fftw_kwargs
)
theta2p_transform = pyfftw.FFTW(
B, W,
axes=(0,),
direction='FFTW_BACKWARD',
**fftw_kwargs
)
x2lambda_transform = pyfftw.FFTW(
W, Z,
axes=(1,),
direction='FFTW_FORWARD',
**fftw_kwargs
)
lambda2x_transform = pyfftw.FFTW(
Z, W,
axes=(1,),
direction='FFTW_BACKWARD',
**fftw_kwargs
)
return p2theta_transform, theta2p_transform, x2lambda_transform, lambda2x_transform
In [9]:
def Propagation_Step( expK, expV, W,B,Z):
"""
Perform a single step propagation. The final Wigner function is not normalized.
The input and output is given in: W
"""
P_To_Theta_FFTW();
B *= expV
Theta_To_P_FFTW();
X_To_Lambda_FFTW()
Z *= expK
Lambda_To_X_FFTW()
P_To_Theta_FFTW();
B *= expV
Theta_To_P_FFTW();
#return self.wignerfunction
In [10]:
def Run( expK,expV,W,B,Z, time_steps=10, skipFrames = 1 ):
"""
The input and output is given in: W
"""
for t_index in xrange(time_steps):
if t_index%skipFrames ==0 :
print ' Step ', t_index
Propagation_Step(expK,expV,W,B,Z)
In [11]:
def PlotWigner(W, (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) ):
globalColor_min = -0.3
globalColor_max = 0.3
zero_position = abs( globalColor_min ) / (abs(globalColor_max) + abs(globalColor_min))
wigner_cdict = {'red' : ((0., 0., 0.),
(zero_position, 1., 1.),
(1., 1., 1.)),
'green' : ((0., 0., 0.),
(zero_position, 1., 1.),
(1., 0., 0.)),
'blue' : ((0., 1., 1.),
(zero_position, 1., 1.),
(1., 0., 0.)) }
wigner_cmap = matplotlib.colors.LinearSegmentedColormap('wigner_colormap', wigner_cdict, 256)
fig, ax = plt.subplots(figsize=(20, 7))
dX = 2*X_amplitude/float(X_gridDIM)
dP = 2*P_amplitude/float(P_gridDIM)
ax.imshow(W, origin='lower',
cmap = wigner_cmap ,
vmin= globalColor_min, vmax=globalColor_max,
extent=[-X_amplitude, X_amplitude-dX, -P_amplitude, P_amplitude -dP ])
ax.grid('on')
In [12]:
def fftshift_Axes0( W ):
return fftpack.fftshift( W, axes=0)
def fftshift_Axes1( W ):
return fftpack.fftshift( W, axes=1)
In [13]:
dt = 0.05
X_amplitude = 12
P_amplitude = 12
X_gridDIM = 512
P_gridDIM = 512
X,P,Theta,Lambda = SetGrid()
dX = 2*X_amplitude/float(X_gridDIM)
dP = 2*P_amplitude/float(P_gridDIM)
In [14]:
Potential_String = ' 0.5*pow(X,2) + 0.01*pow(X,4) '
def Potential(X):
pow = np.power
atan = np.arctan
return eval ( Potential_String, np.__dict__, locals() )
In [15]:
KineticEnergy_String = ' pow(P,2)/(2.)'
def KineticEnergy(P):
pow = np.power
atan = np.arctan
return eval ( KineticEnergy_String, np.__dict__, locals() )
In [16]:
expK , expV = SetExponentials(X,P,Theta,Lambda)
In [17]:
W,B,Z = MemoryAllocation(X,P,Theta,Lambda)
In [18]:
P_To_Theta_FFTW, Theta_To_P_FFTW, X_To_Lambda_FFTW, Lambda_To_X_FFTW = Set_FFTW_Plans(W,B,Z)
In [19]:
def InitialState(W,X,P,dX,dP):
mass = 1.
omega = 1.
W[:] = np.exp( -( (P-3)**2/mass + mass*omega**2*(X)**2) )
norm = np.sum(W)*dX*dP
W[:] /= norm
In [20]:
InitialState(W,X,P,dX,dP)
In [21]:
PlotWigner(W, (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )
In [22]:
Run( expK,expV,W,B,Z, time_steps=500, skipFrames = 100 )
In [23]:
np.min(W)
Out[23]:
In [24]:
np.sum(W)*dX*dP
Out[24]:
In [25]:
PlotWigner(W, (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )
In [26]:
#P_To_Theta_FFTW();
plt.imshow( fftshift_Axes0( np.abs(B) ) , cmap = "cool" )
Out[26]:
In [27]:
CUFFT_FORWARD = -1
CUFFT_INVERSE = 1
#CUFFT_Z2D = 0x6c
#CUFFT_D2Z = 0x6a
In [28]:
CUFFT_R2C = 0x2a # Real to complex (interleaved)
CUFFT_C2R = 0x2c # Complex (interleaved) to real
CUFFT_C2C = 0x29 # Complex to complex (interleaved)
CUFFT_D2Z = 0x6a # Double to double-complex (interleaved)
CUFFT_Z2D = 0x6c # Double-complex (interleaved) to double
CUFFT_Z2Z = 0x69 # Double-complex to double-complex (interleaved)
In [29]:
_libcufft = ctypes.cdll.LoadLibrary('libcufft.so')
In [30]:
_libcufft.cufftPlanMany.argtypes = [ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int,
ctypes.c_int]
In [31]:
def cufftPlanMany(rank, n,
inembed, istride, idist,
onembed, ostride, odist, fft_type, batch):
"""Create batched FFT plan configuration."""
plan = ctypes.c_uint()
status = _libcufft.cufftPlanMany(ctypes.byref(plan), rank, n,
inembed, istride, idist,
onembed, ostride, odist,
fft_type, batch)
return plan
In [32]:
# General CUFFT error:
class cufftError(Exception):
"""CUFFT error"""
pass
# Exceptions corresponding to different CUFFT errors:
class cufftInvalidPlan(cufftError):
"""CUFFT was passed an invalid plan handle."""
pass
class cufftAllocFailed(cufftError):
"""CUFFT failed to allocate GPU memory."""
pass
class cufftInvalidType(cufftError):
"""The user requested an unsupported type."""
pass
class cufftInvalidValue(cufftError):
"""The user specified a bad memory pointer."""
pass
class cufftInternalError(cufftError):
"""Internal driver error."""
pass
class cufftExecFailed(cufftError):
"""CUFFT failed to execute an FFT on the GPU."""
pass
class cufftSetupFailed(cufftError):
"""The CUFFT library failed to initialize."""
pass
class cufftInvalidSize(cufftError):
"""The user specified an unsupported FFT size."""
pass
class cufftUnalignedData(cufftError):
"""Input or output does not satisfy texture alignment requirements."""
pass
cufftExceptions = {
0x1: cufftInvalidPlan,
0x2: cufftAllocFailed,
0x3: cufftInvalidType,
0x4: cufftInvalidValue,
0x5: cufftInternalError,
0x6: cufftExecFailed,
0x7: cufftSetupFailed,
0x8: cufftInvalidSize,
0x9: cufftUnalignedData
}
def cufftCheckStatus(status):
"""Raise an exception if the specified CUBLAS status is an error."""
if status != 0:
try:
raise cufftExceptions[status]
except KeyError:
raise cufftError
In [33]:
################### D2Z
_libcufft.cufftExecD2Z.restype = int
_libcufft.cufftExecD2Z.argtypes = [ctypes.c_uint,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
def cu_fft_D2Z(x_input_gpu, y_output_gpu, plan ):
_libcufft.cufftExecD2Z(plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_FORWARD)
def cu_ifft_D2Z(x_input_gpu, y_output_gpu, plan ):
_libcufft.cufftExecD2Z(plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_INVERSE )
#################### Z2D
_libcufft.cufftExecZ2D.restype = int
_libcufft.cufftExecZ2D.argtypes = [ctypes.c_uint,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_int]
def cu_fft_Z2D(x_input_gpu, y_output_gpu, plan ):
status = _libcufft.cufftExecZ2D(
plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_FORWARD)
cufftCheckStatus(status)
def cu_ifft_Z2D(x_input_gpu, y_output_gpu, plan ):
status = _libcufft.cufftExecZ2D(
plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_INVERSE )
cufftCheckStatus(status)
In [34]:
def cuFFT_P_To_Theta( W_GPU, B_GPU, Z_GPU ):
cu_fft_D2Z( W_GPU , B_GPU , plan_D2Z_Axes0 )
def cuFFT_Theta_To_P( W_GPU, B_GPU, Z_GPU ):
cu_ifft_Z2D( B_GPU , W_GPU , plan_Z2D_Axes0 )
W_GPU /= float( W_GPU.shape[0] )
def cuFFT_X_To_Lambda( W_GPU, B_GPU, Z_GPU ):
cu_fft_D2Z( W_GPU , Z_GPU , plan_D2Z_Axes1 )
def cuFFT_Lambda_To_X( W_GPU, B_GPU, Z_GPU ):
cu_ifft_Z2D( Z_GPU , W_GPU , plan_Z2D_Axes1 )
W_GPU /= float( W_GPU.shape[1] )
In [35]:
class Plan_2D_Axis0:
"""
"""
def __init__(self, shape, cuFFT_type):
self.batch = shape[1]
self.fft_type = cuFFT_type
if len(shape) == 2:
n = np.array([ shape[0] ])
stride = shape[1] # distance jump between two elements in the same series
idist = 1 # distance jump between two consecutive batches
inembed = np.array( [shape[0], stride] )
onembed = np.array( [shape[0], stride] )
rank = 1
self.handle = cufftPlanMany(rank, n.ctypes.data,
inembed.ctypes.data, stride , idist, onembed.ctypes.data, stride, idist,
self.fft_type, self.batch)
else:
raise ValueError('invalid transform dimension')
def __del__(self):
try:
cufft.cufftDestroy(self.handle)
except:
pass
In [36]:
class Plan_2D_Axis1:
"""
"""
def __init__(self, shape, cuFFT_type):
self.batch = shape[0]
self.fft_type = cuFFT_type
if len(shape) == 2:
n = np.array([ shape[1] ])
stride = 1 # distance jump between two elements in the same series
idist = shape[1] # distance jump between two consecutive batches
inembed = np.array( shape )
onembed = np.array( [shape[0] , shape[1]/2 + 1] )
rank = 1
self.handle = cufftPlanMany(rank, n.ctypes.data,
None, 1, 0,
None, 1, 0,
self.fft_type, self.batch)
else:
raise ValueError('invalid transform dimension')
def __del__(self):
try:
cufft.cufftDestroy(self.handle)
except:
pass
In [37]:
plan_D2Z_Axes0 = Plan_2D_Axis0( (P.size, X.size) , CUFFT_D2Z )
plan_Z2D_Axes0 = Plan_2D_Axis0( (P.size, X.size) , CUFFT_Z2D )
plan_D2Z_Axes1 = Plan_2D_Axis1( (P.size, X.size) , CUFFT_D2Z )
plan_Z2D_Axes1 = Plan_2D_Axis1( (P.size, X.size) , CUFFT_Z2D )
In [38]:
def GPU_Propagation_Step( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU ):
"""
Perform a single step propagation. The final Wigner function is not normalized.
The input and output is given in: W
"""
cuFFT_P_To_Theta(W_GPU,B_GPU,Z_GPU);
B_GPU *= expV_GPU
cuFFT_Theta_To_P(W_GPU,B_GPU,Z_GPU);
cuFFT_X_To_Lambda(W_GPU,B_GPU,Z_GPU);
Z_GPU *= expK_GPU
cuFFT_Lambda_To_X(W_GPU,B_GPU,Z_GPU);
cuFFT_P_To_Theta(W_GPU,B_GPU,Z_GPU);
B_GPU *= expV_GPU
cuFFT_Theta_To_P(W_GPU,B_GPU,Z_GPU);
In [39]:
def GPU_Run( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU, time_steps=10, skipFrames = 1 ):
"""
The input and output is given in: W
"""
for t_index in xrange(time_steps):
if t_index%skipFrames ==0 :
print ' Step ', t_index
GPU_Propagation_Step( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU )
In [40]:
InitialState(W,X,P,dX,dP)
expK_GPU = gpuarray.to_gpu( np.ascontiguousarray( expK , dtype = np.complex128) )
expV_GPU = gpuarray.to_gpu( np.ascontiguousarray( expV , dtype = np.complex128) )
In [41]:
W_GPU = gpuarray.to_gpu( np.ascontiguousarray( W , dtype = np.float64) )
In [42]:
B_GPU = gpuarray.to_gpu( np.ascontiguousarray( B , dtype = np.complex128) )
In [43]:
Z_GPU = gpuarray.to_gpu( np.ascontiguousarray( Z , dtype = np.complex128) )
In [44]:
PlotWigner( W_GPU.get() , (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )
In [45]:
#######################33 RUN #########################
In [46]:
#GPU_Propagation_Step( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU)
In [47]:
GPU_Run( expK_GPU, expV_GPU, W_GPU, B_GPU, Z_GPU, time_steps=500 , skipFrames = 100 )
In [48]:
PlotWigner( W_GPU.get() , (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )