FFTW Implementation

This implementation of the spectral split operator propagator avoids the explicit use of fftshift in order to maximize performance. An important side effect is that the current B(X, Theta) function is not compatible with the usual formulas in terms of the wavefunctions. Therefore, this propagator relies on the input of valid Wigner funntions in the phase space.



In [1]:

    
import numpy as np
import scipy.fftpack as fftpack
import pylab as plt
import matplotlib as matplotlib
import ctypes



In [2]:

    
import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule



In [3]:

    
import pyfftw # A pythonic wrapper of FFTW









    



/usr/lib/python2.7/dist-packages/pkg_resources.py:1031: UserWarning: /home/rcabrera/.python-eggs is writable by group/others and vulnerable to attack when used with get_resource_filename. Consider a more secure location (set with .set_extraction_path or the PYTHON_EGG_CACHE environment variable).
  warnings.warn(msg, UserWarning)



In [4]:

    
%matplotlib inline



In [5]:

    
def SetGrid():
    ##########################################################################################
    #
    # Generating grids
    #
    ##########################################################################################
    
    # get coordinate and momentum step sizes
    dX = 2. * X_amplitude / X_gridDIM
    dP = 2. * P_amplitude / P_gridDIM
    
    # coordinate grid
    X = np.linspace(-X_amplitude, X_amplitude - dX, X_gridDIM)
    X = X[np.newaxis, :]
    
    # Lambda grid (variable conjugate to the coordinate)
    Lambda = np.fft.fftfreq(X_gridDIM, dX / (2 * np.pi))
    
    # take only first half, as required by the real fft
    Lambda = Lambda[:(1 + X_gridDIM // 2)]
    #
    Lambda = Lambda[np.newaxis, :]
    
    # momentum grid
    P = np.linspace(-P_amplitude, P_amplitude - dP, P_gridDIM)
    P = P[:, np.newaxis]
    
    # Theta grid (variable conjugate to the momentum)
    Theta = np.fft.fftfreq(P_gridDIM, dP / (2 * np.pi))
    
    # take only first half, as required by the real fft
    Theta = Theta[:(1 + P_gridDIM // 2)]
    #
    Theta = Theta[:, np.newaxis]
        
    return X,P,Theta,Lambda



In [6]:

    
def SetExponentials(X,P,Theta,Lambda):
    ##############################################################################
    
    #        Exponentials
    
    #############################################################################
    
    expV = np.exp(
             - dt * 0.5j * ( Potential(X - 0.5*Theta) - Potential( X + 0.5*Theta))
                ) 
    
    
    expK = np.exp(
             - dt * 1j * ( KineticEnergy(P + 0.5*Lambda) - KineticEnergy( P - 0.5*Lambda))
                ) 
    
    return expK, expV



In [7]:

    
def MemoryAllocation(X,P,Theta,Lambda):

    #############################################################################
    
    #    Memory allocation
    
    #############################################################################
    
    W = pyfftw.n_byte_align_empty(
             (P.size, X.size) , 8, dtype='float64', order='C')
    
    B = pyfftw.n_byte_align_empty(
             (Theta.size, X.size) , 8, dtype='complex128', order='C')
    
    Z = pyfftw.n_byte_align_empty(
             (P.size, Lambda.size) , 8, dtype='complex128', order='C')
    
    return W,B,Z



In [8]:

    
def Set_FFTW_Plans(W,B,Z):
    #######################################################################
    
    #                FFTW plans
    
    #######################################################################
    
    fftw_kwargs = dict(flags=('FFTW_ESTIMATE',))
    
    fftw_kwargs['flags'] = fftw_kwargs['flags'] + ('FFTW_DESTROY_INPUT',)
    
    # plan the FFT for the  p x -> theta x transform
    
    p2theta_transform = pyfftw.FFTW(
                W, B,
                axes=(0,),
                direction='FFTW_FORWARD',
                **fftw_kwargs
                )
    
    theta2p_transform = pyfftw.FFTW(
                B, W,
                axes=(0,),
                direction='FFTW_BACKWARD',
                **fftw_kwargs
                )
    
    x2lambda_transform = pyfftw.FFTW(
                W, Z,
                axes=(1,),
                direction='FFTW_FORWARD',
                **fftw_kwargs
                )
    
    lambda2x_transform = pyfftw.FFTW(
                Z, W,
                axes=(1,),
                direction='FFTW_BACKWARD',
                **fftw_kwargs
            )
    
    return p2theta_transform, theta2p_transform,  x2lambda_transform,  lambda2x_transform



In [9]:

    
def Propagation_Step( expK, expV, W,B,Z):
        """
        Perform a single step propagation. The final Wigner function is not normalized.
        The input and output is given in: W
        """

        P_To_Theta_FFTW();
        B *= expV
        Theta_To_P_FFTW();

        X_To_Lambda_FFTW()
        Z *= expK
        Lambda_To_X_FFTW()

        P_To_Theta_FFTW();
        B *= expV
        Theta_To_P_FFTW();

        #return self.wignerfunction



In [10]:

    
def Run( expK,expV,W,B,Z, time_steps=10, skipFrames = 1  ):
    """
     
    The input and output is given in: W
    
    """

    for t_index in xrange(time_steps):  
        if t_index%skipFrames ==0 :
            print ' Step ', t_index
            
        Propagation_Step(expK,expV,W,B,Z)



In [11]:

    
def PlotWigner(W, (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) ):
    
    globalColor_min = -0.3
    globalColor_max =  0.3
    
    zero_position =  abs( globalColor_min ) / (abs(globalColor_max) + abs(globalColor_min)) 
    wigner_cdict = {'red' 	: 	((0., 0., 0.),
							(zero_position, 1., 1.), 
							(1., 1., 1.)),
					'green' :	((0., 0., 0.),
							(zero_position, 1., 1.),
							(1., 0., 0.)),
					'blue'	:	((0., 1., 1.),
							(zero_position, 1., 1.),
							(1., 0., 0.)) }
    wigner_cmap = matplotlib.colors.LinearSegmentedColormap('wigner_colormap', wigner_cdict, 256)
    
    fig, ax = plt.subplots(figsize=(20, 7))
    
    dX = 2*X_amplitude/float(X_gridDIM)
    dP = 2*P_amplitude/float(P_gridDIM)
    
    ax.imshow(W, origin='lower', 
              cmap = wigner_cmap ,
              vmin= globalColor_min, vmax=globalColor_max,
              extent=[-X_amplitude, X_amplitude-dX, -P_amplitude, P_amplitude -dP ])
    
    ax.grid('on')



In [12]:

    
def fftshift_Axes0( W ):
    return fftpack.fftshift( W, axes=0)

def fftshift_Axes1( W ):
    return fftpack.fftshift( W, axes=1)

CPU Evaluation



In [13]:

    
dt = 0.05

X_amplitude = 12
P_amplitude = 12

X_gridDIM = 512
P_gridDIM = 512

X,P,Theta,Lambda = SetGrid()
dX = 2*X_amplitude/float(X_gridDIM)
dP = 2*P_amplitude/float(P_gridDIM)



In [14]:

    
Potential_String = ' 0.5*pow(X,2) + 0.01*pow(X,4) '

def Potential(X):
    pow = np.power
    atan = np.arctan
    return eval ( Potential_String, np.__dict__, locals() )



In [15]:

    
KineticEnergy_String = ' pow(P,2)/(2.)'

def KineticEnergy(P):
    pow = np.power
    atan = np.arctan
    return eval ( KineticEnergy_String, np.__dict__, locals() )



In [16]:

    
expK , expV = SetExponentials(X,P,Theta,Lambda)



In [17]:

    
W,B,Z = MemoryAllocation(X,P,Theta,Lambda)



In [18]:

    
P_To_Theta_FFTW, Theta_To_P_FFTW,  X_To_Lambda_FFTW,  Lambda_To_X_FFTW = Set_FFTW_Plans(W,B,Z)



In [19]:

    
def InitialState(W,X,P,dX,dP):
    mass  = 1.
    omega = 1.
    
    W[:] = np.exp( -( (P-3)**2/mass + mass*omega**2*(X)**2)  )
    
    norm = np.sum(W)*dX*dP
    
    W[:] /= norm



In [20]:

    
InitialState(W,X,P,dX,dP)



In [21]:

    
PlotWigner(W, (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )



In [22]:

    
Run( expK,expV,W,B,Z, time_steps=500, skipFrames = 100  )









    



 Step  0
 Step  100
 Step  200
 Step  300
 Step  400



In [23]:

    
np.min(W)









    Out[23]:





-0.16857913679981978



In [24]:

    
np.sum(W)*dX*dP









    Out[24]:





1.0000000000000249



In [25]:

    
PlotWigner(W, (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )



In [26]:

    
#P_To_Theta_FFTW();
plt.imshow( fftshift_Axes0( np.abs(B)  ) , cmap = "cool" )









    Out[26]:





<matplotlib.image.AxesImage at 0x7f0acb49ff90>

GPU Evaluation



In [27]:

    
CUFFT_FORWARD = -1
CUFFT_INVERSE = 1

#CUFFT_Z2D = 0x6c
#CUFFT_D2Z = 0x6a



In [28]:

    
CUFFT_R2C = 0x2a # Real to complex (interleaved) 
CUFFT_C2R = 0x2c # Complex (interleaved) to real 
CUFFT_C2C = 0x29 # Complex to complex (interleaved) 
CUFFT_D2Z = 0x6a # Double to double-complex (interleaved) 
CUFFT_Z2D = 0x6c # Double-complex (interleaved) to double 
CUFFT_Z2Z = 0x69 # Double-complex to double-complex (interleaved)



In [29]:

    
_libcufft = ctypes.cdll.LoadLibrary('libcufft.so')

Plan



In [30]:

    
_libcufft.cufftPlanMany.argtypes = [ctypes.c_void_p,
                                    ctypes.c_int,
                                    ctypes.c_void_p,
                                    ctypes.c_void_p,
                                    ctypes.c_int,
                                    ctypes.c_int,
                                    ctypes.c_void_p,
                                    ctypes.c_int,
                                    ctypes.c_int,
                                    ctypes.c_int,
                                    ctypes.c_int]



In [31]:

    
def cufftPlanMany(rank, n, 
                  inembed, istride, idist, 
                  onembed, ostride, odist, fft_type, batch):
    """Create batched FFT plan configuration."""

    plan = ctypes.c_uint()
    status = _libcufft.cufftPlanMany(ctypes.byref(plan), rank, n,
                                     inembed, istride, idist, 
                                     onembed, ostride, odist, 
                                     fft_type, batch)
    return plan

Execution



In [32]:

    
# General CUFFT error:
class cufftError(Exception):
    """CUFFT error"""
    pass
# Exceptions corresponding to different CUFFT errors:
class cufftInvalidPlan(cufftError):
    """CUFFT was passed an invalid plan handle."""
    pass

class cufftAllocFailed(cufftError):
    """CUFFT failed to allocate GPU memory."""
    pass

class cufftInvalidType(cufftError):
    """The user requested an unsupported type."""
    pass

class cufftInvalidValue(cufftError):
    """The user specified a bad memory pointer."""
    pass

class cufftInternalError(cufftError):
    """Internal driver error."""
    pass

class cufftExecFailed(cufftError):
    """CUFFT failed to execute an FFT on the GPU."""
    pass

class cufftSetupFailed(cufftError):
    """The CUFFT library failed to initialize."""
    pass

class cufftInvalidSize(cufftError):
    """The user specified an unsupported FFT size."""
    pass

class cufftUnalignedData(cufftError):
    """Input or output does not satisfy texture alignment requirements."""
    pass

cufftExceptions = {
    0x1: cufftInvalidPlan,
    0x2: cufftAllocFailed,
    0x3: cufftInvalidType,
    0x4: cufftInvalidValue,
    0x5: cufftInternalError,
    0x6: cufftExecFailed,
    0x7: cufftSetupFailed,
    0x8: cufftInvalidSize,
    0x9: cufftUnalignedData
    }

def cufftCheckStatus(status):
	"""Raise an exception if the specified CUBLAS status is an error."""
	
	if status != 0:
		try:
			raise cufftExceptions[status]
		except KeyError:
			raise cufftError



In [33]:

    
###################     D2Z

_libcufft.cufftExecD2Z.restype = int
_libcufft.cufftExecD2Z.argtypes = [ctypes.c_uint,
                                   ctypes.c_void_p,
                                   ctypes.c_void_p,
                                   ctypes.c_int]

def cu_fft_D2Z(x_input_gpu, y_output_gpu, plan ):
    _libcufft.cufftExecD2Z(plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_FORWARD)

        
def cu_ifft_D2Z(x_input_gpu, y_output_gpu, plan ):
    _libcufft.cufftExecD2Z(plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_INVERSE )
    
####################     Z2D 

_libcufft.cufftExecZ2D.restype = int
_libcufft.cufftExecZ2D.argtypes = [ctypes.c_uint,
                                   ctypes.c_void_p,
                                   ctypes.c_void_p,
                                   ctypes.c_int]

def cu_fft_Z2D(x_input_gpu, y_output_gpu, plan ):
    status = _libcufft.cufftExecZ2D(
                                    plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_FORWARD)
    cufftCheckStatus(status)

    
def cu_ifft_Z2D(x_input_gpu, y_output_gpu, plan ):
    status = _libcufft.cufftExecZ2D(
                                    plan.handle, int(x_input_gpu.gpudata), int(y_output_gpu.gpudata), CUFFT_INVERSE  )    
    cufftCheckStatus(status)



In [34]:

    
def cuFFT_P_To_Theta( W_GPU, B_GPU, Z_GPU ):
    cu_fft_D2Z(  W_GPU , B_GPU , plan_D2Z_Axes0 )
    
def cuFFT_Theta_To_P( W_GPU, B_GPU, Z_GPU ):
    cu_ifft_Z2D( B_GPU , W_GPU , plan_Z2D_Axes0 )
    W_GPU /= float( W_GPU.shape[0] )
    
def cuFFT_X_To_Lambda( W_GPU, B_GPU, Z_GPU ):
    cu_fft_D2Z(  W_GPU , Z_GPU , plan_D2Z_Axes1 ) 
    
def cuFFT_Lambda_To_X( W_GPU, B_GPU, Z_GPU ):
    cu_ifft_Z2D(  Z_GPU , W_GPU , plan_Z2D_Axes1 ) 
    W_GPU /= float( W_GPU.shape[1] )



In [35]:

    
class Plan_2D_Axis0:
	"""

	"""
	def __init__(self, shape, cuFFT_type):  
		self.batch = shape[1]
		self.fft_type = cuFFT_type

		if len(shape) == 2:
			n = np.array([ shape[0] ])
			stride = shape[1]           # distance jump between two elements in the same series
			idist  = 1                  # distance jump between two consecutive batches

			inembed = np.array( [shape[0],    stride] ) 
			onembed = np.array( [shape[0],    stride] ) 

			rank = 1
			self.handle = cufftPlanMany(rank, n.ctypes.data,
                                              inembed.ctypes.data, stride , idist,  onembed.ctypes.data, stride, idist,
                                              self.fft_type, self.batch)
		else:
			raise ValueError('invalid transform dimension')

	def __del__(self):
        
		try:
			cufft.cufftDestroy(self.handle)
		except:
			pass



In [36]:

    
class Plan_2D_Axis1:
	"""
	
	"""
	def __init__(self, shape, cuFFT_type):  
		self.batch = shape[0]
		self.fft_type = cuFFT_type

		if len(shape) == 2:
			n = np.array([ shape[1] ])
			stride = 1                  # distance jump between two elements in the same series
			idist  = shape[1]	    # distance jump between two consecutive batches

			inembed = np.array( shape ) 
			onembed = np.array( [shape[0] , shape[1]/2 + 1] ) 

			rank = 1
			self.handle = cufftPlanMany(rank, n.ctypes.data,
                                              None, 1, 0,
                                              None, 1, 0,
                                              self.fft_type, self.batch)
		else:
			raise ValueError('invalid transform dimension')

	def __del__(self):
		try:
			cufft.cufftDestroy(self.handle)
		except:
			pass



In [37]:

    
plan_D2Z_Axes0 = Plan_2D_Axis0(  (P.size, X.size) , CUFFT_D2Z )

plan_Z2D_Axes0 = Plan_2D_Axis0(  (P.size, X.size) , CUFFT_Z2D )

plan_D2Z_Axes1 = Plan_2D_Axis1(  (P.size, X.size) , CUFFT_D2Z )

plan_Z2D_Axes1 = Plan_2D_Axis1(  (P.size, X.size) , CUFFT_Z2D )



In [38]:

    
def GPU_Propagation_Step( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU ):
        """
        Perform a single step propagation. The final Wigner function is not normalized.
        The input and output is given in: W
        """

        cuFFT_P_To_Theta(W_GPU,B_GPU,Z_GPU);
        B_GPU *= expV_GPU
        cuFFT_Theta_To_P(W_GPU,B_GPU,Z_GPU);

        cuFFT_X_To_Lambda(W_GPU,B_GPU,Z_GPU);
        Z_GPU *= expK_GPU
        cuFFT_Lambda_To_X(W_GPU,B_GPU,Z_GPU);

        cuFFT_P_To_Theta(W_GPU,B_GPU,Z_GPU);
        B_GPU *= expV_GPU
        cuFFT_Theta_To_P(W_GPU,B_GPU,Z_GPU);



In [39]:

    
def GPU_Run( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU, time_steps=10, skipFrames = 1  ):
    """
     
    The input and output is given in: W
    
    """

    for t_index in xrange(time_steps):  
        if t_index%skipFrames ==0 :
            print ' Step ', t_index
            
        GPU_Propagation_Step( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU )

Run



In [40]:

    
InitialState(W,X,P,dX,dP)

expK_GPU = gpuarray.to_gpu( np.ascontiguousarray( expK , dtype = np.complex128) )
expV_GPU = gpuarray.to_gpu( np.ascontiguousarray( expV , dtype = np.complex128) )



In [41]:

    
W_GPU = gpuarray.to_gpu( np.ascontiguousarray( W , dtype = np.float64) )



In [42]:

    
B_GPU = gpuarray.to_gpu( np.ascontiguousarray( B , dtype = np.complex128) )



In [43]:

    
Z_GPU = gpuarray.to_gpu( np.ascontiguousarray( Z , dtype = np.complex128) )



In [44]:

    
PlotWigner( W_GPU.get() , (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )



In [45]:

    
#######################33   RUN  #########################



In [46]:

    
#GPU_Propagation_Step( expK_GPU, expV_GPU, W_GPU,B_GPU,Z_GPU)



In [47]:

    
GPU_Run( expK_GPU, expV_GPU, W_GPU, B_GPU, Z_GPU, time_steps=500 , skipFrames = 100  )









    



 Step  0
 Step  100
 Step  200
 Step  300
 Step  400



In [48]:

    
PlotWigner( W_GPU.get() , (X_amplitude, P_amplitude), (X_gridDIM, P_gridDIM) )