4D Fast Accurate Fourier Transform

Complex to Complex



In [1]:

    
import numpy as np
import ctypes
from ctypes import *

import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import math

import time



In [2]:

    
%matplotlib inline

Loading FFT routines



In [3]:

    
gridDIM = 64

size = gridDIM**4

axes0 = 0
axes1 = 1
axes2 = 2
axes3 = 3

segment_axes0 = 0
segment_axes1 = 0
segment_axes2 = 0
segment_axes3 = 0

normFactor = 10.0

DIR_BASE = "/home/rcabrera/Documents/source/Python/FFT-dev/FAFT128_C2C/"

# FAFT
_faft128_4D = ctypes.cdll.LoadLibrary( DIR_BASE+'FAFT128_4D_C2C.so' )
_faft128_4D.FAFT128_4D_C2C.restype = int
_faft128_4D.FAFT128_4D_C2C.argtypes = [ctypes.c_void_p, 
                                         ctypes.c_float, ctypes.c_float, ctypes.c_int, 
                                         ctypes.c_int, ctypes.c_float]

cuda_faft = _faft128_4D.FAFT128_4D_C2C

Initializing Data

Gaussian



In [4]:

    
def Gaussian(x,mu,sigma):
    return np.exp( - (x-mu)**2/sigma**2/2. , dtype=np.float32 )/(sigma*np.sqrt( 2*np.pi  ))

def fftGaussian(p,mu,sigma):
    return np.exp(-1j*mu*p , dtype=np.float32 )*np.exp( - p**2*sigma**2/2. , dtype=np.float32 )



In [5]:

    
# Gaussian parameters
x_mu = 1.5
x_sigma = 1.

p_x_mu = 1.5
p_x_sigma = 1.

y_mu = 1.
y_sigma = 1.

p_y_mu = 1.
p_y_sigma = 1.

# Grid parameters
x_amplitude = 12
p_x_amplitude = 12.
y_amplitude = 10.
p_y_amplitude = 10.

theta_x_amplitude = 10.
lambda_x_amplitude = 10.
theta_y_amplitude  = 5.           # With the traditional method p amplitude is fixed to: 2 * np.pi /( 2*x_amplitude ) 
lambda_y_amplitude = 5.

# Phase space 
dx    = 2*x_amplitude  /float(gridDIM)  
dp_x  = 2*p_x_amplitude/float(gridDIM)  
dy    = 2*y_amplitude  /float(gridDIM) 
dp_y  = 2*p_y_amplitude/float(gridDIM)

# Conjugate Phase space (Fourier transform)
dtheta_x  = 2*theta_x_amplitude /float(gridDIM) 
dlambda_x = 2*lambda_x_amplitude/float(gridDIM)
dtheta_y  = 2*theta_y_amplitude /float(gridDIM) 
dlambda_y = 2*lambda_y_amplitude/float(gridDIM)

# delta parameters
delta_x  =     dx*dtheta_x/(2*np.pi)
delta_p_x = dp_x*dlambda_x/(2*np.pi)
delta_y  =     dy*dtheta_y/(2*np.pi)
delta_p_y = dp_y*dlambda_y/(2*np.pi)

#
#                       Phase space range
#
x_range   = np.linspace( -x_amplitude,   x_amplitude  -dx,    gridDIM  ) 
p_x_range = np.linspace( -p_x_amplitude, p_x_amplitude-dp_x,  gridDIM  ) 
y_range   = np.linspace( -y_amplitude,   y_amplitude  -dy,    gridDIM  ) 
p_y_range = np.linspace( -p_y_amplitude, p_y_amplitude-dp_y,  gridDIM  ) 

# Conjugate  phase space
theta_x_range  = np.linspace( -theta_x_amplitude,  theta_x_amplitude -dtheta_x,    gridDIM ) 
lambda_x_range = np.linspace( -lambda_x_amplitude, lambda_x_amplitude-dlambda_x,   gridDIM ) 
theta_y_range  = np.linspace( -theta_y_amplitude,  theta_y_amplitude -dtheta_y,    gridDIM ) 
lambda_y_range = np.linspace( -lambda_y_amplitude, lambda_y_amplitude-dlambda_y,   gridDIM ) 

# Grid 
x   =   x_range[ np.newaxis, np.newaxis, np.newaxis, : ] 
p_x = p_x_range[ np.newaxis, np.newaxis, :, np.newaxis ] 
y   =   y_range[ np.newaxis, :, np.newaxis, np.newaxis ] 
p_y = p_y_range[ :, np.newaxis, np.newaxis, np.newaxis ]



In [6]:

    
def norm_GPU(W):
    return gpuarray.sum(W).real.get()*dx*dy*dp_x*dp_y



In [7]:

    
f = Gaussian(x,x_mu,x_sigma)*Gaussian(y,y_mu,y_sigma)*Gaussian(p_x,p_x_mu, p_x_sigma)*Gaussian(p_y,p_y_mu,p_y_sigma)

#f = f + 1j*np.zeros_like(f)
f = f + 0j

print ' Amplitude x = ',  x_amplitude
print ' Amplitude p_x = ',p_x_amplitude
print '        '

print 'x_mu = ', x_mu
print 'y_mu = ', y_mu
print 'p_x_mu = ', p_x_mu
print 'p_y_mu = ', p_y_mu

print '   '

print 'n     = ', x.size
print 'dx    = ', dx
print 'dp_x    = ', dp_x
print '           standard fft dp = ',2 * np.pi /( 2*x_amplitude ) , '     '
print '    '
print 'delta_x = ', delta_x


print '    '

print 'The Gaussian extends to the numerical error in single precision:'  
print '    min = ', np.min(f.real)









    



 Amplitude x =  12
 Amplitude p_x =  12.0
        
mu_x =  1.5
mu_y =  1.0
p_x_mu =  1.5
p_y_mu =  1.0
   
n     =  64
dx    =  0.375
dp_x    =  0.375
           standard fft dp =  0.261799387799      
    
delta_x =  0.0186509698936
    
The Gaussian extends to the numerical error in single precision:
    min =  0.0



In [8]:

    
plt.imshow( f[ gridDIM/2, gridDIM/2 , :, :].real, 
           extent=[-x_amplitude , x_amplitude-dx, -p_x_amplitude , p_x_amplitude-dp_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 12.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$p_x$',**axis_font)

plt.colorbar()

print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), '  ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()









    



max-min =  0.0093185    0.0



In [9]:

    
plt.imshow( f[::,::,gridDIM/2, gridDIM/2 ].real, 
           extent=[-y_amplitude , y_amplitude-dx, -p_y_amplitude , p_y_amplitude-dp_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$y$',**axis_font)
plt.ylabel('$p_y$',**axis_font)

plt.colorbar()

print 'max-min = ', f[::,::,gridDIM/2, gridDIM/2 ].real.max(), '  ', f[::,::,gridDIM/2, gridDIM/2 ].real.min()









    



max-min =  0.00265939    0.0



In [10]:

    
plt.imshow( f[ gridDIM/2 , :: , gridDIM/2 , :: ].real, 
           extent=[-x_amplitude , x_amplitude-dx, -y_amplitude , y_amplitude-dy] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()









    Out[10]:





<matplotlib.colorbar.Colorbar at 0x7f9f58918cd0>

$W$ TRANSFORM



In [11]:

    
print '         GPU memory Total (before)      ', pycuda.driver.mem_get_info()[1]/float(2**30) , 'GB'
print '         GPU memory Free  (before)      ', pycuda.driver.mem_get_info()[0]/float(2**30) , 'GB'

F_gpu = gpuarray.zeros( ( gridDIM, gridDIM, gridDIM, gridDIM ), dtype=np.complex64 )

F_gpu[:,:,:,:] = f[:,:,:,:]

print '         GPU memory Total (after)      ', pycuda.driver.mem_get_info()[1]/float(2**30) , 'GB'
print '         GPU memory Free  (after)      ', pycuda.driver.mem_get_info()[0]/float(2**30) , 'GB'









    



         GPU memory Total (before)       5.1767578125 GB
         GPU memory Free  (before)       4.58404541016 GB
         GPU memory Total (after)       5.1767578125 GB
         GPU memory Free  (after)       4.45806884766 GB



In [12]:

    
F_gpu.shape









    Out[12]:





(64, 64, 64, 64)



In [13]:

    
print 'deltas = ', delta_p_x, delta_x, delta_p_y, delta_y









    



deltas =  0.0186509698936 0.0186509698936 0.00777123745566 0.00777123745566

Forward Transform



In [14]:

    
# Executing FFT
t_init = time.time() 

cuda_faft( int(F_gpu.gpudata), dp_x, delta_p_x, segment_axes0, axes0, normFactor )
cuda_faft( int(F_gpu.gpudata), dx, delta_x, segment_axes1, axes1, normFactor )
cuda_faft( int(F_gpu.gpudata), dp_y, delta_p_y, segment_axes2, axes2, normFactor )
cuda_faft( int(F_gpu.gpudata), dy, delta_y, segment_axes3, axes3, normFactor )

t_end = time.time() 

print 'computation time = ', t_end - t_init









    



computation time =  0.41027712822



In [18]:

    
# Arbitrary normalization 
F_gpu /= size

Real Parts



In [19]:

    
plt.imshow( F_gpu.get()[ gridDIM/2, gridDIM/2 , :, :].real, 
           extent=[-theta_x_amplitude , theta_x_amplitude  - dtheta_x,
                   -lambda_x_amplitude, lambda_x_amplitude - dlambda_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 12.1, '$Re W$' , **axis_font)
plt.xlabel('$\\theta_x$',**axis_font)
plt.ylabel('$\\lambda_x$',**axis_font)

plt.colorbar()

print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), '  ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()









    



max-min =  0.0093185    0.0



In [20]:

    
plt.imshow( F_gpu.get()[ :,:,gridDIM/2, gridDIM/2 ].real, 
           extent=[-theta_y_amplitude , theta_y_amplitude  - dtheta_y,
                   -lambda_y_amplitude, lambda_y_amplitude - dlambda_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 8.1, '$Re W$' , **axis_font)
plt.xlabel('$\\theta_y$',**axis_font)
plt.ylabel('$\\lambda_y$',**axis_font)

plt.colorbar()

print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), '  ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()









    



max-min =  0.0093185    0.0

Imaginary Parts



In [21]:

    
plt.imshow( F_gpu.get()[ gridDIM/2, gridDIM/2 , :, :].imag, 
           extent=[-theta_x_amplitude , theta_x_amplitude  - dtheta_x,
                   -lambda_x_amplitude, lambda_x_amplitude - dlambda_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 12.1, '$Im\,W$' , **axis_font)
plt.xlabel('$\\theta_x$',**axis_font)
plt.ylabel('$\\lambda_x$',**axis_font)

plt.colorbar()

print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), '  ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()









    



max-min =  0.0093185    0.0



In [22]:

    
plt.imshow( F_gpu.get()[ :,:,gridDIM/2, gridDIM/2 ].imag, 
           extent=[-theta_y_amplitude , theta_y_amplitude  - dtheta_y,
                   -lambda_y_amplitude, lambda_y_amplitude - dlambda_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 8.1, '$Im\,W$' , **axis_font)
plt.xlabel('$\\theta_y$',**axis_font)
plt.ylabel('$\\lambda_y$',**axis_font)

plt.colorbar()

print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), '  ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()









    



max-min =  0.0093185    0.0

Inverse



In [23]:

    
# Executing iFFT
t_init = time.time() 

cuda_faft( int(F_gpu.gpudata), dp_x, -delta_p_x, segment_axes0, axes0, normFactor )
cuda_faft( int(F_gpu.gpudata), dx, -delta_x, segment_axes1, axes1, normFactor )
cuda_faft( int(F_gpu.gpudata), dp_y, -delta_p_y, segment_axes2, axes2, normFactor )
cuda_faft( int(F_gpu.gpudata), dy, -delta_y, segment_axes3, axes3, normFactor )

t_end = time.time() 

print 'computation time = ', t_end - t_init









    



computation time =  0.445533037186



In [27]:

    
norm = norm_GPU(F_gpu)
F_gpu /= norm



In [28]:

    
plt.imshow( F_gpu.get()[ gridDIM/2, gridDIM/2 , :, :].real, 
           extent=[-x_amplitude , x_amplitude-dx, -p_x_amplitude , p_x_amplitude-dp_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 12.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$p_x$',**axis_font)

plt.colorbar()

print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), '  ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()









    



max-min =  0.0093185    0.0



In [29]:

    
plt.imshow( F_gpu.get()[::,::,gridDIM/2, gridDIM/2 ].real, 
           extent=[-y_amplitude , y_amplitude-dx, -p_y_amplitude , p_y_amplitude-dp_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$y$',**axis_font)
plt.ylabel('$p_y$',**axis_font)

plt.colorbar()

print 'max-min = ', f[::,::,gridDIM/2, gridDIM/2 ].real.max(), '  ', f[::,::,gridDIM/2, gridDIM/2 ].real.min()









    



max-min =  0.00265939    0.0



In [30]:

    
plt.imshow( F_gpu.get()[ gridDIM/2 , :: , gridDIM/2 , :: ].real, 
           extent=[-x_amplitude , x_amplitude-dx, -y_amplitude , y_amplitude-dy] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()









    Out[30]:





<matplotlib.colorbar.Colorbar at 0x7f9f58c0ae50>



In [46]:

    
# Comparing with the original 
plt.plot(
( F_gpu.get() - f  ).flatten().real )









    Out[46]:





[<matplotlib.lines.Line2D at 0x7f9f58c1f0d0>]



In [ ]: