4D Fast Accurate Fourier Transform - (64, 64, 64, 64)

Complex to Complex


In [32]:
import numpy as np
import ctypes
from ctypes import *

import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import math

import time

In [33]:
%matplotlib inline

Loading FFT routines


In [34]:
DIR_BASE = "/home/robert/Documents/FAFT/for interview/FAFT_64_64_64_64_Z2Z/"

#DIR_BASE = "/home/renan/Documents/source/python/FAFT/FAFT_64_64_64_64_Z2Z/"

# FAFT 64-points
_faft64_4D = ctypes.cdll.LoadLibrary( DIR_BASE+'FAFT_64_64_64_64_Z2Z.so' )
_faft64_4D.FAFT64_4D_Z2Z.restype = int
_faft64_4D.FAFT64_4D_Z2Z.argtypes = [ctypes.c_void_p, ctypes.c_double, ctypes.c_double, 
                                       ctypes.c_int, ctypes.c_int, ctypes.c_double ]

_faft64_4D.IFAFT64_4D_Z2Z.argtypes = [ctypes.c_void_p, ctypes.c_double, ctypes.c_double, 
                                       ctypes.c_int, ctypes.c_int, ctypes.c_double ]

faft64  = _faft64_4D.FAFT64_4D_Z2Z
ifaft64 = _faft64_4D.IFAFT64_4D_Z2Z

Initializing Grid


In [35]:
def Gaussian(x,mu,sigma):
    return np.exp( - (x-mu)**2/sigma**2/2. , dtype=np.float64 )/(sigma*np.sqrt( 2*np.pi  ))

def fftGaussian(p,mu,sigma):
    return np.exp(-1j*mu*p , dtype=np.float64 )*np.exp( - p**2*sigma**2/2. , dtype=np.float64 )

The 4D space is defined as: $(x, p_x, y, p_y)$ and is known as the PHASE SPACE

The Fourier transformed 4D space is defined as: $(\lambda_x, \theta_x, \lambda_y, \theta_y)$ and is known as the AMBIGUITY SPACE


In [36]:
gridDIM_p_y = 64     #axis 0
gridDIM_y   = 64     #axis 1    
gridDIM_p_x = 64     #axis 2
gridDIM_x   = 64     #axis 3

size = gridDIM_y*gridDIM_p_y*gridDIM_x*gridDIM_p_x

axes0 = 0
axes1 = 1
axes2 = 2
axes3 = 3

#m = size

segment_axes0 = 0
segment_axes1 = 0
segment_axes2 = 0
segment_axes3 = 0

normFactor = 1

In [37]:
# Phase space window 
p_y_amplitude = 9.    #axis 0
y_amplitude   = 9.    #axis 1
p_x_amplitude = 9.    #axis 2
x_amplitude   = 9.     #axis 3

# Ambiguity space window 
theta_y_amplitude  = 9.   #axis 0             
lambda_y_amplitude = 9.   #axis 1
theta_x_amplitude  = 9.   #axis 2
lambda_x_amplitude = 9.   #axis 3

# Phase space step size 
dp_y  = 2*p_y_amplitude/float(gridDIM_p_y)           #axis 0
dy    = 2*y_amplitude  /float(gridDIM_y)             #axis 1
dp_x  = 2*p_x_amplitude/float(gridDIM_p_x)           #axis 2
dx    = 2*x_amplitude  /float(gridDIM_x)             #axis 3  

# Ambiguity space step size
dtheta_y  = 2*theta_y_amplitude /float(gridDIM_y)     #axis 0
dlambda_y = 2*lambda_y_amplitude/float(gridDIM_p_y)   #axis 1
dtheta_x  = 2*theta_x_amplitude /float(gridDIM_x)     #axis 2
dlambda_x = 2*lambda_x_amplitude/float(gridDIM_p_x)   #axis 3

# delta parameters
delta_p_y =   dp_y*dtheta_y/(2*np.pi)     #axis 0
delta_y  =      dy*dlambda_y/(2*np.pi)    #axis 1
delta_p_x =   dp_x*dtheta_x/(2*np.pi)     #axis 2
delta_x  =      dx*dlambda_x/(2*np.pi)    #axis 3

# Phase space range
p_y_range = np.linspace( -p_y_amplitude, p_y_amplitude-dp_y,  gridDIM_p_y)    #axis 0
y_range   = np.linspace( -y_amplitude,   y_amplitude  -dy,    gridDIM_y  )    #axis 1
p_x_range = np.linspace( -p_x_amplitude, p_x_amplitude-dp_x,  gridDIM_p_x)    #axis 2
x_range   = np.linspace( -x_amplitude,   x_amplitude  -dx,    gridDIM_x  )    #axis 3

# Ambiguity space range
theta_y_range  = np.linspace( -theta_y_amplitude,  theta_y_amplitude -dtheta_y,    gridDIM_y   ) #axis0 
lambda_y_range = np.linspace( -lambda_y_amplitude, lambda_y_amplitude-dlambda_y,   gridDIM_p_y ) #axis1
theta_x_range  = np.linspace( -theta_x_amplitude,  theta_x_amplitude -dtheta_x,    gridDIM_x   ) #axis 2
lambda_x_range = np.linspace( -lambda_x_amplitude, lambda_x_amplitude-dlambda_x,   gridDIM_p_x ) #axis 3

# Grid 
y   =   y_range[ np.newaxis, :, np.newaxis, np.newaxis ]   #axis 1
p_y = p_y_range[ :, np.newaxis, np.newaxis, np.newaxis ]   #axis 0
p_x = p_x_range[ np.newaxis, np.newaxis, :, np.newaxis ]   #axis 2
x   =   x_range[ np.newaxis, np.newaxis, np.newaxis, : ]   #axis 3

In [38]:
def norm_GPU(W):
    return gpuarray.sum(W).real.get()*dx*dy*dp_x*dp_y

Initializing the state


In [39]:
# Gaussian parameters in the Phase space -------------------------

y_mu = 1.;     y_sigma = 1.        #axis 1
p_y_mu = 1.;   p_y_sigma = 1.      #axis 0
p_x_mu = 1.;   p_x_sigma = 1.      #axis 2
x_mu = 1.;     x_sigma = 1.        #axis 3


#-----------------------------------------------------------------

f = Gaussian(x,x_mu,x_sigma)*Gaussian(y,y_mu,y_sigma)*Gaussian(p_x,p_x_mu, p_x_sigma)*Gaussian(p_y,p_y_mu,p_y_sigma)

#f = f + 1j*np.zeros_like(f)
f = f + 0j

print ' Amplitude x = ',  x_amplitude
print ' Amplitude p_x = ',p_x_amplitude
print '        '

print 'mu_x = ', x_mu
print 'mu_y = ', y_mu
print 'p_x_mu = ', p_x_mu
print 'p_y_mu = ', p_y_mu

print '   '

print 'n     = ', x.size
print 'dx    = ', dx
print 'dp_x    = ', dp_x
print '           standard fft dp = ',2 * np.pi /( 2*x_amplitude ) , '     '
print '    '
print 'delta_x = ', delta_x


print '    '

print 'The Gaussian extends to the numerical error in single precision:'  
print '    min = ', np.min(f.real)

print '  '
print 'Normalization = ', np.sum( f ).real*dx*dy*dp_x*dp_y
print ' '
print 'Axis = [ x , px , y , py ]   ->   [ lambda_x , theta_x , lambda_y , theta_y ]  '


 Amplitude x =  9.0
 Amplitude p_x =  9.0
        
mu_x =  1.0
mu_y =  1.0
p_x_mu =  1.0
p_y_mu =  1.0
   
n     =  64
dx    =  0.28125
dp_x    =  0.28125
           standard fft dp =  0.349065850399      
    
delta_x =  0.0125894046782
    
The Gaussian extends to the numerical error in single precision:
    min =  3.50545085319e-89
  
Normalization =  1.0
 
Axis = [ x , px , y , py ]   ->   [ lambda_x , theta_x , lambda_y , theta_y ]  

In [40]:
# Phase space y
plt.imshow( f[gridDIM_x/2, gridDIM_p_x/2, :, : ].real, 
           extent=[-y_amplitude , y_amplitude-dy, -p_y_amplitude , p_y_amplitude-dp_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 7.1, '$W$' , **axis_font)
plt.xlabel('$y$',**axis_font)
plt.ylabel('$p_y$',**axis_font)

plt.colorbar()

print 'max-min = ', f[gridDIM_x/2, gridDIM_p_x/2, :, : ].real.max(), '  ', f[gridDIM_x/2, gridDIM_p_x/2, :, : ].real.min()


max-min =  0.00917402522843    3.46655097701e-46

In [41]:
# Phase space x 
plt.imshow( f[:, :, gridDIM_y/2, gridDIM_p_y/2].real, 
           extent=[-x_amplitude , x_amplitude-dx, -p_x_amplitude , p_x_amplitude-dp_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 14.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$p_x$',**axis_font)

plt.colorbar()

print 'max-min = ', f[:, :, gridDIM_y/2, gridDIM_p_y/2].real.max(), '  ', f[:, :, gridDIM_y/2, gridDIM_p_y/2].real.min()


max-min =  0.00917402522843    3.46655097701e-46

In [42]:
# Configuration space  
plt.imshow( f[ :, gridDIM_p_x/2 , : , gridDIM_p_y/2].real, 
           extent=[-y_amplitude , y_amplitude-dy, -x_amplitude , x_amplitude-dx] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 7.1, '$W$' , **axis_font)
plt.xlabel('$y$',**axis_font)
plt.ylabel('$x$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()


Out[42]:
<matplotlib.colorbar.Colorbar at 0x7f825b728290>

In [43]:
# Momentum space
plt.imshow( f[gridDIM_x/2 , :, gridDIM_y/2, :  ].real, 
           extent=[-p_y_amplitude , p_y_amplitude-dp_y, -p_x_amplitude , p_x_amplitude-dp_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 7.1, '$W$' , **axis_font)
plt.xlabel('$p_y$',**axis_font)
plt.ylabel('$p_x$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()


Out[43]:
<matplotlib.colorbar.Colorbar at 0x7f825b5faa90>

$W$ TRANSFORM


In [44]:
# Allocate GPU memory and copy data to GPU

print '         GPU memory Total              ', pycuda.driver.mem_get_info()[1]/float(2**30) , 'GB'
print '         GPU memory Free  (Before)     ', pycuda.driver.mem_get_info()[0]/float(2**30) , 'GB'

#F_gpu = gpuarray.zeros( ( gridDIM_p_y, gridDIM_y, gridDIM_p_x, gridDIM_x ), dtype=np.complex128 )
F_gpu = gpuarray.zeros( ( gridDIM_x, gridDIM_p_x, gridDIM_y, gridDIM_p_y ), dtype=np.complex128 )

F_gpu[:,:,:,:] = f[:,:,:,:]

print '         GPU memory Free  (After)      ', pycuda.driver.mem_get_info()[0]/float(2**30) , 'GB'

#F_gpu[:,:] = np.ascontiguousarray( f, dtype = np.complex64 )[:,:]


         GPU memory Total               1.94927978516 GB
         GPU memory Free  (Before)      0.976135253906 GB
         GPU memory Free  (After)       0.976135253906 GB

In [45]:
print F_gpu.shape
print f.shape
#print f
#print f[np.newaxis,:,:].shape


(64, 64, 64, 64)
(64, 64, 64, 64)

In [46]:
(5.1767578125 - 4.62921142578)*10


Out[46]:
5.475463867199997

Forward Transform


In [47]:
# Executing FFT

t_init = time.time() 

faft64( int(F_gpu.gpudata), dp_y, delta_p_y, segment_axes0, axes0, normFactor )
faft64( int(F_gpu.gpudata), dy,   delta_y,   segment_axes1, axes1, normFactor )
faft64( int(F_gpu.gpudata), dp_x, delta_p_x, segment_axes2, axes2, normFactor )
faft64( int(F_gpu.gpudata), dx,   delta_x,   segment_axes3, axes3, normFactor )

t_end = time.time() 

print 'computation time = ', t_end - t_init


computation time =  7.54605388641

In [48]:
print ' deltas =' , delta_x, delta_p_x, delta_y, delta_p_y


 deltas = 0.0125894046782 0.0125894046782 0.0125894046782 0.0125894046782

In [49]:
# Arbitrary Renormalization 
F_gpu /= size

In [50]:
# Ambiguity space y

plt.imshow( F_gpu.get()[gridDIM_x/2, gridDIM_p_x/2, :, : ].real, 
           extent=[lambda_y_amplitude-dlambda_y, -lambda_y_amplitude, theta_y_amplitude-dtheta_y, -theta_y_amplitude] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 9.1, '$W$' , **axis_font)
plt.xlabel('$\\theta_y$',**axis_font)
plt.ylabel('$\\lambda_y$',**axis_font)

plt.colorbar()

print 'max-min = ', F_gpu.get()[gridDIM_x/2, gridDIM_p_x/2, :, : ].real.max(), '  ', F_gpu.get()[gridDIM_x/2, gridDIM_p_x/2, :, :].real.min()


max-min =  16.0    -2.83495209922

In [51]:
# Ambiguity space x

plt.imshow( F_gpu.get()[ :, :, gridDIM_y/2, gridDIM_p_y/2].real, 
           extent=[-lambda_x_amplitude , lambda_x_amplitude-dlambda_x, -theta_x_amplitude, theta_x_amplitude-dtheta_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 12.1, '$W$' , **axis_font)
plt.xlabel('$\\theta_x$',**axis_font)
plt.ylabel('$\\lambda_x$',**axis_font)

plt.colorbar()

print 'max-min = ', F_gpu.get()[ :, :, gridDIM_y/2, gridDIM_p_y/2 ].real.max(), '  ', F_gpu.get()[ :, :, gridDIM_y/2, gridDIM_p_y/2 ].real.min()


max-min =  16.0    -2.83495209922

In [52]:
# Conjugate configuration space

plt.imshow( F_gpu.get()[ gridDIM_p_y/2 , :: , gridDIM_p_x/2 , :: ].real, 
           extent=[-theta_x_amplitude , theta_x_amplitude-dtheta_x, -theta_y_amplitude , theta_y_amplitude-dtheta_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 9.1, '$W$' , **axis_font)
plt.xlabel('$\\theta_x$',**axis_font)
plt.ylabel('$\\theta_y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()


Out[52]:
<matplotlib.colorbar.Colorbar at 0x7f826009afd0>

In [53]:
# Conjugate momentum space

plt.imshow( F_gpu.get()[ :, gridDIM_y/2 , :, gridDIM_x/2  ].real, 
           extent=[-lambda_x_amplitude , lambda_x_amplitude-dlambda_x, -lambda_y_amplitude , lambda_y_amplitude-dlambda_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 9.2, '$W$' , **axis_font)
plt.xlabel('$\\lambda_x$',**axis_font)
plt.ylabel('$\\lambda_y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()


Out[53]:
<matplotlib.colorbar.Colorbar at 0x7f825b964c10>

Inverse Transform

# Executing FFT t_init = time.time() faft64( int(F_gpu.gpudata), dp_y, -delta_p_y, segment_axes0, axes0, normFactor ) faft64( int(F_gpu.gpudata), dy, -delta_y, segment_axes1, axes1, normFactor ) faft64( int(F_gpu.gpudata), dp_x, -delta_p_x, segment_axes2, axes2, normFactor ) faft64( int(F_gpu.gpudata), dx, -delta_x, segment_axes3, axes3, normFactor ) t_end = time.time() print 'computation time = ', t_end - t_init

In [54]:
# Executing iFFT
# Calling the kernels with -delta_y, -delta_x, -delta_z, -delta_u

t_init = time.time() 

ifaft64( int(F_gpu.gpudata), dp_y, delta_p_y, segment_axes0, axes0, normFactor )
ifaft64( int(F_gpu.gpudata), dy,   delta_y,   segment_axes1, axes1, normFactor )
ifaft64( int(F_gpu.gpudata), dp_x, delta_p_x, segment_axes2, axes2, normFactor )
ifaft64( int(F_gpu.gpudata), dx,   delta_x,   segment_axes3, axes3, normFactor )

t_end = time.time()

#print 'computation time = ', t_end - t_init

In [55]:
norm = norm_GPU(F_gpu)
F_gpu /= norm

In [56]:
# Phase space y
#
# The AMPLITUDE IN Py IS TOO BIG. THERE MUST BE AN ERROR 
#
plt.imshow( F_gpu.get()[::,::,gridDIM_p_x/2, gridDIM_x/2 ].real, 
           extent=[-y_amplitude , y_amplitude-dx, -p_y_amplitude , p_y_amplitude-dp_y] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 7.1, '$W$' , **axis_font)
plt.xlabel('$y$',**axis_font)
plt.ylabel('$p_y$',**axis_font)

plt.colorbar()


Out[56]:
<matplotlib.colorbar.Colorbar at 0x7f824b4d8090>

In [57]:
# Phase space x

plt.imshow( F_gpu.get()[ gridDIM_p_y/2, gridDIM_y/2 , :, :].real, 
           extent=[-x_amplitude , x_amplitude-dx, -p_x_amplitude , p_x_amplitude-dp_x] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 14.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$p_x$',**axis_font)

plt.colorbar()

print 'max-min = ', F_gpu.get()[ gridDIM_p_y/2, gridDIM_y/2 , :, :].real.max(), '  ', F_gpu.get()[ gridDIM_p_y/2, gridDIM_y/2 , :, :].real.min()


max-min =  0.00917402522843    -1.46959071066e-17

In [58]:
# Momentum space
plt.imshow( F_gpu.get()[ :, gridDIM_y/2 , :, gridDIM_x/2  ].real, 
           extent=[-p_x_amplitude , p_x_amplitude-dx, -p_y_amplitude , p_y_amplitude-dy] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 7.1, '$W$' , **axis_font)
plt.xlabel('$p_x$',**axis_font)
plt.ylabel('$p_y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()


Out[58]:
<matplotlib.colorbar.Colorbar at 0x7f825b0e18d0>

In [59]:
# Configuration space
plt.imshow( F_gpu.get()[ gridDIM_p_y/2, : , gridDIM_p_x/2, :  ].real, 
           extent=[-x_amplitude , x_amplitude-dx, -y_amplitude , y_amplitude-dy] ,
           origin='lower', interpolation='none')

axis_font = {'size':'24'}
plt.text( 0., 7.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()


Out[59]:
<matplotlib.colorbar.Colorbar at 0x7f825b2bb890>

In [60]:
plt.semilogy(
F_gpu.get()[ gridDIM_p_y/2, gridDIM_y/2 , :, gridDIM_x/2  ].real )

plt.semilogy(
f[ gridDIM_p_y/2, gridDIM_y/2 , :, gridDIM_x/2  ].real )

plt.ylim(1e-20,1)


Out[60]:
(1e-20, 1)

In [ ]: