In [1]:
import numpy as np
import ctypes
from ctypes import *
import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import math
import time
In [2]:
%matplotlib inline
In [3]:
gridDIM = 64
size = gridDIM**4
axes0 = 0
axes1 = 1
axes2 = 2
axes3 = 3
segment_axes0 = 0
segment_axes1 = 0
segment_axes2 = 0
segment_axes3 = 0
normFactor = 10.0
DIR_BASE = "/home/rcabrera/Documents/source/Python/FFT-dev/FAFT128_C2C/"
# FAFT
_faft128_4D = ctypes.cdll.LoadLibrary( DIR_BASE+'FAFT128_4D_C2C.so' )
_faft128_4D.FAFT128_4D_C2C.restype = int
_faft128_4D.FAFT128_4D_C2C.argtypes = [ctypes.c_void_p,
ctypes.c_float, ctypes.c_float, ctypes.c_int,
ctypes.c_int, ctypes.c_float]
cuda_faft = _faft128_4D.FAFT128_4D_C2C
In [4]:
def Gaussian(x,mu,sigma):
return np.exp( - (x-mu)**2/sigma**2/2. , dtype=np.float32 )/(sigma*np.sqrt( 2*np.pi ))
def fftGaussian(p,mu,sigma):
return np.exp(-1j*mu*p , dtype=np.float32 )*np.exp( - p**2*sigma**2/2. , dtype=np.float32 )
In [5]:
# Gaussian parameters
x_mu = 1.5
x_sigma = 1.
p_x_mu = 1.5
p_x_sigma = 1.
y_mu = 1.
y_sigma = 1.
p_y_mu = 1.
p_y_sigma = 1.
# Grid parameters
x_amplitude = 12
p_x_amplitude = 12.
y_amplitude = 10.
p_y_amplitude = 10.
theta_x_amplitude = 10.
lambda_x_amplitude = 10.
theta_y_amplitude = 5. # With the traditional method p amplitude is fixed to: 2 * np.pi /( 2*x_amplitude )
lambda_y_amplitude = 5.
# Phase space
dx = 2*x_amplitude /float(gridDIM)
dp_x = 2*p_x_amplitude/float(gridDIM)
dy = 2*y_amplitude /float(gridDIM)
dp_y = 2*p_y_amplitude/float(gridDIM)
# Conjugate Phase space (Fourier transform)
dtheta_x = 2*theta_x_amplitude /float(gridDIM)
dlambda_x = 2*lambda_x_amplitude/float(gridDIM)
dtheta_y = 2*theta_y_amplitude /float(gridDIM)
dlambda_y = 2*lambda_y_amplitude/float(gridDIM)
# delta parameters
delta_x = dx*dtheta_x/(2*np.pi)
delta_p_x = dp_x*dlambda_x/(2*np.pi)
delta_y = dy*dtheta_y/(2*np.pi)
delta_p_y = dp_y*dlambda_y/(2*np.pi)
#
# Phase space range
#
x_range = np.linspace( -x_amplitude, x_amplitude -dx, gridDIM )
p_x_range = np.linspace( -p_x_amplitude, p_x_amplitude-dp_x, gridDIM )
y_range = np.linspace( -y_amplitude, y_amplitude -dy, gridDIM )
p_y_range = np.linspace( -p_y_amplitude, p_y_amplitude-dp_y, gridDIM )
# Conjugate phase space
theta_x_range = np.linspace( -theta_x_amplitude, theta_x_amplitude -dtheta_x, gridDIM )
lambda_x_range = np.linspace( -lambda_x_amplitude, lambda_x_amplitude-dlambda_x, gridDIM )
theta_y_range = np.linspace( -theta_y_amplitude, theta_y_amplitude -dtheta_y, gridDIM )
lambda_y_range = np.linspace( -lambda_y_amplitude, lambda_y_amplitude-dlambda_y, gridDIM )
# Grid
x = x_range[ np.newaxis, np.newaxis, np.newaxis, : ]
p_x = p_x_range[ np.newaxis, np.newaxis, :, np.newaxis ]
y = y_range[ np.newaxis, :, np.newaxis, np.newaxis ]
p_y = p_y_range[ :, np.newaxis, np.newaxis, np.newaxis ]
In [6]:
def norm_GPU(W):
return gpuarray.sum(W).real.get()*dx*dy*dp_x*dp_y
In [7]:
f = Gaussian(x,x_mu,x_sigma)*Gaussian(y,y_mu,y_sigma)*Gaussian(p_x,p_x_mu, p_x_sigma)*Gaussian(p_y,p_y_mu,p_y_sigma)
#f = f + 1j*np.zeros_like(f)
f = f + 0j
print ' Amplitude x = ', x_amplitude
print ' Amplitude p_x = ',p_x_amplitude
print ' '
print 'x_mu = ', x_mu
print 'y_mu = ', y_mu
print 'p_x_mu = ', p_x_mu
print 'p_y_mu = ', p_y_mu
print ' '
print 'n = ', x.size
print 'dx = ', dx
print 'dp_x = ', dp_x
print ' standard fft dp = ',2 * np.pi /( 2*x_amplitude ) , ' '
print ' '
print 'delta_x = ', delta_x
print ' '
print 'The Gaussian extends to the numerical error in single precision:'
print ' min = ', np.min(f.real)
In [8]:
plt.imshow( f[ gridDIM/2, gridDIM/2 , :, :].real,
extent=[-x_amplitude , x_amplitude-dx, -p_x_amplitude , p_x_amplitude-dp_x] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 12.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$p_x$',**axis_font)
plt.colorbar()
print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), ' ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()
In [9]:
plt.imshow( f[::,::,gridDIM/2, gridDIM/2 ].real,
extent=[-y_amplitude , y_amplitude-dx, -p_y_amplitude , p_y_amplitude-dp_y] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$y$',**axis_font)
plt.ylabel('$p_y$',**axis_font)
plt.colorbar()
print 'max-min = ', f[::,::,gridDIM/2, gridDIM/2 ].real.max(), ' ', f[::,::,gridDIM/2, gridDIM/2 ].real.min()
In [10]:
plt.imshow( f[ gridDIM/2 , :: , gridDIM/2 , :: ].real,
extent=[-x_amplitude , x_amplitude-dx, -y_amplitude , y_amplitude-dy] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()
Out[10]:
In [11]:
print ' GPU memory Total (before) ', pycuda.driver.mem_get_info()[1]/float(2**30) , 'GB'
print ' GPU memory Free (before) ', pycuda.driver.mem_get_info()[0]/float(2**30) , 'GB'
F_gpu = gpuarray.zeros( ( gridDIM, gridDIM, gridDIM, gridDIM ), dtype=np.complex64 )
F_gpu[:,:,:,:] = f[:,:,:,:]
print ' GPU memory Total (after) ', pycuda.driver.mem_get_info()[1]/float(2**30) , 'GB'
print ' GPU memory Free (after) ', pycuda.driver.mem_get_info()[0]/float(2**30) , 'GB'
In [12]:
F_gpu.shape
Out[12]:
In [13]:
print 'deltas = ', delta_p_x, delta_x, delta_p_y, delta_y
In [14]:
# Executing FFT
t_init = time.time()
cuda_faft( int(F_gpu.gpudata), dp_x, delta_p_x, segment_axes0, axes0, normFactor )
cuda_faft( int(F_gpu.gpudata), dx, delta_x, segment_axes1, axes1, normFactor )
cuda_faft( int(F_gpu.gpudata), dp_y, delta_p_y, segment_axes2, axes2, normFactor )
cuda_faft( int(F_gpu.gpudata), dy, delta_y, segment_axes3, axes3, normFactor )
t_end = time.time()
print 'computation time = ', t_end - t_init
In [18]:
# Arbitrary normalization
F_gpu /= size
In [19]:
plt.imshow( F_gpu.get()[ gridDIM/2, gridDIM/2 , :, :].real,
extent=[-theta_x_amplitude , theta_x_amplitude - dtheta_x,
-lambda_x_amplitude, lambda_x_amplitude - dlambda_x] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 12.1, '$Re W$' , **axis_font)
plt.xlabel('$\\theta_x$',**axis_font)
plt.ylabel('$\\lambda_x$',**axis_font)
plt.colorbar()
print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), ' ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()
In [20]:
plt.imshow( F_gpu.get()[ :,:,gridDIM/2, gridDIM/2 ].real,
extent=[-theta_y_amplitude , theta_y_amplitude - dtheta_y,
-lambda_y_amplitude, lambda_y_amplitude - dlambda_y] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 8.1, '$Re W$' , **axis_font)
plt.xlabel('$\\theta_y$',**axis_font)
plt.ylabel('$\\lambda_y$',**axis_font)
plt.colorbar()
print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), ' ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()
In [21]:
plt.imshow( F_gpu.get()[ gridDIM/2, gridDIM/2 , :, :].imag,
extent=[-theta_x_amplitude , theta_x_amplitude - dtheta_x,
-lambda_x_amplitude, lambda_x_amplitude - dlambda_x] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 12.1, '$Im\,W$' , **axis_font)
plt.xlabel('$\\theta_x$',**axis_font)
plt.ylabel('$\\lambda_x$',**axis_font)
plt.colorbar()
print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), ' ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()
In [22]:
plt.imshow( F_gpu.get()[ :,:,gridDIM/2, gridDIM/2 ].imag,
extent=[-theta_y_amplitude , theta_y_amplitude - dtheta_y,
-lambda_y_amplitude, lambda_y_amplitude - dlambda_y] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 8.1, '$Im\,W$' , **axis_font)
plt.xlabel('$\\theta_y$',**axis_font)
plt.ylabel('$\\lambda_y$',**axis_font)
plt.colorbar()
print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), ' ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()
In [23]:
# Executing iFFT
t_init = time.time()
cuda_faft( int(F_gpu.gpudata), dp_x, -delta_p_x, segment_axes0, axes0, normFactor )
cuda_faft( int(F_gpu.gpudata), dx, -delta_x, segment_axes1, axes1, normFactor )
cuda_faft( int(F_gpu.gpudata), dp_y, -delta_p_y, segment_axes2, axes2, normFactor )
cuda_faft( int(F_gpu.gpudata), dy, -delta_y, segment_axes3, axes3, normFactor )
t_end = time.time()
print 'computation time = ', t_end - t_init
In [27]:
norm = norm_GPU(F_gpu)
F_gpu /= norm
In [28]:
plt.imshow( F_gpu.get()[ gridDIM/2, gridDIM/2 , :, :].real,
extent=[-x_amplitude , x_amplitude-dx, -p_x_amplitude , p_x_amplitude-dp_x] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 12.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$p_x$',**axis_font)
plt.colorbar()
print 'max-min = ', f[ gridDIM/2, gridDIM/2 , :, :].real.max(), ' ', f[ gridDIM/2, gridDIM/2 , :, :].real.min()
In [29]:
plt.imshow( F_gpu.get()[::,::,gridDIM/2, gridDIM/2 ].real,
extent=[-y_amplitude , y_amplitude-dx, -p_y_amplitude , p_y_amplitude-dp_y] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$y$',**axis_font)
plt.ylabel('$p_y$',**axis_font)
plt.colorbar()
print 'max-min = ', f[::,::,gridDIM/2, gridDIM/2 ].real.max(), ' ', f[::,::,gridDIM/2, gridDIM/2 ].real.min()
In [30]:
plt.imshow( F_gpu.get()[ gridDIM/2 , :: , gridDIM/2 , :: ].real,
extent=[-x_amplitude , x_amplitude-dx, -y_amplitude , y_amplitude-dy] ,
origin='lower', interpolation='none')
axis_font = {'size':'24'}
plt.text( 0., 10.1, '$W$' , **axis_font)
plt.xlabel('$x$',**axis_font)
plt.ylabel('$y$',**axis_font)
plt.axes().set_aspect(1)
plt.colorbar()
Out[30]:
In [46]:
# Comparing with the original
plt.plot(
( F_gpu.get() - f ).flatten().real )
Out[46]:
In [ ]: