In [1]:
import skcuda.fft as cu_fft
In [2]:
import skcuda.cusolver as solver
In [ ]:
In [3]:
# %load https://raw.github.com/lebedov/scikits.cuda/master/demos/fft_demo.py
#!/usr/bin/env python
"""
Demonstrates how to use the PyCUDA interface to CUFFT to compute 1D FFTs.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.fft as cu_fft
print 'Testing fft/ifft..'
N = 4096*16
x = np.asarray(np.random.rand(N), np.float32)
xf = np.fft.fft(x)
y = np.real(np.fft.ifft(xf))
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty(N/2+1, np.complex64)
plan_forward = cu_fft.Plan(x_gpu.shape, np.float32, np.complex64)
cu_fft.fft(x_gpu, xf_gpu, plan_forward)
y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan(x_gpu.shape, np.complex64, np.float32)
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6)
print 'Testing in-place fft..'
x = np.asarray(np.random.rand(N)+1j*np.random.rand(N), np.complex64)
x_gpu = gpuarray.to_gpu(x)
plan = cu_fft.Plan(x_gpu.shape, np.complex64, np.complex64)
cu_fft.fft(x_gpu, x_gpu, plan)
cu_fft.ifft(x_gpu, x_gpu, plan, True)
print 'Success status: ', np.allclose(x, x_gpu.get(), atol=1e-6)
In [4]:
import skcuda
In [5]:
skcuda.__version__
Out[5]:
In [6]:
skcuda.cusolver.cusolver
In [ ]:
In [7]:
%%time
cu_fft.fft(x_gpu, x_gpu, plan)
x_gpu.get()[1234]
In [8]:
%%time
xfft = np.fft.fft(x)
In [9]:
x.shape[0]/1024**2
Out[9]:
In [10]:
x_gpu.get()[1234]
Out[10]:
In [11]:
# %load https://github.com/lebedov/scikits.cuda/raw/master/demos/dot_demo.py
#!/usr/bin/env python
"""
Demonstrates multiplication of two matrices on the GPU.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import numpy as np
import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing matrix multiplication for type ' + str(np.dtype(t))
if np.iscomplexobj(t()):
a = np.asarray(np.random.rand(10, 5)+1j*np.random.rand(10, 5), t)
b = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t)
c = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t)
else:
a = np.asarray(np.random.rand(10, 5), t)
b = np.asarray(np.random.rand(5, 5), t)
c = np.asarray(np.random.rand(5, 5), t)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)
temp_gpu = culinalg.dot(a_gpu, b_gpu)
d_gpu = culinalg.dot(temp_gpu, c_gpu)
temp_gpu.gpudata.free()
del(temp_gpu)
print 'Success status: ', np.allclose(np.dot(np.dot(a, b), c) , d_gpu.get())
print 'Testing vector multiplication for type ' + str(np.dtype(t))
if np.iscomplexobj(t()):
d = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t)
e = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t)
else:
d = np.asarray(np.random.rand(5), t)
e = np.asarray(np.random.rand(5), t)
d_gpu = gpuarray.to_gpu(d)
e_gpu = gpuarray.to_gpu(e)
temp = culinalg.dot(d_gpu, e_gpu)
print 'Success status: ', np.allclose(np.dot(d, e), temp)
In [ ]:
In [47]:
In [ ]: