Web: https://www.meetup.com/Tel-Aviv-Deep-Learning-Bootcamp/events/241762893/
Notebooks: On GitHub
Shlomo Kashani
In [1]:
# Ignore numpy warnings
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
%matplotlib inline
# Some defaults:
plt.rcParams['figure.figsize'] = (12, 6) # Default plot size
In [2]:
import pycuda
from pycuda import compiler
import pycuda.driver as drv
In [3]:
drv.init()
print("%d device(s) found." % drv.Device.count())
for ordinal in range(drv.Device.count()):
dev = drv.Device(ordinal)
print "Device #%d: %s" % (ordinal, dev.name())
drv
Out[3]:
In [6]:
import pycuda.autoinit
import numpy
from pycuda.compiler import SourceModule
srcGPU = """
#include <stdio.h>
__global__ void addGPU(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] + b[i];
//dest[i] = threadIdx.x + threadIdx.y + blockDim.x;
//dest[i] = blockDim.x;
//printf("I am %d.%d\\n", threadIdx.x, threadIdx.y);
}
"""
srcGPUModule = SourceModule(srcGPU)
print srcGPUModule
In [12]:
ARR_SIZE=16
a = numpy.random.randn(ARR_SIZE).astype(numpy.float32)
a=numpy.ones_like(a)*3
print a
b = numpy.random.randn(ARR_SIZE).astype(numpy.float32)
b=numpy.ones_like(b)*2
print b
dest = numpy.zeros_like(a)
# print dest
In [10]:
addGPUFunc = srcGPUModule.get_function("addGPU")
print addGPUFunc
addGPUFunc(drv.Out(dest), drv.In(a), drv.In(b),
block=(ARR_SIZE,32,1))
print dest
In [14]:
import timeit
rounds =3
print 'pycuda', timeit.timeit(lambda:
addGPUFunc(drv.Out(dest), drv.In(a), drv.In(b),
grid=(ARR_SIZE,1,1),
block=(1,1,1)),
number=rounds)
# print dest
# print 'pycuda', timeit.timeit(lambda:
# multGPUFunc(drv.Out(dest), drv.In(a), drv.In(b),
# block=(ARR_SIZE,1,1)),
# number=rounds)
# print dest
print 'npy', timeit.timeit(lambda:a*b ,
number=rounds)
In [ ]: