In [1]:
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.linalg as culinalg
In [2]:
culinalg.init()
In [3]:
np.random.seed(0)
In [4]:
datatype = np.float64
In [5]:
m=10
k=5
n=5
In [6]:
a = np.asarray(np.random.rand(m, k), datatype)
b = np.asarray(np.random.rand(k, n), datatype)
c = np.asarray(np.random.rand(n, n), datatype)
In [7]:
print(a)
In [8]:
print(b)
In [9]:
print(c)
In [10]:
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)
In [11]:
ab_gpu = culinalg.dot(a_gpu, b_gpu)
In [12]:
abc_gpu = culinalg.dot(ab_gpu, c_gpu)
In [13]:
ab_gpu.gpudata.free()
In [14]:
del(ab_gpu)
In [15]:
np.dot(np.dot(a, b), c)
Out[15]:
In [16]:
abc_gpu.get()
Out[16]:
In [17]:
np.allclose(np.dot(np.dot(a, b), c), abc_gpu.get())
Out[17]:
In [ ]:
import time
In [18]:
np.random.seed(0)
In [19]:
datatype = np.float64
In [20]:
m=500
k=500
n=500
In [21]:
a = np.asarray(np.random.rand(m, k), datatype)
b = np.asarray(np.random.rand(k, n), datatype)
c = np.asarray(np.random.rand(n, n), datatype)
En la GPU:
In [23]:
inicio_tiempo = time.time()
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)
fin_load = time.time()
In [24]:
print('tiempo para cargar datos en la GPU:', fin_load - inicio_tiempo)
In [25]:
gpu_multmat_inicio = time.time()
ab_gpu = culinalg.dot(a_gpu, b_gpu)
abc_gpu = culinalg.dot(ab_gpu, c_gpu)
gpu_multmat_fin = time.time()
In [26]:
ab_gpu.gpudata.free()
del(ab_gpu)
In [27]:
load_datos_inicio = time.time()
abc_gpu_result = abc_gpu.get()
load_datos_fin = time.time()
In [28]:
print('tiempo para realizar cargar resultados del device al host:',
load_datos_fin-load_datos_inicio, 'sec')
In [29]:
print('tiempo para realizar multiplicación de 3 matrices en la GPU:',
gpu_multmat_fin-gpu_multmat_inicio, 'sec')
In [30]:
print('Total (incluyendo load host-device, device-host):', load_datos_fin - inicio_tiempo, 'sec')
En la CPU:
In [31]:
cpu_multmat_inicio=time.time()
np.dot(np.dot(a, b), c)
cpu_multmat_fin = time.time()
In [32]:
print('tiempo para realizar multiplicación de 3 matrices en la CPU:',
cpu_multmat_fin-cpu_multmat_inicio, 'sec')
In [ ]:
In [ ]: