In [1]:
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.linalg as culinalg


/home/ubuntu/.virtualenvs/pycuda_ve/lib/python3.6/site-packages/skcuda/cublas.py:283: UserWarning: creating CUBLAS context to get version number
  warnings.warn('creating CUBLAS context to get version number')

In [2]:
culinalg.init()

In [3]:
np.random.seed(0)

In [4]:
datatype = np.float64

In [5]:
m=10
k=5
n=5

In [6]:
a = np.asarray(np.random.rand(m, k), datatype)
b = np.asarray(np.random.rand(k, n), datatype)
c = np.asarray(np.random.rand(n, n), datatype)

In [7]:
print(a)


[[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548 ]
 [0.64589411 0.43758721 0.891773   0.96366276 0.38344152]
 [0.79172504 0.52889492 0.56804456 0.92559664 0.07103606]
 [0.0871293  0.0202184  0.83261985 0.77815675 0.87001215]
 [0.97861834 0.79915856 0.46147936 0.78052918 0.11827443]
 [0.63992102 0.14335329 0.94466892 0.52184832 0.41466194]
 [0.26455561 0.77423369 0.45615033 0.56843395 0.0187898 ]
 [0.6176355  0.61209572 0.616934   0.94374808 0.6818203 ]
 [0.3595079  0.43703195 0.6976312  0.06022547 0.66676672]
 [0.67063787 0.21038256 0.1289263  0.31542835 0.36371077]]

In [8]:
print(b)


[[0.57019677 0.43860151 0.98837384 0.10204481 0.20887676]
 [0.16130952 0.65310833 0.2532916  0.46631077 0.24442559]
 [0.15896958 0.11037514 0.65632959 0.13818295 0.19658236]
 [0.36872517 0.82099323 0.09710128 0.83794491 0.09609841]
 [0.97645947 0.4686512  0.97676109 0.60484552 0.73926358]]

In [9]:
print(c)


[[0.03918779 0.28280696 0.12019656 0.2961402  0.11872772]
 [0.31798318 0.41426299 0.0641475  0.69247212 0.56660145]
 [0.26538949 0.52324805 0.09394051 0.5759465  0.9292962 ]
 [0.31856895 0.66741038 0.13179786 0.7163272  0.28940609]
 [0.18319136 0.58651293 0.02010755 0.82894003 0.00469548]]

In [10]:
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)

In [11]:
ab_gpu = culinalg.dot(a_gpu, b_gpu)

In [12]:
abc_gpu = culinalg.dot(ab_gpu, c_gpu)

In [13]:
ab_gpu.gpudata.free()

In [14]:
del(ab_gpu)

In [15]:
np.dot(np.dot(a, b), c)


Out[15]:
array([[1.43651736, 2.98517151, 0.54877081, 3.72456244, 2.76044292],
       [1.65242367, 3.41390842, 0.63670503, 4.24456895, 3.17738347],
       [1.40795162, 2.83146124, 0.53254437, 3.54515654, 2.7037258 ],
       [1.42858651, 3.09067669, 0.57310134, 3.80521787, 2.667534  ],
       [1.54880894, 3.12835541, 0.58454438, 3.92405367, 3.01490293],
       [1.29082878, 2.74180016, 0.51061817, 3.37417168, 2.65848819],
       [0.98070932, 1.93560849, 0.35270893, 2.44985382, 1.76396947],
       [1.85948905, 3.88602922, 0.72228449, 4.83965422, 3.49428157],
       [1.1432638 , 2.49964804, 0.45311794, 3.07940753, 2.32746611],
       [0.9363571 , 1.98926336, 0.37529049, 2.46520452, 1.90239479]])

In [16]:
abc_gpu.get()


Out[16]:
array([[1.43651736, 2.98517151, 0.54877081, 3.72456244, 2.76044292],
       [1.65242367, 3.41390842, 0.63670503, 4.24456895, 3.17738347],
       [1.40795162, 2.83146124, 0.53254437, 3.54515654, 2.7037258 ],
       [1.42858651, 3.09067669, 0.57310134, 3.80521787, 2.667534  ],
       [1.54880894, 3.12835541, 0.58454438, 3.92405367, 3.01490293],
       [1.29082878, 2.74180016, 0.51061817, 3.37417168, 2.65848819],
       [0.98070932, 1.93560849, 0.35270893, 2.44985382, 1.76396947],
       [1.85948905, 3.88602922, 0.72228449, 4.83965422, 3.49428157],
       [1.1432638 , 2.49964804, 0.45311794, 3.07940753, 2.32746611],
       [0.9363571 , 1.98926336, 0.37529049, 2.46520452, 1.90239479]])

In [17]:
np.allclose(np.dot(np.dot(a, b), c), abc_gpu.get())


Out[17]:
True

Midiendo tiempos de cómputo:


In [ ]:
import time

In [18]:
np.random.seed(0)

In [19]:
datatype = np.float64

In [20]:
m=500
k=500
n=500

In [21]:
a = np.asarray(np.random.rand(m, k), datatype)
b = np.asarray(np.random.rand(k, n), datatype)
c = np.asarray(np.random.rand(n, n), datatype)

En la GPU:


In [23]:
inicio_tiempo = time.time()
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)
fin_load = time.time()

In [24]:
print('tiempo para cargar datos en la GPU:', fin_load - inicio_tiempo)


tiempo para cargar datos en la GPU: 0.002521991729736328

In [25]:
gpu_multmat_inicio = time.time()
ab_gpu = culinalg.dot(a_gpu, b_gpu)
abc_gpu = culinalg.dot(ab_gpu, c_gpu)
gpu_multmat_fin = time.time()

In [26]:
ab_gpu.gpudata.free()
del(ab_gpu)

In [27]:
load_datos_inicio = time.time()
abc_gpu_result = abc_gpu.get()
load_datos_fin = time.time()

In [28]:
print('tiempo para realizar cargar resultados del device al host:',
      load_datos_fin-load_datos_inicio, 'sec')


tiempo para realizar cargar resultados del device al host: 0.0016934871673583984 sec

In [29]:
print('tiempo para realizar multiplicación de 3 matrices en la GPU:',
      gpu_multmat_fin-gpu_multmat_inicio, 'sec')


tiempo para realizar multiplicación de 3 matrices en la GPU: 0.0013403892517089844 sec

In [30]:
print('Total (incluyendo load host-device, device-host):', load_datos_fin - inicio_tiempo, 'sec')


Total (incluyendo load host-device, device-host): 0.05366945266723633 sec

En la CPU:


In [31]:
cpu_multmat_inicio=time.time()
np.dot(np.dot(a, b), c)
cpu_multmat_fin = time.time()

In [32]:
print('tiempo para realizar multiplicación de 3 matrices en la CPU:',
      cpu_multmat_fin-cpu_multmat_inicio, 'sec')


tiempo para realizar multiplicación de 3 matrices en la CPU: 0.01398921012878418 sec

In [ ]:


In [ ]: