notebook.community

Edit and run

Notebook showing how Blaze can compute a matrix-matrix multiplication on persistent storage (i.e. matrix sizes can exceed memory size)



In [ ]:

    
import shutil, os, os.path
from time import time
import blaze as blz
from blaze.algo.linalg import dot



In [ ]:

    
# The dimensions for the arrays
# a.shape = (DIM1, DIM2); b.shape = (DIM2, DIM3); out.shape = (DIM1, DIM3)
DIM1, DIM2, DIM3 = 1000, 1000, 2000



In [ ]:

    
# Remove pre-existent data directories
for d in ('a_dir', 'b_dir', 'out_dir'):
    if os.path.exists(d):
        shutil.rmtree(d)



In [ ]:

    
# Create array 'a' in directory 'a_dir'
%time a = blz.ones(blz.dshape('%d, %d, float64' % (DIM1, DIM2)), params=blz.params(storage='a_dir'))



In [ ]:

    
# Create array 'b' in directory 'b_dir'
%time b = blz.ones(blz.dshape('%d, %d, float64' % (DIM2, DIM3)), params=blz.params(storage='b_dir'))



In [ ]:

    
# Do the dot product and put the result in 'out' (directory 'out_dir')
%time out = dot(a, b, outname='out_dir')



In [ ]:

    
# Show the output
print "out:", `out`



In [ ]:

    
# Function that shows sizes on disk
dsize = lambda d: sum([os.path.getsize(os.path.join(d,f)) for f in os.listdir(d)])



In [ ]:

    
# Data sizes for operands and result
[dsize(data) for data in ('a_dir/data', 'b_dir/data', 'out_dir/data')]



In [ ]:

    
# Function that shows equivalent sizes for NumPy
nsize = lambda arr: arr.size*arr.itemsize



In [ ]:

    
# Data sizes for operands and result for NumPy
[nsize(data) for data in (a[:], b[:], out[:])]



In [ ]:

    
# Compression ratios:
[nsize(arr) / dsize(data) for data, arr in (('a_dir/data', a[:]), ('b_dir/data', b[:]), ('out_dir/data', out[:]))]



In [ ]: