Notebook showing how Blaze can compute a matrix-matrix multiplication on persistent storage (i.e. matrix sizes can exceed memory size)
In [ ]:
import shutil, os, os.path
from time import time
import blaze as blz
from blaze.algo.linalg import dot
In [ ]:
# The dimensions for the arrays
# a.shape = (DIM1, DIM2); b.shape = (DIM2, DIM3); out.shape = (DIM1, DIM3)
DIM1, DIM2, DIM3 = 1000, 1000, 2000
In [ ]:
# Remove pre-existent data directories
for d in ('a_dir', 'b_dir', 'out_dir'):
if os.path.exists(d):
shutil.rmtree(d)
In [ ]:
# Create array 'a' in directory 'a_dir'
%time a = blz.ones(blz.dshape('%d, %d, float64' % (DIM1, DIM2)), params=blz.params(storage='a_dir'))
In [ ]:
# Create array 'b' in directory 'b_dir'
%time b = blz.ones(blz.dshape('%d, %d, float64' % (DIM2, DIM3)), params=blz.params(storage='b_dir'))
In [ ]:
# Do the dot product and put the result in 'out' (directory 'out_dir')
%time out = dot(a, b, outname='out_dir')
In [ ]:
# Show the output
print "out:", `out`
In [ ]:
# Function that shows sizes on disk
dsize = lambda d: sum([os.path.getsize(os.path.join(d,f)) for f in os.listdir(d)])
In [ ]:
# Data sizes for operands and result
[dsize(data) for data in ('a_dir/data', 'b_dir/data', 'out_dir/data')]
In [ ]:
# Function that shows equivalent sizes for NumPy
nsize = lambda arr: arr.size*arr.itemsize
In [ ]:
# Data sizes for operands and result for NumPy
[nsize(data) for data in (a[:], b[:], out[:])]
In [ ]:
# Compression ratios:
[nsize(arr) / dsize(data) for data, arr in (('a_dir/data', a[:]), ('b_dir/data', b[:]), ('out_dir/data', out[:]))]
In [ ]: