In [176]:
import math
import numpy
def blockmul (A, BLOCK_SIZE):
"""
Square a matrix in blocks and count the number of operations.
"""
N = A.shape[0]
N_padded = N
if N%BLOCK_SIZE > 0:
N_padded += BLOCK_SIZE-N%BLOCK_SIZE
A_padded = numpy.matrix(numpy.zeros((N_padded, N_padded), dtype=A.dtype))
A_padded[0:N, 0:N] = A
number_operations = 0
A2 = numpy.matrix(numpy.zeros((N_padded, N_padded), dtype=A.dtype))
for i in range(0, N_padded, BLOCK_SIZE):
for j in range(0, N_padded, BLOCK_SIZE):
for k in range(0, N_padded, BLOCK_SIZE):
norm_A = numpy.linalg.norm(A_padded[i:i+BLOCK_SIZE, k:k+BLOCK_SIZE], 'fro')
norm_B = numpy.linalg.norm(A_padded[k:k+BLOCK_SIZE, j:j+BLOCK_SIZE], 'fro')
if norm_A*norm_B > 0:
number_operations += BLOCK_SIZE**3
A2[i:i+BLOCK_SIZE, j:j+BLOCK_SIZE] += (A_padded[i:i+BLOCK_SIZE, k:k+BLOCK_SIZE]
*A_padded[k:k+BLOCK_SIZE, j:j+BLOCK_SIZE])
return ( A2[0:N, 0:N], number_operations )
def nonzeros (A):
"""
Count the number of nonzeros in the matrix.
"""
return numpy.sum( [ 1 for i in range(N**2) if A.reshape((N**2))[0, i] != 0 ] )
N = 129
BLOCK_SIZE = 4
BANDWIDTH = 4
ITERATIONS = 5
A = numpy.matrix(numpy.zeros((N, N), dtype=float))
for i in range(N):
for j in range(N):
if abs(i-j) <= BANDWIDTH:
A[i, j] = 1/N**(1/2)
print("N = %d, BLOCK_SIZE = %d, BANDWIDTH = %d" % (N, BLOCK_SIZE, BANDWIDTH))
print("number non-zeros = %d -> %1.2f%% fill-in" % (nonzeros(A), nonzeros(A)/N**2))
for i in range(ITERATIONS):
A2, number_operations = blockmul(A, BLOCK_SIZE)
if abs(numpy.sum(A**2-A2)) > 1e-10:
print(A**2-A2)
raise Exception("incorrect matrix product")
A = A**2
print("%d: number non-zeros = %d -> %1.2f%% fill-in" % (i+1, nonzeros(A), nonzeros(A)/N**2))
print("%d: N^3 = %d" % (i+1, N**3))
print("%d: SpAMM = %d" % (i+1, number_operations))
print("%d: SpAMM/N^3 = %f" % (i+1, number_operations/N**3))
print("%d: norm = %f" % (i+1, numpy.linalg.norm(A, 'fro')))
In [ ]: