In [176]:
import math
import numpy

def blockmul (A, BLOCK_SIZE):
    """
    Square a matrix in blocks and count the number of operations.
    """
    
    N = A.shape[0]
    N_padded = N
    if N%BLOCK_SIZE > 0:
        N_padded += BLOCK_SIZE-N%BLOCK_SIZE

    A_padded = numpy.matrix(numpy.zeros((N_padded, N_padded), dtype=A.dtype))
    A_padded[0:N, 0:N] = A

    number_operations = 0
    A2 = numpy.matrix(numpy.zeros((N_padded, N_padded), dtype=A.dtype))
    for i in range(0, N_padded, BLOCK_SIZE):
        for j in range(0, N_padded, BLOCK_SIZE):
            for k in range(0, N_padded, BLOCK_SIZE):
                norm_A = numpy.linalg.norm(A_padded[i:i+BLOCK_SIZE, k:k+BLOCK_SIZE], 'fro')
                norm_B = numpy.linalg.norm(A_padded[k:k+BLOCK_SIZE, j:j+BLOCK_SIZE], 'fro')
                if norm_A*norm_B > 0:
                    number_operations += BLOCK_SIZE**3
                    A2[i:i+BLOCK_SIZE, j:j+BLOCK_SIZE] += (A_padded[i:i+BLOCK_SIZE, k:k+BLOCK_SIZE]
                                                           *A_padded[k:k+BLOCK_SIZE, j:j+BLOCK_SIZE])
                
    return ( A2[0:N, 0:N], number_operations )

def nonzeros (A):
    """
    Count the number of nonzeros in the matrix.
    """
    
    return numpy.sum( [ 1 for i in range(N**2) if A.reshape((N**2))[0, i] != 0 ] )
    
N = 129
BLOCK_SIZE = 4
BANDWIDTH = 4
ITERATIONS = 5

A = numpy.matrix(numpy.zeros((N, N), dtype=float))

for i in range(N):
    for j in range(N):
        if abs(i-j) <= BANDWIDTH:
            A[i, j] = 1/N**(1/2)

print("N = %d, BLOCK_SIZE = %d, BANDWIDTH = %d" % (N, BLOCK_SIZE, BANDWIDTH))
print("number non-zeros = %d -> %1.2f%% fill-in" % (nonzeros(A), nonzeros(A)/N**2))

for i in range(ITERATIONS):
    A2, number_operations = blockmul(A, BLOCK_SIZE)
    if abs(numpy.sum(A**2-A2)) > 1e-10:
        print(A**2-A2)
        raise Exception("incorrect matrix product")

    A = A**2

    print("%d: number non-zeros = %d -> %1.2f%% fill-in" % (i+1, nonzeros(A), nonzeros(A)/N**2))
    print("%d: N^3 = %d" % (i+1, N**3))
    print("%d: SpAMM = %d" % (i+1, number_operations))
    print("%d: SpAMM/N^3 = %f" % (i+1, number_operations/N**3))
    print("%d: norm = %f" % (i+1, numpy.linalg.norm(A, 'fro')))


N = 129, BLOCK_SIZE = 4, BANDWIDTH = 4
number non-zeros = 1141 -> 0.07% fill-in
1: number non-zeros = 2121 -> 0.13% fill-in
1: N^3 = 2146689
1: SpAMM = 18368
1: SpAMM/N^3 = 0.008556
1: norm = 1.917426
2: number non-zeros = 3985 -> 0.24% fill-in
2: N^3 = 2146689
2: SpAMM = 49600
2: SpAMM/N^3 = 0.023105
2: norm = 1.009810
3: number non-zeros = 7329 -> 0.44% fill-in
3: N^3 = 2146689
3: SpAMM = 151872
3: SpAMM/N^3 = 0.070747
3: norm = 0.331219
4: number non-zeros = 12481 -> 0.75% fill-in
4: N^3 = 2146689
4: SpAMM = 479808
4: SpAMM/N^3 = 0.223511
4: norm = 0.042413
5: number non-zeros = 16641 -> 1.00% fill-in
5: N^3 = 2146689
5: SpAMM = 1342528
5: SpAMM/N^3 = 0.625395
5: norm = 0.000834

In [ ]: