Problem: We want to build a new sparse matrix by replacing or appending a column/row. How can we do so efficiently? We then want to use these matrices for fast matrix vector multiplication potentially requiring a cast to another sparse matrix type. What is the best overall compromise?
In [1]:
import numpy as np
import scipy.sparse
In [2]:
sparsity = 0.001
N, M = 10000, 200
rowvec = np.ones(N)
colvec = np.ones(M)
Aorig = np.random.random((N, M)) < sparsity
In [3]:
%timeit scipy.sparse.csr_matrix(Aorig)
In [4]:
%timeit scipy.sparse.lil_matrix(Aorig)
In [5]:
Alil = scipy.sparse.lil_matrix(Aorig)
%timeit scipy.sparse.csr_matrix(Alil)
In [6]:
Adok = scipy.sparse.dok_matrix(Aorig)
%timeit scipy.sparse.csr_matrix(Adok)
In [7]:
index = 3
newrow = np.random.random(N) < sparsity
In [8]:
Arow = Aorig[:, :]
%timeit Arow[:, index] = newrow
In [9]:
def replacerow(A, rowindex, row):
# see http://stackoverflow.com/questions/12129948/scipy-sparse-set-row-to-zeros
# zero extant values
A.data[A.indptr[rowindex]:A.indptr[rowindex+1]] = 0.0
indices = np.nonzero(newrow)
A[indices, rowindex] = newrow[indices]
In [10]:
Acsrrow = scipy.sparse.csr_matrix(Aorig)
replacerow(Acsrrow, index, newrow)
print(np.all(Acsrrow == Arow))
%timeit replacerow(Acsrrow, index, newrow)
In [11]:
Acsrrow = scipy.sparse.csr_matrix(Aorig)
%timeit Acsrrow[:, index] = newrow[:, None]
print(np.all(Acsrrow == Arow))
print(Acsrrow.nnz, np.count_nonzero(Arow))
In [12]:
%timeit Acsrrow.eliminate_zeros()
Acsrrow.nnz, np.count_nonzero(Arow)
Out[12]:
In [13]:
Alilrow = scipy.sparse.lil_matrix(Aorig)
%timeit Alilrow[:, index] = newrow[:, None]
print(np.all(Alilrow == Arow))
print(Alilrow.nnz, np.count_nonzero(Arow))
In [14]:
%timeit np.concatenate((Aorig, newrow[:, None]), axis=1)
Aprow = np.concatenate((Aorig, newrow[:, None]), axis=1)
In [15]:
Acsrprow = scipy.sparse.csr_matrix(Aorig)
print(np.all(Aprow==scipy.sparse.hstack((Acsrprow, scipy.sparse.csr_matrix(newrow[:, None])))))
%timeit scipy.sparse.hstack((Acsrprow, scipy.sparse.csr_matrix(newrow[:, None])))
In [16]:
def append(A, x):
A._shape = (A.shape[0]+1, A.shape[1])
A.indptr = np.hstack((A.indptr, A.indptr[-1]))
A[-1, :] = x
return A
In [17]:
newcol = np.random.random(M) < sparsity
np.all((append(scipy.sparse.csr_matrix(Aorig), newcol) == scipy.sparse.vstack((scipy.sparse.csr_matrix(Aorig), scipy.sparse.csr_matrix(newcol[None, :])))).todense())
Out[17]:
In [18]:
Acsr = scipy.sparse.csr_matrix(Aorig)
%timeit append(Acsr, newcol)
In [19]:
Acsr = scipy.sparse.csr_matrix(Aorig)
%timeit scipy.sparse.vstack((Acsr, scipy.sparse.csr_matrix(newcol[None, :])))
In [20]:
def spmatreplacecol(A, colindex, newcol):
row, col = A.nonzero()
A.data[col == colindex] = 0.0
indices = np.nonzero(newcol)
A[indices, colindex] = newcol[indices]
return A
In [36]:
colindex = 3
newcol = np.random.random(N) < sparsity
In [32]:
Acsr.shape
Out[32]:
In [45]:
Acsr = scipy.sparse.csr_matrix(Aorig)
%time Acsr[:, colindex] = newcol[:, None]
In [44]:
Acsr2 = scipy.sparse.csr_matrix(Aorig)
%time spmatreplacecol(Acsr2, colindex, newcol)
Out[44]:
In [47]:
np.all(Acsr2.todense() == Acsr)
Out[47]:
In [ ]: