In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
def lu_decomp(A, show=False):
N,_ = A.shape
U = np.copy(A)
L = np.identity(N)
if show:
print('Initial matrices')
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through columns
for j in range(N-1):
#iterating through rows
for i in range(j+1,N):
L[i,j] = U[i,j]/U[j,j]
U[i] -= L[i,j]*U[j]
if show:
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('----------------------------------------')
return L,U
"""
Solves a linear system A x = b, where A is a
triangular (upper or lower) matrix
"""
def solve_triangular(A, b, upper=True):
n = b.shape[0]
x = np.zeros_like(b)
if upper==True:
#perform back-substitution
x[-1] = (1./A[-1,-1]) * b[-1]
for i in range(n-2, -1, -1):
x[i] = (1./A[i,i]) * (b[i] - np.sum(A[i,i+1:] * x[i+1:]))
else:
#perform forward-substitution
x[0] = (1./A[0,0]) * b[0]
for i in range(1,n):
x[i] = (1./A[i,i]) * (b[i] - np.sum(A[i,:i] * x[:i]))
return x
def solve_lu(A, b, show=False):
L,U = lu_decomp(A, show)
# L.c = b with c = U.x
c = solve_triangular(L, b, upper=False)
x = solve_triangular(U, c)
return x
In [3]:
#permutation between rows i and j on matrix A
def row_perm(A, i, j):
tmp = np.copy(A[i])
A[i] = A[j]
A[j] = tmp
def palu_decomp(A, show=False):
N,_ = A.shape
P = np.identity(N)
L = np.zeros((N,N))
U = np.copy(A)
if show:
print('Initial matrices')
print('P = '); print(np.array_str(P, precision=2, suppress_small=True))
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through columns
for j in range(N-1):
#determine the new pivot
p_index = np.argmax(np.abs(U[j:,j]))
if p_index != 0:
row_perm(P, j, j+p_index)
row_perm(U, j, j+p_index)
row_perm(L, j, j+p_index)
if show:
print('A permutation has been made')
print('P = '); print(np.array_str(P, precision=2, suppress_small=True))
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through rows
for i in range(j+1,N):
L[i,j] = U[i,j]/U[j,j]
U[i] -= L[i,j]*U[j]
if show:
print('P = '); print(np.array_str(P, precision=2, suppress_small=True))
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('----------------------------------------')
np.fill_diagonal(L,1)
return P,L,U
In [4]:
def palu_decomp2(A, show=False):
N,_ = A.shape
P2 = np.arange(N)
A2 =np.copy(A)
L = np.zeros((N,N))
U = np.copy(A)
if show:
print('Initial matrices')
print('P = '); print(np.array_str(P, precision=2, suppress_small=True))
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('A2 = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through columns
for j in range(N-1):
#determine the new pivot
p_index = np.argmax(np.abs(U[j:,j]))
if p_index != 0:
row_perm(P, j, j+p_index)
row_perm(U, j, j+p_index)
row_perm(L, j, j+p_index)
P2[j]=j+p_index
P2[j+p_index]=j
row_perm(A2, j, j+p_index)
if show:
print('A permutation has been made')
print('P = '); print(np.array_str(P, precision=2, suppress_small=True))
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('A2 = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P2 = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through rows
for i in range(j+1,N):
L[i,j] = U[i,j]/U[j,j]
U[i] -= L[i,j]*U[j]
A2[i,j] = A2[i,j]/A2[j,j]
A2[i,j+1:] -= A2[i,j]*A2[j,j+1:]
if show:
print(j,i)
print('P = '); print(np.array_str(P, precision=2, suppress_small=True))
print('L = '); print(np.array_str(L, precision=2, suppress_small=True))
print('U = '); print(np.array_str(U, precision=2, suppress_small=True))
print('A2 = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P2 = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
np.fill_diagonal(L,1)
return P,L,U,A2,P2
In [5]:
# All prints could be removed
def palu_decomp2(A, show=False):
N,_ = A.shape
P2 = np.arange(N)
A2 =np.copy(A)
if show:
print('Initial matrices')
print('A2 = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P2 = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through columns
for j in range(N-1):
#Determine the new pivot.
p_index = np.argmax(np.abs(A2[j:,j])) # THIS NEEDS TO BE IMPLEMENTED IN C
if p_index != 0:
# Permuting P "matrix" stored as a vector
P2[j]=j+p_index
P2[j+p_index]=j
row_perm(A2, j, j+p_index) # THIS NEEDS TO BE IMPLEMENTED IN C
if show:
print('A permutation has been made')
print('A2 = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P2 = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through rows
for i in range(j+1,N):
A2[i,j] = A2[i,j]/A2[j,j]
A2[i,j+1:] -= A2[i,j]*A2[j,j+1:]
if show:
print(j,i)
print('A2 = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P2 = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
return A2,P2
In [6]:
# All prints could be removed
def palu_decomp3(A, show=False):
N,_ = A.shape
P = np.arange(N)
if show:
print('Initial matrices')
print('A = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through columns
for j in range(N-1):
#Determine the new pivot.
p_index = np.argmax(np.abs(A[j:,j])) # THIS NEEDS TO BE IMPLEMENTED IN C
if p_index != 0:
# Permuting P "matrix" stored as a vector
P[j]=j+p_index
P[j+p_index]=j
row_perm(A, j, j+p_index) # THIS NEEDS TO BE IMPLEMENTED IN C
if show:
print('A permutation has been made')
print('A = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
#iterating through rows
for i in range(j+1,N):
A[i,j] = A[i,j]/A[j,j]
A[i,j+1:] -= A[i,j]*A[j,j+1:]
if show:
print(j,i)
print('A = '); print(np.array_str(A2, precision=2, suppress_small=True))
print('P = '); print(np.array_str(P2, precision=2, suppress_small=True))
print('----------------------------------------')
return A,P
In [7]:
"""
Solves a linear system A x = b, where A is a
triangular (upper or lower) matrix
"""
def solve_triangular(A, b, upper=True):
n = b.shape[0]
x = np.zeros_like(b)
if upper==True:
#perform back-substitution
x[-1] = (1./A[-1,-1]) * b[-1]
for i in range(n-2, -1, -1):
x[i] = (1./A[i,i]) * (b[i] - np.sum(A[i,i+1:] * x[i+1:]))
else:
#perform forward-substitution
x[0] = (1./A[0,0]) * b[0]
for i in range(1,n):
x[i] = (1./A[i,i]) * (b[i] - np.sum(A[i,:i] * x[:i]))
return x
def solve_lu(A, b, show=False):
L,U = lu_decomp(A, show)
# L.c = b with c = U.x
c = solve_triangular(L, b, upper=False)
x = solve_triangular(U, c)
return x
def solve_palu(A, b, show=False):
P,L,U = palu_decomp(A, show)
#A.x = b -> P.A.x = P.b = b'
b = np.dot(P,b)
# L.c = b' with c = U.x
c = solve_triangular(L, b, upper=False)
x = solve_triangular(U, c)
return x
In [8]:
"""
Solves a linear system A x = L U x = b,
where A has L and U stored in itself.
Recall L has 1 in the diagonal, so they
where not stored.
IDEA: Reduced memory usage.
"""
def solve_ALU_reduced(A, b):
N = b.shape[0]
# We store here a temporal solution
c = np.zeros_like(b)
# Solving Lc=b, recall L[i,i]=1
# Performing Forward-Substitution
# Original code: c[0] = b[0]/A[0,0], we removed A[0,0] since we know it is 1 and it was not stored in A
c[0] = b[0]
for i in range(1,N):
tmp=0
for j in range(i):
tmp += A[i,j] * c[j]
# Original code: c[i] = (b[i] - tmp)/A[i,i], we removed A[i,i] since we know it is 1 and it was not stored in A
c[i] = b[i] - tmp
# The final solution will be stored in b
# Performing Back-Substitution
b[N-1] = c[N-1]/A[N-1,N-1]
for i in range(N-2, -1, -1):
tmp=0
for j in range(i+1,N):
tmp += A[i,j] * b[j]
b[i] = (c[i] - tmp)/A[i,i]
return b
def solve_palu2(A, b, show=False):
A,P = palu_decomp3(A, show)
#A.x = b -> P.A.x = P.b = b'
b = b[P]
# L.c = b' with c = U.x
x=solve_ALU_reduced(A, b)
return x
In [9]:
N=100
np.random.seed(0)
A = np.random.random((N,N))
b = np.ones(N)
In [10]:
# Classic solution
x1=solve_palu(A, b, show=False)
#print(x1)
# Improved (less memory usage) solver
x2=solve_palu2(A, b, show=False)
#print(x2)
# Comparison
print(np.linalg.norm(x1-x2))
In [11]:
# The procedures to be implemented in CUDA are:
# solve_palu2: wrapper
# palu_decomp2: it computed PALU decomposition and it stores LU in A and it also returns P as a vector.
# solve_ALU_reduced: it does forward and backward decomposition considering A has LU stored in it,
# the RHS is assumed to be permuted previously.