In [34]:
# calculate QR, RQ iteration until convergence
# store resulting eigenvectors
X = rand(5,5)
A = X + X'
evals_A = eigvals(A)
evecs_A = eigvecs(A)
X = eye(similar(A)) # matrix to populate with eigenvectors
for n = 0:100
Q,R = qr(A)
A = R*Q
X = X * Q
end
#print computed eigenvalues and eigenvectors
A,X,evecs_A,evals_A
Out[34]:
In [46]:
#sort(diag(A),rev=true)-sort(evals_A,rev=true)
i_eigIt = sortperm(diag(A),rev=true) #order of calculated eigenvalues
i_eigBI = sortperm(evals_A,rev=true) #order of eigenvalues from built in function
computed = diag(A)
computed[i_eigIt] - evals_A[i_eigBI]
Out[46]:
In [48]:
## compare difference in iterated eigenvectors vs. built-in function
In [49]:
X[:,i_eigIt]-evecs_A[:,i_eigBI]
Out[49]:
In [1]:
eigvals(X + X'), sort(eigvals(A))
In [5]:
# iterates over rows (outer loop)
# iterates over columns (inner loop)
function matadd1!(C, A, B)
m,n = size(A)
(m,n) == size(B) || error("matrix sizes must match")
for i = 1:m
for j = 1:n
@inbounds C[i,j] = A[i,j] + B[i,j]
end
end
return C
end
matadd1(A,B) = matadd1!(similar(A, promote_type(eltype(A),eltype(B))), A,B)
# same as matadd1 except the loop order is reversed
# iterates over columns (outer loop)
# iterates over rows (inner loop)
function matadd2!(C, A, B)
m,n = size(A)
(m,n) == size(B) || error("matrix sizes must match")
for j = 1:n
for i = 1:m
@inbounds C[i,j] = A[i,j] + B[i,j]
end
end
return C
end
matadd2(A,B) = matadd2!(similar(A, promote_type(eltype(A),eltype(B))), A,B)
Out[5]:
In [6]:
# correctness check:
A = rand(5,7)
B = rand(5,7)
norm(matadd1(A,B) - (A+B)), norm(matadd2(A,B) - (A+B))
Out[6]:
In [7]:
N = round(Int, linspace(10, 1000, 200))
t0 = Float64[]
t1 = Float64[]
t2 = Float64[]
for n in N
A = rand(n,n)
B = rand(n,n)
C = similar(A)
push!(t0, @elapsed A+B) # note: does not preallocate output
push!(t1, @elapsed matadd1!(C,A,B))
push!(t2, @elapsed matadd2!(C,A,B))
end
In [8]:
using PyPlot
plot(N, N.^2 ./ t0 * 1e-9, "k.-")
plot(N, N.^2 ./ t1 * 1e-9, "bo-")
plot(N, N.^2 ./ t2 * 1e-9, "rs-")
xlabel(L"n")
ylabel(L"gflops ($n^2/t$)")
legend(["A+B","matmul1!","matmul2!"])
Out[8]:
In [15]:
function matadd1_allocate(A, B)
m,n = size(A)
(m,n) == size(B) || error("matrix sizes must match")
C = similar(A, promote_type(eltype(A),eltype(B))) # preallocate C
for i = 1:m
for j = 1:n
@inbounds C[i,j] = A[i,j] + B[i,j]
end
end
return C
end
# same as matadd1 except the loop order is reversed
function matadd2_allocate(A, B)
m,n = size(A)
(m,n) == size(B) || error("matrix sizes must match")
C = similar(A, promote_type(eltype(A),eltype(B))) # preallocate
for j = 1:n
for i = 1:m
@inbounds C[i,j] = A[i,j] + B[i,j]
end
end
return C
end
Out[15]:
In [20]:
### Benchmark for preallocated
N = round(Int, linspace(10, 1000, 200))
t0 = Float64[]
t1 = Float64[]
t2 = Float64[]
for n in N
A = rand(n,n)
B = rand(n,n)
push!(t0, @elapsed A+B) # note: does not preallocate output
push!(t1, @elapsed matadd1_allocate(A,B))
push!(t2, @elapsed matadd2_allocate(A,B))
end
plot(N, N.^2 ./ t0 * 1e-9, "k.-")
plot(N, N.^2 ./ t1 * 1e-9, "bo-")
plot(N, N.^2 ./ t2 * 1e-9, "rs-")
xlabel(L"n")
ylabel(L"gflops ($n^2/t$)")
legend(["A+B","matmul1_allocate","matmul2_allocate"])
Out[20]:
code for the Julia operation 'A+B' uses the following code:
for i=1:length(A)
@inbounds F[i] = ($f)(A[i], B[i])
end
Where $f is addition ':+'. The indices 1:length(A) travel first down the first by row and then start at the begining of the next column. This is column-major order which is represented in the loop ordering of matadd2. The flop counts rates of A+B are closer matmul2, so this is consistent with the prediction.
In [ ]:
Suppose you wanted to compute C=A+3B+4A.^2 (noting that .^ is element-wise squaring, not matrix squaring A^2) in Julia. Based on your results above, how/why might you make this faster (if you needed to)? (Matters would be harder to improve in Matlab or Python, where loops written in the language are slow.)
In [9]:
function complex_add1(A, B)
m,n = size(A)
(m,n) == size(B) || error("matrix sizes must match")
C = similar(A, promote_type(eltype(A),eltype(B))) # preallocate C
for i = 1:m
for j = 1:n
@inbounds C[i,j] = A[i,j] + 3*B[i,j] + 4*(A[i,j])^2 #A+3B+4A.^2
end
end
return C
end
function complex_add2(A, B)
m,n = size(A)
(m,n) == size(B) || error("matrix sizes must match")
C = similar(A, promote_type(eltype(A),eltype(B))) # preallocate C
for j = 1:n
for i = 1:m
@inbounds C[i,j] = A[i,j] + 3*B[i,j] + 4*(A[i,j])^2 #A+3B+4A.^2
end
end
return C
end
Out[9]:
In [10]:
### Benchmark for preallocated
N = round(Int, linspace(10, 1000, 200))
t0 = Float64[]
t1 = Float64[]
t2 = Float64[]
for n in N
A = rand(n,n)
B = rand(n,n)
push!(t0, @elapsed (A+3B+4A.^2)) # note: does not preallocate output
push!(t1, @elapsed complex_add1(A,B))
push!(t2, @elapsed complex_add2(A,B))
end
plot(N, N.^2 ./ t0 * 1e-9, "k.-")
plot(N, N.^2 ./ t1 * 1e-9, "bo-")
plot(N, N.^2 ./ t2 * 1e-9, "rs-")
xlabel(L"n")
ylabel(L"gflops ($n^2/t$)")
legend(["A+3B+4A.^2","complex_add1","complex_add2"])
Out[10]:
-The naive implimentation is slowest (for small matrices). This is because it has to loop through each entry on two seperate occations.
-In 'complex_add1' each entry of A and B are manipulated only once. Because it does these operations in row-major oder, however it can be slower than the naive implimentation for larger matrices
-'complex_add2' is the fastest because it handles each entry only once in column-major order.
In [20]:
A = rand((5,5,))
T,Q = schur(A)
A-Q*T*Q'# check factorization
evals = eigvals(A)
evecs = eigvecs(A)
#T, evals
(A - evals[1]*eye(5)) \ zeros(5)
Out[20]:
In [32]:
#gaussian_elim
function gaussian_elim(A)
m,n = size(A)
C = similar(A, promote_type(eltype(A))) # preallocate
U = copy(A)
L = eye(m)
for k= 1:(m-1)
for j=(k+1):m
L[j,k] = U[j,k]/U[k,k]
U[j,k:m] = U[j,k:m] - L[j,k]*U[k,k:m]
end
end
return L,U
end
Out[32]:
In [41]:
m,n = size(A)
C = similar(A, promote_type(eltype(A))) # preallocate
U = copy(A)
L = eye(m)
mtrx = [A zeros(5)]
L,U = gaussian_elim(mtrx)
eigx = zeros(n)
for i = 1:m
k = m-1
eigx[k] = U[k,k]
end
k = 4
eigx[k] =
Out[41]:
In [69]:
function back_subs(R,b)
x = zeros(similar(b))
sumVal = 0
for j= 1:m
for k=(j+1):m
sumVal = sumVal + x[k]*R[j,k]
end
x[j] = (b[j] - sumVal)/R[j,j]
sumVal = 0
end
return x
end
Out[69]:
In [75]:
R = U
b = zeros(5)
x = zeros(similar(b))
sumVal = 0
for j= 1:m
for k=(j+1):m
sumVal = sumVal + x[k]*R[j,k]
end
x[j] = (b[j] - sumVal)/R[j,j]
sumVal = 0
end
In [80]:
A = [2 -3 1; 1 -2 1; 1 -3 2]
eigvals(A)
eigvecs(A)
Out[80]:
In [82]:
A = [1 -1; 1 1]
eigvals(A)
Out[82]: