In [1]:
using PairwiseListMatrices
using Benchmarks
using Base.Test
using Gadfly


WARNING: UUID.jl is deprecated, please use Base.Random.uuid1(), Base.Random.uuid4(), and Base.Random.UUID instead.

In [2]:
# Similar to pairwise of Distances.jl
function using_full(vecs)
  n = length(vecs)
  mat = Array(Float64, n, n)
  @inbounds for i in 1:n
    vec_i = vecs[i]
    for j in i:n
      mat[i,j] = cor(vec_i, vecs[j])
    end
    for j = 1 : (i-1)
      @inbounds mat[i,j] = mat[j,i]  # leveraging the symmetry
    end
  end
  mat
end

function using_symmetric(vecs)
  n = length(vecs)
  mat = Array(Float64, n, n)
  @inbounds for i in 1:n
    vec_i = vecs[i]
    for j in i:n
      mat[i,j] = cor(vec_i, vecs[j])
    end
  end
  Symmetric(mat)
end

function using_sparse_symmetric(vecs)
  n = length(vecs)
  mat = spzeros(Float64, n, n)
  @inbounds for i in 1:n
    vec_i = vecs[i]
    for j in i:n
      mat[i,j] = cor(vec_i, vecs[j])
    end
  end
  Symmetric(mat)
end

# Creates a list and returns it as a PairwiseListMatrix
function using_pairwiselist(vecs)
  n = length(vecs)
  list = Array(Float64, div(n*(n+1),2))
  k = 1
  @inbounds for i in 1:n
    vec_i = vecs[i]
    for j in i:n
      list[k] = cor(vec_i, vecs[j])
      k += 1
    end
  end
  PairwiseListMatrix(list, true)
end

# Creates and fill a PairwiseListMatrix
function using_pairwiselistmatrix(vecs)
  n = length(vecs)
  list = PairwiseListMatrix(Float64, n, true)
  @inbounds for i in 1:n
    vec_i = vecs[i]
    for j in i:n
      list[i,j] = cor(vec_i, vecs[j])
    end
  end
  list
end


Out[2]:
using_pairwiselistmatrix (generic function with 1 method)

In [3]:
const testset = [ rand(3) for n in 1:500 ];

# Test for the same result
@test all(using_sparse_symmetric(testset) .== using_full(testset))
@test all(using_full(testset) .== using_symmetric(testset))
@test all(using_symmetric(testset) .== using_pairwiselist(testset))
@test all(using_pairwiselist(testset) .== using_pairwiselistmatrix(testset))

@benchmark using_symmetric(testset)


Out[3]:
================ Benchmark Results ========================
     Time per evaluation: 31.36 ms [30.85 ms, 31.87 ms]
Proportion of time in GC: 7.20% [5.75%, 8.65%]
        Memory allocated: 28.66 mb
   Number of allocations: 501003 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 3.25 s

In [4]:
@benchmark using_pairwiselist(testset)


Out[4]:
================ Benchmark Results ========================
     Time per evaluation: 30.77 ms [30.04 ms, 31.50 ms]
Proportion of time in GC: 7.96% [6.37%, 9.56%]
        Memory allocated: 29.62 mb
   Number of allocations: 625752 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 3.13 s

In [5]:
@benchmark using_pairwiselistmatrix(testset)


Out[5]:
================ Benchmark Results ========================
     Time per evaluation: 23.32 ms [22.89 ms, 23.74 ms]
Proportion of time in GC: 9.13% [7.57%, 10.70%]
        Memory allocated: 25.80 mb
   Number of allocations: 375763 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 2.37 s

In [6]:
const SAMPLES = collect(5:50:2000)
const TIME = zeros(Float64, length(SAMPLES)*5)
const NAMES = vcat([ ["full", "symmetric", "sparse", "list", "pairwiselistmatrix"] for i in 1:length(SAMPLES) ]...)
const XS = vcat([ [x, x, x, x, x] for x in SAMPLES ]...)

k = 0
for sample in SAMPLES
    test = [ rand(3) for n in 1:sample ]
    k += 1
    TIME[k] = @elapsed using_full(test)
    k += 1
    TIME[k] = @elapsed using_symmetric(test)
    k += 1
    TIME[k] = sample < 400 ? @elapsed( using_sparse_symmetric(test) ) : NaN
    k += 1
    TIME[k] = @elapsed using_pairwiselist(test)
    k += 1
    TIME[k] = @elapsed using_pairwiselistmatrix(test)
end

In [7]:
plot(x=XS, y=TIME, color=NAMES, Geom.point, Geom.smooth, Scale.y_sqrt)


Out[7]:
x -2500 -2000 -1500 -1000 -500 0 500 1000 1500 2000 2500 3000 3500 4000 4500 -2000 -1900 -1800 -1700 -1600 -1500 -1400 -1300 -1200 -1100 -1000 -900 -800 -700 -600 -500 -400 -300 -200 -100 0 100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 -2000 0 2000 4000 -2000 -1800 -1600 -1400 -1200 -1000 -800 -600 -400 -200 0 200 400 600 800 1000 1200 1400 1600 1800 2000 2200 2400 2600 2800 3000 3200 3400 3600 3800 4000 full symmetric sparse list pairwiselistmatrix Color -2.52 -2.02 -1.52 -1.02 -0.52 0.02 0.52 1.02 1.52 2.02 2.52 3.02 -2.002 -1.952 -1.902 -1.852 -1.802 -1.752 -1.702 -1.652 -1.602 -1.552 -1.502 -1.452 -1.402 -1.352 -1.302 -1.252 -1.202 -1.152 -1.102 -1.052 -1.002 -0.952 -0.902 -0.852 -0.802 -0.752 -0.702 -0.652 -0.602 -0.552 -0.502 -0.452 -0.402 -0.352 -0.302 -0.252 -0.202 -0.152 -0.102 -0.052 0.002 0.052 0.102 0.152 0.202 0.252 0.302 0.352 0.402 0.452 0.502 0.552 0.602 0.652 0.702 0.752 0.802 0.852 0.902 0.952 1.002 1.052 1.102 1.152 1.202 1.252 1.302 1.352 1.402 1.452 1.502 1.552 1.602 1.652 1.702 1.752 1.802 1.852 1.902 1.952 2.002 2.052 2.102 2.152 2.202 2.252 2.302 2.352 2.402 2.452 2.502 -22 02 22 42 -2.02 -1.92 -1.82 -1.72 -1.62 -1.52 -1.42 -1.32 -1.22 -1.12 -1.02 -0.92 -0.82 -0.72 -0.62 -0.52 -0.42 -0.32 -0.22 -0.12 0.02 0.12 0.22 0.32 0.42 0.52 0.62 0.72 0.82 0.92 1.02 1.12 1.22 1.32 1.42 1.52 1.62 1.72 1.82 1.92 2.02 2.12 2.22 2.32 2.42 2.52 y