For the various SEPP algorithms, we end manipulating an upper-triangular matrix.


In [1]:
import numpy as np

def back(t):
    t = np.asarray(t)
    return np.zeros_like(t) + 1.5

def trig(dt):
    return 0.7 * np.exp(-0.2 * np.asanyarray(dt))

def rand(n):
    t = np.random.random(n)
    t.sort()
    for i in range(len(t)-1):
        if t[i] == t[i+1]:
            raise AssertionError()
    return t

In [2]:
def slow(times):
    p = np.zeros((len(times), len(times)))
    for i, t in enumerate(times):
        p[i,i] = back(t)
        for j, s in enumerate(times[:i]):
            p[j,i] = trig(t-s)
    for i in range(p.shape[0]):
        p[:,i] /= np.sum(p[:,i])
    return p

np.testing.assert_allclose(slow([1]), [[1]])
b = 1.5
a = trig(1)
np.testing.assert_allclose(slow([1,2]), [[1, a/(a+b)], [0,b/(a+b)]])

In [3]:
def f1(times):
    times = np.asarray(times)
    p = np.zeros((len(times), len(times)))
    for i, t in enumerate(times):
        p[i,i] = back(t)
        p[:i, i] = trig(t - times[:i])
    return p / np.sum(p, axis=0)[None,:]

for n in range(1, 10):
    for _ in range(100):
        times = rand(n)
        np.testing.assert_allclose(slow(times), f1(times))

In [4]:
def f2(times):
    times = np.asarray(times)
    d = times.shape[0]
    p = np.empty((d,d))
    for i, t in enumerate(times):
        p[i,i] = back(t)
        p[:i, i] = trig(t - times[:i])
        p[i+1:, i] = 0
    return p / np.sum(p, axis=0)[None,:]

for n in range(1, 10):
    for _ in range(100):
        times = rand(n)
        np.testing.assert_allclose(slow(times), f2(times))

In [5]:
def f3(times):
    # More space efficient
    times = np.asarray(times)
    d = times.shape[0]
    p = np.empty(d*(d+1)//2)
    offset = int()
    for i, t in enumerate(times):
        p[offset+i] = back(t)
        p[offset : offset+i] = trig(t - times[:i])
        norm = np.sum(p[offset : offset+i+1])
        p[offset : offset+i+1] /= norm
        offset += i + 1
    return p

def to_compressed(p):
    d = p.shape[0]
    pp = np.empty(d*(d+1)//2)
    offset = 0
    for i in range(d):
        pp[offset : offset + i + 1] = p[:i+1,i]
        offset += i + 1
    return pp

for n in range(1, 10):
    for _ in range(100):
        times = rand(n)
        np.testing.assert_allclose(to_compressed(slow(times)), f3(times))

In [6]:
def f4(times):
    times = np.asarray(times)
    d = times.shape[0]
    diag = back(times)
    offdiag = np.empty(d * (d-1) // 2)
    offset = 0
    for i, t in enumerate(times):
        offdiag[offset : offset+i] = trig(t - times[:i])
        norm = diag[i] + np.sum(offdiag[offset : offset+i])
        diag[i] /= norm
        offdiag[offset : offset+i] /= norm
        offset += i
    return diag, offdiag

def split(p):
    d = p.shape[0]
    diag = np.empty(d)
    for i in range(d):
        diag[i] = p[i,i]
    offdiag = np.empty(d * (d-1) // 2)
    offset = 0
    for i in range(1, d):
        offdiag[offset : offset+i] = p[:i,i]        
        offset += i
    return diag, offdiag

for n in range(1, 10):
    for _ in range(10):
        times = rand(n)
        np.testing.assert_allclose(split(slow(times))[0], f4(times)[0])
        np.testing.assert_allclose(split(slow(times))[1], f4(times)[1])

In [7]:
def f5(times):
    times = np.asarray(times)
    d = times.shape[0]
    p = np.zeros((d,d))
    p[np.diag_indices(d)] = back(times)
    for i, t in enumerate(times):
        p[:i, i] = trig(t - times[:i])
    return p / np.sum(p, axis=0)[None,:]

for n in range(1, 10):
    for _ in range(100):
        times = rand(n)
        np.testing.assert_allclose(slow(times), f5(times))

In [8]:
def f6(times):
    times = np.asarray(times)
    d = times.shape[0]
    dt = times[None,:] - times[:,None]
    dt = np.ma.array(dt, mask=dt<=0)
    p = trig(dt)
    p[np.diag_indices(d)] = back(times)
    p /= np.sum(p, axis=0)[None,:]
    return p

for n in range(1, 10):
    for _ in range(100):
        times = rand(n)
        np.testing.assert_allclose(slow(times), f6(times))

In [9]:
times = rand(10)

%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))


1000 loops, best of 3: 239 µs per loop
1000 loops, best of 3: 280 µs per loop
1000 loops, best of 3: 360 µs per loop
1000 loops, best of 3: 253 µs per loop
10000 loops, best of 3: 133 µs per loop
1000 loops, best of 3: 532 µs per loop

In [10]:
times = rand(100)

%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))


100 loops, best of 3: 2.29 ms per loop
100 loops, best of 3: 2.47 ms per loop
100 loops, best of 3: 4.04 ms per loop
100 loops, best of 3: 2.48 ms per loop
1000 loops, best of 3: 1.06 ms per loop
1000 loops, best of 3: 1.31 ms per loop

In [11]:
times = rand(1000)

%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))


10 loops, best of 3: 43.6 ms per loop
10 loops, best of 3: 48.2 ms per loop
10 loops, best of 3: 52.7 ms per loop
10 loops, best of 3: 39.2 ms per loop
10 loops, best of 3: 30.1 ms per loop
10 loops, best of 3: 82.4 ms per loop

In [12]:
times = rand(10000)

%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))


1 loop, best of 3: 2.29 s per loop
1 loop, best of 3: 2.55 s per loop
1 loop, best of 3: 1.99 s per loop
1 loop, best of 3: 1.85 s per loop
1 loop, best of 3: 2.14 s per loop
1 loop, best of 3: 8.05 s per loop

Conclusions:

  • For small to medium data sets, f5 wins (i.e. construct the matrix as a matrix using numpy as much as possible).
  • For larger data sets, space starts to win, and f4 is quicker.

Second order computations

For parametric fitting, we might only be interested in $\sum_i p_{i,i}$ or $\sum_{i<j} p_{i,j}$ or $\sum_{i<j} p_{i,j} (t_j-t_i)$.


In [23]:
def sslow(times):
    p = slow(times)
    diag_sum = 0
    for i in range(len(times)):
        diag_sum += p[i,i]
    off_diag_sum = 0
    weighted_sum = 0
    for i in range(len(times)):
        for j in range(i):
            off_diag_sum += p[j,i]
            weighted_sum += p[j,i] * (times[i] - times[j])
    return diag_sum, off_diag_sum, weighted_sum

assert sslow([1]) == (1,0,0)
assert np.all(np.abs(np.asarray(sslow([1,2])) - np.asarray([1.72355007, 0.27644993, 0.27644993])) < 1e-8)

In [38]:
def s0(times):
    p = f5(times)
    diag_sum = np.sum(p[np.diag_indices(p.shape[0])])
    off_diag_sum, weighted_sum = 0, 0
    for i, t in enumerate(times):
        off_diag_sum += np.sum(p[:i, i])
        weighted_sum += np.sum(p[:i, i] * (t - times[:i]))
    return diag_sum, off_diag_sum, weighted_sum

for n in range(1, 100):
    times = rand(n)
    exp = np.asarray(sslow(times))
    got = np.asarray(s0(times))
    assert np.all(np.abs(exp - got) < 1e-10)

In [39]:
def s1(times):
    # From `f5`
    times = np.asarray(times)
    d = times.shape[0]
    p = np.zeros((d,d))
    p[np.diag_indices(d)] = back(times)
    for i, t in enumerate(times):
        p[:i, i] = trig(t - times[:i])
    p /= np.sum(p, axis=0)[None,:]
    diag_sum = np.sum(p[np.diag_indices(d)])
    off_diag_sum = np.sum(p) - diag_sum
    weighted_sum = 0
    for i, t in enumerate(times):
        weighted_sum += np.sum(p[:i, i] * (t - times[:i]))
    return diag_sum, off_diag_sum, weighted_sum

for n in range(1, 100):
    times = rand(n)
    exp = np.asarray(sslow(times))
    got = np.asarray(s1(times))
    assert np.all(np.abs(exp - got) < 1e-10)

In [40]:
def s2(times):
    # Optimised s1
    times = np.asarray(times)
    d = times.shape[0]
    diag = back(times)
    off_diag_sum, weighted_sum = 0, 0
    for i, t in enumerate(times):
        tt = t - times[:i]
        off_diag = trig(tt)
        odt = np.sum(off_diag)
        total = odt + diag[i]
        diag[i] /= total
        off_diag_sum += odt / total
        weighted_sum += np.sum(off_diag * tt) / total
    return np.sum(diag), off_diag_sum, weighted_sum

for n in range(1, 100):
    times = rand(n)
    exp = np.asarray(sslow(times))
    got = np.asarray(s2(times))
    assert np.all(np.abs(exp - got) < 1e-10)

In [41]:
times = rand(10)

%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))


1000 loops, best of 3: 345 µs per loop
1000 loops, best of 3: 282 µs per loop
1000 loops, best of 3: 283 µs per loop

In [42]:
times = rand(100)

%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))


100 loops, best of 3: 3.06 ms per loop
100 loops, best of 3: 2.38 ms per loop
100 loops, best of 3: 2.63 ms per loop

In [43]:
times = rand(1000)

%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))


10 loops, best of 3: 55.1 ms per loop
10 loops, best of 3: 45.9 ms per loop
10 loops, best of 3: 41.9 ms per loop

In [44]:
times = rand(10000)

%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))


1 loop, best of 3: 2.69 s per loop
1 loop, best of 3: 2.52 s per loop
1 loop, best of 3: 1.74 s per loop

Conclusion: Writing hand-tuned code is quicker, but not that much quicker, than doing the naive thing.


In [ ]: