Compute MSD in Python quickly with FFT

The implementation is entirely copied from this SO answer : http://stackoverflow.com/questions/34222272/computing-mean-square-displacement-using-python-and-fft#34222273

Thanks to thomasfermi for the implementation. The original algorithm comes from :

nMoldyn - Interfacing spectroscopic experiments, molecular dynamics simulations and models for time correlation functions
V. Calandrini, E. Pellegrini, P. Calligari, K. Hinsen and G.R. Kneller
Publié en ligne: 9 juin 2011
DOI: 10.1051/sfn/201112010

This notebook compares the speed of calculation of MSD with FFT and without with trackpy.


In [2]:
%load_ext snakeviz
%load_ext autoreload
%autoreload 2
%matplotlib inline

import timeit

import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')

import numpy as np
import pandas as pd

import trackpy as tp

In [45]:
def autocorr_fft(x):
    N = len(x)
    F = np.fft.fft(x, n=2 * N)  # 2*N because of zero-padding
    PSD = F * F.conjugate()
    res = np.fft.ifft(PSD)
    res= (res[:N]).real  # now we have the autocorrelation in convention B
    n = N * np.ones(N) - np.arange(0, N) #divide res(m) by (N-m)
    return res / n  # this is the autocorrelation in convention A

def msd_fft(traj, mpp, fps, max_lagtime=100, detail=False, pos_columns=['x', 'y']):
    """Implementation comes from here : 
    http://stackoverflow.com/questions/34222272/computing-mean-square-displacement-using-python-and-fft#34222273
    """
    
    r = traj[pos_columns].values
    r *= mpp

    t = traj['frame']

    max_lagtime = min(max_lagtime, len(t))  # checking to be safe
    lagtimes = 1 + np.arange(max_lagtime - 1)    

    N = len(r)

    D = np.square(r).sum(axis=1) 
    D = np.append(D, 0)
    S2 = sum([autocorr_fft(r[:, i]) for i in range(len(pos_columns))])

    Q = 2 * D.sum()
    S1 = np.zeros(max_lagtime)

    for m in range(max_lagtime):
        Q = Q - D[m - 1] - D[N - m]
        S1[m] = Q / (N - m)

    msd = S1 - 2 * S2[:max_lagtime]
    msd = msd[1:]

    lagt = lagtimes / fps

    results = pd.DataFrame(np.array([msd, lagt]).T, columns=['msd', 'lagt'])
    results.index = 1 + np.arange(max_lagtime - 1)
    results.index.name = 'lagt'
    
    return results

def get(N, pos_columns):
    traj = np.cumsum(np.random.choice([-1., 0., 1.], size=(N, len(pos_columns))), axis=0)
    traj = pd.DataFrame(traj, columns=pos_columns)
    traj['frame'] = np.arange(N)
    return traj

In [46]:
# Generate a random trajectory

pos_columns = ['x', 'y', 'z']
mpp = 0.3
fps = 1
max_lagtime = 1000

traj = get(N=10000, pos_columns=pos_columns)

In [47]:
# Get MSD with trackpy
msd1 = tp.msd(traj, mpp=mpp, fps=fps, max_lagtime=max_lagtime, pos_columns=pos_columns)
msd1[['msd', 'lagt']].head()


Out[47]:
msd lagt
lagt
1 0.179613 1
2 0.359829 2
3 0.539037 3
4 0.721459 4
5 0.903548 5

In [48]:
# Get MSD with msd_fft
msd2 = msd_fft(traj, mpp=mpp, fps=fps, max_lagtime=max_lagtime, pos_columns=pos_columns)
msd2[['msd', 'lagt']].head()


Out[48]:
msd lagt
lagt
1 0.179613 1
2 0.359829 2
3 0.539037 3
4 0.721459 4
5 0.903548 5

In [49]:
# Compate MSD with plots
fig, ax = plt.subplots(figsize=(14, 10))

msd1.plot(x='lagt', y='msd', ax=ax)
msd2.plot(x='lagt', y='msd', ax=ax)


Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f49cc62a358>

In [50]:
t1 = %timeit -o tp.msd(traj, mpp=mpp, fps=fps, max_lagtime=max_lagtime, pos_columns=pos_columns)
t1.best


1 loops, best of 3: 1.55 s per loop
Out[50]:
1.554775274998974

In [51]:
t2 = %timeit -o msd_fft(traj, mpp=mpp, fps=fps, max_lagtime=max_lagtime, pos_columns=pos_columns)
t2.best


100 loops, best of 3: 5.4 ms per loop
Out[51]:
0.005402266660094028