notebook.community

Edit and run



In [1]:

    
#%matplotlib inline

# Import all the programs we want to use. If this gives an error, then you need to add these to your python path.

import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np 
import pandas as pd
from pandas import DataFrame, Series  # for convenience

import pims
import trackpy as tp
#import holopy as hp
import os
#import av
import scipy
import scipy.optimize as sco
import seaborn
from matplotlib.backends.backend_pdf import PdfPages

#%matplotlib notebook

from __future__ import division  # this makes mathematical division work better
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
scaling =  0.08431 #um/pixel
mpp = scaling
fps = 11.959
moviename = 'tracer+janus_no_H2O2_3(green)2016-06-14'
os.chdir('C:\\Users\\Viva\\Desktop\\EPJ folder\\analysis')
tmcontrolfilename = 'filtered_data_with_drift_subtracted_tracer+janus_no_H2O2_3(green)2016-06-14_pickled.pkl'
tm = pd.read_pickle(tmcontrolfilename)
tracks = tm['particle'].astype(int).unique()

print size(tracks)
tm.head()









    



455






    Out[2]:







  
    
      
      x
      y
      relative_to_frame
      x_raw
      y_raw
      mass
      size
      ecc
      signal
      raw_mass
      ep
      frame
      particle
    
    
      frame
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      875.160878
      192.028652
      0
      874.697693
      191.754771
      3968.798645
      4.464910
      0.027739
      37.269016
      11861.0
      0.079888
      1
      4.0
    
    
      1
      835.597617
      244.699832
      0
      835.206839
      244.480260
      1849.738631
      4.366219
      0.019074
      18.986103
      9252.0
      0.169469
      1
      5.0
    
    
      1
      481.856246
      283.222511
      0
      481.518730
      283.488932
      2529.370788
      4.271017
      0.022451
      25.666398
      8377.0
      0.271613
      1
      9.0
    
    
      1
      567.957162
      295.440481
      0
      567.636351
      295.588590
      2328.258737
      4.227951
      0.009058
      26.369587
      9071.0
      0.183764
      1
      10.0
    
    
      1
      954.420411
      303.649093
      0
      954.110514
      303.266207
      2240.711709
      4.543183
      0.031326
      20.744075
      10623.0
      0.106635
      1
      11.0



In [3]:

    
plt.axis('equal')
ax = tp.plot_traj(tm, mpp = scaling, legend=False)



In [4]:

    
imsd = tp.imsd(tm, scaling, fps, max_lagtime=1000)



In [5]:

    
fig, ax = plt.subplots()
ax.plot(imsd.index, imsd, 'k-', alpha=0.1)  # black lines, semitransparent
ax.set(ylabel=r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]',
       xlabel='lag time $\Delta{}t$ [s]')
ax.set_xscale('log')
ax.set_yscale('log')
fig.set_size_inches(3,3)
plt.title(moviename + '\nMSD')









    Out[5]:





Text(0.5,1,u'tracer+janus_no_H2O2_3(green)2016-06-14\nMSD')



In [6]:

    
emsd = tp.emsd(tm, scaling, fps,max_lagtime=60)



In [7]:

    
# I wonder whether these datapoints are normally distributed.

first = True
for i in np.logspace(0,log(len(imsd)-1)/log(10),num=6, dtype=int, base=10):
    #print i
    fig = figure(figsize=[5,.7])
       
    # plot a histogram of the MSD values for a single lag time
    imsd.iloc[i].hist(bins=30, grid = False, histtype='stepfilled', color='.35'
                               ,label=r'$\Delta$t = {:.3} s'.format(imsd.index[i]))
    
    try:
        # plot a vertical line showing the ensemble mean square displacement for a single lag time
        axvline(x=emsd.iloc[i], color='b', linewidth=1.5)
           #,label='mean {:.2}'.format(emsd_zero_perox.iloc[i]))
    except IndexError:
        pass
    
    if first:
        plt.title(moviename + '\ndistribution of MSD values', loc='left') 
        first = False
    plt.ylabel('Unweighted\noccurences')
    plt.legend(frameon=True, fontsize='small', markerscale= 1)

plt.xlabel(r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]')









    Out[7]:





Text(0.5,0,u'$\\langle \\Delta r^2 \\rangle$ [$\\mu$m$^2$]')



In [8]:

    
res = tp.utils.fit_powerlaw(emsd)  # performs linear best fit in log space, plots
res



In [9]:

    
t0 = frange(.0666,10,.1)
fit = res.A[0]*(t0**res.n[0])



In [10]:

    
ax =emsd.plot(loglog=True, style='b.',linewidth=.3, grid=False, figsize = [3,3], label="emsd")

# plot fit
loglog(t0,fit, 'b')

ax.set(ylabel=r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]', xlabel='lag time $\Delta t$ [s]')
#ax.set_xticks([.1,1,10])
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FormatStrFormatter('%g'))

plt.title(moviename + "\n2D MSD")

#savefig('/home/viva/group/viva/Analysis/2016-03-02/MSD_of_tracers_in_droplets_containing_both_tracers_and_Janus_particles.pdf')
#savefig('/home/viva/group/viva/Analysis/2016-03-02/MSD_of_tracers_in_droplets_containing_both_tracers_and_Janus_particles.png')









    Out[10]:





Text(0.5,1,u'tracer+janus_no_H2O2_3(green)2016-06-14\n2D MSD')



In [11]:

    
# A new version of tp.motion.emsd() that calculates standard deviation.
# This function is copied from trackpy. (Please see the trackpy license.)
# I [Viva] added the calculation of biased weighted standard deviation.

def my_emsd(traj, mpp, fps, max_lagtime=100, detail=False, pos_columns=None):
    """Compute the ensemble mean squared displacements of many particles.

    Parameters
    ----------
    traj : DataFrame of trajectories of multiple particles, including
        columns particle, frame, x, and y
    mpp : microns per pixel
    fps : frames per second
    max_lagtime : intervals of frames out to which MSD is computed
        Default: 100
    detail : Set to True to include <x>, <y>, <x^2>, <y^2>. Returns
        only <r^2> by default.

    Returns
    -------
    Series[msd, index=t] or, if detail=True,
    DataFrame([<x>, <y>, <x^2>, <y^2>, msd, N, lagt,
               std_<x>, std_<y>, std_<x^2>, std_<y^2>, 
               std_msd],
              index=frame)

    Notes
    -----
    Input units are pixels and frames. Output units are microns and seconds.
    """
    ids = []
    msds = []
    for pid, ptraj in traj.reset_index(drop=True).groupby('particle'):
        msds.append(tp.motion.msd(ptraj, mpp, fps, max_lagtime, True, pos_columns))
        ids.append(pid)
    msds = pd.concat(msds, keys=ids, names=['particle', 'frame'])
    results = msds.mul(msds['N'], axis=0).mean(level=1)  # weighted average
    results = results.div(msds['N'].mean(level=1), axis=0)  # weights normalized
    # Above, lagt is lumped in with the rest for simplicity and speed.
    # Here, rebuild it from the frame index.
    
    if not detail:
        return results.set_index('lagt')['msd']

    # Calculation of biased weighted standard deviation
    numerator = ((msds.subtract(results))**2).mul(msds['N'], axis=0).sum(level=1)
    denominator = msds['N'].sum(level=1) # without Bessel's correction
    variance = numerator.div(denominator, axis=0)
    variance = variance[['<x>', '<y>', '<x^2>','<y^2>','msd']]
    std = np.sqrt(variance)
    std.columns = 'std_' + std.columns  

    return results.join(std)

detailed_emsd = my_emsd(tm, scaling, fps, detail=True, max_lagtime=130)



In [12]:

    
plt.errorbar(detailed_emsd.lagt, 
             detailed_emsd.msd, 
             yerr = detailed_emsd.std_msd, 
             capthick=0, 
             alpha = 0.7,
             linewidth=.2,
             label="biased weighted standard deviation")









    Out[12]:





<Container object of 3 artists>

values below 0 will not be plotted on loglog plot



In [13]:

    
plt.errorbar(detailed_emsd.lagt, 
             detailed_emsd.msd, 
             yerr = detailed_emsd.std_msd, 
             capthick=0, 
             alpha = 0.7,
             linewidth=.2,
             label="biased weighted standard deviation")
loglog(detailed_emsd.lagt, detailed_emsd.msd, 'b.',  label="ensemble msd")
loglog(t0,fit, 'r', alpha=.7, label="power law fit") # plot fit

ax2 = plt.subplot(111)
ax2.set(ylabel=r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]', xlabel='lag time $\Delta t$ [s]')
ax2.get_xaxis().set_major_formatter(matplotlib.ticker.FormatStrFormatter('%g'))

plt.title(moviename + "\n2D MSD")
plt.legend(loc=2, fontsize='medium')









    



C:\ProgramData\Anaconda2\lib\site-packages\matplotlib\cbook\deprecation.py:106: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance.  In a future version, a new instance will always be created and returned.  Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
  warnings.warn(message, mplDeprecation, stacklevel=1)






    Out[13]:





<matplotlib.legend.Legend at 0x126d4048>



In [14]:

    
fig, ax = plt.subplots()

ax.plot(imsd.index, imsd, 'k-', alpha=0.1, label="MSD of each particle")
#loglog(t0,fit, 'r', alpha=.7, label="power law fit") # plot fit
plt.errorbar(detailed_emsd.lagt, detailed_emsd.msd, yerr = detailed_emsd.std_msd, capthick=0, 
             linewidth=1.5, alpha=.3,
             label="ensemble MSD with standard deviation")

ax.set(ylabel=r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]',
       xlabel='lag time $\Delta{}t$ [s]')
ax.set_xscale('log')
ax.set_yscale('log')
ax.get_xaxis().set_major_formatter(matplotlib.ticker.FormatStrFormatter('%g'))
plt.title(moviename + '\n2D MSD, comparing imsd and emsd')









    Out[14]:





Text(0.5,1,u'tracer+janus_no_H2O2_3(green)2016-06-14\n2D MSD, comparing imsd and emsd')



In [15]:

    
# I wonder whether these datapoints are normally distributed.

first = True
for i in np.logspace(0,log(len(emsd)-1)/log(10),num=20, dtype=int, base=10):
    #print i
    fig = figure(figsize=[5,.7])
       
    # plot a histogram of the MSD values for a single lag time
    imsd.iloc[i].hist(bins=30, grid = False, histtype='stepfilled', color='.35',
                      label=r'$\Delta$t = {:.3} s'.format(imsd.index[i]))
    
    # plot a blue vertical line showing the ensemble mean square displacement for a single lag time
    axvline(x=detailed_emsd.iloc[i].msd, color='b', linewidth=1.5)
           #,label='mean {:.2}'.format(emsd_zero_perox.iloc[i]))
        
    # plot a red horizontal line showing the standard deviation
    xmin = (detailed_emsd.iloc[i].msd)-(detailed_emsd.iloc[i].std_msd)
    xmax = (detailed_emsd.iloc[i].msd)+(detailed_emsd.iloc[i].std_msd)
    plot([xmin,xmax],[5,5], 'r')
    
    if first:
        plt.title(moviename + '\ndistribution of MSD values\n'+ '$\Delta$t = {:.3} s'.format(imsd.index[i])) 
        first = False
    else:
        plt.title('$\Delta$t = {:.3} s'.format(imsd.index[i]), fontdict={'fontsize':'medium'}, loc='center')
    plt.ylabel('Unweighted\noccurences')
    #plt.legend(frameon=True, fontsize='small')

plt.xlabel(r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]')









    Out[15]:





Text(0.5,0,u'$\\langle \\Delta r^2 \\rangle$ [$\\mu$m$^2$]')



In [16]:

    
from scipy.optimize import curve_fit



In [17]:

    
import scipy
scipy.__version__
# need at least version 14.0 of scipy.









    Out[17]:





'0.19.1'



In [18]:

    
def powerlaw(t, A, n):
    return (A * (t**n))



In [19]:

    
params, pcov = curve_fit(powerlaw, detailed_emsd.lagt, detailed_emsd.msd, 
                         #p0=[res.A[0], res.n[0]], 
                         sigma=detailed_emsd.std_msd,
                         absolute_sigma=True)



In [20]:

    
yfit = powerlaw(detailed_emsd.lagt,params[0],params[1])

#for y in yfit:
#    if y < 0:
#        yfit.replace(y, nan, inplace=True)



In [21]:

    
loglog(detailed_emsd.lagt,detailed_emsd.msd, 'k.')
loglog(detailed_emsd.lagt,yfit, 'm-')
plt.errorbar(detailed_emsd.lagt, 
             detailed_emsd.msd, 
             yerr = detailed_emsd.std_msd, 
             ecolor = 'k',
             capthick=0, 
             alpha = 0.7,
             linewidth=.3,
             label="biased weighted standard deviation")

plt.xlabel('lag time $\Delta{}t$ [s]')
plt.ylabel(r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]')
#pylab.axes().set_aspect('equal', 'datalim')
plt.title(moviename + '\ncurve fit')

#print 'Fit to power law, MSD ~ A * $\Delta$t$^n$.'
#print 'Coefficient A = ' + str(params[0]) + ' ± ' + str(pcov[0,0])
#print 'Exponent n = ' + str(params[1]) + ' ± ' + str(pcov[1,1])

plt.text(0.02,20, 
        'Fit to power law, MSD = A * $\Delta$t$^n$.'
         + '\nExponent n = {:.2f} $\pm$ {:.2f}.'.format(params[1],sqrt(pcov[1,1]))
         + '\nCoefficient A = {:.2f} $\pm$ {:.2f}.'.format(params[0],sqrt(pcov[0,0])))


# The entries on the diagonal of the covariance matrix \Sigma are 
# the variances of each element of the vector \mathbf{X}.
#  source: https://en.wikipedia.org/wiki/Covariance_matrix#Generalization_of_the_variance









    Out[21]:





Text(0.02,20,u'Fit to power law, MSD = A * $\\Delta$t$^n$.\nExponent n = 1.00 $\\pm$ 0.03.\nCoefficient A = 1.40 $\\pm$ 0.06.')



In [22]:

    
pcov









    Out[22]:





array([[ 0.00373089, -0.00027736],
       [-0.00027736,  0.00092381]])



In [23]:

    
imshow(abs(pcov), cmap="gray", interpolation="nearest", vmin=0)
plt.colorbar()
plt.title('Covariance matrix, absolute values')









    Out[23]:





Text(0.5,1,u'Covariance matrix, absolute values')



In [24]:

    
print moviename
print  'Coefficient A = ' + str(params[0]) + ' ± ' + str(np.sqrt(pcov[0,0]))
print 'Exponent n = ' + str(params[1]) + ' ± ' + str(np.sqrt(pcov[1,1]))









    



tracer+janus_no_H2O2_3(green)2016-06-14
Coefficient A = 1.40422386265 ± 0.0610810038731
Exponent n = 1.00279955763 ± 0.0303942205249



In [25]:

    
# Try linear fit

def linear(Dt,t):
    return 4*Dt*t

Dt, Dt_pcov = curve_fit(linear, detailed_emsd.lagt, detailed_emsd.msd)
print Dt
print Dt_pcov

print "better fit next"

Dt, Dt_pcov = curve_fit(linear, detailed_emsd.lagt, detailed_emsd.msd,
                        sigma=detailed_emsd.std_msd,
                        absolute_sigma=True)

print Dt
print Dt_pcov

Dt = Dt[0]
Dt_pcov=Dt_pcov[0][0]

print 'Dt = ' + str(Dt) +' ± '+ str(Dt_pcov) + ' um^2/s'

linearfit=4*Dt*detailed_emsd.lagt









    



[ 0.33119577]
[[  1.92575379e-06]]
better fit next
[ 0.35125771]
[[ 0.00022824]]
Dt = 0.351257709885 ± 0.000228242262229 um^2/s



In [26]:

    
plot(detailed_emsd.lagt,detailed_emsd.msd, 'k.')
plot(detailed_emsd.lagt,linearfit, 'm-')
plt.errorbar(detailed_emsd.lagt, 
             detailed_emsd.msd, 
             yerr = detailed_emsd.std_msd, 
             ecolor = 'k',
             capthick=0, 
             alpha = 0.7,
             linewidth=.3,
             label="biased weighted standard deviation")

plt.xlabel('lag time $\Delta{}t$ [s]')
plt.ylabel(r'$\langle \Delta r^2 \rangle$ [$\mu$m$^2$]')
#pylab.axes().set_aspect('equal', 'datalim')
plt.title(moviename + '\ncurve fit')

#print 'Fit to power law, MSD ~ A * $\Delta$t$^n$.'
#print 'Coefficient A = ' + str(params[0]) + ' ± ' + str(pcov[0,0])
#print 'Exponent n = ' + str(params[1]) + ' ± ' + str(pcov[1,1])

plt.text(0.02,20, 
        'Fit to line MSD $= 4D_t \Delta t$ \n $D_t = $' +str(Dt) + ' $\pm$ ' + str(Dt_pcov) + ' $\mu m^2/s$')


# The entries on the diagonal of the covariance matrix \Sigma are 
# the variances of each element of the vector \mathbf{X}.
#  source: https://en.wikipedia.org/wiki/Covariance_matrix#Generalization_of_the_variance









    Out[26]:





Text(0.02,20,u'Fit to line MSD $= 4D_t \\Delta t$ \n $D_t = $0.351257709885 $\\pm$ 0.000228242262229 $\\mu m^2/s$')



In [ ]:

	x	y	relative_to_frame	x_raw	y_raw	mass	size	ecc	signal	raw_mass	ep	frame	particle
frame
1	875.160878	192.028652	0	874.697693	191.754771	3968.798645	4.464910	0.027739	37.269016	11861.0	0.079888	1	4.0
1	835.597617	244.699832	0	835.206839	244.480260	1849.738631	4.366219	0.019074	18.986103	9252.0	0.169469	1	5.0
1	481.856246	283.222511	0	481.518730	283.488932	2529.370788	4.271017	0.022451	25.666398	8377.0	0.271613	1	9.0
1	567.957162	295.440481	0	567.636351	295.588590	2328.258737	4.227951	0.009058	26.369587	9071.0	0.183764	1	10.0
1	954.420411	303.649093	0	954.110514	303.266207	2240.711709	4.543183	0.031326	20.744075	10623.0	0.106635	1	11.0