This is a testing notebook for loading and analyzing the output of ImageJ's TrackMate plugin (https://imagej.net/TrackMate). The plugin outputs a wealth of data in XML format, and this set of scripts provides an interface for loading and analyzing these data in Python.


In [ ]:
import numpy as np
import matplotlib.pylab as plt
from os import path
from xml.dom import minidom

# Path to raw TrackMate data
data_dir = '../data/raw/test_dataset_1'
file_name = 'MMStack_Pos0.ome.test1.xml'

# Read data from the provided source file
full_path = path.join(data_dir, file_name)
full_file = minidom.parse(full_path)

The following cell contains common functions that help parse the raw data read out of the TrackMate xml files.


In [ ]:
def pull_property_by_track(prop_name, all_spot_ids, all_spots):
    # Gather any (numerical) spot property for each track
    # 'prop_name' must be a valid key for 'all_spots'
    all_track_property = list() # stores numpy arrays of values
    for spot_ids in all_spot_ids:
        curr_prop_values = np.empty(len(spot_ids), dtype=float)
        for i, spot_id in enumerate(spot_ids):
            spot_ind = spot_id[1]
            curr_prop_values[i] = all_spots[spot_ind][prop_name]    
        all_track_property.append(curr_prop_values)
    return all_track_property

def build_spot_lists_by_track(tracks_raw, all_spots):
    # Build x,y spot coordinates for each track
    # 'all_tracks' and 'all_spots' are lists of dictionaries directly
    # from the output of the TrackMate plugin
    
    # Build an array of spot indices
    spot_ids = np.empty([len(spots)], dtype=int)
    for i, spot in enumerate(all_spots):
        spot_ids[i] = int(spot['ID'])
    
    all_track_edges = list()
    all_track_coords = list() # stores numpy arrays of x,y,t coords
    all_track_spot_ids = list() # stores spot IDs and indices for each track
    for track_raw in tracks_raw:
        curr_edges = track_raw.getElementsByTagName('Edge')
        track_name = track_raw.getAttribute('name')
        edges = list()
        for edge in curr_edges:
            edge_dict = dict(edge.attributes.items())
            edge_dict['track_name'] = track_name
            edges.append(edge_dict)

        edges.sort(key=lambda k: float(k['EDGE_TIME']))
        # build spot list
        num_spots = len(edges) + 1
        track_coords = np.empty([num_spots, 3], dtype=float) # x,y,t
        track_spot_ids = np.empty([num_spots, 2], dtype=int)
        for i in range(num_spots):
            if i==(num_spots-1): # get coords of last spot
                edge = edges[i-1]
                spot_id = int(edge['SPOT_TARGET_ID'])
            else:
                edge = edges[i]
                spot_id = int(edge['SPOT_SOURCE_ID'])
            spot_ind = np.where(spot_ids==spot_id)[0][0]

            track_spot_ids[i][0] = spot_id
            track_spot_ids[i][1] = spot_ind

            track_coords[i][0] = spots[spot_ind]['POSITION_X']
            track_coords[i][1] = spots[spot_ind]['POSITION_Y']
            track_coords[i][2] = spots[spot_ind]['POSITION_T']

        all_track_edges.append(edges)
        all_track_coords.append(track_coords)
        all_track_spot_ids.append(track_spot_ids)
    
    processed_tracks = (all_track_coords, all_track_edges, 
                            all_track_spot_ids)
    return processed_tracks

In [ ]:
# Parse the data into tracks, creating lists of spots that
# make up each individual track (TrackMate assigns a unique
# spot ID to each spot in each frame, and their connections
# into multi-frame tracks are stored as 'Edge' elements)

# Convert the raw track data to a list of dictionaries
tracks = list()
tracks_raw = full_file.getElementsByTagName("Track")
for track_raw in tracks_raw:
    att = dict(track_raw.attributes.items())
    tracks.append(att)

# Compile a complete list of spots as a list of dictionaries
spots = list()
spots_by_frame = full_file.getElementsByTagName("SpotsInFrame")
for spots_curr_frame in spots_by_frame:
    curr_spots = spots_curr_frame.getElementsByTagName("Spot")
    for spot in curr_spots:
        att = dict(spot.attributes.items())
        spots.append(att)


processed_tracks = build_spot_lists_by_track(tracks_raw, spots)
track_coords, track_edges, track_spot_ids = processed_tracks

all_track_intensities = pull_property_by_track('MEAN_INTENSITY',
                                               track_spot_ids, spots)

In [ ]:
# Calculate MSD for each particle
from scipy import stats

t_dsq = list() # time vs sq. displacement for each particle
for track in track_coords:
    # 'track' is an array of x,y,t
    
    track_norm = track - track[0,:] # set x,y,t=0
    xy_sq = np.square(track_norm[:,0:2])
    d_sq = np.sum(xy_sq, axis=1)
    t = track_norm[:,2]
    tds = np.vstack((t, d_sq))
    tds = np.transpose(tds)
    
    t_dsq.append(tds)

# rebuild results as a padded numpy array for calculating means
l_max = max((len(el) for el in t_dsq))

d_sq = np.empty((l_max, len(t_dsq)))
t_total = np.empty((l_max, len(t_dsq)))
for i, x in enumerate(t_dsq):
    len1 = x.shape[0]
    pad_len = l_max - len1
    nanpad = np.empty([pad_len,2])*np.nan
    x_padded = np.concatenate((x,nanpad), axis=0)
    
    d_sq[:,i] = x_padded[:,1]
    t_total[:,i] = x_padded[:,0]
print(d_sq)
mean_t = np.nanmean(t_total, axis=1)
mean_dsq = np.nanmean(d_sq, axis=1)
std_dsq = np.nanstd(d_sq, axis=1)
sterr_dsq = stats.sem(d_sq, axis=1, nan_policy='omit',ddof=0)
print(mean_t)

# Plot results
f1, axarr = plt.subplots(1,2)
axarr[0].set_title('All tracks')
axarr[0].set_xlabel('Time (s)')
axarr[0].set_ylabel('Position (μm)')
axarr[1].set_title('Mean squared displacements')
axarr[1].set_xlabel('Time (s)')
axarr[1].set_ylabel('MSD (μm^2)')
for x in t_dsq:
    axarr[0].plot(x[:,0], x[:,1])


axarr[1].errorbar(mean_t, mean_dsq, yerr=sterr_dsq, fmt='o')
#axarr[1].plot(mean_t, mean_dsq)

plt.show()

In [ ]:
# Plot some useful stuff

%matplotlib
#%matplotlib inline
import matplotlib.pylab as plt
#plt.xkcd()

# Various plotting tools
f2, axarr = plt.subplots(1,2)
axarr[0].set_title('All tracks')
axarr[0].set_xlabel('Position (μm)')
axarr[0].set_ylabel('Position (μm)')
axarr[1].set_xlabel('Time (s)')
axarr[1].set_ylabel('Intensity (A.U.)')
axarr[1].set_title('All intensities')
for i, track in enumerate(track_coords):
    axarr[0].plot(track[:,0], track[:,1])
    axarr[1].plot(track[:,2], all_track_intensities[i])
plt.show()
#plt.plot(track_coords[testind][:,2], all_track_intensities[testind])

In [ ]: