Unpack Packages


In [1]:
import os
import csv
import sys
module_path = os.path.abspath(os.path.join('C:\\Users\\koolk\\Desktop\\brain-diffusion\\Chad_functions_and_unittests'))
if module_path not in sys.path:
    sys.path.append(module_path)
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import scipy.optimize as opt
import scipy.stats as stat
from operator import itemgetter
import random
import numpy as np
import numpy.ma as ma
import numpy.linalg as la

pi = np.pi
sin = np.sin
cos = np.cos

In [2]:
from MSD_utils import get_data_pups, build_time_array, return_average, avg_all, graph_single_variable
from MSD_utils import SD_all, return_SD, range_and_ticks, choose_y_axis_params, data_prep_for_plotting_pups
from MSD_utils import fill_in_and_split, plot_traj_length_histogram, plot_traj, filter_out_short_traj
from MSD_utils import plot_trajectory_overlay, quality_control

from MSD_utils import diffusion_coefficient_point_derivative, diffusion_coefficient_linear_regression
from MSD_utils import calculate_diffusion_coefficients, diffusion_bar_chart, summary_barcharts
from MSD_utils import calculate_MMSDs, plot_general_histogram, plot_MSD_histogram, plot_all_MSD_histograms
from MSD_utils import fillin2, MSD_iteration, vectorized_MMSD_calcs
from MSD_utils import get_data_gels, data_prep_for_plotting_gels, plot_all_MSD_histograms_gels, quality_control_gels
from MSD_utils import calculate_diffusion_coefficients_gels

In [3]:
folder = "./"
path = "./geoM2xy_{sample_name}.csv"
frames = 401
SD_frames = [10, 20, 50, 80]
conversion = (0.16, 20.08, 1)#(0.3, 3.95, 1)
to_frame = 60
dimension = "2D"
time_to_calculate = 1

base = "in_agarose"
base_name = "RED"
test_bins = np.linspace(0, 75, 76)

# name = 'RED_KO_PEG_P1_S1_cortex'
cut = 1
totvids = 15
frame_m = 401  # atm I can't go lower than the actual value.

parameters = {}
parameters["channels"] = ["RED"]
#parameters["genotypes"] = ["WT"]
#parameters["pups"] = ["P1", "P2", "P3"]
parameters["surface functionalities"] = ["nPEG"]
parameters["slices"] = ["S1", "S2", "S3", "S4"]
# parameters["regions"] = ["withCa", "noCa"]
parameters["replicates"] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
parameters["slice suffixes"] = ['a', 'b', 'c', 'd']


channels = parameters["channels"]
#genotypes = parameters["genotypes"]
#pups = parameters["pups"]
surface_functionalities = parameters["surface functionalities"]
slices = parameters["slices"]
# regions = parameters["regions"]
replicates = parameters["replicates"]
suffixes = parameters["slice suffixes"]

y_range, ticks_y, dec_y, x_range, ticks_x, dec_x = 8, 2, 1, 3, 1, 1

In [ ]:
geoM1x = {}
geoM1y = {}
geoM2xy = {}
SM1x = {}
SM1y = {}
SM2xy = {}

for channel in channels:
    for surface_functionality in surface_functionalities:
        slice_counter = 0
        for slic in slices:
            suffix = suffixes[slice_counter]
            sample_name = "{}_{}_{}_{}_{}".format(channel, surface_functionality, base, '37C_pH72', slic)
            DIR = folder

            total1, xs, ys, x, y = MSD_iteration(DIR, sample_name, cut, totvids, conversion, frame_m)

            geoM1x[sample_name], geoM1y[sample_name], geoM2xy[sample_name], SM1x[sample_name], SM1y[sample_name],\
                SM2xy[sample_name] = vectorized_MMSD_calcs(frames, total1, xs, ys, x, y, frame_m)
            np.savetxt(DIR+'geoM2xy_{}.csv'.format(sample_name), geoM2xy[sample_name], delimiter=',')
            np.savetxt(DIR+'SM2xy_{}.csv'.format(sample_name), SM2xy[sample_name], delimiter=',')

#                 geoM2xy[sample_name] = np.genfromtxt(DIR + 'geoM2xy_{}.csv'.format(sample_name), delimiter=",");
#                 SM2xy[sample_name] = np.genfromtxt(DIR + "SM2xy_{}.csv".format(sample_name), delimiter=",");

            slice_counter = slice_counter + 1

In [ ]:
units = len(geoM2xy)
duplicates = []

counter = 0
for keys in geoM2xy:
    duplicates.append(keys.split('_S')[0]);
    counter = counter + 1
    
seen = set()
uniq = []
for x in duplicates:
    if x not in seen:
        uniq.append(x)
        seen.add(x)

In [ ]:
uniq

In [ ]:
frames

In [ ]:
to_avg = np.zeros((len(slices), frames))
averaged = {}
stded = {}
upper = {}
lower = {}

for x in uniq:
    counter = 0
    for slic in slices:
        sample_name = "{}_{}".format(x, slic);
        to_avg[counter, :] = geoM2xy[sample_name]
        counter = counter + 1;
    averaged[x] = np.mean(to_avg, axis=0)
    stded[x] = np.std(to_avg, axis=0)/2
    upper[x] = averaged[x] + stded[x]
    lower[x] = averaged[x] - stded[x]

In [ ]:
time, time_SD = build_time_array(frames, conversion, SD_frames)

In [ ]:
fig, ax1 = plt.subplots(1, 1, figsize = (16, 8))
filename = 'diffusion_varying_calcium.png'
x_range = 5;
y_range = 8;

tick_size = 28;
label_size = 40;
legend_size = 20;
width = 4

time = time[0:480];
cgamut = ['skyblue', 'limegreen', 'salmon', 'violet'];
lgamut = ['blue', 'green', 'red', 'purple'];

counter = 0;
for x in uniq:
    test = x;
    ax1.fill_between(time[1:], lower[test][1:], averaged[test][1:], color=cgamut[counter], alpha = 0.5);
    ax1.fill_between(time[1:], averaged[test][1:], upper[test][1:], color=cgamut[counter], alpha = 0.5);
    ax1.plot(time[1:], averaged[test][1:], color = lgamut[counter], label = test, linewidth = width)
    counter = counter + 1;

plt.gca().set_xlim([0, x_range])
plt.gca().set_ylim([0, y_range])
ax1.legend(loc='best', prop={'size': legend_size})

ax1.title.set_fontsize(tick_size)
ax1.set_xlabel('Time (s)', fontsize=label_size)
ax1.set_ylabel(r'MSD ($\mu$m$^2$)', fontsize=label_size)
ax1.tick_params(direction='out', pad=16)

for item in ([ax1.xaxis.label, ax1.yaxis.label] +
             ax1.get_xticklabels() + ax1.get_yticklabels()):
    item.set_fontsize(tick_size)
    
plt.savefig('{}'.format(filename), bbox_inches='tight')
plt.show()

In [ ]:
x = np.arange(0.0, 2, 0.01)
y1 = np.sin(2*np.pi*x)
y2 = 1.2*np.sin(4*np.pi*x)

fig, ax1 = plt.subplots(1, 1, sharex=True)

ax1.fill_between(x, 0, y1)

plt.show()

In [ ]:
lower[test]

In [ ]:
time.shape

In [ ]:
averaged[test]

In [ ]:
for keys in avg_sets:
    all_avg[avg_sets[keys]] = return_average(data, frames, avg_sets[keys])
return all_avg

In [ ]:
data, avg_over_slices, time, time_SD, average_over_slices, all_SD_over_slices = \
    data_prep_for_plotting_gels(path, frames, SD_frames, conversion, to_frame, parameters, base);

In [ ]:
plot_all_MSD_histograms_gels(parameters, folder, SM2xy, time, test_bins, 1, set_y_limit=True, y_range=5000, set_x_limit=True, x_range=60)

Second Dataset


In [ ]:
folder = "./{functionality}/{slic}/"
path = "./{functionality}/{slic}/geoM2xy_{sample_name}.csv"
path2 = "./{functionality}/{slic}/Traj_{sample_name}.tif.csv"

frames = 120
SD_frames2 = [1, 2, 3, 4]
conversion = (1.24, 1.93, 1)#(0.3, 3.95, 1)
to_frame = 60
dimension = "2D"
time_to_calculate = 1

base = "0-4p_agarose"
base_name = "RED"
test_bins = np.linspace(0, 75, 76)

# name = 'RED_KO_PEG_P1_S1_cortex'
cut = 4
totvids = 10
frame_m = 120  # atm I can't go lower than the actual value.

parameters = {}
parameters["channels"] = ["RED"]
parameters["surface functionalities"] = ["nPEG"]
parameters["slices"] = ["1", "2", "3", "4"]
parameters["replicates"] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
parameters["slice suffixes"] = ['a', 'b', 'c', 'd']


channels = parameters["channels"]
surface_functionalities = parameters["surface functionalities"]
slices = parameters["slices"]
replicates = parameters["replicates"]
suffixes = parameters["slice suffixes"]

frames2 = 120
interv = 50

In [ ]:
geoM1x = {}
geoM1y = {}
geoM2xy = {}
SM1x = {}
SM1y = {}
SM2xy = {}

for channel in channels:
    for surface_functionality in surface_functionalities:
        slice_counter = 0
        for slic in slices:
            suffix = suffixes[slice_counter]
            sample_name = "{}_{}_0-4p_agarose_{}".format(channel, surface_functionality, slic)
            DIR = folder.format(functionality = surface_functionality, slic = slic)
            frames, total1, xs, ys, x, y = MSD_iteration(DIR, sample_name, cut, totvids, conversion, frame_m, suffix)

            geoM1x[sample_name], geoM1y[sample_name], geoM2xy[sample_name], SM1x[sample_name], SM1y[sample_name],\
                SM2xy[sample_name] = vectorized_MMSD_calcs(frames, total1, xs, ys, x, y, frame_m)
            np.savetxt(DIR+'geoM2xy_{}.csv'.format(sample_name), geoM2xy[sample_name], delimiter=',')
            np.savetxt(DIR+'SM2xy_{}.csv'.format(sample_name)), SM2xy[sample_name], delimiter=',')

            slice_counter = slice_counter + 1

In [ ]:
data2, avg_over_slices2, time2, time_SD2, average_over_slices2, all_SD_over_slices2 = \
    data_prep_for_plotting_gels(path, frames2, SD_frames2, conversion, to_frame, parameters, base);

In [ ]:
plot_all_MSD_histograms_gels(parameters, base, folder, SM2xy, time2, test_bins, 1, set_y_limit=True, y_range=5000, set_x_limit=True, x_range=60)

In [ ]:
quality_control_gels(path2, folder, frames, conversion, parameters, base, interv, cut)

In [ ]:
def quality_control_gels(path2, folder, frames, conversion, parameters, base, interv, cut):
    """
    This function plots a histogram of trajectory lengths (in units of frames) and two types of plots of
    trajectories (original and overlay).

    Inputs:
    path2: string.  Name of input trajectory csv files.
    folder: string. Name of folder to which to save results.
    frames: integer.  Total number of frames in videos.
    conversion: array.  Contains microns per pixel and frames per second of videos.
    parameters:

    parameters = {}
    parameters["channels"] = ["RED"]
    parameters["surface functionalities"] = ["PEG"]
    parameters["slices"] = ["S1", "S2", "S3"]
    parameters["replicates"] = [1, 2, 3, 4]

    cut: integer.  Minimum number of frames a trajectory must have in order to be plotted.
    """

    channels = parameters["channels"]
    surface_functionalities = parameters["surface functionalities"]
    slices = parameters["slices"]
    replicates = parameters["replicates"]
    SD_frames = [1, 7, 14, 15]

    trajectory = {}
    names_with_replicates = {}
    data = {}

    particles_unfiltered = {}
    framed_unfiltered = {}
    x_data_unfiltered = {}
    y_data_unfiltered = {}
    total_unfiltered = {}
    particles = {}
    framed = {}
    x_data = {}
    y_data = {}
    total = {}
    tlength = {}
    x_microns = {}
    y_microns = {}
    x_particle = {}
    y_particle = {}

    x_original_frames = {}
    y_original_frames = {}

    x_adjusted_frames = {}
    y_adjusted_frames = {}

    counter2 = 0

    for channel in channels:
            for surface_functionality in surface_functionalities:
                        for slic in slices:
                            for replicate in replicates:
                                # Establishing sample names and extracting data from csv files.
                                counter2 = counter2 + 1
                                sample_name_long = "{}_{}_{}_{}_{}".format(channel, surface_functionality, base, slic, replicate)
                                names_with_replicates[counter2] = sample_name_long

                                filename = path2.format(functionality = surface_functionality, slic = slic, sample_name=sample_name_long)
                                data[sample_name_long] = np.genfromtxt(filename, delimiter=",")
                                data[sample_name_long] = np.delete(data[sample_name_long], 0, 1)

                                # Names of output plots
                                fold = folder.format(functionality = surface_functionality, slic = slic)
                                
                                logplot = fold +'{}_logplot'.format(sample_name_long)
                                Mplot = fold +'{}_Mplot'.format(sample_name_long)
                                Dplot = fold +'{}_Dplot'.format(sample_name_long)
                                Hplot = fold +'{}_Hplot'.format(sample_name_long)
                                Hlogplot = fold +'{}_Hlogplot'.format(sample_name_long)
                                Cplot = fold +'{}_Cplot'.format(sample_name_long)
                                Tplot = fold +'{}_Tplot'.format(sample_name_long)
                                T2plot = fold +'{}_T2plot'.format(sample_name_long)
                                lenplot = fold +'{}_lenplot'.format(sample_name_long)

                                # Fill in data and split into individual trajectories
                                particles_unfiltered[counter2], framed_unfiltered[counter2], x_data_unfiltered[counter2],\
                                    y_data_unfiltered[counter2] = fill_in_and_split(data[names_with_replicates[counter2]])

                                total_unfiltered[counter2] = int(max(particles_unfiltered[counter2]))

                                # Filter out short trajectories
                                particles[counter2], framed[counter2], x_data[counter2], y_data[counter2] =\
                                    filter_out_short_traj(particles_unfiltered[counter2], framed_unfiltered[counter2],
                                                          x_data_unfiltered[counter2], y_data_unfiltered[counter2], cut)

                                # Convert to microns and seconds
                                time, time_SD = build_time_array(frames, conversion, SD_frames)
                                framen = np.linspace(0, frames, frames+1).astype(np.int64)
                                total[counter2] = int(max(particles[counter2]))
                                tlength[counter2] = np.zeros(total[counter2])

                                x_microns[counter2] = x_data[counter2]*conversion[0]
                                y_microns[counter2] = y_data[counter2]*conversion[0]

                                # Adjust frames (probably unneccesary, but I did it...)
                                x_particle[counter2] = {}
                                y_particle[counter2] = {}

                                x_original_frames[counter2] = {}
                                y_original_frames[counter2] = {}

                                x_adjusted_frames[counter2] = {}
                                y_adjusted_frames[counter2] = {}

                                for num in range(1, total[counter2] + 1):
                                    hold = np.where(particles[counter2] == num)
                                    itindex = hold[0]
                                    min1 = min(itindex)
                                    max1 = max(itindex)
                                    x_particle[counter2][num] = x_microns[counter2][min1:max1+1]
                                    y_particle[counter2][num] = y_microns[counter2][min1:max1+1]

                                    x_original_frames[counter2][num] = np.zeros(frames + 1)
                                    y_original_frames[counter2][num] = np.zeros(frames + 1)
                                    x_adjusted_frames[counter2][num] = np.zeros(frames + 1)
                                    y_adjusted_frames[counter2][num] = np.zeros(frames + 1)

                                    x_original_frames[counter2][num][framed[counter2][min1]:framed[counter2][max1]+1]\
                                        = x_microns[counter2][min1:max1+1]
                                    y_original_frames[counter2][num][framed[counter2][min1]:framed[counter2][max1]+1]\
                                        = y_microns[counter2][min1:max1+1]

                                    x_adjusted_frames[counter2][num][0:max1+1-min1] = x_microns[counter2][min1:max1+1]
                                    y_adjusted_frames[counter2][num][0:max1+1-min1] = y_microns[counter2][min1:max1+1]

                                    x_original_frames[counter2][num] = ma.masked_equal(x_original_frames[counter2][num], 0)
                                    y_original_frames[counter2][num] = ma.masked_equal(y_original_frames[counter2][num], 0)
                                    x_adjusted_frames[counter2][num] = ma.masked_equal(x_adjusted_frames[counter2][num], 0)
                                    y_adjusted_frames[counter2][num] = ma.masked_equal(y_adjusted_frames[counter2][num], 0)

                                    tlength[counter2][num - 1] = ma.count(x_adjusted_frames[counter2][num])

                                plot_traj(x_original_frames[counter2], y_original_frames[counter2], total[counter2], interv, T2plot)
                                plt.gcf().clear()
                                plot_trajectory_overlay(x_original_frames[counter2], y_original_frames[counter2], 6, 2, 6, Tplot)
                                plt.gcf().clear()
                                plot_traj_length_histogram(tlength[counter2], max(tlength[counter2]), lenplot)
                                plt.gcf().clear()

Plot Datasets Together

Because my PEG and nPEG datasets had different framerates, I had to analyze the datasets separately. My code requires all datasets to have the same framerate, and thus they can't be plotted together.


In [ ]:
# Creates figure

line_colors=['g', 'r', 'b', 'c', 'm', 'k']
line_kind='-'
labels = ['PEG', 'nPEG']
label_size=95
legend_size=40 
tick_size=50
line_width=10
fig_size=(20, 18)
filename = 'test.png'

to_graph = {}
counter = 0
for keys in average_over_slices:
    to_graph[counter] = keys
    counter = counter + 1
for keys in average_over_slices2:
    to_graph[counter] = keys
    counter = counter + 1

fig = plt.figure(figsize=fig_size, dpi=80)
ax = fig.add_subplot(111)

line_type = [s + line_kind for s in line_colors]
ax.plot(time[0:to_frame], average_over_slices[to_graph[0]][0:to_frame], line_type[0], linewidth=line_width, label=labels[0])
ax.errorbar(time_SD, average_over_slices[to_graph[0]][SD_frames], all_SD_over_slices[to_graph[0]], fmt='', linestyle='',
            capsize=7, capthick=2, elinewidth=2, color=line_colors[0])

ax.plot(time2[0:to_frame], average_over_slices2[to_graph[1]][0:to_frame], line_type[1], linewidth=line_width, label=labels[1])
ax.errorbar(time_SD2, average_over_slices2[to_graph[1]][SD_frames2], all_SD_over_slices2[to_graph[1]], fmt='', linestyle='',
            capsize=7, capthick=2, elinewidth=2, color=line_colors[1])

# A few adjustments to prettify the graph
for item in ([ax.xaxis.label, ax.yaxis.label] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(tick_size)

xmajor_ticks = np.arange(0, x_range+0.0001, ticks_x)
ymajor_ticks = np.arange(0, y_range+0.0001, ticks_y)

ax.set_xticks(xmajor_ticks)
plt.xticks(rotation=-30)
ax.set_yticks(ymajor_ticks)
ax.title.set_fontsize(tick_size)
ax.set_xlabel('Time (s)', fontsize=label_size)
ax.set_ylabel(r'MSD ($\mu$m$^2$)', fontsize=label_size)
ax.tick_params(direction='out', pad=16)
ax.legend(loc=(0.02, 0.75), prop={'size': legend_size})
plt.gca().xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.{}f'.format(dec_x)))
plt.gca().yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.{}f'.format(dec_y)))

# plt.yscale('log')
# plt.xscale('log')
plt.gca().set_xlim([0, x_range+0.0001])
plt.gca().set_ylim([0, y_range+0.0001])

# Save your figure
plt.savefig('{}'.format(filename), bbox_inches='tight')
plt.show()

In [ ]:
def calculate_diffusion_coefficients_gels(channels, surface_functionalities, slices, path, time, time_to_calculate, to_frame, dimension):

    """
    Loads data from csv files and outputs a dictionary following a specified
        sample naming convection determined by the input

    Parameters:
    channels, surface functionalities, media, and concentrations, and replicates
        can take ranges or lists.
    path is string with substition placeholders for concentration and sample
        name (built from channels, surface_functionalities, media,
        concentrations, and replicates).

    Example:
    path = "./{genotype}/{pup}/{region}/{channel}/geoM2xy_{sample_name}.csv";
    get_data(["RED", "YG"], ["WT", "KO", "HET"], ["P1", "P2", "P3", "P4"],
    ["PEG", "noPEG"], ["S1", "S2", "S3", "S4"], ["cortex", "hipp", "mid"],
    [1, 2, 3, 4, 5], path)
    """

    data = {}
    avg_over_slices_raw = {}
    avg_over_pups_raw = {}
    names_with_replicates = {}
    counter = 0
    counter2 = 0

    diffusion_coef_point_derivative = {}
    diffusion_coef_linear_fit = {}

    for channel in channels:
        for surface_functionality in surface_functionalities:
            for slic in slices:
                test_value = "{}_{}_0-4p_agarose_{}".format(channel, surface_functionality, slic)
                avg_over_slices_raw[counter] = test_value
                counter = counter + 1
                sample_name = test_value
                for replicate in replicates:
                    sample_name_long = test_value + "_{}".format(replicate)
                    names_with_replicates[counter2] = sample_name_long
                    counter2 = counter2 + 1
                filename = path.format(functionality = surface_functionality, slic = slic, sample_name=sample_name)
                data[sample_name] = np.genfromtxt(filename, delimiter=",")

                diffusion_coef_point_derivative[sample_name] =\
                    diffusion_coefficient_point_derivative(data[sample_name], time, time_to_calculate, dimension)
                diffusion_coef_linear_fit[sample_name] =\
                    diffusion_coefficient_linear_regression(data[sample_name], time, to_frame, dimension)

    return diffusion_coef_point_derivative, diffusion_coef_linear_fit

In [ ]:
calculate_diffusion_coefficients_gels(channels, surface_functionalities, slices, path, time, time_to_calculate, to_frame, dimension)

In [ ]: