In [1]:
import os
import csv
import sys
module_path = os.path.abspath(os.path.join('C:\\Users\\koolk\\Desktop\\brain-diffusion\\Chad_functions_and_unittests'))
if module_path not in sys.path:
sys.path.append(module_path)
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import scipy.optimize as opt
import scipy.stats as stat
from operator import itemgetter
import random
import numpy as np
import numpy.ma as ma
import numpy.linalg as la
pi = np.pi
sin = np.sin
cos = np.cos
In [2]:
from MSD_utils import get_data_pups, build_time_array, return_average, avg_all, graph_single_variable
from MSD_utils import SD_all, return_SD, range_and_ticks, choose_y_axis_params, data_prep_for_plotting_pups
from MSD_utils import fill_in_and_split, plot_traj_length_histogram, plot_traj, filter_out_short_traj
from MSD_utils import plot_trajectory_overlay, quality_control
from MSD_utils import diffusion_coefficient_point_derivative, diffusion_coefficient_linear_regression
from MSD_utils import calculate_diffusion_coefficients, diffusion_bar_chart, summary_barcharts
from MSD_utils import calculate_MMSDs, plot_general_histogram, plot_MSD_histogram, plot_all_MSD_histograms
from MSD_utils import fillin2, MSD_iteration, vectorized_MMSD_calcs
from MSD_utils import get_data_gels, data_prep_for_plotting_gels, plot_all_MSD_histograms_gels, quality_control_gels
from MSD_utils import calculate_diffusion_coefficients_gels
In [3]:
folder = "./"
path = "./geoM2xy_{sample_name}.csv"
frames = 401
SD_frames = [10, 20, 50, 80]
conversion = (0.16, 20.08, 1)#(0.3, 3.95, 1)
to_frame = 60
dimension = "2D"
time_to_calculate = 1
base = "in_agarose"
base_name = "RED"
test_bins = np.linspace(0, 75, 76)
# name = 'RED_KO_PEG_P1_S1_cortex'
cut = 1
totvids = 15
frame_m = 401 # atm I can't go lower than the actual value.
parameters = {}
parameters["channels"] = ["RED"]
#parameters["genotypes"] = ["WT"]
#parameters["pups"] = ["P1", "P2", "P3"]
parameters["surface functionalities"] = ["nPEG"]
parameters["slices"] = ["S1", "S2", "S3", "S4"]
# parameters["regions"] = ["withCa", "noCa"]
parameters["replicates"] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
parameters["slice suffixes"] = ['a', 'b', 'c', 'd']
channels = parameters["channels"]
#genotypes = parameters["genotypes"]
#pups = parameters["pups"]
surface_functionalities = parameters["surface functionalities"]
slices = parameters["slices"]
# regions = parameters["regions"]
replicates = parameters["replicates"]
suffixes = parameters["slice suffixes"]
y_range, ticks_y, dec_y, x_range, ticks_x, dec_x = 8, 2, 1, 3, 1, 1
In [ ]:
geoM1x = {}
geoM1y = {}
geoM2xy = {}
SM1x = {}
SM1y = {}
SM2xy = {}
for channel in channels:
for surface_functionality in surface_functionalities:
slice_counter = 0
for slic in slices:
suffix = suffixes[slice_counter]
sample_name = "{}_{}_{}_{}_{}".format(channel, surface_functionality, base, '37C_pH72', slic)
DIR = folder
total1, xs, ys, x, y = MSD_iteration(DIR, sample_name, cut, totvids, conversion, frame_m)
geoM1x[sample_name], geoM1y[sample_name], geoM2xy[sample_name], SM1x[sample_name], SM1y[sample_name],\
SM2xy[sample_name] = vectorized_MMSD_calcs(frames, total1, xs, ys, x, y, frame_m)
np.savetxt(DIR+'geoM2xy_{}.csv'.format(sample_name), geoM2xy[sample_name], delimiter=',')
np.savetxt(DIR+'SM2xy_{}.csv'.format(sample_name), SM2xy[sample_name], delimiter=',')
# geoM2xy[sample_name] = np.genfromtxt(DIR + 'geoM2xy_{}.csv'.format(sample_name), delimiter=",");
# SM2xy[sample_name] = np.genfromtxt(DIR + "SM2xy_{}.csv".format(sample_name), delimiter=",");
slice_counter = slice_counter + 1
In [ ]:
units = len(geoM2xy)
duplicates = []
counter = 0
for keys in geoM2xy:
duplicates.append(keys.split('_S')[0]);
counter = counter + 1
seen = set()
uniq = []
for x in duplicates:
if x not in seen:
uniq.append(x)
seen.add(x)
In [ ]:
uniq
In [ ]:
frames
In [ ]:
to_avg = np.zeros((len(slices), frames))
averaged = {}
stded = {}
upper = {}
lower = {}
for x in uniq:
counter = 0
for slic in slices:
sample_name = "{}_{}".format(x, slic);
to_avg[counter, :] = geoM2xy[sample_name]
counter = counter + 1;
averaged[x] = np.mean(to_avg, axis=0)
stded[x] = np.std(to_avg, axis=0)/2
upper[x] = averaged[x] + stded[x]
lower[x] = averaged[x] - stded[x]
In [ ]:
time, time_SD = build_time_array(frames, conversion, SD_frames)
In [ ]:
fig, ax1 = plt.subplots(1, 1, figsize = (16, 8))
filename = 'diffusion_varying_calcium.png'
x_range = 5;
y_range = 8;
tick_size = 28;
label_size = 40;
legend_size = 20;
width = 4
time = time[0:480];
cgamut = ['skyblue', 'limegreen', 'salmon', 'violet'];
lgamut = ['blue', 'green', 'red', 'purple'];
counter = 0;
for x in uniq:
test = x;
ax1.fill_between(time[1:], lower[test][1:], averaged[test][1:], color=cgamut[counter], alpha = 0.5);
ax1.fill_between(time[1:], averaged[test][1:], upper[test][1:], color=cgamut[counter], alpha = 0.5);
ax1.plot(time[1:], averaged[test][1:], color = lgamut[counter], label = test, linewidth = width)
counter = counter + 1;
plt.gca().set_xlim([0, x_range])
plt.gca().set_ylim([0, y_range])
ax1.legend(loc='best', prop={'size': legend_size})
ax1.title.set_fontsize(tick_size)
ax1.set_xlabel('Time (s)', fontsize=label_size)
ax1.set_ylabel(r'MSD ($\mu$m$^2$)', fontsize=label_size)
ax1.tick_params(direction='out', pad=16)
for item in ([ax1.xaxis.label, ax1.yaxis.label] +
ax1.get_xticklabels() + ax1.get_yticklabels()):
item.set_fontsize(tick_size)
plt.savefig('{}'.format(filename), bbox_inches='tight')
plt.show()
In [ ]:
x = np.arange(0.0, 2, 0.01)
y1 = np.sin(2*np.pi*x)
y2 = 1.2*np.sin(4*np.pi*x)
fig, ax1 = plt.subplots(1, 1, sharex=True)
ax1.fill_between(x, 0, y1)
plt.show()
In [ ]:
lower[test]
In [ ]:
time.shape
In [ ]:
averaged[test]
In [ ]:
for keys in avg_sets:
all_avg[avg_sets[keys]] = return_average(data, frames, avg_sets[keys])
return all_avg
In [ ]:
data, avg_over_slices, time, time_SD, average_over_slices, all_SD_over_slices = \
data_prep_for_plotting_gels(path, frames, SD_frames, conversion, to_frame, parameters, base);
In [ ]:
plot_all_MSD_histograms_gels(parameters, folder, SM2xy, time, test_bins, 1, set_y_limit=True, y_range=5000, set_x_limit=True, x_range=60)
In [ ]:
folder = "./{functionality}/{slic}/"
path = "./{functionality}/{slic}/geoM2xy_{sample_name}.csv"
path2 = "./{functionality}/{slic}/Traj_{sample_name}.tif.csv"
frames = 120
SD_frames2 = [1, 2, 3, 4]
conversion = (1.24, 1.93, 1)#(0.3, 3.95, 1)
to_frame = 60
dimension = "2D"
time_to_calculate = 1
base = "0-4p_agarose"
base_name = "RED"
test_bins = np.linspace(0, 75, 76)
# name = 'RED_KO_PEG_P1_S1_cortex'
cut = 4
totvids = 10
frame_m = 120 # atm I can't go lower than the actual value.
parameters = {}
parameters["channels"] = ["RED"]
parameters["surface functionalities"] = ["nPEG"]
parameters["slices"] = ["1", "2", "3", "4"]
parameters["replicates"] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
parameters["slice suffixes"] = ['a', 'b', 'c', 'd']
channels = parameters["channels"]
surface_functionalities = parameters["surface functionalities"]
slices = parameters["slices"]
replicates = parameters["replicates"]
suffixes = parameters["slice suffixes"]
frames2 = 120
interv = 50
In [ ]:
geoM1x = {}
geoM1y = {}
geoM2xy = {}
SM1x = {}
SM1y = {}
SM2xy = {}
for channel in channels:
for surface_functionality in surface_functionalities:
slice_counter = 0
for slic in slices:
suffix = suffixes[slice_counter]
sample_name = "{}_{}_0-4p_agarose_{}".format(channel, surface_functionality, slic)
DIR = folder.format(functionality = surface_functionality, slic = slic)
frames, total1, xs, ys, x, y = MSD_iteration(DIR, sample_name, cut, totvids, conversion, frame_m, suffix)
geoM1x[sample_name], geoM1y[sample_name], geoM2xy[sample_name], SM1x[sample_name], SM1y[sample_name],\
SM2xy[sample_name] = vectorized_MMSD_calcs(frames, total1, xs, ys, x, y, frame_m)
np.savetxt(DIR+'geoM2xy_{}.csv'.format(sample_name), geoM2xy[sample_name], delimiter=',')
np.savetxt(DIR+'SM2xy_{}.csv'.format(sample_name)), SM2xy[sample_name], delimiter=',')
slice_counter = slice_counter + 1
In [ ]:
data2, avg_over_slices2, time2, time_SD2, average_over_slices2, all_SD_over_slices2 = \
data_prep_for_plotting_gels(path, frames2, SD_frames2, conversion, to_frame, parameters, base);
In [ ]:
plot_all_MSD_histograms_gels(parameters, base, folder, SM2xy, time2, test_bins, 1, set_y_limit=True, y_range=5000, set_x_limit=True, x_range=60)
In [ ]:
quality_control_gels(path2, folder, frames, conversion, parameters, base, interv, cut)
In [ ]:
def quality_control_gels(path2, folder, frames, conversion, parameters, base, interv, cut):
"""
This function plots a histogram of trajectory lengths (in units of frames) and two types of plots of
trajectories (original and overlay).
Inputs:
path2: string. Name of input trajectory csv files.
folder: string. Name of folder to which to save results.
frames: integer. Total number of frames in videos.
conversion: array. Contains microns per pixel and frames per second of videos.
parameters:
parameters = {}
parameters["channels"] = ["RED"]
parameters["surface functionalities"] = ["PEG"]
parameters["slices"] = ["S1", "S2", "S3"]
parameters["replicates"] = [1, 2, 3, 4]
cut: integer. Minimum number of frames a trajectory must have in order to be plotted.
"""
channels = parameters["channels"]
surface_functionalities = parameters["surface functionalities"]
slices = parameters["slices"]
replicates = parameters["replicates"]
SD_frames = [1, 7, 14, 15]
trajectory = {}
names_with_replicates = {}
data = {}
particles_unfiltered = {}
framed_unfiltered = {}
x_data_unfiltered = {}
y_data_unfiltered = {}
total_unfiltered = {}
particles = {}
framed = {}
x_data = {}
y_data = {}
total = {}
tlength = {}
x_microns = {}
y_microns = {}
x_particle = {}
y_particle = {}
x_original_frames = {}
y_original_frames = {}
x_adjusted_frames = {}
y_adjusted_frames = {}
counter2 = 0
for channel in channels:
for surface_functionality in surface_functionalities:
for slic in slices:
for replicate in replicates:
# Establishing sample names and extracting data from csv files.
counter2 = counter2 + 1
sample_name_long = "{}_{}_{}_{}_{}".format(channel, surface_functionality, base, slic, replicate)
names_with_replicates[counter2] = sample_name_long
filename = path2.format(functionality = surface_functionality, slic = slic, sample_name=sample_name_long)
data[sample_name_long] = np.genfromtxt(filename, delimiter=",")
data[sample_name_long] = np.delete(data[sample_name_long], 0, 1)
# Names of output plots
fold = folder.format(functionality = surface_functionality, slic = slic)
logplot = fold +'{}_logplot'.format(sample_name_long)
Mplot = fold +'{}_Mplot'.format(sample_name_long)
Dplot = fold +'{}_Dplot'.format(sample_name_long)
Hplot = fold +'{}_Hplot'.format(sample_name_long)
Hlogplot = fold +'{}_Hlogplot'.format(sample_name_long)
Cplot = fold +'{}_Cplot'.format(sample_name_long)
Tplot = fold +'{}_Tplot'.format(sample_name_long)
T2plot = fold +'{}_T2plot'.format(sample_name_long)
lenplot = fold +'{}_lenplot'.format(sample_name_long)
# Fill in data and split into individual trajectories
particles_unfiltered[counter2], framed_unfiltered[counter2], x_data_unfiltered[counter2],\
y_data_unfiltered[counter2] = fill_in_and_split(data[names_with_replicates[counter2]])
total_unfiltered[counter2] = int(max(particles_unfiltered[counter2]))
# Filter out short trajectories
particles[counter2], framed[counter2], x_data[counter2], y_data[counter2] =\
filter_out_short_traj(particles_unfiltered[counter2], framed_unfiltered[counter2],
x_data_unfiltered[counter2], y_data_unfiltered[counter2], cut)
# Convert to microns and seconds
time, time_SD = build_time_array(frames, conversion, SD_frames)
framen = np.linspace(0, frames, frames+1).astype(np.int64)
total[counter2] = int(max(particles[counter2]))
tlength[counter2] = np.zeros(total[counter2])
x_microns[counter2] = x_data[counter2]*conversion[0]
y_microns[counter2] = y_data[counter2]*conversion[0]
# Adjust frames (probably unneccesary, but I did it...)
x_particle[counter2] = {}
y_particle[counter2] = {}
x_original_frames[counter2] = {}
y_original_frames[counter2] = {}
x_adjusted_frames[counter2] = {}
y_adjusted_frames[counter2] = {}
for num in range(1, total[counter2] + 1):
hold = np.where(particles[counter2] == num)
itindex = hold[0]
min1 = min(itindex)
max1 = max(itindex)
x_particle[counter2][num] = x_microns[counter2][min1:max1+1]
y_particle[counter2][num] = y_microns[counter2][min1:max1+1]
x_original_frames[counter2][num] = np.zeros(frames + 1)
y_original_frames[counter2][num] = np.zeros(frames + 1)
x_adjusted_frames[counter2][num] = np.zeros(frames + 1)
y_adjusted_frames[counter2][num] = np.zeros(frames + 1)
x_original_frames[counter2][num][framed[counter2][min1]:framed[counter2][max1]+1]\
= x_microns[counter2][min1:max1+1]
y_original_frames[counter2][num][framed[counter2][min1]:framed[counter2][max1]+1]\
= y_microns[counter2][min1:max1+1]
x_adjusted_frames[counter2][num][0:max1+1-min1] = x_microns[counter2][min1:max1+1]
y_adjusted_frames[counter2][num][0:max1+1-min1] = y_microns[counter2][min1:max1+1]
x_original_frames[counter2][num] = ma.masked_equal(x_original_frames[counter2][num], 0)
y_original_frames[counter2][num] = ma.masked_equal(y_original_frames[counter2][num], 0)
x_adjusted_frames[counter2][num] = ma.masked_equal(x_adjusted_frames[counter2][num], 0)
y_adjusted_frames[counter2][num] = ma.masked_equal(y_adjusted_frames[counter2][num], 0)
tlength[counter2][num - 1] = ma.count(x_adjusted_frames[counter2][num])
plot_traj(x_original_frames[counter2], y_original_frames[counter2], total[counter2], interv, T2plot)
plt.gcf().clear()
plot_trajectory_overlay(x_original_frames[counter2], y_original_frames[counter2], 6, 2, 6, Tplot)
plt.gcf().clear()
plot_traj_length_histogram(tlength[counter2], max(tlength[counter2]), lenplot)
plt.gcf().clear()
Because my PEG and nPEG datasets had different framerates, I had to analyze the datasets separately. My code requires all datasets to have the same framerate, and thus they can't be plotted together.
In [ ]:
# Creates figure
line_colors=['g', 'r', 'b', 'c', 'm', 'k']
line_kind='-'
labels = ['PEG', 'nPEG']
label_size=95
legend_size=40
tick_size=50
line_width=10
fig_size=(20, 18)
filename = 'test.png'
to_graph = {}
counter = 0
for keys in average_over_slices:
to_graph[counter] = keys
counter = counter + 1
for keys in average_over_slices2:
to_graph[counter] = keys
counter = counter + 1
fig = plt.figure(figsize=fig_size, dpi=80)
ax = fig.add_subplot(111)
line_type = [s + line_kind for s in line_colors]
ax.plot(time[0:to_frame], average_over_slices[to_graph[0]][0:to_frame], line_type[0], linewidth=line_width, label=labels[0])
ax.errorbar(time_SD, average_over_slices[to_graph[0]][SD_frames], all_SD_over_slices[to_graph[0]], fmt='', linestyle='',
capsize=7, capthick=2, elinewidth=2, color=line_colors[0])
ax.plot(time2[0:to_frame], average_over_slices2[to_graph[1]][0:to_frame], line_type[1], linewidth=line_width, label=labels[1])
ax.errorbar(time_SD2, average_over_slices2[to_graph[1]][SD_frames2], all_SD_over_slices2[to_graph[1]], fmt='', linestyle='',
capsize=7, capthick=2, elinewidth=2, color=line_colors[1])
# A few adjustments to prettify the graph
for item in ([ax.xaxis.label, ax.yaxis.label] +
ax.get_xticklabels() + ax.get_yticklabels()):
item.set_fontsize(tick_size)
xmajor_ticks = np.arange(0, x_range+0.0001, ticks_x)
ymajor_ticks = np.arange(0, y_range+0.0001, ticks_y)
ax.set_xticks(xmajor_ticks)
plt.xticks(rotation=-30)
ax.set_yticks(ymajor_ticks)
ax.title.set_fontsize(tick_size)
ax.set_xlabel('Time (s)', fontsize=label_size)
ax.set_ylabel(r'MSD ($\mu$m$^2$)', fontsize=label_size)
ax.tick_params(direction='out', pad=16)
ax.legend(loc=(0.02, 0.75), prop={'size': legend_size})
plt.gca().xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.{}f'.format(dec_x)))
plt.gca().yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.{}f'.format(dec_y)))
# plt.yscale('log')
# plt.xscale('log')
plt.gca().set_xlim([0, x_range+0.0001])
plt.gca().set_ylim([0, y_range+0.0001])
# Save your figure
plt.savefig('{}'.format(filename), bbox_inches='tight')
plt.show()
In [ ]:
def calculate_diffusion_coefficients_gels(channels, surface_functionalities, slices, path, time, time_to_calculate, to_frame, dimension):
"""
Loads data from csv files and outputs a dictionary following a specified
sample naming convection determined by the input
Parameters:
channels, surface functionalities, media, and concentrations, and replicates
can take ranges or lists.
path is string with substition placeholders for concentration and sample
name (built from channels, surface_functionalities, media,
concentrations, and replicates).
Example:
path = "./{genotype}/{pup}/{region}/{channel}/geoM2xy_{sample_name}.csv";
get_data(["RED", "YG"], ["WT", "KO", "HET"], ["P1", "P2", "P3", "P4"],
["PEG", "noPEG"], ["S1", "S2", "S3", "S4"], ["cortex", "hipp", "mid"],
[1, 2, 3, 4, 5], path)
"""
data = {}
avg_over_slices_raw = {}
avg_over_pups_raw = {}
names_with_replicates = {}
counter = 0
counter2 = 0
diffusion_coef_point_derivative = {}
diffusion_coef_linear_fit = {}
for channel in channels:
for surface_functionality in surface_functionalities:
for slic in slices:
test_value = "{}_{}_0-4p_agarose_{}".format(channel, surface_functionality, slic)
avg_over_slices_raw[counter] = test_value
counter = counter + 1
sample_name = test_value
for replicate in replicates:
sample_name_long = test_value + "_{}".format(replicate)
names_with_replicates[counter2] = sample_name_long
counter2 = counter2 + 1
filename = path.format(functionality = surface_functionality, slic = slic, sample_name=sample_name)
data[sample_name] = np.genfromtxt(filename, delimiter=",")
diffusion_coef_point_derivative[sample_name] =\
diffusion_coefficient_point_derivative(data[sample_name], time, time_to_calculate, dimension)
diffusion_coef_linear_fit[sample_name] =\
diffusion_coefficient_linear_regression(data[sample_name], time, to_frame, dimension)
return diffusion_coef_point_derivative, diffusion_coef_linear_fit
In [ ]:
calculate_diffusion_coefficients_gels(channels, surface_functionalities, slices, path, time, time_to_calculate, to_frame, dimension)
In [ ]: