In [ ]:
import deltascope as ds
import deltascope.alignment as ut
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from scipy.optimize import minimize
import os
import tqdm
import json
Specify matplotlib plots to be interactive
In [ ]:
%matplotlib notebook
In [ ]:
# --------------------------------
# -------- User input ------------
# --------------------------------
data = {
# Specify sample type key
'wt': {
# Specify path to data directory
'path': 'path\to\data\directory\sample1',
# Specify which channels are in the directory and are of interest
'channels': ['AT','ZRF']
},
'stype2': {
'path': 'path\to\data\directory\sample2',
'channels': ['AT','ZRF']
}
}
We'll generate a list of pairs of stypes and channels for ease of use.
In [1]:
data_pairs = []
for s in data.keys():
for c in data[s]['channels']:
data_pairs.append((s,c))
We can now read in all datafiles specified by the data
dictionary above.
In [ ]:
D = {}
for s in data.keys():
D[s] = {}
for c in data[s]['channels']:
D[s][c] = ds.read_psi_to_dict(data[s]['path'],c)
In [ ]:
# --------------------------------
# -------- User input ------------
# --------------------------------
# Specify stype to use as control
s_ctrl = 'wt'
# Specify channel to use as control
c_ctrl = 'AT'
# Specify size of theta bins
theta_step = np.pi/4
# Specify the step size when sweeping various alpha bin sizes
astep = 3
# Specify minimum a value
amn = 2
# Specify maximum a value
amx = 50
In [ ]:
optr = ds.anumSelect(D[s_ctrl][c_ctrl])
optr.param_sweep(theta_step, amn=amn, amx=amx, astep=astep
rnull=np.nan, DT='r')
Plot result of the parameter sweep to get a sense of the results.
In [ ]:
# --------------------------------
# -------- User input ------------
# --------------------------------
# Select degrees of freedom for curve fitting
dof = 5
In [ ]:
x = np.arange(amn,amx,astep)
fig,ax = plt.subplots()
# Fit a curve to bin variance values
pbv = np.polyfit(x, normalize(optr.Mbv)[0], dof)
fbv = np.poly1d(pbv)
ax.plot(x, fbv(x), c='b', label='Bin Variance')
# Fit curve to sample variance values
psv = np.polyfit(x, normalize(optr.Msv)[0], dof)
fsv = np.poly1d(psv)
ax.plot(x, fsv(x), c='g', label='Sample Variance')
# Plot sum of sample and bin variances
ax.plot(x, fsv(x)+fbv(x), c='c', label='Total Variance')
Now we can calculate the optimal bin size with the help of the user specifying a best guess for the optimal value.
In [ ]:
# --------------------------------
# -------- User input ------------
# --------------------------------
# Guess the approximate optimal value
guess = 25
In [ ]:
opt = minimize(fbv+fsv, guess)
ax.axvline(opt.x, c='r', label='Optimum: '+str(np.round(opt.x[0], 2)))
print(opt.x[0])
In [ ]:
# --------------------------------
# -------- User input ------------
# --------------------------------
# Pick an integer value for bin number based on results above
anum = int(opt.x[0])
# Specify the percentiles which will be used to calculate landmarks
percbins = [50]
Calculate landmark bins based on user input parameters and the previously specified control sample.
In [ ]:
lm = ds.landmarks(percbins=percbins, rnull=np.nan)
lm.calc_bins(D[s_ctrl][c_ctrl], anum, theta_step)
print('Alpha bins')
print(lm.acbins)
print('Theta bins')
print(lm.tbins)
In [ ]:
lmdf = pd.DataFrame()
# Loop through each pair of stype and channels
for s,c in tqdm.tqdm(data_pairs):
print(s,c)
# Calculate landmarks for each sample with this data pair
for k,df in tqdm.tqdm(D[s][c].items()):
lmdf = lm.calc_perc(df, k, '-'.join([s,c]), lmdf)
# Set timestamp for saving data
tstamp = time.strftime("%m-%d-%H-%M",time.localtime())
# Save completed landmarks to a csv file
lmdf.to_csv(tstamp+'_landmarks.csv')
# Save landmark bins to json file
bins = {
'acbins':list(lm.acbins),
'tbins':list(lm.tbins)
}
with open(tstamp+'_landmarks_bins.json', 'w') as outfile:
json.dump(bins, outfile)