This notebook contains the code to extract peak parameters from PSDs of electrodes of a single subject. It is organized in the following manner :
Each of the helper functions also has a docstring with a short description and summary of input arguments and output results.
In [1]:
%pylab inline
In [2]:
from __future__ import division
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt
import random
import os
import scipy.io
import scipy.signal
from sklearn.metrics import r2_score
from sklearn import cross_validation
from sklearn import cluster
import sys
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [3]:
def process_elec_data(elec_data, srate):
"""
Returns the f, mean, (mean - 1/f^2) of trials in elec_data
Args:
elec_data: Matrix of electrode data (trials x time)
Returns:
f: vector of frequencies (spacing decided with signal.welch)
psd_mean: average psd of all trials in elec_data matrix
flatspec: psd_mean - (1/f^2)
"""
# Resample
newsrate = 256 # new sampling rate
if srate!=newsrate:
elec_data = sp.signal.resample(elec_data, int(np.floor(len(elec_data)*(newsrate/srate))))
# Calculate trial and mean PSDs for testing range of trials
f,psd_trial = sp.signal.welch(elec_data, fs=newsrate, window='hanning', nperseg=newsrate, noverlap=newsrate/2.0, nfft=None, detrend='linear', return_onesided=True, scaling='density')
psd_mean = psd_trial.mean(0)
#Cut PSD according to frequency range of interest (1-50Hz)
f = f[1:50]
psd_mean = np.log10(psd_mean[1:50])
#Calculate Flatspec (PSDMean - 1/f^2)
slope_params = np.polyfit(f, psd_mean, 2)
slopefit = np.polyval(slope_params, f)
flatspec = psd_mean - slopefit
return (f,psd_mean, flatspec)
In [ ]:
'''Import and check data'''
datafolder = 'INSERT PATH TO DATA HERE' # path to EEG_1elec_epoched_data.mat
fname = 'EEG_1elec_epoched_data.mat';
os.chdir(datafolder);
data = sp.io.loadmat(fname);
data = squeeze(data['elec_dat']);
fx,pow_spec,flatspec = process_elec_data(data,256);
plot(fx,pow_spec)
ylabel('Power')
xlabel('Frequency')
Norm takes in a vector of frequencies (essentially the x-axis) and a vector of parameters (arg) that characterizes the Gaussians. It uses these parameters to create a combination of Gaussians and returns it as fitdat. For example, if the following peaks were found:
Peaks | Amplitude | Std Dev |
---|---|---|
7.0 | 0.05 | 1.35 |
22.0 | 0.13 | 0.66 |
then the arg vector would be
[7.0 22.0 0.05 0.13 1.35 0.66 2]
where the last 2 stands for the total number of peaks found
In [5]:
def norm(x, *args):
"""
Calculates a combination of Gaussians given parameters
Args:
x: vector of frequencies
*args: vector of Gaussian parameters (Gaussian center, Gaussian standard deviation, Gaussian height, # of Gaussians)
Returns:
fitdat: combination of Gaussians
"""
number_of_gaussians = int(args[-1]) # number of gaussians from peak detection
fitdat = np.array(0)
for peaki in range(number_of_gaussians):
fitdat = fitdat + args[peaki+(2*number_of_gaussians)]*sp.stats.norm.pdf(x, loc=args[peaki], scale=args[peaki+number_of_gaussians]) # add a bunch of gaussians together
return fitdat
Uses Python's built in sp.signal.find_peaks_cwt
in order to find peaks using the given sliding window on the given flatspec (PSD - $\frac{1}{f^2}$). The first window starts at 0Hz and looks for a peak between 0Hz and sliding_window
Hz. The next window starts at 2Hz and goes till sliding_window
+2Hz. This process continues till the entire freequency range has been covered.
A sliding window is used to ensure that low amplitude peaks are not ignored. The 2Hz overlap ensures that peaks at edges of the sliding windows aren't missed [MAKE CLEAR].
In [6]:
import sys
from numpy import NaN, Inf, arange, isscalar, asarray, array
def peakdect(v, delta, x = None):
"""
Converted from MATLAB script at http://billauer.co.il/peakdet.html
Returns two arraysX
function [maxtab, mintab]=peakdet(v, delta, x)
%PEAKDET Detect peaks in a vector
% [MAXTAB, MINTAB] = PEAKDET(V, DELTA) finds the local
% maxima and minima ("peaks") in the vector V.
% MAXTAB and MINTAB consists of two columns. Column 1
% contains indices in V, and column 2 the found values.
%
% With [MAXTAB, MINTAB] = PEAKDET(V, DELTA, X) the indices
% in MAXTAB and MINTAB are replaced with the corresponding
% X-values.
%
% A point is considered a maximum peak if it has the maximal
% value, and was preceded (to the left) by a value lower by
% DELTA.
% Eli Billauer, 3.4.05 (Explicitly not copyrighted).
% This function is released to the public domain; Any use is allowed.
"""
maxtab = []
mintab = []
if x is None:
x = arange(len(v))
v = asarray(v)
if len(v) != len(x):
sys.exit('Input vectors v and x must have same length')
if not isscalar(delta):
sys.exit('Input argument delta must be a scalar')
if delta <= 0:
sys.exit('Input argument delta must be positive')
mn, mx = Inf, -Inf
mnpos, mxpos = NaN, NaN
lookformax = True
for i in arange(len(v)):
this = v[i]
if this > mx:
mx = this
mxpos = x[i]
if this < mn:
mn = this
mnpos = x[i]
if lookformax:
if this < mx-delta:
maxtab.append((mxpos, mx))
mn = this
mnpos = x[i]
lookformax = False
else:
if this > mn+delta:
mintab.append((mnpos, mn))
mx = this
mxpos = x[i]
lookformax = True
return array(maxtab), array(mintab)
In [7]:
def find_peaks (flatspec, sliding_window, f):
"""
Detects peaks in flatspec in in chunks of width sliding_window. A window overlaps previous window by 2Hz
Args:
flatspec: psd_mean - (1/f^2)
f: frequency of vectors
sliding_window: width of sliding window for detecting peaks
Returns:
peaks: indices of peak locations in flatspec
"""
maxpks, minpks = peakdect(flatspec, sliding_window, f) # find peaks in sliding window range
try:
peakind = [np.uint32(i) for i in (array(maxpks)[:,0]) if i <= 30] #ignore peaks beyond 30Hz
except Exception:
peakind = [-1]
try:
valleyind = [np.uint32(i) for i in (array(minpks)[:,0]) if i <= 30] #ignore peaks beyond 30Hz
except Exception:
valleyind = [-1]
return peakind, valleyind
In [8]:
def transform_data (params):
"""
Converts list of lists to matrix in form (n_components x n_features)
Args:
params: parameters from CV folds
Returns:
X: matrix of parameters
"""
#separate peaks for each sliding window width
peaks_np = np.array(params)
#separate peaks for each sliding window width
peaks1 = [elem for sublist in map(list, peaks_np[:,0,:]) for elem in sublist if elem >=0] #only include peaks less than 30Hz
peaks2 = [elem for sublist in map(list, peaks_np[:,1,:]) for elem in sublist if elem >=0] #only include peaks less than 30Hz
peaks3 = [elem for sublist in map(list, peaks_np[:,2,:]) for elem in sublist if elem >=0] #only include peaks less than 30Hz]
#combine all peaks for input into Kmeans clustering algorithm
peaks_comp = np.array(peaks1+peaks2+peaks3)
peaks_comp = [elem for elem in peaks_comp if (elem<30 and elem >= 0)] #only include peaks less than 30Hz
X = np.reshape(peaks_comp, (1,len(peaks_comp)))
X=X.transpose()
return X, peaks1, peaks2, peaks3
In [9]:
def kmeans(X, cluster_size):
"""
Returns the cluster centroids and SoS from centroids
Args:
X: Data (n_componentsxn_features) to cluster
Returns:
clusters: cluster centroids
inertia: SoS
"""
km = cluster.KMeans(n_clusters=cluster_size)
km.fit(X)
clusters = km.cluster_centers_
return clusters, km.inertia_
In [10]:
def find_cluster(X):
"""
Finds ideal number of clusters
Args:
peaks: list of all peaks
Returns:
ideal number of clusters
"""
SoS_list = []
peaks = X[:,0:1]
med = np.median(peaks)
zeroSoS = sum(((x-med)*(x-med) for x in peaks))
SoS_list.append(zeroSoS)
for cluster_size in range (1, 6):
p,SoS_tmp = kmeans(X, cluster_size)
SoS_list.append((SoS_tmp))
dx = np.array(SoS_list[:-1]) - np.array(SoS_list[1:]) #calculate difference in SoS
SoS_knee = np.argmax(dx) #pick max change
return SoS_knee+2, SoS_list
In [11]:
def bootstrap(data):
bootstrap_dist = np.zeros(1000)
for i in range(0,1000):
total_points = len(data)
data_sample = np.random.choice(np.array(data), size=int(0.8*total_points))
bootstrap_dist[i] = median(data_sample)
return [median(bootstrap_dist), percentile(bootstrap_dist, 2.5), percentile(bootstrap_dist, 97.5)]
In [12]:
def final_check_for_output(extcv_pks,extcv_std, flatspec):
extcv_pks_raster = []
extcv_std_raster = []
final_peaks = [];
final_stds = [];
bin_thresh = 0.05;
for cv in range(len(extcv_pks)):
for i in range(len(extcv_pks[cv])):
if (extcv_pks[cv][i] <=30 and extcv_pks[cv][i] >=0):
if (flatspec[extcv_pks[cv][i]] > 0):
extcv_pks_raster.append(extcv_pks[cv][i])
extcv_std_raster.append(extcv_std[cv][i])
#Build Histogram of External CV Peaks
bin_count, bin_edge = np.histogram(extcv_pks_raster, 5)
valid_peak_range = []
for i in range(len(bin_count)):
if (bin_count[i]) >= bin_thresh*len(extcv_pks_raster):
valid_peak_range.append([bin_edge[i], bin_edge[i+1]])
#Place Peaks and other Params in Appropriate Bins
sorted_params = []
for low, high in valid_peak_range:
temp_ind = [i for i in range(len(extcv_pks_raster)) if low<=extcv_pks_raster[i]<=high]
sorted_params.append([np.array(extcv_pks_raster)[temp_ind], np.array(extcv_std_raster)[temp_ind]])
for j in range(len(valid_peak_range)):
med_peak = median(sorted_params[j],1)[0]
med_std = median(sorted_params[j],1)[1]
if med_std <=10:
cf_params = bootstrap(sorted_params[j][0])
bw_params = bootstrap(2.355*sorted_params[j][1])
final_peaks.append(cf_params[0])
final_stds.append(bw_params[0])
return final_peaks, final_stds
In [14]:
def final_peak_fitting(elec_data,srate):
'''
Uses cross validation to calculate the PSD and fit gaussians to electrophysiological data
Args:
data: trials x time
srate: sampling rate of the data
extcv: range of cross validation runs
Returns:
extcv_peaks = Center frequencies of all peaks
extcv_std = Standard deviation for each peak found
'''
freq_vector, psd_mean, flatspec = process_elec_data(elec_data, srate)
extcv = range(0,10)
deltas = [0.03, 0.05, 0.07]
extcv_amps = []
extcv_rsq = []
extcv_std = []
extcv_pks = []
extcv_fit = []
extcv_trial = []
extcv_valid = []
extcv_cluster_peaks = []
extcv_params = []
extcv_rscore=[]
extcv_delta_pks=[]
for i in extcv:
"""Perform Cross Validation"""
#Set aside validation set
elec_data_train, elec_data_valid = cross_validation.train_test_split(elec_data, test_size=0.2, random_state=np.uint(np.random.rand(1)*100)[0]) #split into test and validation set #split into test and validation set
#Split train set into 100 train and test sets
ss = cross_validation.ShuffleSplit(int(np.size(elec_data_train,0)*0.8), n_iter=100, test_size=0.2, random_state=0)
#Use each window width on each of the 100 folds for fitting Gaussians
peaks_cv=[]
valleys_cv=[]
for train_index, test_index in ss:
#split into train and test set
trials_train = elec_data_train[train_index,:]
trials_test = elec_data_train[test_index,:]
freq_vector, _, flatspec_train = process_elec_data(trials_train, srate) #calculate train flatspec
_, _, flatspec_test = process_elec_data(trials_test, srate) #calculate test flatspec
peaks_cv_temp = np.zeros((3,12))
valleys_cv_temp = np.zeros((3,12))
#find Gaussian fit parameters for each window width
for d in range(0,len(deltas)):
delta = deltas[d]
peaks_temp, valleys_temp = find_peaks(flatspec_train, delta, freq_vector)
peaks_temp_vect = np.zeros(12)
valleys_temp_vect = np.zeros(12)
peaks_temp_vect[0:len(peaks_temp)] = peaks_temp
valleys_temp_vect[0:len(valleys_temp)] = valleys_temp
peaks_cv_temp[d] = peaks_temp_vect
valleys_cv_temp[d] = valleys_temp_vect
peaks_cv.append((peaks_cv_temp))
valleys_cv.append((valleys_cv_temp))
extcv_delta_pks.append((peaks_cv))
#Reshape Gaussian parameters to k-means clustering input format
X_peaks, peaks1, peaks2, peaks3 = transform_data(peaks_cv)
peaks = X_peaks[:,0:1]
X_valleys, valleys1, valleys2, valleys3 = transform_data(valleys_cv)
valleys = X_valleys[:,0:1]
#Find ideal number of clusters, return all SoS for plotting
cluster_num_peaks, cluster_SoS_peaks = find_cluster(X_peaks)
cluster_num_valleys, cluster_SoS_valleys = find_cluster(X_valleys)
#process valid and train sets
f, mean_train, flatspec_train = process_elec_data(elec_data_train, srate)
_, mean_valid, flatspec_valid = process_elec_data(elec_data_valid, srate)
extcv_trial.append((flatspec_train))
extcv_valid.append((flatspec_valid))
#use ideal number of clusters to generate best fit
cluster_peaks,_ = kmeans(X_peaks,cluster_num_peaks)
cluster_valleys,_ = kmeans(X_valleys,cluster_num_valleys)
peak = [elem for sublist in map(list, np.array(cluster_peaks)) for elem in sublist]
valley = [elem for sublist in map(list, np.array(cluster_valleys)) for elem in sublist]
cluster_all = peak+valley
peakind = np.uint32([p for p in cluster_all if p>=1 and p<=30])
p = np.linspace(1, len(peakind), len(peakind))
params = list(freq_vector[peakind]) + list(p.T) + list(flatspec_train[peakind]*2) + [len(peakind)] # initial guess for parameters based on peak detection
try:
oscillatory_fit_params,_ = sp.optimize.curve_fit(norm, f, flatspec_train, p0=params) # fitting parameters
except RuntimeError:
print('FAILED FIT')
continue
cluster_fit = norm(f, *oscillatory_fit_params)
cluster_num = oscillatory_fit_params[-1]
extcv_std.append((oscillatory_fit_params[cluster_num:2*cluster_num]))
extcv_pks.append((oscillatory_fit_params[0:cluster_num]))
final_peaks, final_bws = final_clustering_for_output(extcv_pks,extcv_std, flatspec);
return final_peaks, final_bws
In [ ]:
peaks,bws = final_peak_fitting(data,256);
print peaks, bws