In [ ]:
%matplotlib inline
In [ ]:
import mne
import numpy as np
from mne.datasets import sample
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from matplotlib import cm
from jumeg.jumeg_utils import rank_estimation
from jumeg.decompose.dimension_selection import mibs, bic, gap, aic, mdl, explVar, fa_rank_cv, pca_rank_cv
# ----------------------------------------
# filenames and path
# ----------------------------------------
data_path = sample.data_path()
subjects_dir = data_path + '/subjects'
fname_raw = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw.fif'
fname_event = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw-eve.fif'
# ----------------------------------------
# read data and crop to speedup the process
# ----------------------------------------
raw = mne.io.read_raw_fif(fname_raw)
events = mne.read_events(fname_event)
# add a bad channel
raw.info['bads'] += ['MEG 2443']
# pick MEG channels
picks = mne.pick_types(raw.info, meg='mag', eeg=False, stim=False, eog=False,
exclude='bads')
# crop data to speed-up process
raw.crop(tmax=180)
data = raw.get_data()[picks, :] # data *= 1e12 will change the results !
# =========================================
# Method 1:
# apply various dimension reduction tools
# to estimate the median of all
# =========================================
rank_all, rank_median = rank_estimation(data)
print ('\nRank estimation using methods implemented in jumeg (including whitening)')
print('Ranks in order: MIBS, BIC, GAP, AIC, MDL, pct95, pct99: ', rank_all)
print('The median of the data is %f\n' % rank_median)
# =========================================
# Method 2:
# apply a single method for rank estimation
# using whitening provided by sklearn
# here we apply all methods separately
# =========================================
# perform PCA
pca = PCA(svd_solver='auto', whiten=True)
pc = pca.fit_transform(data.T)
n_samples, n_features = pc.shape
# rank estimation of a single method
rank_mibs = mibs(pca.explained_variance_, n_samples) # MIBS
rank_bic = bic(pca.explained_variance_, n_samples) # BIC
rank_gap = gap(pca.explained_variance_) # GAP
rank_aic = aic(pca.explained_variance_) # AIC
rank_mdl = mdl(pca.explained_variance_) # MDL
rank_expl95 = explVar(pca.explained_variance_, explainedVar=0.95) # expl. variance (95)
rank_expl99 = explVar(pca.explained_variance_, explainedVar=0.99) # expl. variance (99)
label_mibs = 'MIBS: n_comp = %d' % (rank_mibs)
label_bic = 'BIC: n_comp = %d' % (rank_bic)
label_gap = 'GAP: n_comp = %d' % (rank_gap)
label_aic = 'AIC: n_comp = %d' % (rank_aic)
label_mdl = 'MDL: n_comp = %d' % (rank_mdl)
label_expl95 = 'expl95: n_comp = %d' % (rank_expl95)
label_expl99 = 'expl99: n_comp = %d' % (rank_expl99)
print (label_mibs)
print (label_bic)
print (label_gap)
print (label_aic)
print (label_mdl)
print (label_expl95)
print (label_expl99)
# -------------------------------------------
# Method 3
# apply rank estimation utilizing cross-validation
# using PCA and FA scores
# Note, on normal data sets this will take a long time
# -------------------------------------------
# define a list of components to test
# here we use all components from above and add a few more for testing
ncomp_list = np.unique(np.concatenate([rank_all[rank_all > 0], [40, 50]]))
ncomp_list.sort()
cv = 5 # depends on data length
print ('>>> Please be patient, this might take a while ....')
print ('')
# PCA: test a list of numbers in a cross-validation scenario
rank_cv_pca = pca_rank_cv(data, ncomp_list, cv=cv, whiten=True)
# FA: test a list of components in a cross-validation scenario
rank_cv_fa = fa_rank_cv(pc.T, ncomp_list, cv=cv)
label_cv_pca = 'PCA (CV=%d): %d' % (cv, rank_cv_pca)
label_cv_fa = 'FA (CV=%d): %d' % (cv, rank_cv_fa)
# ----------------------
# plot results (Scree plot)
# ----------------------
labels = np.array([label_aic, label_bic, label_gap, label_mdl, label_mibs, label_expl95, label_expl99])
comps = np.array([rank_aic, rank_bic, rank_gap, rank_mdl, rank_mibs, rank_expl95, rank_expl99])
# sorting by increasing number of components
ixsort = comps.argsort()
comps = comps[ixsort]
labels = labels[ixsort]
# components estimated using CV
label_cv = [label_cv_pca, label_cv_fa]
comps_cv = [rank_cv_pca, rank_cv_fa]
# plot figure
cmap = cm.get_cmap('Dark2') # 8 colors
colors = cmap.colors[::-1]
xaxis = np.arange(n_features)+1
fig = plt.figure(figsize=(10, 6))
plt.plot(xaxis, np.cumsum(pca.explained_variance_ratio_ * 100), marker='o', color='black')
plt.title('Scree plot')
plt.xlabel('# components')
plt.ylabel('explained variance / %')
# plot vertical line and label for each method
methods = []
for i in range(len(comps)):
if (comps[i] > 0):
hl = plt.axvline(x=comps[i], color=colors[i], label=labels[i], linestyle='--')
methods.append(hl)
legend1 = plt.legend(handles=methods, loc='lower right')
ax = plt.gca().add_artist(legend1)
# plot vertical line and label for CV methods
l1 = plt.axvline(x=comps_cv[0], color='blue', label=label_cv[0], linewidth=3)
l2 = plt.axvline(x=comps_cv[1], color='red', label=label_cv[1], linewidth=3)
methods_cv = [l1,l2]
legend2 = plt.legend(handles=methods_cv, loc='center right')