This notebook demonstrates MEG analysis using windowed Phase Locking Value (PLV), Principal Component Analysis (PCA) and hierarchical clustering. The notebook:
For more information, please refer to https://nextonsblog.wordpress.com/?p=3087
In [1]:
import numpy as np
import scipy.io
from scipy.signal import hilbert
from sklearn.decomposition import PCA, FastICA
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import mpl_toolkits.mplot3d.axes3d as p3
import matplotlib.animation as animation
import matplotlib.colors as colors
%matplotlib inline
from IPython.display import HTML
In [2]:
# Global configuration.
FILENAME = "../../data/megicann_train_v2.mat"
DATASET_X = 'train_day1'
DATASET_Y = 'class_day1'
FREQ_DOMAINS = [5]
VALIDATION_SAMPLES = 0.2
WINDOW_LENGTH = 40
In [3]:
# Original data is in format train[frequency][sample, channel, time].
# make different frequencies appear as channels and swap channel and time.
def preprocess_x(raw_data):
# Move different frequencies to channels
samples = raw_data[0, FREQ_DOMAINS[0]][:]
for freq_domain in FREQ_DOMAINS[1:]:
samples = np.concatenate([samples, raw_data[0, freq_domain][:]], axis=1)
# Normalize the data within each sample (per channel) and ensure that type is ok
for i, sample in enumerate(samples):
sample_mean = sample.mean(axis=1);
sample_std = sample.std(axis=1);
samples[i] = ((sample.transpose() - sample_mean) / sample_std).transpose()
samples = hilbert(samples)
# Transpose the data to [sample, time, channel] format
samples = np.transpose(samples, axes=(0, 2, 1))
return samples
In [4]:
# Compute windowed PLV for the data and return interaction matrix in format [sample, channel, channel]
def wplv(samples_x, samples_y, window_length):
plvs = []
y = []
for sample_x, sample_y in zip(samples_x, samples_y):
for i in range(sample_x.shape[0] - window_length):
_sample = sample_x[i:i+window_length]
plv = np.abs(np.dot(_sample.conj().T, _sample)) / window_length
# The connectivity matrix is symmetrical. Look just the upper triangular matrix
idx = np.triu_indices(plv.shape[0])
plv = plv[idx[0],idx[1]]
# Avoid memory exhaustion by thresholding data here and storing it as uint8
plv[plv < 0.9] = 0
plv[plv >= 0.9] = 1
plv = np.uint8(plv)
plvs.append(np.asarray(plv))
y.append(sample_y)
return np.asarray(plvs), np.asarray(y)
In [5]:
# Load samples and preprocess them
train_dict = scipy.io.loadmat(FILENAME)
train_dataset_x = preprocess_x(train_dict[DATASET_X])
train_dataset_y = train_dict[DATASET_Y][:,0]
# Pick up some characteristics of the data
num_samples = train_dataset_x.shape[0]
sample_length = train_dataset_x.shape[1]
num_channels = train_dataset_x.shape[2]
# Remove train_dict to save some memory
train_dict = None
In [6]:
# Compute PLVs
train_dataset_x, train_dataset_y = wplv(train_dataset_x, train_dataset_y, WINDOW_LENGTH)
# Update sample length
sample_length = sample_length - WINDOW_LENGTH
# Split samples into training and validation sets
num_val_samples = sample_length * int(np.ceil(VALIDATION_SAMPLES * num_samples))
val_dataset_x = train_dataset_x[:num_val_samples]
val_dataset_y = train_dataset_y[:num_val_samples]
train_dataset_x = train_dataset_x[num_val_samples:]
train_dataset_y = train_dataset_y[num_val_samples:]
In [7]:
# Initialize data for visualization
label_locations = np.arange(1, 5, 4.0 / 5.0) + (4.0 / 10.0)
label_colors = ['cyan', 'blue', 'gray', 'red', 'green']
labels = ['Artificial', 'Football', 'Nature', 'Bean', 'Chaplin']
In [9]:
# Project the preprocessed samples to 2 dimensional plane
pca = PCA(n_components=2)
train_dataset_x_projected = pca.fit(train_dataset_x[0:(100 * sample_length)]).transform(train_dataset_x)
In [10]:
fig = plt.figure(figsize=(6,4))
ax = fig.add_subplot(111)
scatter = plt.scatter(train_dataset_x_projected[:,0],
train_dataset_x_projected[:,1],
c=train_dataset_y,
cmap=colors.ListedColormap(label_colors))
colorbar = plt.colorbar(scatter)
colorbar.set_ticks(label_locations)
colorbar.set_ticklabels(labels)
plt.show()
In [11]:
# Project the preprocessed samples to 3 dimensional plane
pca = PCA(n_components=3)
train_dataset_x_projected = pca.fit(train_dataset_x[0:(sample_length * 100)]).transform(train_dataset_x)
In [12]:
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(train_dataset_x_projected[:,0],
train_dataset_x_projected[:,1],
train_dataset_x_projected[:,2],
c=train_dataset_y,
cmap=colors.ListedColormap(label_colors))
colorbar = plt.colorbar(scatter)
colorbar.set_ticks(label_locations)
colorbar.set_ticklabels(labels)
plt.show()