This notebook demonstrates MEG analysis using windowed Phase Locking Value (PLV), Principal Component Analysis (PCA) and hierarchical clustering. The notebook:
For more information, please refer to https://nextonsblog.wordpress.com/?p=3087
In [1]:
import numpy as np
import scipy.io
from scipy.signal import hilbert
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.decomposition import PCA
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
In [2]:
# Global configuration.
FILENAME = "../../data/megicann_train_v2.mat"
DATASET_X = 'train_day1'
DATASET_Y = 'class_day1'
FREQ_DOMAINS = [5]
VALIDATION_SAMPLES = 0.2
In [3]:
# Original data is in format train[frequency][sample, channel, time].
# make different frequencies appear as channels and swap channel and time.
def preprocess_x(raw_data):
# Move different frequencies to channels
samples = raw_data[0, FREQ_DOMAINS[0]][:]
for freq_domain in FREQ_DOMAINS[1:]:
samples = np.concatenate([samples, raw_data[0, freq_domain][:]], axis=1)
# Normalize the data within each sample (per channel) and ensure that type is ok
for i, sample in enumerate(samples):
sample_mean = sample.mean(axis=1);
sample_std = sample.std(axis=1);
samples[i] = ((sample.transpose() - sample_mean) / sample_std).transpose()
samples = hilbert(samples)
# Transpose the data to [sample, time, channel] format
samples = np.transpose(samples, axes=(0, 2, 1))
return samples
In [4]:
def plv(samples):
plvs = []
for _sample in samples:
plv = np.abs(np.dot(_sample.conj().T, _sample)) / 200
# The connectivity matrix is symmetrical. Look just the upper triangular matrix
idx = np.triu_indices(plv.shape[0])
plv = plv[idx[0],idx[1]]
# Avoid memory exhaustion by thresholding data here and storing it as uint8
plv[plv < 0.5] = 0
plv[plv >= 0.5] = 1
plv = np.uint8(plv)
plvs.append(np.asarray(plv))
return np.asarray(plvs)
In [5]:
# Load samples and preprocess them
train_dict = scipy.io.loadmat(FILENAME)
train_dataset_x = preprocess_x(train_dict[DATASET_X])
train_dataset_y = train_dict[DATASET_Y][:,0]
# Remove train_dict to save some memory
train_dict = None
In [6]:
train_dataset_x = plv(train_dataset_x)
train_dataset_x[train_dataset_x < 0.5] = 0
train_dataset_x[train_dataset_x >= 0.5] = 1
# Split samples into training and validation sets
num_val_samples = int(np.ceil(VALIDATION_SAMPLES * len(train_dataset_x) - 1))
val_dataset_x = train_dataset_x[:num_val_samples]
val_dataset_y = train_dataset_y[:num_val_samples]
train_dataset_x = train_dataset_x[num_val_samples:]
train_dataset_y = train_dataset_y[num_val_samples:]
In [8]:
# Initialize data for visualization
label_locations = np.arange(1, 5, 4.0 / 5.0) + (4.0 / 10.0)
label_colors = ['cyan', 'blue', 'gray', 'red', 'green']
labels = ['Artificial', 'Football', 'Nature', 'Bean', 'Chaplin']
In [9]:
# Project the preprocessed samples to 2 dimensional plane
pca = PCA(n_components=2)
train_dataset_x_projected = pca.fit(train_dataset_x).transform(train_dataset_x)
fig = plt.figure(figsize=(6,4))
ax = fig.add_subplot(111)
scatter = plt.scatter(train_dataset_x_projected[:,0],
train_dataset_x_projected[:,1],
c=train_dataset_y,
cmap=matplotlib.colors.ListedColormap(label_colors))
colorbar = plt.colorbar(scatter)
colorbar.set_ticks(label_locations)
colorbar.set_ticklabels(labels)
plt.show()
In [10]:
# Project the preprocessed samples to 3 dimensional plane
pca = PCA(n_components=3)
train_dataset_x_projected = pca.fit(train_dataset_x).transform(train_dataset_x)
fig = plt.figure(figsize=(6,4))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(train_dataset_x_projected[:,0],
train_dataset_x_projected[:,1],
train_dataset_x_projected[:,2],
c=train_dataset_y,
cmap=matplotlib.colors.ListedColormap(label_colors))
colorbar = plt.colorbar(scatter)
colorbar.set_ticks(label_locations)
colorbar.set_ticklabels(labels)
plt.show()
In [11]:
train_dataset_x_linkage = linkage(train_dataset_x, 'ward')
plt.figure(figsize=(15, 70))
plt.title('Hierarchical Clustering of the Samples')
plt.xlabel('Distance')
plt.ylabel('Sample Index')
dendrogram(
train_dataset_x_linkage,
leaf_font_size=10,
labels=[labels[c - 1] for c in train_dataset_y],
orientation='left',
color_threshold=0)
plt.show()
In [ ]: