In [ ]:
from __future__ import print_function
import numpy as np
from scipy.io import loadmat
import os
import glob
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
%matplotlib inline
print("import done")
In [ ]:
#convert .mat data into ndarray and compress it usingle pickle
def mat_to_numpy(files):
print('Converting files ' + files[0] + ' to ' + files[-1])
data = list()
for idx, fl in enumerate(files):
#open mat file
mat = loadmat(fl)
#convert from ndarray object to ndarray
names = mat['dataStruct'].dtype.names
ndata = {n: mat['dataStruct'][n][0, 0] for n in names}
#detect if columns out of order
if not all(x<y for x, y in zip(ndata['channelIndices'][0], ndata['channelIndices'][0][1:])):
print('WARNING: Columns out of order in file' + fl)
# Clean and normalize data
data.append(ndata['data'])
return np.asarray(data)
nrOfFiles = 15
def dataset(folder, name_pickle):
# get files separated by preictal and interictal data
files_preictal = glob.glob(folder + "/*1.mat")
files_interictal = glob.glob(folder + "/*0.mat")
# randomize read files
np.random.shuffle(files_preictal)
np.random.shuffle(files_interictal)
# read files into ndarrays
data_preictal = mat_to_numpy(files_preictal[:nrOfFiles])
data_interictal = mat_to_numpy(files_interictal[:nrOfFiles])
# create labels
labels_preictal = np.ones(data_preictal.shape[0], dtype=np.int32)
labels_interictal = np.zeros(data_interictal.shape[0], dtype=np.int32)
# concatenate preictal and interictal data
print('Merging preictal and interictal data..')
data_merge = np.concatenate((data_preictal, data_interictal), axis=0)
labels_merge = np.concatenate((labels_preictal, labels_interictal), axis=0)
# bundle data and labels in dictionary
print(data_merge.shape)
save = {'data': data_merge, 'labels': labels_merge}
try:
with open(name_pickle, 'wb') as f:
print('Pickling to ' + name_pickle)
pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to' + name_pickle + ' :', e)
dataset('../data/train/', '../data/trainsh1.pickle')
print('dataset done')
In [ ]: