In [2]:
%matplotlib inline
# autoreload class definition
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import numpy as np
import scipy.io
import os
from seizures.Global import Global
In [2]:
In [3]:
matdict = scipy.io.loadmat(Global.get_subject_folder(os.path.join('Dog_1', 'Dog_1_ictal_segment_1.mat')))
data = matdict['data']
In [4]:
# plot each channel
plt.figure(figsize=(10,4))
plt.plot(data.T)
plt.xlabel('time')
plt.ylabel('magnitude')
plt.title('%d channels'%data.shape[0])
plt.grid(True)
In [5]:
plt.figure(figsize=(10,4))
plt.plot(data[8,:])
Out[5]:
In [6]:
np.hstack(data).shape
Out[6]:
In [7]:
from seizures.data.DataLoader_v2 import DataLoader
from seizures.features.RandomFeatures import RandomFeatures
from seizures.features.ARFeatures import ARFeatures
base_folder = Global.path_map('clips_folder')
#base_folder = '/home/nuke/git/gatsby-hackathon-seizure/wj_data/'
feature_extractor = ARFeatures()
loader = DataLoader(base_folder, feature_extractor)
subject_name = 'Dog_1'
X, type_labels, early_labels = loader.training_data(subject_name, max_segments=20)
In [8]:
#
X.shape
Out[8]:
In [9]:
plt.plot(X[2, :])
Out[9]:
In [10]:
# load_data() method will fill the content of attribute features_train
assert(type(loader.features_train)==list)
features_train = loader.features_train
print '#ictal+interictal segments in %s = %d'% (subject_name, len(features_train))
print '#features of %s = %d' % (type(feature_extractor), len(features_train[0]) )
# loader.features_train
In [11]:
# data for cross validation
n_fold = 3
max_segments=200
X_list,y_seizure, y_early = loader.blocks_for_Xvalidation(subject_name, n_fold, max_segments)
In [12]:
print len(X_list)
print len(y_seizure)
print len(y_early)
sum(x.shape[0] for x in X_list)
Out[12]:
In [13]:
X_list[2].shape
y_early[0]
Out[13]:
In [13]:
In [14]:
def gen_gaussian_sum(n, d):
"""toy data. X = Gaussian. Y = sum of coordinates of X"""
X = np.matrix(np.random.randn(n, d))
y = ( (np.sum(X, 1) + np.random.randn(n, 1)*0.1) > 0.5).astype(np.int)
#y = np.array(y, dtype=np.int)
X = np.asarray(X)
y = np.squeeze(np.asarray(y))
return X,y
# n x d data matrix
n=100
d=10
X,y = gen_gaussian_sum(n ,d)
print X.shape
print y.shape
In [15]:
from sklearn.ensemble import GradientBoostingClassifier
# fit a gradient boosting classifier
# n_estimators = number of boosting stages
gbc = GradientBoostingClassifier(n_estimators=200)
gbc.verbose=True
gbc.fit(X, y)
Out[15]:
In [16]:
# testing data
nte = 50
Xte, yte = gen_gaussian_sum(nte, d)
In [17]:
# test the learned gradient boosting classifier
ypred = gbc.predict(Xte)
print 'ground truth: '
print yte
print 'predicted: '
print ypred
print 'accuracy = %f'% ( np.mean(ypred==yte) )
In [18]:
# prediction probabilities
ypred_prob = gbc.predict_proba(Xte)
In [19]:
import seizures.prediction.Boosting
seizures.prediction.Boosting.main()
In [20]:
import scipy.stats as st
import numpy as np
from seizures.features.StatsFeatures import StatsFeatures
from seizures.data.DataLoader_v2 import DataLoader
from seizures.features.RandomFeatures import RandomFeatures
from seizures.features.ARFeatures import ARFeatures
In [21]:
base_folder = Global.path_map('clips_folder')
feature_extractor = StatsFeatures()
loader = DataLoader(base_folder, feature_extractor)
subject_name = 'Dog_1'
X, type_labels, early_labels = loader.training_data(subject_name, max_segments=10)
X.shape
Out[21]:
In [130]:
import scot
import numpy as np
from seizures.data.SubjectEEGData import SubjectEEGData
loader = SubjectEEGData('Patient_1', max_train_segments=10)
L = loader.get_train_data()
tup = L[9]
ins = tup[0]
dndata = ins.eeg_data
nddata = dndata.T
# reducedim: A number of less than 1 in interpreted as
# the fraction of variance that should remain in the data.
keep = min(nddata.shape[1], 5)
ica = scot.plainica.plainica(nddata, reducedim=keep)
In [131]:
M = ica.mixing
U = ica.unmixing
# plot mixing matrix
plt.imshow(M)
plt.colorbar()
plt.title('mixing matrix')
# plot unmixing matrix
plt.figure()
plt.imshow(U)
plt.colorbar()
plt.title('unmixing matrix')
Out[131]:
In [1]:
from seizures.features.ARFeatures import *
from seizures.features.MixFeatures import StackFeatures
from seizures.features.PLVFeatures import PLVFeatures
from seizures.features.XCHUHFeatures import XCHUHFeatures
from seizures.features.SEFeatures import SEFeatures
#from seizures.features.LyapunovFeatures import LyapunovFeatures
from seizures.features.StatsFeatures import StatsFeatures
from seizures.prediction.ForestPredictor import ForestPredictor
from seizures.prediction.Boosting import AdaBoostTrees
from seizures.pipelines.FeaturePredictorTest import CVFeaturesPredictorsTester
from seizures.pipelines.FeaturePredictorTest import CachedCVFeaPredTester
feature_extractors = [
ARFeatures(),
SEFeatures(),
StatsFeatures(),
#VarLagsARFeatures(4),
#StackFeatures(ARFeatures(), VarLagsARFeatures(4), VarLagsARFeatures(6))
#StackFeatures(ARFeatures(), VarLagsARFeatures(4)),
#StackFeatures(ARFeatures(), PLVFeatures(), SEFeatures()),
#PLVFeatures(),
#StackFeatures(ARFeatures(), VarLagsARFeatures(5), PLVFeatures()),
#StackFeatures(ARFeatures(), PLVFeatures(), XCHUHFeatures(), SEFeatures())
]
#predictors = [AdaBoostTrees(), ForestPredictor(n_estimators=200)]
predictors = [ ForestPredictor(n_estimators=100, max_features=0.2)]
patient = 'Dog_1'
#patient = 'Patient_1'
#tester = CVFeaturesPredictorsTester(feature_extractors, predictors, patient)
tester = CachedCVFeaPredTester(feature_extractors, predictors, patient)
# randomly select subsamples of total segments (ictal + interictal)
# To make it faster. I expect using the full data to give similar result anyway.
max_segments=400
table = tester.test_combination(fold=2, max_segments=max_segments)
In [2]:
# the argument to print_table(..) can be
# seizure_mean_auc, seizure_std_auc, early_mean_auc, early_std_auc
table.print_table('seizure_mean_auc')
In [3]:
table.print_table('early_mean_auc')
In [ ]: