In [1]:
%pylab
import numpy as np
from seizures.features.FeatureExtractBase import FeatureExtractBase
from statsmodels.tsa.vector_ar.var_model import VAR
from seizures.features.ARFeatures import ARFeatures
import scipy.signal
from itertools import tee, izip
from matplotlib import pyplot as plt
from seizures.data.DataLoader import DataLoader, EEGData
In [ ]:
data_path = "/nfs/data3/kaggle_seizure/scratch/Stiched_data/Dog_1/"
feature_extractor = ARFeatures()
eeg_data = EEGData(data_path+'Dog_1_ictal_segment_1')
instances = eeg_data.get_instances()
X = instances[1].eeg_data
In [ ]:
# Extracting all features for DOG_1
import glob
import scipy.io
path ='/nfs/data3/kaggle_seizure/clips/Dog_1/'
filenames = glob.glob(path+'*')
feature_mat = np.zeros((len(filenames),528))
labels = np.zeros((len(filenames),1))
i=0
for filename in filenames:
data = scipy.io.loadmat(filename)['data']
#### Extracted brutally from ARFeatures.py
params = VAR(data.T).fit(maxlags=2).params
feature_mat[i,:] = np.hstack(params.reshape( (np.prod(params.shape),1) ))
####
if filename.find('interictal')>-1:
labels[i] = 1
elif filename.find('ictal')>-1:
labels[i] = 2
else:
labels[i] = 3
i+=1
In [ ]:
import pickle
# Locally saving:
#stats_AR = { "filenames": filenames,
# "labels": labels,
# "feature_mat":feature_mat}
#pickle.dump( stats_AR, open( "save_AR_stats.p", "wb" ))
l = pickle.load( open( "save_AR_stats.p", "rb" ))
feature_mat = l['feature_mat']
labels = l['labels']
In [ ]:
# PCA ON Features
def I_x(k):
return [i for i in range(len(labels)) if labels[i] == k]
def I_s(s):
assert(s in ["test","ictal","interictal"])
if s=="test":
k= 3
elif s=="ictal":
k= 2
elif s=="interictal":
k= 1
return I_x(k)
print feature_mat.shape
print 'Ictal segments: '+str(len(I_x(1)))
print 'InterIctal segments: '+str(len(I_x(2)))
print 'Test segments: '+str(len(I_x(3)))
permutation = I_x(1)+I_x(2)+I_x(3)
feature_mat = feature_mat[permutation,:]
feature_mean =np.mean(feature_mat,axis=0)
feature_mat_c = feature_mat-np.mean(feature_mat,axis=0)
fig,ax = plt.subplots(1,1,figsize=(10,5))
iax = ax.imshow(feature_mat_c, aspect='auto')
ax.set_title('All data, ordered')
fig.colorbar(iax)
fig.show()
fig,ax = plt.subplots(1,1)
iax = ax.imshow(feature_mat_c[I_s("interictal"),:], aspect='auto')
ax.set_title('Interictal')
fig.colorbar(iax)
fig.show()
fig,ax = plt.subplots(1,1)
iax = ax.imshow(feature_mat_c[I_s("ictal"),:], aspect='auto')
ax.set_title('Ictal')
fig.colorbar(iax)
fig.show()
fig,ax = plt.subplots(1,1)
iax = ax.imshow(feature_mat_c[I_s("test"),:], aspect='auto')
ax.set_title('Test')
fig.colorbar(iax)
fig.show()
In [ ]:
np.mean(feature_mat,axis=0).shape
In [ ]:
# Run SVD on features
import numpy as np
U, s, V = np.linalg.svd(feature_mat_c, full_matrices=True)
In [ ]:
f0=plt.figure()
plt.plot(feature_mean)
plt.title('feature mean')
plt.show()
f1=plt.figure()
plt.plot(V[0:3,:].T)
plt.title('feature principal dimensions')
plt.show()
f2=plt.figure()
plt.plot(log(s))
plt.title('singular values')
plt.show()
f3=plt.figure()
plt.scatter(np.dot(feature_mat_c[I_x(3),:],V[0,:]),
np.dot(feature_mat_c[I_x(3),:],V[1,:]), c='g')
plt.scatter(np.dot(feature_mat_c[I_x(1),:],V[0,:]),
np.dot(feature_mat_c[I_x(1),:],V[1,:]), c='r')
plt.scatter(np.dot(feature_mat_c[I_x(2),:],V[0,:]),
np.dot(feature_mat_c[I_x(2),:],V[1,:]), c='b')
plt.show()
In [ ]:
# Plot 3 first dim
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(np.dot(feature_mat[I_x(3),:],V[0,:]),
np.dot(feature_mat[I_x(3),:],V[1,:]),
np.dot(feature_mat[I_x(3),:],V[2,:]), c='g')
ax.scatter(np.dot(feature_mat[I_x(1),:],V[0,:]),
np.dot(feature_mat[I_x(1),:],V[1,:]),
np.dot(feature_mat[I_x(1),:],V[2,:]),c='r')
ax.scatter(np.dot(feature_mat[I_x(2),:],V[0,:]),
np.dot(feature_mat[I_x(2),:],V[1,:]),
np.dot(feature_mat[I_x(2),:],V[2,:]),c='b')
In [ ]:
### TEST FROM SITE
import numpy as np
import matplotlib.pyplot as plt
import mlpy
np.random.seed(0)
mean1, cov1, n1 = [1, 4.5,0], [[1,1,0],[1,2,0],[0,0,1]], 20 # 20 samples of class 1
x1 = np.random.multivariate_normal(mean1, cov1, n1)
y1 = np.ones(n1, dtype=np.int)
mean2, cov2, n2 = [2.5, 2.5,-1.], [[1,1,0],[1,2,0],[0,0,1]], 30 # 30 samples of class 2
x2 = np.random.multivariate_normal(mean2, cov2, n2)
y2 = 2 * np.ones(n2, dtype=np.int)
x = np.concatenate((x1, x2), axis=0) # concatenate the samples
y = np.concatenate((y1, y2))
lda = mlpy.LDA()
lda.learn(x, y) # compute the tranformation matrix
z = lda.transform(x) # embedded x into the C-1 = 1 dimensional space
In [ ]:
### LDA
import mlpy
x1 = feature_mat_c[I_s("ictal"),:]
x2 = feature_mat_c[I_s("interictal"),:]
y1 = labels[I_s("ictal"),:]
y2 = labels[I_s("interictal"),:]
x = np.concatenate((x1, x2), axis=0) # concatenate the samples
y = np.hstack(np.concatenate((y1, y2)))
lda = mlpy.LDA()
lda.learn(x, y) # compute the tranformation matrix
z1 = lda.transform(x1) # embedded x into the C-1 = 1 dimensional space
z2 = lda.transform(x2) # embedded x into the C-1 = 1 dimensional space
In [ ]:
x3 = feature_mat_c[I_s("test"),:]
z3 = lda.transform(x3) # embedded x into the C-1 = 1 dimensional space
f0=plt.figure()
plt.hist(z1,bins=20,color='r')
plt.hist(z2,bins=20,color='b')
plt.show()
f1=plt.figure()
plt.hist(z3,bins=100,color='g')
plt.show()
In [ ]:
help(plt.hist)
In [ ]: