In [135]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
In [145]:
# Read from CSV where every column = [FFT freq bin]
# every row = [epoch 1 (2s): electrode 1, 2, 3, 4] + [epoch 2: electrode 1, 2, 3, 4] + ...
relax = pd.read_csv("../Muse Data/novonarelax.csv", names = np.arange(0,220))
# Cut dataframe in half by selecting only the first 128 rows
relax = relax.iloc[:,0:110]
# Add a new column named chan
relax['chan'] = 1
# Iterate through every row, setting chan values to [1,2,3,4] in sequence
relax['chan'] = relax.index.map(lambda x: x % 4 + 1)
# Seperate data into 4 dataframes, 1 for each electrode
chan1 = relax.loc[relax['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1111)
chan2 = relax.loc[relax['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2111)
chan3 = relax.loc[relax['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3111)
chan4 = relax.loc[relax['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4111)
# Concat all channel-specific dataframes together so that row = 2s epoch
# columns = [electrode 1 FFT bins] + [electrode 2 FFT bins] + ...
relax = pd.concat([chan1.iloc[:,0:110], chan2.iloc[:,0:110], chan3.iloc[:,0:110], chan4.iloc[:,0:110]], axis=1, join_axes=[chan1.index])
# add a label column with relax for all rows
relax['label'] = 'relax'
In [146]:
# Do the exact same thing as above, but for all values in focus data set
focus = pd.read_csv("../Muse Data/novonafocus.csv", names = np.arange(0,220))
focus = focus.iloc[:,0:110]
focus['chan'] = 1
focus['chan'] = focus.index.map(lambda x: x % 4 + 1)
chan1 = focus.loc[focus['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1111)
chan2 = focus.loc[focus['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2111)
chan3 = focus.loc[focus['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3111)
chan4 = focus.loc[focus['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4111)
focus = pd.concat([chan1.iloc[:,0:110], chan2.iloc[:,0:110], chan3.iloc[:,0:110], chan4.iloc[:,0:110]], axis=1, join_axes=[chan1.index])
focus['label'] = 'focus'
In [193]:
# Combine focus and relax dataframes into a numeric values and complementary labels dataframe
# rows = [relax data] + [focus data]
values = pd.concat([relax.iloc[:,0:439], focus.iloc[:,0:439]]).reset_index(drop=True)
labels = pd.concat([pd.DataFrame(relax['label']), pd.DataFrame(focus['label'])]).reset_index(drop=True)
# Convert labels from a Vector? to a 1D matrix
c, r = labels.shape
labels = labels.as_matrix().reshape(c,)
In [194]:
# model_selection only available in newest sklearn version?
from sklearn.cross_validation import train_test_split
# Split values and labels arrays into random train and test subsets (20% set aside for testing)
X_train, X_test, y_train, y_test = train_test_split(values,labels,test_size=0.2)
In [195]:
from sklearn.cross_validation import cross_val_score
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
# Creates an rbf kernel SVM and gets the cross validation score
def cross_val_svm (X,y,n,kern='rbf'):
clf = svm.SVC(kernel=kern)
scores = cross_val_score(clf, X, y, cv=n)
return scores
def cross_val_gnb (X,y,n):
clf = GaussianNB()
scores = cross_val_score(clf, X, y, cv=n)
return scores
In [196]:
# Show the mean cross val score across 4 different tests
print(cross_val_svm(X_train,y_train,4).mean())
In [151]:
import matplotlib.pyplot as plt
%matplotlib inline
# Plot one of the epoch PSDs to make sure we're not running SVM on crazy bidness
plt.plot(values.iloc[1,0:110])
Out[151]:
This isn't the best performance. One thing that we know from the literature is that the alpha/beta ratio should be a helpful feature.
Let's add that in.
In [435]:
# Read from CSV where every column = [FFT freq bin]
# every row = [epoch 1 (2s): electrode 1, 2, 3, 4] + [epoch 2: electrode 1, 2, 3, 4] + ...
relax = pd.read_csv("../Muse Data/chrisrelax.csv", names = np.arange(0,110.5,step=.5))
# Cut dataframe in half by selecting only the first 110 rows (corresponding to 0-55hz with a .5 hz stepsize)
relax = relax.iloc[:,0:110]
# Add a new column named alpha that is the average of all bins in the alpha range (8-13hz)
relax['alpha'] = relax[[8, 8.5, 9, 9.5, 10, 10.5, 11, 11.5, 12, 12.5, 13]].mean(axis=1)
# Add a new column named beta that is the average of all bins in the alpha range (13-30hz)
relax['beta'] = relax[[13, 13.5, 14, 14.5, 15, 15.5, 16, 16.5, 17, 17.5, 18, 18.5, 19,19.5, 20, 20.5,21, 21.5, 22, 22.5,23, 23.5, 24, 24.5,
25, 25.5, 26, 26.5,27, 27.5, 28, 28.5,29, 29.5, 30]].mean(axis=1)
# Add a new column named alpha/beta that is the ratio of the alpha and beta bands
relax['alpha/beta'] = relax['alpha']/relax['beta']
# Iterate through every row, setting chan values to [1,2,3,4] in sequence
relax['chan'] = relax.index.map(lambda x: x % 4 + 1)
# Seperate data into 4 dataframes, 1 for each electrode
chan1 = relax.loc[relax['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1114)
chan2 = relax.loc[relax['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2114)
chan3 = relax.loc[relax['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3114)
chan4 = relax.loc[relax['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4114)
# For fun, let's only keep the alpha, beta, and alpha/beta values we calculated
relax = pd.concat([chan1.iloc[:,-4:-1], chan2.iloc[:,-4:-1], chan3.iloc[:,-4:-1], chan4.iloc[:,-4:-1]], axis=1, join_axes=[chan1.index])
# add a label column with relax for all rows
relax['label'] = 'relax'
In [436]:
# Do the exact same thing as above, but for all values in focus data set
focus = pd.read_csv("../Muse Data/chrisfocus.csv", names = np.arange(0,110.5,step=.5))
focus = focus.iloc[:,0:110]
focus['alpha'] = focus[[8, 8.5, 9, 9.5, 10, 10.5, 11, 11.5, 12, 12.5, 13]].mean(axis=1)
focus['beta'] = focus[[13, 13.5, 14, 14.5, 15, 15.5, 16, 16.5, 17, 17.5, 18, 18.5, 19,19.5, 20, 20.5,21, 21.5, 22, 22.5,23, 23.5, 24, 24.5,
25, 25.5, 26, 26.5,27, 27.5, 28, 28.5,29, 29.5, 30]].mean(axis=1)
focus['alpha/beta'] = focus['alpha']/focus['beta']
focus['chan'] = focus.index.map(lambda x: x % 4 + 1)
chan1 = focus.loc[focus['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1114)
chan2 = focus.loc[focus['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2114)
chan3 = focus.loc[focus['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3114)
chan4 = focus.loc[focus['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4114)
focus = pd.concat([chan1.iloc[:,-4:-1], chan2.iloc[:,-4:-1], chan3.iloc[:,-4:-1], chan4.iloc[:,-4:-1]], axis=1, join_axes=[chan1.index])
focus['label'] = 'focus'
In [437]:
# Combine focus and relax dataframes into a numeric values and complementary labels dataframe
# rows = [relax data] + [focus data]
values = pd.concat([relax.iloc[:,:-1], focus.iloc[:,:-1]]).reset_index(drop=True)
labels = pd.concat([pd.DataFrame(relax['label']), pd.DataFrame(focus['label'])]).reset_index(drop=True)
# Convert labels from a Vector? to a 1D matrix
c, r = labels.shape
labels = labels.as_matrix().reshape(c,)
In [438]:
# Split values and labels arrays into random train and test subsets (20% set aside for testing)
X_train, X_test, y_train, y_test = train_test_split(values,labels,test_size=0.2)
# Show the mean cross val score across 4 different tests
print(cross_val_gnb(X_train,y_train,4).mean())