notebook.community

Edit and run



In [135]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk



In [145]:

    
# Read from CSV where every column = [FFT freq bin]
# every row = [epoch 1 (2s): electrode 1, 2, 3, 4] + [epoch 2: electrode 1, 2, 3, 4] + ...
relax = pd.read_csv("../Muse Data/novonarelax.csv", names = np.arange(0,220))

# Cut dataframe in half by selecting only the first 128 rows 
relax = relax.iloc[:,0:110]

# Add a new column named chan
relax['chan'] = 1

# Iterate through every row, setting chan values to [1,2,3,4] in sequence
relax['chan'] = relax.index.map(lambda x: x % 4 + 1)

# Seperate data into 4 dataframes, 1 for each electrode
chan1 = relax.loc[relax['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1111)
chan2 = relax.loc[relax['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2111)
chan3 = relax.loc[relax['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3111)
chan4 = relax.loc[relax['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4111)

# Concat all channel-specific dataframes together so that row = 2s epoch
# columns = [electrode 1 FFT bins] + [electrode 2 FFT bins] + ...
relax = pd.concat([chan1.iloc[:,0:110], chan2.iloc[:,0:110], chan3.iloc[:,0:110], chan4.iloc[:,0:110]], axis=1, join_axes=[chan1.index])

# add a label column with relax for all rows
relax['label'] = 'relax'



In [146]:

    
# Do the exact same thing as above, but for all values in focus data set

focus = pd.read_csv("../Muse Data/novonafocus.csv", names = np.arange(0,220))

focus = focus.iloc[:,0:110]

focus['chan'] = 1
focus['chan'] = focus.index.map(lambda x: x % 4 + 1)

chan1 = focus.loc[focus['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1111)
chan2 = focus.loc[focus['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2111)
chan3 = focus.loc[focus['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3111)
chan4 = focus.loc[focus['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4111)

focus = pd.concat([chan1.iloc[:,0:110], chan2.iloc[:,0:110], chan3.iloc[:,0:110], chan4.iloc[:,0:110]], axis=1, join_axes=[chan1.index])
focus['label'] = 'focus'



In [193]:

    
# Combine focus and relax dataframes into a numeric values and complementary labels dataframe
# rows = [relax data] + [focus data]
values = pd.concat([relax.iloc[:,0:439], focus.iloc[:,0:439]]).reset_index(drop=True)
labels = pd.concat([pd.DataFrame(relax['label']), pd.DataFrame(focus['label'])]).reset_index(drop=True)

# Convert labels from a Vector? to a 1D matrix
c, r = labels.shape
labels = labels.as_matrix().reshape(c,)



In [194]:

    
# model_selection only available in newest sklearn version?
from sklearn.cross_validation import train_test_split

# Split values and labels arrays into random train and test subsets (20% set aside for testing)
X_train, X_test, y_train, y_test = train_test_split(values,labels,test_size=0.2)



In [195]:

    
from sklearn.cross_validation import cross_val_score
from sklearn import svm
from sklearn.naive_bayes import GaussianNB

# Creates an rbf kernel SVM and gets the cross validation score
def cross_val_svm (X,y,n,kern='rbf'):
    clf = svm.SVC(kernel=kern)
    scores = cross_val_score(clf, X, y, cv=n)
    return scores  

def cross_val_gnb (X,y,n):
    clf = GaussianNB()
    scores = cross_val_score(clf, X, y, cv=n)
    return scores



In [196]:

    
# Show the mean cross val score across 4 different tests
print(cross_val_svm(X_train,y_train,4).mean())









    



0.718531468531



In [151]:

    
import matplotlib.pyplot as plt
%matplotlib inline

# Plot one of the epoch PSDs to make sure we're not running SVM on crazy bidness
plt.plot(values.iloc[1,0:110])









    Out[151]:





[<matplotlib.lines.Line2D at 0x7f4efa5d9eb8>]

Results

SVM

Chris: .63
Josh: .625
Dano: .46
Novona: .69

Gaussian Naive Bayes

Chris: .43
Josh: .43
Dano: .19 :o
Novona: .54

This isn't the best performance. One thing that we know from the literature is that the alpha/beta ratio should be a helpful feature.

Let's add that in.



In [435]:

    
# Read from CSV where every column = [FFT freq bin]
# every row = [epoch 1 (2s): electrode 1, 2, 3, 4] + [epoch 2: electrode 1, 2, 3, 4] + ...
relax = pd.read_csv("../Muse Data/chrisrelax.csv", names = np.arange(0,110.5,step=.5))

# Cut dataframe in half by selecting only the first 110 rows (corresponding to 0-55hz with a .5 hz stepsize)
relax = relax.iloc[:,0:110]

# Add a new column named alpha that is the average of all bins in the alpha range (8-13hz)
relax['alpha'] = relax[[8, 8.5, 9, 9.5, 10, 10.5, 11, 11.5, 12, 12.5, 13]].mean(axis=1)

# Add a new column named beta that is the average of all bins in the alpha range (13-30hz)
relax['beta'] = relax[[13, 13.5, 14, 14.5, 15,  15.5,  16,  16.5, 17,   17.5,  18, 18.5, 19,19.5, 20,  20.5,21, 21.5, 22,  22.5,23, 23.5, 24, 24.5,
25, 25.5, 26,  26.5,27, 27.5, 28,  28.5,29, 29.5, 30]].mean(axis=1)

# Add a new column named alpha/beta that is the ratio of the alpha and beta bands
relax['alpha/beta'] = relax['alpha']/relax['beta']

# Iterate through every row, setting chan values to [1,2,3,4] in sequence
relax['chan'] = relax.index.map(lambda x: x % 4 + 1)

# Seperate data into 4 dataframes, 1 for each electrode
chan1 = relax.loc[relax['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1114)
chan2 = relax.loc[relax['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2114)
chan3 = relax.loc[relax['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3114)
chan4 = relax.loc[relax['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4114)

# For fun, let's only keep the alpha, beta, and alpha/beta values we calculated
relax = pd.concat([chan1.iloc[:,-4:-1], chan2.iloc[:,-4:-1], chan3.iloc[:,-4:-1], chan4.iloc[:,-4:-1]], axis=1, join_axes=[chan1.index])

# add a label column with relax for all rows
relax['label'] = 'relax'



In [436]:

    
# Do the exact same thing as above, but for all values in focus data set

focus = pd.read_csv("../Muse Data/chrisfocus.csv", names = np.arange(0,110.5,step=.5))

focus = focus.iloc[:,0:110]

focus['alpha'] = focus[[8, 8.5, 9, 9.5, 10, 10.5, 11, 11.5, 12, 12.5, 13]].mean(axis=1)

focus['beta'] = focus[[13, 13.5, 14, 14.5, 15,  15.5,  16,  16.5, 17,   17.5,  18, 18.5, 19,19.5, 20,  20.5,21, 21.5, 22,  22.5,23, 23.5, 24, 24.5,
25, 25.5, 26,  26.5,27, 27.5, 28,  28.5,29, 29.5, 30]].mean(axis=1)

focus['alpha/beta'] = focus['alpha']/focus['beta']

focus['chan'] = focus.index.map(lambda x: x % 4 + 1)

chan1 = focus.loc[focus['chan'] == 1,].reset_index(drop=True)
chan1.columns = np.arange(1000,1114)
chan2 = focus.loc[focus['chan'] == 2,].reset_index(drop=True)
chan2.columns = np.arange(2000,2114)
chan3 = focus.loc[focus['chan'] == 3,].reset_index(drop=True)
chan3.columns = np.arange(3000,3114)
chan4 = focus.loc[focus['chan'] == 4,].reset_index(drop=True)
chan4.columns = np.arange(4000,4114)

focus = pd.concat([chan1.iloc[:,-4:-1], chan2.iloc[:,-4:-1], chan3.iloc[:,-4:-1], chan4.iloc[:,-4:-1]], axis=1, join_axes=[chan1.index])
focus['label'] = 'focus'



In [437]:

    
# Combine focus and relax dataframes into a numeric values and complementary labels dataframe
# rows = [relax data] + [focus data]
values = pd.concat([relax.iloc[:,:-1], focus.iloc[:,:-1]]).reset_index(drop=True)
labels = pd.concat([pd.DataFrame(relax['label']), pd.DataFrame(focus['label'])]).reset_index(drop=True)

# Convert labels from a Vector? to a 1D matrix
c, r = labels.shape
labels = labels.as_matrix().reshape(c,)



In [438]:

    
# Split values and labels arrays into random train and test subsets (20% set aside for testing)
X_train, X_test, y_train, y_test = train_test_split(values,labels,test_size=0.2)

# Show the mean cross val score across 4 different tests
print(cross_val_gnb(X_train,y_train,4).mean())









    



0.468852124183