In [163]:
import tensorflow as tf
import tensorflow.contrib.learn as skflow
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import seaborn as sns
import random
from scipy.signal import resample
from scipy.stats import zscore
from scipy import interp
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn import cross_validation
In [612]:
# data loading function
def data_loader(mouse_name):
theta = pd.read_csv('~/work/whiskfree/data/theta_' + mouse_name + '.csv',header=None)
kappa = pd.read_csv('~/work/whiskfree/data/kappa_' + mouse_name + '.csv',header=None)
tt = pd.read_csv('~/work/whiskfree/data/trialtype_' + mouse_name + '.csv',header=None)
ch = pd.read_csv('~/work/whiskfree/data/choice_' + mouse_name + '.csv',header=None)
return theta, kappa, tt, ch
def data_parser(theta,kappa,tt,ch):
theta_r = np.array([[resample(theta.values.squeeze()[i,950:1440],50)] for i in range(0,theta.shape[0])])
theta_r = zscore(theta_r.squeeze(),axis=None)
kappa_r = np.array([[resample(kappa.values.squeeze()[i,950:1440],50)] for i in range(0,kappa.shape[0])])
kappa_r = zscore(kappa_r.squeeze(),axis=None)
kappa_df = pd.DataFrame(kappa_r)
theta_df = pd.DataFrame(theta_r)
both_df = pd.concat([theta_df,kappa_df],axis=1)
clean1 = np.nan_to_num(tt) !=0
clean2 = np.nan_to_num(ch) !=0
clean = clean1&clean2
tt_c = tt[clean].values
ch_c = ch[clean].values
# tt_c = tt[tt.values !=0|3].values
both = both_df[clean]
# both_c = both[clean.squeeze(),:]
both_c = both.values
# keeping one hot vector for now (incase we want it later)
# labs = np.eye(3)[tt_c.astype(int)-1]
# y[np.arange(3), a] = 1
# labs = labs.squeeze()
return both_c, tt_c, ch_c, clean
In [466]:
mouse_name = '32'
theta, kappa, tt, ch = data_loader(mouse_name)
fig, ax = plt.subplots(1,2,figsize=(20,5))
_ = ax[0].plot(theta[:100].T)
_ = ax[1].plot(kappa[:100].T)
In [613]:
both_c, tt_c, ch_c, clean = data_parser(theta,kappa,tt,ch)
_ = plt.plot(both_c[:100].T)
In [614]:
# Let's use 20% of the data for testing and 80% for training
trainsize = int(len(both_c) * 0.8)
testsize = len(both_c) - trainsize
print('Desired training/test set sizes:',trainsize, testsize)
subset = random.sample(range(len(both_c)),trainsize)
fullrange = range(0,len(both_c))
toexclude = np.delete(fullrange,subset)
traindata = both_c[subset,:]
# trainlabs = labs[subset,:]
testdata = np.delete(both_c,subset,axis=0)
# testlabs = np.delete(labs,subset,axis=0)
# non one-hot style labels
trainlabs1D = tt_c[subset].squeeze() # Change this to ch_c to classify choice instead
testlabs1D = np.delete(tt_c,subset)
print('training set shape:',traindata.shape)
print('test set shape:',testdata.shape)
# print('training labels shape:',trainlabs.shape)
# print('test labels shape:',testlabs.shape)
print('1D train label shape:', trainlabs1D.shape)
print('1D test label shape:', testlabs1D.shape)
In [615]:
# Define models
lr = LogisticRegression()
NN = skflow.TensorFlowDNNClassifier(hidden_units=[100], n_classes=3,batch_size=128, steps=1000, optimizer = 'Adam',learning_rate=0.001,verbose=0)
In [622]:
# ROC mouse + 2 MODELS of all trials with binarised labels
fpr = dict()
tpr = dict()
roc_auc = dict()
n_classes = 3
trialtypes = ['Anterior Pole','Posterior Pole','No Go'] # 32-34
# trialtypes = ['Posterior Pole','Anterior Pole','No Go'] # 36
# Change the model/labels here
preds = cross_validation.cross_val_predict(lr, both_c, ch_c.squeeze()-1, cv=5)
preds_NN = cross_validation.cross_val_predict(NN, both_c, ch_c.squeeze()-1, cv=5)
with plt.style.context('fivethirtyeight'):
fig, ax = plt.subplots(1,3,figsize=(15,6))
# MOUSE
mouse_choice = ch[clean.squeeze()].values
n_classes = 3
for i in range(0,3):
these_trials = tt_c == i+1
binary_trials = np.zeros_like(tt_c.squeeze())
binary_trials[these_trials.squeeze()] = 1
wrong = mouse_choice != i+1
binary_preds = np.ones_like(mouse_choice)
binary_preds[wrong] = 0
fpr[i], tpr[i], thresholds = metrics.roc_curve(binary_trials,binary_preds)
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
ax[0].plot(fpr[i], tpr[i], lw=1, label='ROC ' + trialtypes[i] +' (area = %0.2f)' % (roc_auc[i]))
# Compute macro-average ROC following sklearn docs
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
ax[0].plot(fpr["macro"], tpr["macro"],
label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),linewidth=2)
ax[0].plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Chance')
ax[0].legend(loc=4)
ax[0].set_title('Mouse ' + mouse_name)
ax[0].set_xlim([-0.2,1.1])
ax[0].set_ylim([-0.2,1.1])
# Logistic Regression
for i in range(0,3):
these_trials = tt_c == i+1
binary_trials = np.zeros_like(tt_c.squeeze())
binary_trials[these_trials.squeeze()] = 1
wrong = preds != i
binary_preds = np.ones_like(preds)
binary_preds[wrong] = 0
fpr[i], tpr[i], thresholds = metrics.roc_curve(binary_trials,binary_preds)
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
ax[1].plot(fpr[i], tpr[i], lw=1, label='ROC ' + trialtypes[i] +' (area = %0.2f)' % (roc_auc[i]))
# Compute macro-average ROC following sklearn docs
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
ax[1].plot(fpr["macro"], tpr["macro"],
label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),linewidth=2)
ax[1].plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Chance')
ax[1].legend(loc=4)
ax[1].set_title('Logistic Regression')
ax[1].set_xlim([-0.2,1.1])
ax[1].set_ylim([-0.2,1.1])
# Neural Network
for i in range(0,3):
these_trials = tt_c == i+1
binary_trials = np.zeros_like(tt_c.squeeze())
binary_trials[these_trials.squeeze()] = 1
wrong = preds_NN != i
binary_preds = np.ones_like(preds)
binary_preds[wrong] = 0
fpr[i], tpr[i], thresholds = metrics.roc_curve(binary_trials,binary_preds)
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
ax[2].plot(fpr[i], tpr[i], lw=1, label='ROC ' + trialtypes[i] +' (area = %0.2f)' % (roc_auc[i]))
# Compute macro-average ROC following sklearn docs
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
ax[2].plot(fpr["macro"], tpr["macro"],
label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),linewidth=2)
ax[2].plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Chance')
ax[2].legend(loc=4)
ax[2].set_title('Neural Network')
ax[2].set_xlim([-0.2,1.1])
ax[2].set_ylim([-0.2,1.1])
# plt.savefig('figs/ROC_allthree_trailtype_preds_'+ mouse_name +'.png')
In [617]:
# Softmax probability version
# TO DO: change this to utilise all data in cross val by setting up custom function.
fpr = dict()
tpr = dict()
roc_auc = dict()
n_classes = 3
probs = lr.fit(traindata,trainlabs1D-1).predict_proba(testdata)
probs_NN = NN.fit(traindata,trainlabs1D-1).predict_proba(testdata)
with plt.style.context('fivethirtyeight'):
fig, ax = plt.subplots(1,3, figsize=(15,5))
# MOUSE
mouse_choice = ch[clean.squeeze()].values
n_classes = 3
for i in range(0,3):
these_trials = tt_c == i+1
binary_trials = np.zeros_like(tt_c.squeeze())
binary_trials[these_trials.squeeze()] = 1
wrong = mouse_choice != i+1
binary_preds = np.ones_like(mouse_choice)
binary_preds[wrong] = 0
fpr[i], tpr[i], thresholds = metrics.roc_curve(binary_trials,binary_preds)
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
ax[0].plot(fpr[i], tpr[i], lw=1, label='ROC ' + trialtypes[i] +' (area = %0.2f)' % (roc_auc[i]))
# Compute macro-average ROC following sklearn docs
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
ax[0].plot(fpr["macro"], tpr["macro"],
label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),linewidth=2)
ax[0].plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Chance')
ax[0].legend(loc=4)
ax[0].set_title('Mouse ' + mouse_name)
ax[0].set_xlim([-0.2,1.1])
ax[0].set_ylim([-0.2,1.1])
# Logistic Regression
for i in range(0,3):
these_trials = testlabs1D == i+1
binary_trials = np.zeros_like(testlabs1D.squeeze())
binary_trials[these_trials.squeeze()] = 1
fpr[i], tpr[i], thresholds = metrics.roc_curve(binary_trials,probs[:,i])
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
ax[1].plot(fpr[i], tpr[i], lw=1, label='ROC ' + trialtypes[i] +' (area = %0.2f)' % (roc_auc[i]))
# Compute macro-average ROC following sklearn docs
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
ax[1].plot(fpr["macro"], tpr["macro"],label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),linewidth=2)
ax[1].plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Chance')
ax[1].legend(loc=4)
ax[1].set_title('Logistic Regression')
ax[1].set_xlim([-0.2,1.1])
ax[1].set_ylim([-0.2,1.1])
# Neural Network
for i in range(0,3):
these_trials = testlabs1D == i+1
binary_trials = np.zeros_like(testlabs1D.squeeze())
binary_trials[these_trials.squeeze()] = 1
fpr[i], tpr[i], thresholds = metrics.roc_curve(binary_trials,probs_NN[:,i])
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
ax[2].plot(fpr[i], tpr[i], lw=1, label='ROC ' + trialtypes[i] +' (area = %0.2f)' % (roc_auc[i]))
# Compute macro-average ROC following sklearn docs
# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
mean_tpr += interp(all_fpr, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
ax[2].plot(fpr["macro"], tpr["macro"],label='macro-average ROC curve (area = {0:0.2f})'''.format(roc_auc["macro"]),linewidth=2)
ax[2].plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Chance')
ax[2].legend(loc=4)
ax[2].set_title('Neural Network')
ax[2].set_xlim([-0.2,1.1])
ax[2].set_ylim([-0.2,1.1])
# plt.savefig('figs/ROC_both_trialtype_3probs_'+ mouse_name +'.png')
In [623]:
# TRIALTYPE
# # Confusion matrices. Mouse vs model
# mouse_choice = ch[clean.squeeze()].values
# cm_m = confusion_matrix(tt_c,mouse_choice)
# # Confusion matrices
# cm_lr = confusion_matrix(tt_c,preds+1)
# cm_NN = confusion_matrix(tt_c,preds_NN+1)
# CHOICE
# Confusion matrices. Mouse vs model
# mouse_choice = ch_c #[clean.squeeze()].values
label = ch_c
cm_m = confusion_matrix(label,ch_c)
# Confusion matrices
cm_lr = confusion_matrix(label,preds+1)
cm_NN = confusion_matrix(label,preds_NN+1)
with sns.axes_style("white"):
fig,ax = plt.subplots(1,3,figsize=(15,6))
ax[0].imshow(cm_m,interpolation='none',cmap="Greys")
ax[0].set_title('Mouse ' + mouse_name + '. ' + str(int(100 * accuracy_score(label,ch_c))) + '%')
ax[0].set_ylabel('True label')
ax[0].set_xlabel('Predicted label')
tick_marks = np.arange(len(trialtypes))
ax[0].set_xticks(tick_marks, trialtypes)
ax[0].set_yticks(tick_marks, trialtypes)
for i in range(0,3):
for j in range(0,3):
ax[0].text(j, i, cm_m[i,j], va='center', ha='center',bbox=dict(facecolor='white',edgecolor='white', alpha=0.5))
ax[1].imshow(cm_lr,interpolation='none',cmap="Greys")
ax[1].set_title('Logistic Regression' + '. ' + str(int(100 * accuracy_score(label,preds+1))) + '%')
ax[1].set_ylabel('True label')
ax[1].set_xlabel('Predicted label')
for i in range(0,3):
for j in range(0,3):
ax[1].text(j, i, cm_lr[i,j], va='center', ha='center',bbox=dict(facecolor='white',edgecolor='white', alpha=0.5))
ax[2].imshow(cm_NN,interpolation='none',cmap="Greys")
ax[2].set_title('Neural Network' + '. ' + str(int(100 * accuracy_score(label,preds_NN+1))) + '%')
ax[2].set_ylabel('True label')
ax[2].set_xlabel('Predicted label')
for i in range(0,3):
for j in range(0,3):
ax[2].text(j, i, cm_NN[i,j], va='center', ha='center',bbox=dict(facecolor='white',edgecolor='white', alpha=0.5))
# plt.savefig('figs/Cmatrix_lr_trialtype_choice_'+ mouse_name +'.png')
In [624]:
# preds = cross_validation.cross_val_predict(lr, both_c, tt_c.squeeze()-1, cv=5)
# plt.hist(preds)
# x = tt_c[~np.isnan(tt_c)]
# x.shape
# plt.hist(np.nan_to_num(tt))
with plt.style.context('fivethirtyeight'):
fig, ax = plt.subplots(1,3, figsize=(12,3))
ax[0].hist(tt_c)
# ax[0].hist(tt[clean.squeeze()].values) # when predicting choice
ax[0].set_title('Trialtype')
# ax[0].set_xticks([1,2,3],trialtypes)
ax[0].set_xlim([0.5,3.5])
ax[1].hist(ch_c)
ax[1].set_title('Choice')
ax[1].set_xlim([0.5,3.5])
ax[2].hist(preds_NN+1)
ax[2].set_title('NN choice')
ax[2].set_xlim([0.5,3.5])
plt.suptitle('Mouse ' + mouse_name, x=0.5,y=1.1,fontsize=15)
# plt.savefig('figs/choice_number_'+ mouse_name +'.png')
Out[624]:
In [625]:
# print('Mouse '+ mouse_name + '. '+ accuracy_score(tt_c,mouse_choice) + '%')
# int(100 *accuracy_score(tt_c,mouse_choice))
# print('Mouse ' + mouse_name + '. ' + str(int(100 * accuracy_score(tt_c,mouse_choice))) + '%')
trialtypes = ['Anterior Pole','Posterior Pole','No Go']
print(metrics.classification_report(label,ch_c,target_names=trialtypes))
print('Weighted f1_score: ',metrics.f1_score(label,mouse_choice,average='weighted'))
print(metrics.classification_report(label,preds_NN+1,target_names=trialtypes))
print('Weighted f1_score: ',metrics.f1_score(label,preds_NN+1,average='weighted'))
In [580]:
clean1 = np.nan_to_num(tt) !=0
clean2 = np.nan_to_num(ch) !=0
tt_c2 = tt[clean1&clean2].values
ch_c2 = ch[clean1&clean2].values
fig,ax = plt.subplots(1,2)
ax[0].hist(tt_c2)
ax[1].hist(ch_c2)
Out[580]:
In [605]:
theta_r = np.array([[resample(theta.values.squeeze()[i,950:1440],50)] for i in range(0,theta.shape[0])])
theta_r = zscore(theta_r.squeeze(),axis=None)
kappa_r = np.array([[resample(kappa.values.squeeze()[i,950:1440],50)] for i in range(0,kappa.shape[0])])
kappa_r = zscore(kappa_r.squeeze(),axis=None)
kappa_df = pd.DataFrame(kappa_r)
theta_df = pd.DataFrame(theta_r)
both_df = pd.concat([theta_df,kappa_df],axis=1)
In [610]:
clean1 = np.nan_to_num(tt) !=0
clean2 = np.nan_to_num(ch) !=0
clean = clean1&clean2
# tt_c = tt[tt.values !=0|3].values
both = both_df[clean]
# both_c = both[clean.squeeze(),:]
both_c = both.values
both_c.shape
Out[610]:
In [673]:
# LDA/PCA of data TRIALTYPE
from sklearn.lda import LDA
from sklearn.decomposition import PCA
trialtypes = ['Anterior Pole','Posterior Pole','No Go']
pca = PCA(n_components=20)
X_r = pca.fit(both_c).transform(both_c)
lda = LDA(n_components=20)
X_r2 = lda.fit(both_c, tt_c).transform(both_c)
# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
% str(pca.explained_variance_ratio_))
plt.figure()
for c, i, trialtypes in zip("rgb", [0, 1, 2], trialtypes):
plt.scatter(X_r[tt_c.ravel() == i+1, 0], X_r[tt_c.ravel() == i+1, 1], c=c, label=trialtypes)
plt.legend()
plt.title('PCA')
trialtypes = ['Anterior Pole','Posterior Pole','No Go']
plt.figure()
for c, i, trialtypes in zip("rgb", [0, 1, 2], trialtypes):
plt.scatter(X_r2[tt_c.squeeze() == i+1, 0], X_r2[tt_c.squeeze() == i+1, 1], c=c, label=trialtypes)
plt.legend()
plt.title('LDA')
Out[673]:
In [675]:
# LDA/ PCA CHOICE
trialtypes = ['Anterior Pole','Posterior Pole','No Go']
pca = PCA(n_components=20)
X_r = pca.fit(both_c).transform(both_c)
lda = LDA(n_components=20)
X_r2 = lda.fit(both_c, ch_c).transform(both_c)
# Percentage of variance explained for each components
print('explained variance ratio (first two components): %s'
% str(pca.explained_variance_ratio_))
plt.figure()
for c, i, trialtypes in zip("rgb", [0, 1, 2], trialtypes):
plt.scatter(X_r[ch_c.squeeze() == i+1, 0], X_r[ch_c.squeeze() == i+1, 1], c=c, label=trialtypes)
plt.legend()
plt.title('PCA')
trialtypes = ['Anterior Pole','Posterior Pole','No Go']
plt.figure()
for c, i, trialtypes in zip("rgb", [0, 1, 2], trialtypes):
plt.scatter(X_r2[ch_c.squeeze() == i+1, 0], X_r2[ch_c.squeeze() == i+1, 1], c=c, label=trialtypes)
plt.legend()
plt.title('LDA')
Out[675]:
In [699]:
# lda.score(both_c,tt_c)
x = pca.components_
fig, ax = plt.subplots(figsize=(10,5))
_ = plt.plot(x.T,)
np.cumsum(pca.explained_variance_ratio_)
Out[699]:
In [452]:
# Conv net
def max_pool_2x1(tensor_in):
return tf.nn.max_pool(tensor_in, ksize=[1, 2, 1, 1], strides=[1, 2, 1, 1],
padding='SAME')
def conv_model(X, y):
# reshape X to 4d tensor with 2nd and 3rd dimensions being image width and height
# final dimension being the number of color channels
X = tf.reshape(X, [-1, 100, 1, 1])
# first conv layer will compute 32 features for each 5x1 strip
with tf.variable_scope('conv_layer1'):
h_conv1 = skflow.ops.conv2d(X, n_filters=32, filter_shape=[5, 1],
bias=True, activation=tf.nn.relu)
h_pool1 = max_pool_2x1(h_conv1)
# second conv layer will compute 64 features for each 5x1 strip
with tf.variable_scope('conv_layer2'):
h_conv2 = skflow.ops.conv2d(h_pool1, n_filters=64, filter_shape=[5, 1],
bias=True, activation=tf.nn.relu)
h_pool2 = max_pool_2x1(h_conv2)
# reshape tensor into a batch of vectors
h_pool2_flat = tf.reshape(h_pool2, [-1, 5 * 5 * 64])
# densely connected layer with 1024 neurons
h_fc1 = skflow.ops.dnn(h_pool2_flat, [1024], activation=tf.nn.relu, dropout=0.5)
return skflow.models.logistic_regression(h_fc1, y)
# Training and predicting
classifier3 = skflow.TensorFlowEstimator(
model_fn=conv_model, n_classes=10, batch_size=100, steps=20000,
learning_rate=0.001)
In [130]:
# Convnet
convnet = skflow.TensorFlowEstimator(
model_fn=conv_model, n_classes=10, batch_size=100, steps=20000,
learning_rate=0.001,verbose=0)
# convnet.fit(traindata,trainlabs1D-1)
# lr.fit(both_c,tt_c.squeeze())
# print(accuracy_score(testlabs1D-1,convnet.predict(testdata)))
scores_convnet = cross_validation.cross_val_score(convnet, both_c, tt_c.squeeze()-1, cv=5,scoring='accuracy') #'f1_weighted')
print(scores_convnet)
print(np.mean(scores_convnet))
In [ ]:
In [ ]:
In [ ]:
In [676]:
# Logistic regression (once)
lr = LogisticRegression()
lr.fit(X_r2,tt_c)
# lr.fit(both_c,tt_c.squeeze())
# print(accuracy_score(testlabs1D,lr.predict(testdata)))
print(accuracy_score(ch_c.squeeze(),lr.predict(X_r2)))
lr.fit(both_c,ch_c)
print(accuracy_score(ch_c.squeeze(),lr.predict(both_c)))
In [696]:
# Logistic regression with 5-fold cross validation
lr = LogisticRegression()
scores_lr = cross_validation.cross_val_score(lr, both_c, tt_c.squeeze(), cv=5,scoring='accuracy')
print(scores_lr)
print(np.mean(scores_lr))
In [698]:
# 3 diff single layer NNs
# import tensorflow.contrib.learn as skflow
hidden = [3,10,50,100]
for i in range(3):
NN = skflow.TensorFlowDNNClassifier(hidden_units=[hidden[i]], n_classes=3,batch_size=128, steps=1000, optimizer = 'Adam',learning_rate=0.001,verbose=0)
scores_NN = cross_validation.cross_val_score(NN, both_c, tt_c.squeeze()-1, cv=5,scoring='f1_weighted')
print(str(hidden[i]) + ' hidden units: ' + str(scores_NN))
print('Mean: '+ str(np.mean(scores_NN)))
In [694]:
print('Mean: ', np.mean(scores_NN),'blah',1)
In [197]:
# One hidden layer neural network
NN = skflow.TensorFlowDNNClassifier(hidden_units=[100], n_classes=3,batch_size=128, steps=1000, optimizer = 'Adam',learning_rate=0.001,verbose=0)
scores_NN1 = cross_validation.cross_val_score(NN, both_c, tt_c.squeeze()-1, cv=5,scoring='f1_weighted')
print(scores_NN1)
print(np.mean(scores_NN1))
In [199]:
# Deep net with dropout
def my_model(X, y):
"""This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
layers = skflow.ops.dnn(X, [10, 20, 10], dropout=0.5)
return skflow.models.logistic_regression(layers, y)
NN_drop = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3,batch_size=128, steps=2000, optimizer = 'Adam',learning_rate=0.01,verbose=1)
scores_NN_drop = cross_validation.cross_val_score(NN_drop, both_c, tt_c.squeeze()-1, cv=5,scoring='accuracy') #'f1_weighted')
print(scores_NN_drop)
print(np.mean(scores_NN_drop))
In [259]:
# SVM
from sklearn import svm
lin_svm = svm.LinearSVC()
scores_svm = cross_validation.cross_val_score(lin_svm, both_c, tt_c.squeeze(),cv=5,scoring='accuracy')
print(scores_svm)
print(np.mean(scores_svm))
In [248]:
# Naive bayes
cross_validation.cross_val_predict?
In [140]:
# Test all 3 models with all of the data (will overfit but we'll try and fix this later...)
lr.fit(both_c,tt_c.squeeze())
NN.fit(both_c,tt_c.squeeze()-1)
# NN_drop.fit(both_c,tt_c.squeeze()-1)
Out[140]:
In [137]:
print('Log Reg: ',accuracy_score(tt_c.squeeze(),lr.predict(both_c)))
print('NN: ',accuracy_score(tt_c.squeeze()-1,NN.predict(both_c)))
print('Deep NN: ',accuracy_score(tt_c.squeeze()-1,NN_drop.predict(both_c)))
In [ ]:
In [523]:
# plt.scatter(theta,kappa)
tt1 = tt == 1
sum(tt1)
t = theta.values
t2 = t[tt1.squeeze()]
t2.shape
# tt1.shape
# fig, ax = plt.subplots(figsize=(10,5))
# _ =plt.plot(t[tt1.squeeze()])
Out[523]:
In [517]:
# plt.plot(tt_c[these_trials])
# plt.imshow(both_c[these_trials.squeeze()],aspect = 0.1)
# both_c.shape
# these_trials.shape
# plt.plot(NN.predict(both_c[these_trials.squeeze()]))
# plt.plot(tt_c[these_trials.squeeze()]-1)
# preds.shape
# wrong.shape
# binary_preds
# tt_c[these_trials.squeeze()]
# i?
# np.ones_like(binary_preds)
# NN.predict_proba(both_c[these_trials.squeeze()]).shape
# tt_c.shape
trialtypes = ['Left','Right','No Go']
trialtypes[1]
Out[517]:
In [122]:
classifier.fit(both_c,tt_c.squeeze()-1)
Out[122]:
In [124]:
accuracy_score(tt_c.squeeze()-1,classifier.predict(both_c))
Out[124]:
In [193]:
plt.plot(np.mean(both_c[tt_c.squeeze() ==1],0))
plt.plot(np.mean(both_c[tt_c.squeeze() ==2],0))
plt.plot(np.mean(both_c[tt_c.squeeze() ==3],0))
Out[193]:
In [194]:
plt.plot(np.mean(traindata[trainlabs1D.squeeze() ==1],0))
plt.plot(np.mean(traindata[trainlabs1D.squeeze() ==2],0))
plt.plot(np.mean(traindata[trainlabs1D.squeeze() ==3],0))
Out[194]:
In [207]:
plt.hist(preds)
wrong
Out[207]:
In [ ]: