Logistic regression of mouse behaviour data

Using softmax in tensorflow

M.Evans 02.06.16


In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns # caused kernel to die 02.06.16
import random
from scipy.signal import resample


/Users/mathew/miniconda/envs/py35/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [2]:
%matplotlib inline
from IPython import display # For plotting intermediate results

In [2]:
# ! pip install pandas
# ! pip install seaborn
# import seaborn as sns
# ! pip install matplotlib
# ! pip install sklearn


Collecting pandas
  Downloading pandas-0.18.1-cp35-cp35m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (9.9MB)
    100% |████████████████████████████████| 9.9MB 69kB/s 
Requirement already satisfied (use --upgrade to upgrade): python-dateutil>=2 in /Users/mathew/miniconda/envs/py35/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.7.0 in /Users/mathew/miniconda/envs/py35/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): pytz>=2011k in /Users/mathew/miniconda/envs/py35/lib/python3.5/site-packages (from pandas)
Requirement already satisfied (use --upgrade to upgrade): six>=1.5 in /Users/mathew/miniconda/envs/py35/lib/python3.5/site-packages (from python-dateutil>=2->pandas)
Installing collected packages: pandas
Successfully installed pandas-0.18.1
Collecting seaborn
  Downloading seaborn-0.7.0.tar.gz (154kB)
    100% |████████████████████████████████| 163kB 28kB/s 
Collecting scipy (from seaborn)
  Downloading scipy-0.17.1-cp35-cp35m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (20.2MB)
    100% |████████████████████████████████| 20.2MB 21kB/s 
Requirement already satisfied (use --upgrade to upgrade): numpy>=1.6.2 in /Users/mathew/miniconda/envs/py35/lib/python3.5/site-packages (from scipy->seaborn)
Building wheels for collected packages: seaborn
  Running setup.py bdist_wheel for seaborn ... - \ done
  Stored in directory: /Users/mathew/Library/Caches/pip/wheels/e0/94/01/731d82dc437e1c8b65130956028f3fe693943c3e499a676dc9
Successfully built seaborn
Installing collected packages: scipy, seaborn
Successfully installed scipy-0.17.1 seaborn-0.7.0

In [3]:
# Import the data. For one mouse ATM
theta = pd.read_csv('~/work/whiskfree/data/theta_36.csv',header=None)
kappa = pd.read_csv('~/work/whiskfree/data/kappa_36.csv',header=None)
tt = pd.read_csv('~/work/whiskfree/data/trialtype_36.csv',header=None)
ch = pd.read_csv('~/work/whiskfree/data/choice_36.csv',header=None)

In [4]:
from scipy.signal import resample
from scipy.stats import zscore

In [5]:
# Restrict analysis to 500ms post-touch and downsample with resample
theta_r = np.array([[resample(theta.values.squeeze()[i,950:1440],50)] for i in range(0,theta.shape[0])])
theta_r = zscore(theta_r.squeeze(),axis=None)
print(theta_r.shape)
_ = plt.plot(theta_r[:10].T)


(1790, 50)

In [6]:
kappa_r = np.array([[resample(kappa.values.squeeze()[i,950:1440],50)] for i in range(0,kappa.shape[0])])
kappa_r = zscore(kappa_r.squeeze(),axis=None)
print(kappa_r.shape)
_ = plt.plot(kappa_r[:10].T)
# _ = plt.plot(zscore(kappa_r[:10],axis=1).T)
# fig,ax = plt.subplots(1,2)
# ax[0].imshow(zscore(kappa_r,axis=None),aspect=float(50/1790),cmap='seismic')
# ax[1].imshow(kappa_r,aspect=float(50/1790),cmap='seismic')


(1790, 50)

In [7]:
kappa_df = pd.DataFrame(kappa_r)
theta_df = pd.DataFrame(theta_r)

In [229]:
kappa_df[:10].T.plot()


Out[229]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f65c8c486a0>

In [8]:
both_df = pd.concat([theta_df,kappa_df],axis=1)
both_df.shape


Out[8]:
(1790, 100)

In [9]:
fig, ax = plt.subplots(figsize=(10,5))
plt.imshow(both_df.values.squeeze(),aspect=float(100/1790))
plt.colorbar()


Out[9]:
<matplotlib.colorbar.Colorbar at 0x116a6bcc0>

In [44]:
np.mean?

Trying to classify trialtype from theta/kappa/both


In [10]:
# First generate a clean datasets, dropping trialtype = 0, as numpy arrays
clean = tt.values !=0
tt_c = tt[tt.values !=0].values
both = both_df.values
both_c = both[clean.squeeze(),:]
both_c.shape


Out[10]:
(1764, 100)

In [11]:
# Turn labels into 'one-hot' array (using a great one-liner from reddit :sunglasses:)
labs = np.eye(3)[tt_c-1]
# y[np.arange(3), a] = 1
labs = labs.squeeze()

fig, ax = plt.subplots(2,1,figsize = (20,2))
ax[0].plot(tt_c[0:100])

ax[1].imshow(labs[0:100,:].T,interpolation = 'none',origin='lower')
labs.shape


Out[11]:
(1764, 3)

In [12]:
# Let's use 20% of the data for testing and 80% for training
trainsize = int(len(both_c) * 0.8)
testsize = len(both_c) - trainsize
print('Desired training/test set sizes:',trainsize, testsize)

subset = random.sample(range(len(both_c)),trainsize)
traindata = both_c[subset,:]
trainlabs = labs[subset,:]
testdata = np.delete(both_c,subset,axis=0)
testlabs = np.delete(labs,subset,axis=0)

print('training set shape:',traindata.shape)
print('test set shape:',testdata.shape)
print('training labels shape:',trainlabs.shape)
print('test labels shape:',testlabs.shape)


Desired training/test set sizes: 1411 353
training set shape: (1411, 100)
test set shape: (353, 100)
training labels shape: (1411, 3)
test labels shape: (353, 3)

In [13]:
# Construct the data flow graph following the TF beginner's MNIST example
x = tf.placeholder(tf.float32,[None,100]) # data

W = tf.Variable(tf.zeros([100,3])) # W and b are model variables to be fit by the model
b = tf.Variable(tf.zeros([3]))     # 3 possible trial types

y = tf.nn.softmax(tf.matmul(x,W) + b) # This is the softmax nn model

y_ = tf.placeholder(tf.float32,[None,3]) # Placeholder for correct answers (test labels)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) # Cross entropy loss

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # training step

In [14]:
# Function to load a random batch of data
def next_batch(data,labels,n):
    subset = random.sample(range(len(data)),n) 
    batch_data = data[subset,:]
    batch_labels = labels[subset,:]
    
    return batch_data, batch_labels

In [15]:
# Test the next_batch function
from IPython import display
fig,ax = plt.subplots(2,1)

for i in range(10):
    batch_xs, batch_ys = next_batch(traindata,trainlabs,10)
    ax[0].plot(batch_xs.T)
    ax[1].imshow(batch_ys.T,interpolation='none')
    display.clear_output(wait=True)
    display.display(plt.gcf())



In [ ]:


In [16]:
# Set wheels in motion and train the model
init = tf.initialize_all_variables()

sess = tf.Session() # Start tf session
sess.run(init)

In [17]:
# Run a training loop
for i in range(10000):
    batch_xs, batch_ys = next_batch(traindata,trainlabs,250)
    sess.run(train_step,feed_dict={x: batch_xs, y_: batch_ys})

In [18]:
# Evaluate model performance
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
print(sess.run(accuracy,feed_dict={x: testdata,y_:testlabs}))


0.331445

In [19]:
# Compare the mouse to the model with a confusion matrix
preds = sess.run(y,feed_dict={x:testdata})
preds


Out[19]:
array([[  6.99532963e-03,   9.88953471e-01,   4.05115634e-03],
       [  4.29192781e-01,   4.70996886e-01,   9.98103470e-02],
       [  8.25226028e-03,   9.91714895e-01,   3.28192582e-05],
       ..., 
       [  6.31799772e-02,   1.14001594e-01,   8.22818398e-01],
       [  1.66652441e-01,   3.54082100e-02,   7.97939360e-01],
       [  7.64948130e-01,   1.62391827e-01,   7.26600215e-02]], dtype=float32)

In [25]:
with sns.axes_style("white"):
    fig, ax = plt.subplots(2,1,figsize=[20,1])
ax[0].imshow(preds.T,interpolation=None,aspect = 3)
ax[1].imshow(testlabs.T,interpolation=None,aspect = 3)


Out[25]:
<matplotlib.image.AxesImage at 0x116f4a240>

In [27]:
fig,ax = plt.subplots(1,2)
ax[0].hist(np.argmax(preds,1))
ax[1].hist(np.argmax(testlabs,1))


Out[27]:
(array([ 138.,    0.,    0.,    0.,    0.,  100.,    0.,    0.,    0.,  115.]),
 array([ 0. ,  0.2,  0.4,  0.6,  0.8,  1. ,  1.2,  1.4,  1.6,  1.8,  2. ]),
 <a list of 10 Patch objects>)

In [28]:
from sklearn.metrics import confusion_matrix


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-28-3c24bfdfd493> in <module>()
----> 1 from sklearn.metrics import confusion_matrix

ImportError: No module named 'sklearn'

In [29]:



Collecting sklearn
  Downloading sklearn-0.0.tar.gz
Collecting scikit-learn (from sklearn)
  Downloading scikit_learn-0.17.1-cp35-cp35m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (3.7MB)
    100% |████████████████████████████████| 3.7MB 171kB/s 
Building wheels for collected packages: sklearn
  Running setup.py bdist_wheel for sklearn ... - \ done
  Stored in directory: /Users/mathew/Library/Caches/pip/wheels/d7/db/a3/1b8041ab0be63b5c96c503df8e757cf205c2848cf9ef55f85e
Successfully built sklearn
Installing collected packages: scikit-learn, sklearn
Successfully installed scikit-learn-0.17.1 sklearn-0.0

In [ ]:
# To do: repeat but with combined data from all mice (interesting to see if this helps)