Etienne Ackermann, 08/05/2015
The data can be downloaded from the CRCNS (Collaborative Research in Computational Neuroscience) website, and the hc-3
data set in particular.
Note that both gor01 and vvp01 were recorded with a Neuralynx recording system at 32,552 Hz, then amplified 1000x, followed by 1–5 kHz bandpass filtering. It was then further downsampled to 1252 Hz, and stored in the .eeg file, which is a binary file with the same number of channels as in the raw data. Has up to 32 (four shank probe) or 64 (eight shank probe) short integers (2 bytes, signed) every time step. (One integer from each recording site, i.e. channel). Actual number of channels is specified in the .xml file and is not always a multiple of 8 because bad channels (due to very high impedance or broken shank) were removed from the data.
At this point we are interested in learning from the data, and we do this by considering sequences of spike count observations. That is, each observation is a $u$-dimensional vector, where $u$ is the number of units that we recorded, with each element corresponds to the number of spikes observed within a time bin of 250 or 10 ms, depending on whether we are considering the behavioral, or the replay timescales.
In [94]:
import pickle
# load SWR spikes for both environments (binned in 10 ms):
with open('../../Data/SWR_train1rn.pickle', 'rb') as f:
SWR_train1 = pickle.load(f)
with open('../../Data/SWR_train2rn.pickle', 'rb') as f:
SWR_train2 = pickle.load(f)
with open('../../Data/SWR_test1rn.pickle', 'rb') as f:
SWR_test1 = pickle.load(f)
with open('../../Data/SWR_test2rn.pickle', 'rb') as f:
SWR_test2 = pickle.load(f)
# load training spike count data, binned in 250 ms bins:
with open('../../Data/BVR_train1rn_noswr_noth1.pickle', 'rb') as f:
BVR_train1 = pickle.load(f)
with open('../../Data/BVR_train2rn_noswr_th1.pickle', 'rb') as f:
BVR_train2 = pickle.load(f)
# load testing spike count data, binned in 250 ms bins:
with open('../../Data/BVR_test1rn_noswr_noth1.pickle', 'rb') as f:
BVR_test1 = pickle.load(f)
with open('../../Data/BVR_test2rn_noswr_th1.pickle', 'rb') as f:
BVR_test2 = pickle.load(f)
In [2]:
import sys
sys.path.insert(0, '../')
import numpy as np
import pandas as pd
import pickle
import seaborn as sns
#import yahmm as ym
from hmmlearn import hmm # see https://github.com/ckemere/hmmlearn
from matplotlib import pyplot as plt
from pandas import Series, DataFrame
from efunctions import * # load my helper functions
%matplotlib inline
In [99]:
def remove_empty_sequences(data):
tmp = np.array(data)
emptySequences = []
num_sequences = tmp.shape[0]
for ss in np.arange(0,num_sequences):
if len(tmp[ss]) == 0:
emptySequences.append(ss)
print(emptySequences)
for dd in np.arange(0,len(emptySequences)):
idx = emptySequences[len(emptySequences)-dd-1]
del data[idx]
In [104]:
def yahmmdata_to_hmmlearn_data(yahmmdata):
SequenceLengths = []
tmp = np.array(yahmmdata)
num_sequences = tmp.shape[0]
for ss in np.arange(0,num_sequences):
SequenceLengths.append(len(tmp[ss]))
numCells = np.array(tmp[0]).shape[1]
TotalSequenceLength = np.array(SequenceLengths).sum()
StackedData = np.zeros((TotalSequenceLength,numCells))
rr = 0;
for ss in np.arange(0,num_sequences):
StackedData[rr:rr+SequenceLengths[ss],:] = np.array(tmp[ss])
rr = rr+SequenceLengths[ss]
print("{0} sequences stacked for hmmlearn".format(num_sequences))
return SequenceLengths, StackedData
In [105]:
TrainingSequenceLengthsBVRtr2, StackedTrainingDataBVRtr2 = yahmmdata_to_hmmlearn_data(BVR_train2)
TrainingSequenceLengthsSWRtr2, StackedTrainingDataSWRtr2 = yahmmdata_to_hmmlearn_data(SWR_train2)
TestingSequenceLengthsSWRts2, StackedTestingDataSWRts2 = yahmmdata_to_hmmlearn_data(SWR_test2)
TestingSequenceLengthsBVRts2, StackedTestingDataBVRts2 = yahmmdata_to_hmmlearn_data(BVR_test2)
TrainingSequenceLengthsBVRtr1, StackedTrainingDataBVRtr1 = yahmmdata_to_hmmlearn_data(BVR_train1)
TestingSequenceLengthsSWRts1, StackedTestingDataSWRts1 = yahmmdata_to_hmmlearn_data(SWR_test1)
TrainingSequenceLengthsSWRtr1, StackedTrainingDataSWRtr1 = yahmmdata_to_hmmlearn_data(SWR_train1)
remove_empty_sequences(BVR_test1)
TestingSequenceLengthsBVRts1, StackedTestingDataBVRts1 = yahmmdata_to_hmmlearn_data(BVR_test1) # error ??? will investigate
In [5]:
# learn (fit) models to data:
NStates = 15
# using BVR1 training data
hmm15_1 = hmm.PoissonHMM(n_components=NStates, n_iter=20, init_params='stm', params='stm', verbose=True)
hmm15_1.fit(StackedTrainingDataBVRtr1, lengths=TrainingSequenceLengthsBVRtr1)
# using BVR2 training data
hmm15_2 = hmm.PoissonHMM(n_components=NStates, n_iter=20, init_params='stm', params='stm', verbose=True)
hmm15_2.fit(StackedTrainingDataBVRtr2, lengths=TrainingSequenceLengthsBVRtr2)
# using SWR1 training data
hmm15_swr1 = hmm.PoissonHMM(n_components=NStates, n_iter=20, init_params='stm', params='stm', verbose=True)
hmm15_swr1.fit(StackedTrainingDataSWRtr1, lengths=TrainingSequenceLengthsSWRtr1)
# using SWR2 training data
hmm15_swr2 = hmm.PoissonHMM(n_components=NStates, n_iter=20, init_params='stm', params='stm', verbose=True)
hmm15_swr2.fit(StackedTrainingDataSWRtr2, lengths=TrainingSequenceLengthsSWRtr2)
Out[5]:
In [106]:
####################################################################
# BVR test {1,2} in BVR train {1}
####################################################################
bvr2_in_bvr1_log_prob_test = np.zeros((len(TestingSequenceLengthsBVRts2),1))
bvr1_in_bvr1_log_prob_test = np.zeros((len(TestingSequenceLengthsBVRts1),1))
seqlimits = np.cumsum(np.array([0] + TestingSequenceLengthsBVRts2))
for ee in np.arange(0,len(TestingSequenceLengthsBVRts2)):
obs = StackedTestingDataBVRts2[seqlimits[ee]:seqlimits[ee+1],:]
bvr2_in_bvr1_log_prob_test[ee] = hmm15_1.score(obs)
seqlimits = np.cumsum(np.array([0] + TestingSequenceLengthsBVRts1))
for ee in np.arange(0,len(TestingSequenceLengthsBVRts1)):
obs = StackedTrainingDataBVRtr1[seqlimits[ee]:seqlimits[ee+1],:]
bvr1_in_bvr1_log_prob_test[ee] = hmm15_1.score(obs)
####################################################################
# SWR test {1,2} in BVR train {2}
####################################################################
swr2_in_bvr2_log_prob_test = np.zeros((len(TestingSequenceLengthsSWRts2),1))
swr1_in_bvr2_log_prob_test = np.zeros((len(TestingSequenceLengthsSWRts1),1))
seqlimits = np.cumsum(np.array([0] + TestingSequenceLengthsSWRts2))
for ee in np.arange(0,len(TestingSequenceLengthsSWRts2)):
obs = StackedTestingDataSWRts2[seqlimits[ee]:seqlimits[ee+1],:]*25
swr2_in_bvr2_log_prob_test[ee] = hmm15_2.score(obs)
seqlimits = np.cumsum(np.array([0] + TestingSequenceLengthsSWRts1))
for ee in np.arange(0,len(TestingSequenceLengthsSWRts1)):
obs = StackedTestingDataSWRts1[seqlimits[ee]:seqlimits[ee+1],:]*25
swr1_in_bvr2_log_prob_test[ee] = hmm15_2.score(obs)
####################################################################
# SWR test {1,2} in BVR train {1,2} --- log odds
####################################################################
swr1ts_in_bvr1tr_log_prob = np.zeros((len(TestingSequenceLengthsSWRts1),1))
swr2ts_in_bvr1tr_log_prob = np.zeros((len(TestingSequenceLengthsSWRts2),1))
swr1ts_in_bvr2tr_log_prob = np.zeros((len(TestingSequenceLengthsSWRts1),1))
swr2ts_in_bvr2tr_log_prob = np.zeros((len(TestingSequenceLengthsSWRts2),1))
seqlimits = np.cumsum(np.array([0] + TestingSequenceLengthsSWRts1))
for ee in np.arange(0,len(TestingSequenceLengthsSWRts1)):
obs = StackedTestingDataSWRts1[seqlimits[ee]:seqlimits[ee+1],:]*25
swr1ts_in_bvr1tr_log_prob[ee] = hmm15_1.score(obs)
swr1ts_in_bvr2tr_log_prob[ee] = hmm15_2.score(obs)
seqlimits = np.cumsum(np.array([0] + TestingSequenceLengthsSWRts2))
for ee in np.arange(0,len(TestingSequenceLengthsSWRts2)):
obs = StackedTestingDataSWRts2[seqlimits[ee]:seqlimits[ee+1],:]*25
swr2ts_in_bvr1tr_log_prob[ee] = hmm15_1.score(obs)
swr2ts_in_bvr2tr_log_prob[ee] = hmm15_2.score(obs)
In [107]:
sns.set(rc={'figure.figsize': (6, 4),'lines.linewidth': 3, 'font.size': 16, 'axes.labelsize': 14, 'legend.fontsize': 12, 'ytick.labelsize': 12, 'xtick.labelsize': 12 })
sns.set_style("white")
f, ( ax1) = plt.subplots(1,1)
sns.distplot( bvr2_in_bvr1_log_prob_test, bins=20, hist=False, kde=True, rug=False, axlabel='log probability', ax=ax1, kde_kws={"lw": 3, "label": "log p(Y=2|e=1)"} );
sns.distplot( bvr1_in_bvr1_log_prob_test, bins=20, hist=False, kde=True, rug=False, axlabel='log probability', ax=ax1, kde_kws={"lw": 3, "label": "log p(Y=1|e=1)"} );
ax1.set_title('BVR_test{1,2} in BVR_train{1}')
#saveFigure("figures/?.pdf")
Out[107]:
In [108]:
sns.set(rc={'figure.figsize': (6, 4),'lines.linewidth': 3, 'font.size': 16, 'axes.labelsize': 14, 'legend.fontsize': 12, 'ytick.labelsize': 12, 'xtick.labelsize': 12 })
sns.set_style("white")
f, ( ax1 ) = plt.subplots(1,1)
sns.distplot( swr1_in_bvr2_log_prob_test, bins=20, hist=False, kde=True, rug=False, axlabel='log probability', ax=ax1, kde_kws={"lw": 3, "label": "log p(Y=1|e=2)"} );
sns.distplot( swr2_in_bvr2_log_prob_test, bins=20, hist=False, kde=True, rug=False, axlabel='log probability', ax=ax1, kde_kws={"lw": 3, "label": "log p(Y=2|e=2)"} );
ax1.set_title('SWR_test{1,2} in BVR_train{2}')
#saveFigure("figures/?.pdf")
Out[108]:
In [24]:
sns.set(rc={'figure.figsize': (6, 4),'lines.linewidth': 3, 'font.size': 16, 'axes.labelsize': 14, 'legend.fontsize': 12, 'ytick.labelsize': 12, 'xtick.labelsize': 12 })
sns.set_style("white")
f, ( ax1) = plt.subplots(1)
sns.distplot( swr1ts_in_bvr1tr_log_prob - swr1ts_in_bvr2tr_log_prob, bins=20, hist=False, kde=True, rug=False, axlabel='log probability', ax=ax1, kde_kws={"lw": 3, "label": "log p(Y=1|e=1)/p(Y=1|e=2)"} );
sns.distplot( swr2ts_in_bvr1tr_log_prob - swr2ts_in_bvr2tr_log_prob, bins=20, hist=False, kde=True, rug=False, axlabel='log probability', ax=ax1, kde_kws={"lw": 3, "label": "log p(Y=2|e=1)/p(Y=2|e=2)"} );
ax1.set_title('log odds of SWR (test) sequences in BVR (train) models')
#saveFigure("figures/?.pdf")
Out[24]: