In [10]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2
%qtconsole
import sys
import collections
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import tqdm
import patsy
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats

sys.path.append('../src/')
import data_processing
import ripple_decoding
import ripple_detection

Animal = collections.namedtuple('Animal', {'directory', 'short_name'})
animals = {'HPa': Animal(directory='HPa_direct', short_name='HPa')}
epoch_index = ('HPa', 3, 4)
sampling_frequency = 1500

In [2]:
print('\nDecoding ripples for Animal {0}, Day {1}, Epoch #{2}:'.format(*epoch_index))
# Include only CA1 neurons with spikes
neuron_info = data_processing.make_neuron_dataframe(animals)[epoch_index].dropna()
tetrode_info = data_processing.make_tetrode_dataframe(animals)[epoch_index]
neuron_info = pd.merge(tetrode_info, neuron_info,
                       on=['animal', 'day', 'epoch_ind', 'tetrode_number', 'area'],
                       how='right', right_index=True).set_index(neuron_info.index)
neuron_info = neuron_info[neuron_info.area.isin(['CA1', 'iCA1']) &
                          (neuron_info.numspikes > 0) &
                          ~neuron_info.descrip.str.endswith('Ref').fillna(False)]

# Train on when the rat is moving
position_info = data_processing.get_interpolated_position_dataframe(
    epoch_index, animals)
spikes_data = [data_processing.get_spike_indicator_dataframe(neuron_index, animals)
               for neuron_index in neuron_info.index]

train_position_info = position_info.query('speed > 4')
train_spikes_data = [spikes_datum[position_info.speed > 4]
                     for spikes_datum in spikes_data]


Decoding ripples for Animal HPa, Day 3, Epoch #4:

In [5]:
formula = '1 + trajectory_direction * bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')

def glmfit(spikes, design_matrix, ind):
    try:
        return sm.GLM(spikes, design_matrix, family=sm.families.Poisson(),
                      drop='missing').fit(maxiter=30)
    except np.linalg.linalg.LinAlgError:
        warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))
        return np.nan

formula = '1 + trajectory_direction * bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
for ind, spikes in enumerate(train_spikes_data):
    fit = glmfit(spikes, design_matrix, ind)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-5-c3cf3410da09> in <module>()
     15     formula, train_position_info, return_type='dataframe')
     16 for ind, spikes in enumerate(train_spikes_data):
---> 17     fit = glmfit(spikes, design_matrix, ind)

<ipython-input-5-c3cf3410da09> in glmfit(spikes, design_matrix, ind)
      6     try:
      7         return sm.GLM(spikes, design_matrix, family=sm.families.Poisson(),
----> 8                       drop='missing').fit(maxiter=30)
      9     except np.linalg.linalg.LinAlgError:
     10         warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py in __init__(self, endog, exog, family, offset, exposure, missing, **kwargs)
    197         super(GLM, self).__init__(endog, exog, missing=missing,
    198                                   offset=offset, exposure=exposure,
--> 199                                   **kwargs)
    200         self._check_inputs(family, self.offset, self.exposure, self.endog)
    201         if offset is None:

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
    184 
    185     def __init__(self, endog, exog=None, **kwargs):
--> 186         super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
    187         self.initialize()
    188 

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
     58         hasconst = kwargs.pop('hasconst', None)
     59         self.data = self._handle_data(endog, exog, missing, hasconst,
---> 60                                       **kwargs)
     61         self.k_constant = self.data.k_constant
     62         self.exog = self.data.exog

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/base/model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
     82 
     83     def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
---> 84         data = handle_data(endog, exog, missing, hasconst, **kwargs)
     85         # kwargs arrays could have changed, easier to just attach here
     86         for key in kwargs:

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/base/data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
    564     klass = handle_data_class_factory(endog, exog)
    565     return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
--> 566                  **kwargs)

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/base/data.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
     74         # this has side-effects, attaches k_constant and const_idx
     75         self._handle_constant(hasconst)
---> 76         self._check_integrity()
     77         self._cache = resettable_cache()
     78 

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/base/data.py in _check_integrity(self)
    450                 (hasattr(endog, 'index') and hasattr(exog, 'index')) and
    451                 not self.orig_endog.index.equals(self.orig_exog.index)):
--> 452             raise ValueError("The indices for endog and exog are not aligned")
    453         super(PandasData, self)._check_integrity()
    454 

ValueError: The indices for endog and exog are not aligned

In [6]:
ind


Out[6]:
0

In [7]:
len(spikes)


Out[7]:
681515

In [8]:
len(design_matrix)


Out[8]:
681430

In [9]:
len(train_position_info)


Out[9]:
681515

In [17]:
formula = '1 + bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(formula, train_position_info, return_type='dataframe')
len(design_matrix)


Out[17]:
681515

In [18]:
formula = '1 + trajectory_direction'
design_matrix = patsy.dmatrix(formula, train_position_info, return_type='dataframe')
len(design_matrix)


Out[18]:
681430

In [19]:
train_position_info.trajectory_direction


Out[19]:
time
4682.524100    Inbound
4682.524767    Inbound
4682.525433    Inbound
4682.526100    Inbound
4682.526767    Inbound
4682.527433    Inbound
4682.528100    Inbound
4682.528767    Inbound
4682.529433    Inbound
4682.530100    Inbound
4682.530767    Inbound
4682.531433    Inbound
4682.532100    Inbound
4682.532767    Inbound
4682.533433    Inbound
4682.534100    Inbound
4682.534767    Inbound
4682.535433    Inbound
4682.536100    Inbound
4682.536767    Inbound
4682.537433    Inbound
4682.538100    Inbound
4682.538767    Inbound
4682.539433    Inbound
4682.540100    Inbound
4682.540767    Inbound
4682.541433    Inbound
4682.542100    Inbound
4682.542767    Inbound
4682.543433    Inbound
                ...   
5894.994100        NaN
5894.994767        NaN
5894.995433        NaN
5894.996100        NaN
5894.996767        NaN
5894.997433        NaN
5894.998100        NaN
5894.998767        NaN
5894.999433        NaN
5895.000100        NaN
5895.000767        NaN
5895.001433        NaN
5895.002100        NaN
5895.002767        NaN
5895.003433        NaN
5895.004100        NaN
5895.004767        NaN
5895.005433        NaN
5895.006100        NaN
5895.006767        NaN
5895.007433        NaN
5895.008100        NaN
5895.008767        NaN
5895.009433        NaN
5895.010100        NaN
5895.010767        NaN
5895.011433        NaN
5895.012100        NaN
5895.012767        NaN
5895.013433        NaN
Name: trajectory_direction, dtype: object

In [27]:
formula = '1 + trajectory_direction'
design_matrix = patsy.dmatrix(formula, train_position_info, return_type='dataframe')
len(design_matrix)


Out[27]:
681430

In [48]:
fit = glmfit(spikes.reindex(design_matrix.index), design_matrix, ind)
fit.summary()


Out[48]:
Generalized Linear Model Regression Results
Dep. Variable: is_spike No. Observations: 681430
Model: GLM Df Residuals: 681428
Model Family: Poisson Df Model: 1
Link Function: log Scale: 1.0
Method: IRLS Log-Likelihood: -2902.0
Date: Sat, 19 Nov 2016 Deviance: 5073.9
Time: 16:38:28 Pearson chi2: 6.81e+05
No. Iterations: 13
coef std err z P>|z| [95.0% Conf. Int.]
Intercept -6.7828 0.054 -125.986 0.000 -6.888 -6.677
trajectory_direction[T.Outbound] -3.0613 0.230 -13.310 0.000 -3.512 -2.610

In [50]:
formula = '1 + trajectory_direction * bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')

def glmfit2(spikes, design_matrix, ind):
    try:
        return sm.GLM(spikes.reindex(design_matrix.index), design_matrix,
                      family=sm.families.Poisson(),
                      drop='missing').fit(maxiter=30)
    except np.linalg.linalg.LinAlgError:
        warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))
        return np.nan

formula = '1 + trajectory_direction * bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
for ind, spikes in enumerate(train_spikes_data):
    print(ind)
    fit = glmfit2(spikes, design_matrix, ind)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

In [ ]: