In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2
%qtconsole
import sys
import collections
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import tqdm
import patsy
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats

sys.path.append('../src/')
import data_processing
import ripple_decoding
import ripple_detection

Animal = collections.namedtuple('Animal', {'directory', 'short_name'})
animals = {'HPa': Animal(directory='HPa_direct', short_name='HPa')}
epoch_index = ('HPa', 8, 4)
sampling_frequency = 1500

In [3]:
print('\nDecoding ripples for Animal {0}, Day {1}, Epoch #{2}:'.format(*epoch_index))
# Include only CA1 neurons with spikes
neuron_info = data_processing.make_neuron_dataframe(animals)[epoch_index].dropna()
tetrode_info = data_processing.make_tetrode_dataframe(animals)[epoch_index]
neuron_info = pd.merge(tetrode_info, neuron_info,
                       on=['animal', 'day', 'epoch_ind', 'tetrode_number', 'area'],
                       how='right', right_index=True).set_index(neuron_info.index)
neuron_info = neuron_info[neuron_info.area.isin(['CA1', 'iCA1']) &
                          (neuron_info.numspikes > 0) &
                          ~neuron_info.descrip.str.endswith('Ref').fillna(False)]

# Train on when the rat is moving
position_info = data_processing.get_interpolated_position_dataframe(
    epoch_index, animals)
spikes_data = [data_processing.get_spike_indicator_dataframe(neuron_index, animals)
               for neuron_index in neuron_info.index]

train_position_info = position_info.query('speed > 4')
train_spikes_data = [spikes_datum[position_info.speed > 4]
                     for spikes_datum in spikes_data]


Decoding ripples for Animal HPa, Day 8, Epoch #4:

In [9]:
def glmfit(spikes, design_matrix, ind):
    try:
        return sm.GLM(spikes.reindex(design_matrix.index), design_matrix,
                      family=sm.families.Poisson(),
                      drop='missing').fit(maxiter=30)
    except np.linalg.linalg.LinAlgError:
        warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))
        return np.nan
    
formula = '1 + trajectory_direction * bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
for ind, spikes in enumerate(train_spikes_data):
    print(ind)
    print(glmfit(spikes, design_matrix, ind).summary())


0
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: invalid value encountered in true_divide
  endog_mu = self._clean(endog/mu)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-9-9057eeef194c> in <module>()
     13 for ind, spikes in enumerate(train_spikes_data):
     14     print(ind)
---> 15     print(glmfit(spikes, design_matrix, ind).summary())

<ipython-input-9-9057eeef194c> in glmfit(spikes, design_matrix, ind)
      3         return sm.GLM(spikes.reindex(design_matrix.index), design_matrix,
      4                       family=sm.families.Poisson(),
----> 5                       drop='missing').fit(maxiter=30)
      6     except np.linalg.linalg.LinAlgError:
      7         warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
    690         dev = self.family.deviance(self.endog, mu)
    691         if np.isnan(dev):
--> 692             raise ValueError("The first guess on the deviance function "
    693                              "returned a nan.  This could be a boundary "
    694                              " problem and should be reported.")

ValueError: The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.

In [11]:
def glmfit(spikes, design_matrix, ind):
    try:
        return sm.GLM(spikes.reindex(design_matrix.index), design_matrix,
                      family=sm.families.Poisson(),
                      drop='missing').fit(maxiter=30)
    except np.linalg.linalg.LinAlgError:
        warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))
        return np.nan
    
formula = '1 + trajectory_direction'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
for ind, spikes in enumerate(train_spikes_data):
    print(ind)
    print(glmfit(spikes, design_matrix, ind).summary())


0
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: invalid value encountered in true_divide
  endog_mu = self._clean(endog/mu)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-790340c0b19e> in <module>()
     13 for ind, spikes in enumerate(train_spikes_data):
     14     print(ind)
---> 15     print(glmfit(spikes, design_matrix, ind).summary())

<ipython-input-11-790340c0b19e> in glmfit(spikes, design_matrix, ind)
      3         return sm.GLM(spikes.reindex(design_matrix.index), design_matrix,
      4                       family=sm.families.Poisson(),
----> 5                       drop='missing').fit(maxiter=30)
      6     except np.linalg.linalg.LinAlgError:
      7         warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
    690         dev = self.family.deviance(self.endog, mu)
    691         if np.isnan(dev):
--> 692             raise ValueError("The first guess on the deviance function "
    693                              "returned a nan.  This could be a boundary "
    694                              " problem and should be reported.")

ValueError: The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.

In [12]:
formula = '1 + bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
for ind, spikes in enumerate(train_spikes_data):
    print(ind)
    print(glmfit(spikes, design_matrix, ind).summary())


0
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: invalid value encountered in true_divide
  endog_mu = self._clean(endog/mu)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-12-941d2a30e06b> in <module>()
     13 for ind, spikes in enumerate(train_spikes_data):
     14     print(ind)
---> 15     print(glmfit(spikes, design_matrix, ind).summary())

<ipython-input-12-941d2a30e06b> in glmfit(spikes, design_matrix, ind)
      3         return sm.GLM(spikes.reindex(design_matrix.index), design_matrix,
      4                       family=sm.families.Poisson(),
----> 5                       drop='missing').fit(maxiter=30)
      6     except np.linalg.linalg.LinAlgError:
      7         warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
    690         dev = self.family.deviance(self.endog, mu)
    691         if np.isnan(dev):
--> 692             raise ValueError("The first guess on the deviance function "
    693                              "returned a nan.  This could be a boundary "
    694                              " problem and should be reported.")

ValueError: The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.

In [13]:
formula = '1'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
for ind, spikes in enumerate(train_spikes_data):
    print(ind)
    print(glmfit(spikes, design_matrix, ind).summary())


0
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: invalid value encountered in true_divide
  endog_mu = self._clean(endog/mu)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-5f0f218ef69d> in <module>()
      4 for ind, spikes in enumerate(train_spikes_data):
      5     print(ind)
----> 6     print(glmfit(spikes, design_matrix, ind).summary())

<ipython-input-12-941d2a30e06b> in glmfit(spikes, design_matrix, ind)
      3         return sm.GLM(spikes.reindex(design_matrix.index), design_matrix,
      4                       family=sm.families.Poisson(),
----> 5                       drop='missing').fit(maxiter=30)
      6     except np.linalg.linalg.LinAlgError:
      7         warnings.warn('Data is poorly scaled for neuron #{}'.format(ind+1))

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
    690         dev = self.family.deviance(self.endog, mu)
    691         if np.isnan(dev):
--> 692             raise ValueError("The first guess on the deviance function "
    693                              "returned a nan.  This could be a boundary "
    694                              " problem and should be reported.")

ValueError: The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.

In [24]:
for spikes in train_spikes_data:
    print(spikes.sum())


is_spike    0
dtype: int64
is_spike    137
dtype: int64
is_spike    616
dtype: int64
is_spike    159
dtype: int64
is_spike    881
dtype: int64
is_spike    101
dtype: int64
is_spike    246
dtype: int64
is_spike    823
dtype: int64
is_spike    261
dtype: int64
is_spike    319
dtype: int64
is_spike    796
dtype: int64
is_spike    98
dtype: int64
is_spike    808
dtype: int64
is_spike    109
dtype: int64
is_spike    283
dtype: int64
is_spike    136
dtype: int64
is_spike    490
dtype: int64
is_spike    4528
dtype: int64
is_spike    769
dtype: int64
is_spike    2078
dtype: int64
is_spike    420
dtype: int64

In [25]:
for spikes in spikes_data:
    print(spikes.sum())


is_spike    2
dtype: int64
is_spike    210
dtype: int64
is_spike    1372
dtype: int64
is_spike    299
dtype: int64
is_spike    1244
dtype: int64
is_spike    181
dtype: int64
is_spike    350
dtype: int64
is_spike    1469
dtype: int64
is_spike    455
dtype: int64
is_spike    562
dtype: int64
is_spike    1671
dtype: int64
is_spike    184
dtype: int64
is_spike    1118
dtype: int64
is_spike    126
dtype: int64
is_spike    416
dtype: int64
is_spike    206
dtype: int64
is_spike    711
dtype: int64
is_spike    6521
dtype: int64
is_spike    1395
dtype: int64
is_spike    3504
dtype: int64
is_spike    773
dtype: int64

In [26]:
neuron_info.numspikes


Out[26]:
animal  day  epoch_ind  tetrode_number  neuron_number
HPa     8    4          1               1                   2
                                        2                 210
                                        3                1372
                                        4                 299
                                        5                1244
                                        6                 181
                                        8                 350
                                        9                1469
                        4               1                 455
                                        2                 562
                                        3                1671
                                        7                 184
                                        9                1118
                                        10                126
                                        11                416
                        12              1                 206
                                        2                 711
                                        3                6521
                        14              1                1395
                                        2                3504
                                        5                 773
Name: numspikes, dtype: object

In [ ]: