Date: 2016-08-23

The goal of this notebook is to:

  • work out code to convert the existing Matlab data structures into pandas DataFrames.
  • make the data structures more 'flat' (denormalize in database terms) so that they're more easily readable

This first cell converts the tasks data to a single pandas DataFrame.


In [41]:
%matplotlib inline
%reload_ext autoreload
%autoreload 3
import os
import sys
import collections
import scipy.io
import numpy as np
import matplotlib.pyplot as plt  
import seaborn as sns
import pandas as pd

sys.path.append('../src/')
import data_filter as df

# Setup
Animal = collections.namedtuple('Animal', {'directory', 'short_name'})
num_days = 8
days = range(1, num_days + 1)
animals = {'HPa': Animal(directory='HPa_direct', short_name='HPa')}

# Get all epochs
tasks = [(df.get_data_structure(animals[animal], day, 'task', 'task'), animal)
         for animal in animals
         for day in days]
epochs = [(epoch, animal) for day, animal in tasks for epoch in day] # flatten

# Convert into pandas dataframes
ndata = [{name: epoch[0][name][0][0][0]
          for name in epoch[0].dtype.names
          if name not in 'linearcoord'}
         for epoch in epochs]
df1 = pd.DataFrame(ndata)
# print(df1)

day_epoch_ind = [{'animal': day[1], 'day': day_ind + 1, 'epoch_ind': epoch_ind + 1}
                 for day_ind, day in enumerate(tasks)
                 for epoch_ind, epoch in enumerate(day[0])]

df2 = pd.DataFrame(day_epoch_ind)
# print(df2)

epochs_df = (pd
             .concat([df2, df1], axis=1, join_axes=[df1.index])
             .set_index(['animal', 'day', 'epoch_ind'])
             .assign(environment=lambda x: pd.Categorical(x['environment']))
             .assign(type=lambda x: pd.Categorical(x['type']))
            )

print(epochs_df.info())
print('\n')
print(epochs_df)
print('\n')

# Check accessing by Multi-dimensional index (animal HPa, days 6 and 8)
print(epochs_df.loc[(['HPa'], [6,8]), :])
print('\n')

print(epochs_df
      .loc[(['HPa'], [6,8]), :]
      .loc[epochs_df.environment == 'wtr1'])
print('\n')


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 42 entries, (HPa, 1, 1) to (HPa, 8, 5)
Data columns (total 2 columns):
environment    33 non-null category
type           42 non-null category
dtypes: category(2)
memory usage: 484.0+ bytes
None


                     environment   type
animal day epoch_ind                   
HPa    1   1            presleep  sleep
           2                 lin    run
           3                 NaN   rest
           4                wtr1    run
           5                 NaN   rest
           6                wtr1    run
           7           postsleep  sleep
       2   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       3   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       4   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       5   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       6   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep
       7   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep
       8   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep


                     environment   type
animal day epoch_ind                   
HPa    6   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep
       8   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep


                     environment type
animal day epoch_ind                 
HPa    6   2                wtr1  run
       8   2                wtr1  run


Now the same thing with the tetrode info. This is a little trickier, because there are tetrodes for each epoch. It might be better to have one for each day-epoch (a dictionary of data frames).


In [2]:
def get_tetrode_info(animal):
    '''Returns the Matlab tetrodeinfo file name assuming it is in the Raw Data directory.
    '''
    data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
    return '{data_dir}/{animal.directory}/{animal.short_name}tetinfo.mat'.format(
        data_dir=data_dir,
        animal=animal)

def convert_to_dict(struct_array):
    try:
        return {name: np.squeeze(struct_array[name][0,0][0]) for name in struct_array.dtype.names}
    except TypeError:
        return {}

tetrode_file_names = [(get_tetrode_info(animals[animal]), animal) for animal in animals]

tetrode_info = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in tetrode_file_names]
tetrode_info = [(convert_to_dict(tetrode[0]), animal, day_ind + 1, epoch_ind + 1, tetrode_ind + 1)
       for info, animal in tetrode_info
       for day_ind, day in enumerate(info['tetinfo'].T)
       for epoch_ind, epoch in enumerate(day[0].T)
       for tetrode_ind, tetrode in enumerate(epoch[0].T)]

tet_df = pd.DataFrame([info[0] for info in tetrode_info])
day_epoch_ind = [{'animal': info[1], 'day': info[2], 'epoch_ind': info[3], 'tetrode_number': info[4]} 
                 for info in tetrode_info]
day_epoch_df = pd.DataFrame(day_epoch_ind)

tetrode_df = (pd
              .concat([day_epoch_df, tet_df], axis=1, join_axes=[day_epoch_df.index])
              .set_index(['animal', 'day', 'epoch_ind', 'tetrode_number'])  # set multi-index to identify rows
              .assign(numcells=lambda x: x['numcells'].astype(int))  # convert numcells to integer type
              .assign(depth=lambda x: x['depth'].astype(int)) # convert depth to integer type
              .assign(area=lambda x: pd.Categorical(x['area']))  # convert numcells to integer type
             )

print(tetrode_df.info())
print('\n')

# Check accessing by Multi-dimensional index

# Get all electrodes from HPa, day 8, epoch 2,4
print(tetrode_df.loc[(['HPa'], [8], [2,4]), :]) 
print('\n')

# Now only electrodes with cells recorded on them
print(tetrode_df
      .loc[tetrode_df.numcells > 0]
      .loc[('HPa', 8, [2,4])])
print('\n')

print(tetrode_df
      .loc[tetrode_df.numcells > 0]
      .loc[('HPa', 8, [2,4])].index.values)

tetrode_df.head()


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 840 entries, (HPa, 1, 1, 1) to (HPa, 8, 5, 20)
Data columns (total 4 columns):
area        840 non-null category
depth       840 non-null int64
descrip     462 non-null object
numcells    840 non-null int64
dtypes: category(1), int64(2), object(1)
memory usage: 27.1+ KB
None


                                     area  depth descrip  numcells
animal day epoch_ind tetrode_number                               
HPa    8   2         1                CA1    113  riptet        12
                     2                CA1    121     NaN         0
                     3                CA1     90  CA1Ref         0
                     4                CA1    116  riptet        15
                     5                CA1    116  riptet         0
                     6                CA1    110  riptet         0
                     7                CA1    114  riptet         0
                     8               iCA1    114  riptet         0
                     9               iCA1    100  riptet         0
                     10              iCA1     96     NaN         0
                     11              iCA1    106  riptet         0
                     12              iCA1    114  riptet         3
                     13              iCA1    120     NaN         0
                     14              iCA1    105  riptet         6
                     15               PFC     93     NaN         0
                     16               PFC     90     NaN         0
                     17               PFC     90     NaN         6
                     18               PFC     90     NaN         0
                     19               PFC    130     NaN         0
                     20               PFC    109     NaN         0
           4         1                CA1    113  riptet        12
                     2                CA1    121     NaN         0
                     3                CA1     90  CA1Ref         0
                     4                CA1    116  riptet        15
                     5                CA1    116  riptet         0
                     6                CA1    110  riptet         0
                     7                CA1    114  riptet         0
                     8               iCA1    114  riptet         0
                     9               iCA1    100  riptet         0
                     10              iCA1     96     NaN         0
                     11              iCA1    106  riptet         0
                     12              iCA1    114  riptet         3
                     13              iCA1    120     NaN         0
                     14              iCA1    105  riptet         6
                     15               PFC     93     NaN         0
                     16               PFC     90     NaN         0
                     17               PFC     90     NaN         6
                     18               PFC     90     NaN         0
                     19               PFC    130     NaN         0
                     20               PFC    109     NaN         0


                                     area  depth descrip  numcells
animal day epoch_ind tetrode_number                               
HPa    8   2         1                CA1    113  riptet        12
                     4                CA1    116  riptet        15
                     12              iCA1    114  riptet         3
                     14              iCA1    105  riptet         6
                     17               PFC     90     NaN         6
           4         1                CA1    113  riptet        12
                     4                CA1    116  riptet        15
                     12              iCA1    114  riptet         3
                     14              iCA1    105  riptet         6
                     17               PFC     90     NaN         6


[('HPa', 8, 2, 1) ('HPa', 8, 2, 4) ('HPa', 8, 2, 12) ('HPa', 8, 2, 14)
 ('HPa', 8, 2, 17) ('HPa', 8, 4, 1) ('HPa', 8, 4, 4) ('HPa', 8, 4, 12)
 ('HPa', 8, 4, 14) ('HPa', 8, 4, 17)]
Out[2]:
area depth descrip numcells
animal day epoch_ind tetrode_number
HPa 1 1 1 CA1 114 riptet 7
2 CA1 124 NaN 0
3 CA1 90 CA1Ref 0
4 CA1 117 riptet 13
5 CA1 119 riptet 1

Now let's try organizing tetrodes by a dictionary mapping (animal, day, epoch_ind) -> tetrode datatframe. If several dataframes are selected, they can be concatenated. This might be easier than trying to use the epochs dataframe.


In [3]:
tetrode_data = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in tetrode_file_names]

def convert_tetrode_epoch_to_dataframe(tetrodes_in_epoch, animal, day, epoch_ind):
    '''
    Given an epoch data structure, return a cleaned up DataFrame
    '''
    tetrode_dict_list = [convert_to_dict(tetrode) for tetrode in tetrodes_in_epoch[0][0]]
    return (pd.DataFrame(tetrode_dict_list)
              .assign(numcells=lambda x: x['numcells'].astype(int))  # convert numcells to integer type
              .assign(depth=lambda x: x['depth'].astype(int)) # convert depth to integer type
              .assign(area=lambda x: pd.Categorical(x['area']))  # convert numcells to integer type
              .assign(animal=lambda x: animal)
              .assign(day=lambda x: day)
              .assign(epoch_ind=lambda x: epoch_ind)
              .assign(tetrode_number=lambda x: x.index + 1)
              .set_index(['animal', 'day', 'epoch_ind', 'tetrode_number'])  # set index to identify rows
            )

# Make a dictionary with (animal, day, epoch_ind) as the keys
tetrode_info2 = {(animal, day_ind + 1, epoch_ind + 1): 
                  convert_tetrode_epoch_to_dataframe(epoch, animal, day_ind + 1, epoch_ind + 1)
                   for info, animal in tetrode_data
                   for day_ind, day in enumerate(info['tetinfo'].T)
                   for epoch_ind, epoch in enumerate(day[0].T)}

Get one dataframe based on a particular animal, day, epoch


In [4]:
epoch_keys = list(tetrode_info2.keys())
print(epoch_keys[0])
tetrode_info2[epoch_keys[0]]


('HPa', 5, 1)
Out[4]:
area depth descrip numcells
animal day epoch_ind tetrode_number
HPa 5 1 1 CA1 114 riptet 9
2 CA1 121 NaN 0
3 CA1 90 CA1Ref 0
4 CA1 117 riptet 9
5 CA1 117 riptet 2
6 CA1 110 riptet 0
7 CA1 114 riptet 0
8 iCA1 114 riptet 1
9 iCA1 101 riptet 1
10 iCA1 98 NaN 0
11 iCA1 108 riptet 0
12 iCA1 116 riptet 4
13 iCA1 120 NaN 0
14 iCA1 105 riptet 1
15 PFC 120 NaN 0
16 PFC 123 NaN 0
17 PFC 123 NaN 3
18 PFC 112 NaN 2
19 PFC 130 NaN 0
20 PFC 109 NaN 0

Get two dataframes and concat based on keys


In [5]:
print(epoch_keys[:2])
pd.concat([tetrode_info2[key] for key in epoch_keys[:2]])


[('HPa', 5, 1), ('HPa', 6, 4)]
Out[5]:
area depth descrip numcells
animal day epoch_ind tetrode_number
HPa 5 1 1 CA1 114 riptet 9
2 CA1 121 NaN 0
3 CA1 90 CA1Ref 0
4 CA1 117 riptet 9
5 CA1 117 riptet 2
6 CA1 110 riptet 0
7 CA1 114 riptet 0
8 iCA1 114 riptet 1
9 iCA1 101 riptet 1
10 iCA1 98 NaN 0
11 iCA1 108 riptet 0
12 iCA1 116 riptet 4
13 iCA1 120 NaN 0
14 iCA1 105 riptet 1
15 PFC 120 NaN 0
16 PFC 123 NaN 0
17 PFC 123 NaN 3
18 PFC 112 NaN 2
19 PFC 130 NaN 0
20 PFC 109 NaN 0
6 4 1 CA1 114 riptet 7
2 CA1 121 NaN 0
3 CA1 90 CA1Ref 0
4 CA1 117 riptet 8
5 CA1 117 riptet 5
6 CA1 110 riptet 0
7 CA1 114 riptet 0
8 iCA1 114 riptet 0
9 iCA1 101 riptet 1
10 iCA1 98 NaN 1
11 iCA1 108 riptet 1
12 iCA1 116 riptet 1
13 iCA1 120 NaN 0
14 iCA1 105 riptet 1
15 PFC 120 NaN 0
16 PFC 123 NaN 0
17 PFC 123 NaN 1
18 PFC 112 NaN 1
19 PFC 130 NaN 0
20 PFC 109 NaN 0

Now use the epoch data structure to get only those epochs with the w-track task and get the corresponding electrodes


In [6]:
epoch_keys = list(epochs_df.loc[epochs_df.environment == 'wtr1'].index)
pd.concat([tetrode_info2[key] for key in epoch_keys])


Out[6]:
area depth descrip numcells
animal day epoch_ind tetrode_number
HPa 1 4 1 CA1 114 riptet 7
2 CA1 124 NaN 0
3 CA1 90 CA1Ref 0
4 CA1 117 riptet 13
5 CA1 119 riptet 0
6 CA1 110 riptet 0
7 CA1 111 riptet 0
8 iCA1 116 riptet 1
9 iCA1 100 riptet 1
10 iCA1 99 NaN 0
11 iCA1 110 riptet 0
12 iCA1 118 riptet 0
13 iCA1 120 NaN 0
14 iCA1 106 riptet 1
15 PFC 96 NaN 2
16 PFC 94 NaN 2
17 PFC 98 NaN 3
18 PFC 90 NaN 4
19 PFC 104 NaN 0
20 PFC 101 NaN 0
6 1 CA1 114 riptet 7
2 CA1 124 NaN 0
3 CA1 90 CA1Ref 0
4 CA1 117 riptet 13
5 CA1 119 riptet 0
6 CA1 110 riptet 1
7 CA1 111 riptet 0
8 iCA1 116 riptet 1
9 iCA1 100 riptet 1
10 iCA1 99 NaN 0
1 6 ... ... ... ... ...
7 2 11 iCA1 108 riptet 3
12 iCA1 116 riptet 6
13 iCA1 120 NaN 0
14 iCA1 105 riptet 7
15 PFC 99 NaN 3
16 PFC 96 NaN 1
17 PFC 96 NaN 2
18 PFC 96 NaN 3
19 PFC 130 NaN 0
20 PFC 109 NaN 0
8 2 1 CA1 113 riptet 12
2 CA1 121 NaN 0
3 CA1 90 CA1Ref 0
4 CA1 116 riptet 15
5 CA1 116 riptet 0
6 CA1 110 riptet 0
7 CA1 114 riptet 0
8 iCA1 114 riptet 0
9 iCA1 100 riptet 0
10 iCA1 96 NaN 0
11 iCA1 106 riptet 0
12 iCA1 114 riptet 3
13 iCA1 120 NaN 0
14 iCA1 105 riptet 6
15 PFC 93 NaN 0
16 PFC 90 NaN 0
17 PFC 90 NaN 6
18 PFC 90 NaN 0
19 PFC 130 NaN 0
20 PFC 109 NaN 0

260 rows × 4 columns

Now say we want only the epochs where the animal is running the w-track and electrodes with at least one cell recorded


In [7]:
epoch_keys = list(epochs_df.loc[epochs_df.environment == 'wtr1'].index)
tetrodes_by_epoch = pd.concat([tetrode_info2[key] for key in epoch_keys])
tetrodes_by_epoch.loc[tetrodes_by_epoch.numcells > 0]


Out[7]:
area depth descrip numcells
animal day epoch_ind tetrode_number
HPa 1 4 1 CA1 114 riptet 7
4 CA1 117 riptet 13
8 iCA1 116 riptet 1
9 iCA1 100 riptet 1
14 iCA1 106 riptet 1
15 PFC 96 NaN 2
16 PFC 94 NaN 2
17 PFC 98 NaN 3
18 PFC 90 NaN 4
6 1 CA1 114 riptet 7
4 CA1 117 riptet 13
6 CA1 110 riptet 1
8 iCA1 116 riptet 1
9 iCA1 100 riptet 1
14 iCA1 106 riptet 1
15 PFC 96 NaN 2
16 PFC 94 NaN 2
17 PFC 98 NaN 3
18 PFC 90 NaN 4
2 2 1 CA1 114 riptet 6
4 CA1 117 riptet 6
7 CA1 113 riptet 1
8 iCA1 114 riptet 1
12 iCA1 116 riptet 2
14 iCA1 106 riptet 3
15 PFC 100 NaN 4
17 PFC 104 NaN 2
18 PFC 96 NaN 4
4 1 CA1 114 riptet 6
4 CA1 117 riptet 6
2 4 ... ... ... ... ...
6 2 4 CA1 117 riptet 8
5 CA1 117 riptet 5
8 iCA1 114 riptet 1
9 iCA1 101 riptet 1
10 iCA1 98 NaN 1
11 iCA1 108 riptet 1
12 iCA1 116 riptet 1
14 iCA1 105 riptet 1
17 PFC 123 NaN 1
18 PFC 112 NaN 1
7 2 1 CA1 114 riptet 7
2 CA1 121 NaN 1
4 CA1 117 riptet 10
5 CA1 117 riptet 6
6 CA1 110 riptet 1
8 iCA1 114 riptet 1
9 iCA1 101 riptet 3
10 iCA1 98 NaN 1
11 iCA1 108 riptet 3
12 iCA1 116 riptet 6
14 iCA1 105 riptet 7
15 PFC 99 NaN 3
16 PFC 96 NaN 1
17 PFC 96 NaN 2
18 PFC 96 NaN 3
8 2 1 CA1 113 riptet 12
4 CA1 116 riptet 15
12 iCA1 114 riptet 3
14 iCA1 105 riptet 6
17 PFC 90 NaN 6

122 rows × 4 columns


In [40]:
def get_df_index(data_frame):
    ''' Converts pandas dataframe to a list of tuples corresponding to
    the dataframe multi-index
    '''
    index = list(data_frame.index.get_values())
    return index


tetrode_index = get_df_index(tetrodes_by_epoch.loc[tetrodes_by_epoch.numcells > 0])

def get_LFP_file_name(index, animals):
    ''' Given an index tuple (animal, day, epoch, tetrode_number) and the animals dictionary
    return a file name for the tetrode file LFP
    '''
    data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
    return '{data_dir}/{animal.directory}/EEG/{animal.short_name}eeg{day:02d}-{epoch}-{tetrode_number:02d}.mat'.format(
        data_dir=data_dir,
        animal=animals[index[0]],
        day=index[1],
        epoch=index[2],
        tetrode_number=index[3]
    )


print(tetrode_index[-1])
print(get_LFP_file_name(tetrode_index[-1], animals))

def get_LFP_data(tetrode_index, animals):
    ''' Given a tetrode index tuple and the animals dictionary, 
    return the LFP data and start time
    '''
    lfp_file = scipy.io.loadmat(get_LFP_file_name(tetrode_index, animals))
    lfp_data = lfp_file['eeg'][0, -1][0, -1][0, -1]
    data_dict = {'time': _get_LFP_time(lfp_data['starttime'][0,0], lfp_data['data'][0,0].size, lfp_data['samprate'][0,0]),
                 'electric_potential': lfp_data['data'][0,0].squeeze()
                 }
    return pd.DataFrame(data_dict).set_index('time')

def _get_LFP_time(start_time, number_samples, sampling_rate):
    ''' Returns an array of time stamps
    '''
    end_time = start_time + (number_samples / sampling_rate)
    return np.round(np.arange(start_time, end_time, (1 / sampling_rate)), decimals=4)
    

lfp_file = scipy.io.loadmat(get_LFP_file_name(tetrode_index[-1], animals))
print(lfp_file['eeg'][0, -1][0, -1][0, -1].dtype.names)
lfp_data = get_LFP_data(tetrode_index[-1], animals)
lfp_data.plot()


('HPa', 8, 2, 17)
/Users/edeno/Documents/GitHub/Jadhav-2016-Data-Analysis/Raw-Data/HPa_direct/EEG/HPaeeg08-2-17.mat
('descript', 'fields', 'starttime', 'samprate', 'data', 'depth')
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x115470a58>

Now let's do the same thing for the neuron info


In [9]:
def get_neuron_info(animal):
    '''Returns the Matlab tetrodeinfo file name assuming it is in the Raw Data directory.
    '''
    data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
    return '{data_dir}/{animal.directory}/{animal.short_name}cellinfo.mat'.format(
        data_dir=data_dir,
        animal=animal)

neuron_file_names = [(get_neuron_info(animals[animal]), animal) for animal in animals]
neuron_data = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in neuron_file_names]

def convert_neuron_epoch_to_dataframe(tetrodes_in_epoch, animal, day, epoch_ind):
    '''
    Given an neuron data structure, return a cleaned up DataFrame
    '''
    DROP_COLUMNS = ['ripmodtag', 'thetamodtag', 'runripmodtag', 'postsleepripmodtag',
                   'presleepripmodtag', 'runthetamodtag', 'ripmodtag2', 'runripmodtag2',
                   'postsleepripmodtag2', 'presleepripmodtag2', 'ripmodtype',
                   'runripmodtype', 'postsleepripmodtype', 'presleepripmodtype',
                   'FStag', 'ripmodtag3', 'runripmodtag3', 'ripmodtype3', 'runripmodtype3',
                   'tag', 'typetag', 'runripmodtype2', 'tag2', 'ripmodtype2', 'descrip']
    neuron_dict_list = [add_to_dict(convert_to_dict(neuron), tetrode_ind, neuron_ind)
                         for tetrode_ind, tetrode in enumerate(tetrodes_in_epoch[0][0])
                         for neuron_ind, neuron in enumerate(tetrode[0])
                        if neuron.size > 0
                       ]
    return (pd.DataFrame(neuron_dict_list)
              .drop(DROP_COLUMNS, 1, errors='ignore')
              .assign(animal=lambda x: animal)
              .assign(day=lambda x: day)
              .assign(epoch_ind=lambda x: epoch_ind)
              .set_index(['animal', 'day', 'epoch_ind', 'tetrode_number', 'neuron_number'])  # set index to identify rows
            )

def add_to_dict(dictionary, tetrode_ind, neuron_ind):
    dictionary['tetrode_number'] = tetrode_ind + 1
    dictionary['neuron_number'] = neuron_ind + 1
    return dictionary

# Make a dictionary with (animal, day, epoch_ind) as the keys
neuron_info = {(animal, day_ind + 1, epoch_ind + 1): 
                  convert_neuron_epoch_to_dataframe(epoch, animal, day_ind + 1, epoch_ind + 1)
                   for info, animal in neuron_data
                   for day_ind, day in enumerate(info['cellinfo'].T)
                   for epoch_ind, epoch in enumerate(day[0].T)
                }
epoch_keys = list(neuron_info.keys())
print(neuron_info[epoch_keys[1]].info())
neuron_info[epoch_keys[1]]


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 36 entries, (HPa, 3, 3, 1, 1) to (HPa, 3, 3, 18, 4)
Data columns (total 6 columns):
area          26 non-null object
csi           26 non-null object
meanrate      26 non-null object
numspikes     26 non-null object
propbursts    26 non-null object
spikewidth    26 non-null object
dtypes: object(6)
memory usage: 2.0+ KB
None
Out[9]:
area csi meanrate numspikes propbursts spikewidth
animal day epoch_ind tetrode_number neuron_number
HPa 3 3 1 1 CA1 0.11153846153846154 0.4276315789473684 260 0.3576923076923077 9.852046830634947
2 CA1 0.09844559585492228 0.31743421052631576 193 0.35233160621761656 6.193681221562731
3 CA1 0.1476510067114094 0.2450657894736842 149 0.4563758389261745 7.23198180778703
4 CA1 0.06408483171968649 3.567434210526316 2169 0.2429691101890272 11.637107077760408
5 CA1 0.06976744186046512 0.28289473684210525 172 0.31976744186046513 9.414763982224821
6 CA1 0.14285714285714285 0.046052631578947366 28 0.5 7.010773687328376
7 CA1 0.057692307692307696 0.17105263157894737 104 0.28846153846153844 7.326065780548666
8 CA1 0.06870229007633588 0.4309210526315789 262 0.23282442748091603 9.887929449648837
9 CA1 0.09090909090909091 0.16282894736842105 99 0.29292929292929293 8.675470637035936
4 1 CA1 0.1008174386920981 1.207236842105263 734 0.3678474114441417 9.310775428384867
2 CA1 0.08433734939759036 0.2730263157894737 166 0.26506024096385544 8.764926016949678
3 CA1 0.16783216783216784 0.23519736842105263 143 0.5314685314685315 7.372509201038104
4 CA1 0.1935483870967742 0.15296052631578946 93 0.5591397849462365 8.497293315564809
5 CA1 0.09615384615384616 0.08552631578947369 52 0.2692307692307692 7.809075007961705
6 CA1 0.11538461538461539 0.04276315789473684 26 0.2692307692307692 9.89821354188334
7 NaN NaN NaN NaN NaN NaN
7 1 CA1 0.13548387096774195 0.5098684210526315 310 0.44516129032258067 8.099867455040723
2 CA1 0.10861423220973783 0.8782894736842105 534 0.37453183520599254 10.695540560591898
3 CA1 0.03773584905660377 0.08717105263157894 53 0.24528301886792453 10.225901491678394
4 CA1 0.12449799196787148 0.4095394736842105 249 0.4859437751004016 9.84830423452351
5 NaN NaN NaN NaN NaN NaN
6 NaN NaN NaN NaN NaN NaN
7 CA1 0.07920792079207921 0.16611842105263158 101 0.25742574257425743 8.163437415682733
8 CA1 0.17010309278350516 0.3190789473684211 194 0.5463917525773195 8.673467413803706
8 1 NaN NaN NaN NaN NaN NaN
9 1 NaN NaN NaN NaN NaN NaN
14 1 NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN
15 1 PFC 0.005668016194331984 2.03125 1235 0.025101214574898785 11.85580178041336
2 PFC 0 0.08881578947368421 54 0 12.415151489561344
3 PFC 0.02765016902140938 18.97532894736842 11537 0.1012394903354425 7.127713053825276
4 PFC 0 0.07894736842105263 48 0 7.691757341921077
18 1 NaN NaN NaN NaN NaN NaN
2 PFC 0.0036231884057971015 0.45394736842105265 276 0.028985507246376812 10.576494182074454
3 NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN

In [10]:
epoch_keys = get_df_index(epochs_df.loc[epochs_df.environment == 'wtr1'])
pd.concat([neuron_info[key] for key in epoch_keys]).dropna()


Out[10]:
area csi meanrate numspikes propbursts spikewidth
animal day epoch_ind tetrode_number neuron_number
HPa 1 4 1 1 CA1 0.15163699023549684 1.4317434210526316 1741 0.48477886272257326 8.404000423452445
2 CA1 0.15368081676518 3.0608552631578947 3722 0.5354648038688877 10.068207612419641
3 CA1 0.06485355648535565 0.7861842105263158 956 0.2583682008368201 10.591460255431754
4 CA1 0.12368421052631579 0.3125 380 0.4131578947368421 9.56360765114096
5 CA1 0.07959949937421777 3.2853618421052633 3995 0.30037546933667086 11.086349081099883
6 CA1 0.08576642335766424 0.4506578947368421 548 0.2956204379562044 8.898837340698611
4 1 CA1 0.13953488372093023 0.6011513157894737 731 0.4746922024623803 8.928423068731131
2 CA1 0.029411764705882353 0.3355263157894737 408 0.12009803921568628 9.606738087324494
3 CA1 0.1457641196013289 1.980263157894737 2408 0.4962624584717608 8.824516757593013
4 CA1 0.08823529411764706 0.027960526315789474 34 0.3235294117647059 8.614027432575318
5 CA1 0.1522468142186452 1.2261513157894737 1491 0.4936284372904091 8.542827069173516
6 CA1 0.11607142857142858 0.7368421052631579 896 0.40513392857142855 8.835866239911471
7 CA1 0.07142857142857142 0.20723684210526316 252 0.25396825396825395 8.14703602100615
9 CA1 0.1679902260232132 1.346217105263158 1637 0.548564447159438 7.677985200040181
10 CA1 0.10616438356164383 0.7203947368421053 876 0.3778538812785388 9.274112719181899
11 CA1 0.06056701030927835 0.6381578947368421 776 0.23324742268041238 9.412296547260718
12 CA1 0.09714285714285714 0.14391447368421054 175 0.3028571428571429 7.361349894539315
13 CA1 0.11013215859030837 0.18667763157894737 227 0.3303964757709251 8.306329106015445
8 1 iCA1 0.14345991561181434 0.38980263157894735 474 0.5189873417721519 9.54469181741173
9 1 iCA1 0.10364145658263306 0.587171052631579 714 0.3641456582633053 10.236683991169903
14 1 iCA1 0.07974423935335988 6.816611842105263 8289 0.2884545783568585 11.229290642757745
15 1 PFC 0.0011736228269639845 11.211348684210526 13633 0.004547788454485439 11.59722922762468
2 PFC 0.04433909648633575 8.84703947368421 10758 0.16220487079382784 11.529344904793362
16 1 PFC 0 0.8207236842105263 998 0.002004008016032064 11.008656376212048
2 PFC 0.05835108998524832 5.017269736842105 6101 0.2076708736272742 11.795111252949056
17 2 PFC 0.019583539910758552 3.317434210526316 4034 0.07635101636093208 13.19374374853498
18 1 PFC 0 0.003289473684210526 4 0 7.62186304435293
2 PFC 0.007438224911749874 6.5230263157894735 7932 0.02773575390821987 11.362835093966867
3 PFC 0.008025682182985553 0.5123355263157895 623 0.028892455858747994 9.501617030121416
6 1 1 CA1 0.08113804004214963 0.776595744680851 949 0.32349841938883034 8.404000423452445
1 6 1 ... ... ... ... ... ... ...
7 2 18 2 PFC 0.006029285099052541 4.805463576158941 5805 0.027906976744186046 11.833343257416901
3 PFC 0.01594896331738437 0.5190397350993378 627 0.05422647527910686 8.279705653387657
8 2 1 1 CA1 nan 0 0 nan 7.347367425964567
2 CA1 0.05963302752293578 0.7218543046357616 872 0.2305045871559633 10.359959504215627
3 CA1 0.19162790697674417 0.8899006622516556 1075 0.6409302325581395 7.624190000134791
4 CA1 0.08121019108280254 0.5198675496688742 628 0.3200636942675159 10.734931880073724
5 CA1 0.14817320703653586 2.447019867549669 2956 0.4983085250338295 9.1987074003374
6 CA1 0.1690929451287794 0.7392384105960265 893 0.5442329227323628 9.028117472746398
7 CA1 0.053475935828877004 0.15480132450331127 187 0.20855614973262032 11.719632986962303
4 1 CA1 0.09773936170212766 1.2450331125827814 1504 0.3184840425531915 10.573569863065767
2 CA1 0.13846153846153847 1.1837748344370862 1430 0.4951048951048951 8.249598508446987
3 CA1 0.1411764705882353 0.7740066225165563 935 0.48128342245989303 9.473909850454165
4 CA1 0.11428571428571428 0.23178807947019867 280 0.2892857142857143 7.821072138691519
5 CA1 0.04081632653061224 0.20281456953642385 245 0.09387755102040816 8.26724220794418
6 CA1 0.02981651376146789 0.3609271523178808 436 0.10550458715596331 13.34939605009777
7 CA1 0.16352201257861634 0.1316225165562914 159 0.5471698113207547 8.622862142764975
8 CA1 0.16104868913857678 0.22102649006622516 267 0.43820224719101125 9.926903553501516
12 1 iCA1 0.02631578947368421 0.06291390728476821 76 0.07894736842105263 8.965920269909438
2 iCA1 0.04430379746835443 0.3923841059602649 474 0.1962025316455696 10.073373561223226
3 iCA1 0.053717494811378344 6.780629139072848 8191 0.1947259186912465 5.81454456934001
14 1 iCA1 0.06834268977300463 3.3915562913907285 4097 0.24969489870637052 11.11293649893477
2 iCA1 0.07884972170686456 2.6771523178807946 3234 0.2786023500309215 10.208434221893334
3 iCA1 0.048678720445062586 0.5951986754966887 719 0.22253129346314326 8.64379513034075
6 iCA1 0.09302325581395349 0.10678807947019868 129 0.3643410852713178 7.082362239433368
17 1 PFC 0.014189693801344288 1.1084437086092715 1339 0.06572068707991038 10.417261378112482
2 PFC nan 0 0 nan 7.827389292341298
3 PFC 0 0.16473509933774835 199 0 12.93298222642811
4 PFC 0.008634868421052632 2.013245033112583 2432 0.029605263157894735 8.619500395429855
5 PFC 0.00519311911716975 2.5504966887417218 3081 0.023044466082440766 10.821009588390197
6 PFC 0.004267668146124957 9.698675496688741 11716 0.014595425059747353 12.333565861353105

406 rows × 6 columns


In [23]:
neuron_index = get_df_index(pd.concat([neuron_info[key] for key in epoch_keys]).dropna())
print(neuron_index[-1])


def get_neuron_data(neuron_index, animals):
    ''' Given a neuron index tuple and the animals dictionary, 
    return the spike times
    '''
    neuron_file = scipy.io.loadmat(df.get_data_filename(animals[neuron_index[0]], neuron_index[1], 'spikes'))
    return neuron_file['spikes'][0, -1][0, neuron_index[2] - 1][0, neuron_index[3] - 1][0, neuron_index[4] - 1][0]['data']

print(get_neuron_data(neuron_index[-1], animals)[0])


('HPa', 8, 2, 17, 6)
[[  2.71344770e+03   9.62000000e+01   1.05950000e+02 ...,   9.62162673e+01
    1.05660690e+02   6.61617632e-01]
 [  2.71362580e+03   9.62000000e+01   1.05300000e+02 ...,   9.61594889e+01
    1.05410010e+02   6.59681126e-01]
 [  2.71366690e+03   9.62000000e+01   1.05300000e+02 ...,   9.61530958e+01
    1.05405284e+02   6.60198741e-01]
 ..., 
 [  3.91980730e+03   1.02050000e+02   1.00750000e+02 ...,   1.02205782e+02
    1.00416081e+02  -2.65838534e-02]
 [  3.91983050e+03   1.01400000e+02   1.00100000e+02 ...,   1.01925888e+02
    1.00393648e+02  -4.35875789e-02]
 [  3.91988440e+03   1.01400000e+02   1.00100000e+02 ...,   1.01611592e+02
    1.00360565e+02  -6.16906623e-02]]

In [ ]: