Date: 2016-08-23

The goal of this notebook is to:

work out code to convert the existing Matlab data structures into pandas DataFrames.
make the data structures more 'flat' (denormalize in database terms) so that they're more easily readable

This first cell converts the tasks data to a single pandas DataFrame.



In [41]:

    
%matplotlib inline
%reload_ext autoreload
%autoreload 3
import os
import sys
import collections
import scipy.io
import numpy as np
import matplotlib.pyplot as plt  
import seaborn as sns
import pandas as pd

sys.path.append('../src/')
import data_filter as df

# Setup
Animal = collections.namedtuple('Animal', {'directory', 'short_name'})
num_days = 8
days = range(1, num_days + 1)
animals = {'HPa': Animal(directory='HPa_direct', short_name='HPa')}

# Get all epochs
tasks = [(df.get_data_structure(animals[animal], day, 'task', 'task'), animal)
         for animal in animals
         for day in days]
epochs = [(epoch, animal) for day, animal in tasks for epoch in day] # flatten

# Convert into pandas dataframes
ndata = [{name: epoch[0][name][0][0][0]
          for name in epoch[0].dtype.names
          if name not in 'linearcoord'}
         for epoch in epochs]
df1 = pd.DataFrame(ndata)
# print(df1)

day_epoch_ind = [{'animal': day[1], 'day': day_ind + 1, 'epoch_ind': epoch_ind + 1}
                 for day_ind, day in enumerate(tasks)
                 for epoch_ind, epoch in enumerate(day[0])]

df2 = pd.DataFrame(day_epoch_ind)
# print(df2)

epochs_df = (pd
             .concat([df2, df1], axis=1, join_axes=[df1.index])
             .set_index(['animal', 'day', 'epoch_ind'])
             .assign(environment=lambda x: pd.Categorical(x['environment']))
             .assign(type=lambda x: pd.Categorical(x['type']))
            )

print(epochs_df.info())
print('\n')
print(epochs_df)
print('\n')

# Check accessing by Multi-dimensional index (animal HPa, days 6 and 8)
print(epochs_df.loc[(['HPa'], [6,8]), :])
print('\n')

print(epochs_df
      .loc[(['HPa'], [6,8]), :]
      .loc[epochs_df.environment == 'wtr1'])
print('\n')









    



<class 'pandas.core.frame.DataFrame'>
MultiIndex: 42 entries, (HPa, 1, 1) to (HPa, 8, 5)
Data columns (total 2 columns):
environment    33 non-null category
type           42 non-null category
dtypes: category(2)
memory usage: 484.0+ bytes
None


                     environment   type
animal day epoch_ind                   
HPa    1   1            presleep  sleep
           2                 lin    run
           3                 NaN   rest
           4                wtr1    run
           5                 NaN   rest
           6                wtr1    run
           7           postsleep  sleep
       2   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       3   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       4   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       5   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr1    run
           5           postsleep  sleep
       6   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep
       7   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep
       8   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep


                     environment   type
animal day epoch_ind                   
HPa    6   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep
       8   1            presleep  sleep
           2                wtr1    run
           3                 NaN   rest
           4                wtr2    run
           5           postsleep  sleep


                     environment type
animal day epoch_ind                 
HPa    6   2                wtr1  run
       8   2                wtr1  run

Now the same thing with the tetrode info. This is a little trickier, because there are tetrodes for each epoch. It might be better to have one for each day-epoch (a dictionary of data frames).



In [2]:

    
def get_tetrode_info(animal):
    '''Returns the Matlab tetrodeinfo file name assuming it is in the Raw Data directory.
    '''
    data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
    return '{data_dir}/{animal.directory}/{animal.short_name}tetinfo.mat'.format(
        data_dir=data_dir,
        animal=animal)

def convert_to_dict(struct_array):
    try:
        return {name: np.squeeze(struct_array[name][0,0][0]) for name in struct_array.dtype.names}
    except TypeError:
        return {}

tetrode_file_names = [(get_tetrode_info(animals[animal]), animal) for animal in animals]

tetrode_info = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in tetrode_file_names]
tetrode_info = [(convert_to_dict(tetrode[0]), animal, day_ind + 1, epoch_ind + 1, tetrode_ind + 1)
       for info, animal in tetrode_info
       for day_ind, day in enumerate(info['tetinfo'].T)
       for epoch_ind, epoch in enumerate(day[0].T)
       for tetrode_ind, tetrode in enumerate(epoch[0].T)]

tet_df = pd.DataFrame([info[0] for info in tetrode_info])
day_epoch_ind = [{'animal': info[1], 'day': info[2], 'epoch_ind': info[3], 'tetrode_number': info[4]} 
                 for info in tetrode_info]
day_epoch_df = pd.DataFrame(day_epoch_ind)

tetrode_df = (pd
              .concat([day_epoch_df, tet_df], axis=1, join_axes=[day_epoch_df.index])
              .set_index(['animal', 'day', 'epoch_ind', 'tetrode_number'])  # set multi-index to identify rows
              .assign(numcells=lambda x: x['numcells'].astype(int))  # convert numcells to integer type
              .assign(depth=lambda x: x['depth'].astype(int)) # convert depth to integer type
              .assign(area=lambda x: pd.Categorical(x['area']))  # convert numcells to integer type
             )

print(tetrode_df.info())
print('\n')

# Check accessing by Multi-dimensional index

# Get all electrodes from HPa, day 8, epoch 2,4
print(tetrode_df.loc[(['HPa'], [8], [2,4]), :]) 
print('\n')

# Now only electrodes with cells recorded on them
print(tetrode_df
      .loc[tetrode_df.numcells > 0]
      .loc[('HPa', 8, [2,4])])
print('\n')

print(tetrode_df
      .loc[tetrode_df.numcells > 0]
      .loc[('HPa', 8, [2,4])].index.values)

tetrode_df.head()









    



<class 'pandas.core.frame.DataFrame'>
MultiIndex: 840 entries, (HPa, 1, 1, 1) to (HPa, 8, 5, 20)
Data columns (total 4 columns):
area        840 non-null category
depth       840 non-null int64
descrip     462 non-null object
numcells    840 non-null int64
dtypes: category(1), int64(2), object(1)
memory usage: 27.1+ KB
None


                                     area  depth descrip  numcells
animal day epoch_ind tetrode_number                               
HPa    8   2         1                CA1    113  riptet        12
                     2                CA1    121     NaN         0
                     3                CA1     90  CA1Ref         0
                     4                CA1    116  riptet        15
                     5                CA1    116  riptet         0
                     6                CA1    110  riptet         0
                     7                CA1    114  riptet         0
                     8               iCA1    114  riptet         0
                     9               iCA1    100  riptet         0
                     10              iCA1     96     NaN         0
                     11              iCA1    106  riptet         0
                     12              iCA1    114  riptet         3
                     13              iCA1    120     NaN         0
                     14              iCA1    105  riptet         6
                     15               PFC     93     NaN         0
                     16               PFC     90     NaN         0
                     17               PFC     90     NaN         6
                     18               PFC     90     NaN         0
                     19               PFC    130     NaN         0
                     20               PFC    109     NaN         0
           4         1                CA1    113  riptet        12
                     2                CA1    121     NaN         0
                     3                CA1     90  CA1Ref         0
                     4                CA1    116  riptet        15
                     5                CA1    116  riptet         0
                     6                CA1    110  riptet         0
                     7                CA1    114  riptet         0
                     8               iCA1    114  riptet         0
                     9               iCA1    100  riptet         0
                     10              iCA1     96     NaN         0
                     11              iCA1    106  riptet         0
                     12              iCA1    114  riptet         3
                     13              iCA1    120     NaN         0
                     14              iCA1    105  riptet         6
                     15               PFC     93     NaN         0
                     16               PFC     90     NaN         0
                     17               PFC     90     NaN         6
                     18               PFC     90     NaN         0
                     19               PFC    130     NaN         0
                     20               PFC    109     NaN         0


                                     area  depth descrip  numcells
animal day epoch_ind tetrode_number                               
HPa    8   2         1                CA1    113  riptet        12
                     4                CA1    116  riptet        15
                     12              iCA1    114  riptet         3
                     14              iCA1    105  riptet         6
                     17               PFC     90     NaN         6
           4         1                CA1    113  riptet        12
                     4                CA1    116  riptet        15
                     12              iCA1    114  riptet         3
                     14              iCA1    105  riptet         6
                     17               PFC     90     NaN         6


[('HPa', 8, 2, 1) ('HPa', 8, 2, 4) ('HPa', 8, 2, 12) ('HPa', 8, 2, 14)
 ('HPa', 8, 2, 17) ('HPa', 8, 4, 1) ('HPa', 8, 4, 4) ('HPa', 8, 4, 12)
 ('HPa', 8, 4, 14) ('HPa', 8, 4, 17)]






    Out[2]:






  
    
      
      
      
      
      area
      depth
      descrip
      numcells
    
    
      animal
      day
      epoch_ind
      tetrode_number
      
      
      
      
    
  
  
    
      HPa
      1
      1
      1
      CA1
      114
      riptet
      7
    
    
      2
      CA1
      124
      NaN
      0
    
    
      3
      CA1
      90
      CA1Ref
      0
    
    
      4
      CA1
      117
      riptet
      13
    
    
      5
      CA1
      119
      riptet
      1

Now let's try organizing tetrodes by a dictionary mapping (animal, day, epoch_ind) -> tetrode datatframe. If several dataframes are selected, they can be concatenated. This might be easier than trying to use the epochs dataframe.



In [3]:

    
tetrode_data = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in tetrode_file_names]

def convert_tetrode_epoch_to_dataframe(tetrodes_in_epoch, animal, day, epoch_ind):
    '''
    Given an epoch data structure, return a cleaned up DataFrame
    '''
    tetrode_dict_list = [convert_to_dict(tetrode) for tetrode in tetrodes_in_epoch[0][0]]
    return (pd.DataFrame(tetrode_dict_list)
              .assign(numcells=lambda x: x['numcells'].astype(int))  # convert numcells to integer type
              .assign(depth=lambda x: x['depth'].astype(int)) # convert depth to integer type
              .assign(area=lambda x: pd.Categorical(x['area']))  # convert numcells to integer type
              .assign(animal=lambda x: animal)
              .assign(day=lambda x: day)
              .assign(epoch_ind=lambda x: epoch_ind)
              .assign(tetrode_number=lambda x: x.index + 1)
              .set_index(['animal', 'day', 'epoch_ind', 'tetrode_number'])  # set index to identify rows
            )

# Make a dictionary with (animal, day, epoch_ind) as the keys
tetrode_info2 = {(animal, day_ind + 1, epoch_ind + 1): 
                  convert_tetrode_epoch_to_dataframe(epoch, animal, day_ind + 1, epoch_ind + 1)
                   for info, animal in tetrode_data
                   for day_ind, day in enumerate(info['tetinfo'].T)
                   for epoch_ind, epoch in enumerate(day[0].T)}

Get one dataframe based on a particular animal, day, epoch



In [4]:

    
epoch_keys = list(tetrode_info2.keys())
print(epoch_keys[0])
tetrode_info2[epoch_keys[0]]









    



('HPa', 5, 1)






    Out[4]:






  
    
      
      
      
      
      area
      depth
      descrip
      numcells
    
    
      animal
      day
      epoch_ind
      tetrode_number
      
      
      
      
    
  
  
    
      HPa
      5
      1
      1
      CA1
      114
      riptet
      9
    
    
      2
      CA1
      121
      NaN
      0
    
    
      3
      CA1
      90
      CA1Ref
      0
    
    
      4
      CA1
      117
      riptet
      9
    
    
      5
      CA1
      117
      riptet
      2
    
    
      6
      CA1
      110
      riptet
      0
    
    
      7
      CA1
      114
      riptet
      0
    
    
      8
      iCA1
      114
      riptet
      1
    
    
      9
      iCA1
      101
      riptet
      1
    
    
      10
      iCA1
      98
      NaN
      0
    
    
      11
      iCA1
      108
      riptet
      0
    
    
      12
      iCA1
      116
      riptet
      4
    
    
      13
      iCA1
      120
      NaN
      0
    
    
      14
      iCA1
      105
      riptet
      1
    
    
      15
      PFC
      120
      NaN
      0
    
    
      16
      PFC
      123
      NaN
      0
    
    
      17
      PFC
      123
      NaN
      3
    
    
      18
      PFC
      112
      NaN
      2
    
    
      19
      PFC
      130
      NaN
      0
    
    
      20
      PFC
      109
      NaN
      0

Get two dataframes and concat based on keys



In [5]:

    
print(epoch_keys[:2])
pd.concat([tetrode_info2[key] for key in epoch_keys[:2]])









    



[('HPa', 5, 1), ('HPa', 6, 4)]






    Out[5]:






  
    
      
      
      
      
      area
      depth
      descrip
      numcells
    
    
      animal
      day
      epoch_ind
      tetrode_number
      
      
      
      
    
  
  
    
      HPa
      5
      1
      1
      CA1
      114
      riptet
      9
    
    
      2
      CA1
      121
      NaN
      0
    
    
      3
      CA1
      90
      CA1Ref
      0
    
    
      4
      CA1
      117
      riptet
      9
    
    
      5
      CA1
      117
      riptet
      2
    
    
      6
      CA1
      110
      riptet
      0
    
    
      7
      CA1
      114
      riptet
      0
    
    
      8
      iCA1
      114
      riptet
      1
    
    
      9
      iCA1
      101
      riptet
      1
    
    
      10
      iCA1
      98
      NaN
      0
    
    
      11
      iCA1
      108
      riptet
      0
    
    
      12
      iCA1
      116
      riptet
      4
    
    
      13
      iCA1
      120
      NaN
      0
    
    
      14
      iCA1
      105
      riptet
      1
    
    
      15
      PFC
      120
      NaN
      0
    
    
      16
      PFC
      123
      NaN
      0
    
    
      17
      PFC
      123
      NaN
      3
    
    
      18
      PFC
      112
      NaN
      2
    
    
      19
      PFC
      130
      NaN
      0
    
    
      20
      PFC
      109
      NaN
      0
    
    
      6
      4
      1
      CA1
      114
      riptet
      7
    
    
      2
      CA1
      121
      NaN
      0
    
    
      3
      CA1
      90
      CA1Ref
      0
    
    
      4
      CA1
      117
      riptet
      8
    
    
      5
      CA1
      117
      riptet
      5
    
    
      6
      CA1
      110
      riptet
      0
    
    
      7
      CA1
      114
      riptet
      0
    
    
      8
      iCA1
      114
      riptet
      0
    
    
      9
      iCA1
      101
      riptet
      1
    
    
      10
      iCA1
      98
      NaN
      1
    
    
      11
      iCA1
      108
      riptet
      1
    
    
      12
      iCA1
      116
      riptet
      1
    
    
      13
      iCA1
      120
      NaN
      0
    
    
      14
      iCA1
      105
      riptet
      1
    
    
      15
      PFC
      120
      NaN
      0
    
    
      16
      PFC
      123
      NaN
      0
    
    
      17
      PFC
      123
      NaN
      1
    
    
      18
      PFC
      112
      NaN
      1
    
    
      19
      PFC
      130
      NaN
      0
    
    
      20
      PFC
      109
      NaN
      0

Now use the epoch data structure to get only those epochs with the w-track task and get the corresponding electrodes



In [6]:

    
epoch_keys = list(epochs_df.loc[epochs_df.environment == 'wtr1'].index)
pd.concat([tetrode_info2[key] for key in epoch_keys])









    Out[6]:






  
    
      
      
      
      
      area
      depth
      descrip
      numcells
    
    
      animal
      day
      epoch_ind
      tetrode_number
      
      
      
      
    
  
  
    
      HPa
      1
      4
      1
      CA1
      114
      riptet
      7
    
    
      2
      CA1
      124
      NaN
      0
    
    
      3
      CA1
      90
      CA1Ref
      0
    
    
      4
      CA1
      117
      riptet
      13
    
    
      5
      CA1
      119
      riptet
      0
    
    
      6
      CA1
      110
      riptet
      0
    
    
      7
      CA1
      111
      riptet
      0
    
    
      8
      iCA1
      116
      riptet
      1
    
    
      9
      iCA1
      100
      riptet
      1
    
    
      10
      iCA1
      99
      NaN
      0
    
    
      11
      iCA1
      110
      riptet
      0
    
    
      12
      iCA1
      118
      riptet
      0
    
    
      13
      iCA1
      120
      NaN
      0
    
    
      14
      iCA1
      106
      riptet
      1
    
    
      15
      PFC
      96
      NaN
      2
    
    
      16
      PFC
      94
      NaN
      2
    
    
      17
      PFC
      98
      NaN
      3
    
    
      18
      PFC
      90
      NaN
      4
    
    
      19
      PFC
      104
      NaN
      0
    
    
      20
      PFC
      101
      NaN
      0
    
    
      6
      1
      CA1
      114
      riptet
      7
    
    
      2
      CA1
      124
      NaN
      0
    
    
      3
      CA1
      90
      CA1Ref
      0
    
    
      4
      CA1
      117
      riptet
      13
    
    
      5
      CA1
      119
      riptet
      0
    
    
      6
      CA1
      110
      riptet
      1
    
    
      7
      CA1
      111
      riptet
      0
    
    
      8
      iCA1
      116
      riptet
      1
    
    
      9
      iCA1
      100
      riptet
      1
    
    
      10
      iCA1
      99
      NaN
      0
    
    
      1
      6
      ...
      ...
      ...
      ...
      ...
    
    
      7
      2
      11
      iCA1
      108
      riptet
      3
    
    
      12
      iCA1
      116
      riptet
      6
    
    
      13
      iCA1
      120
      NaN
      0
    
    
      14
      iCA1
      105
      riptet
      7
    
    
      15
      PFC
      99
      NaN
      3
    
    
      16
      PFC
      96
      NaN
      1
    
    
      17
      PFC
      96
      NaN
      2
    
    
      18
      PFC
      96
      NaN
      3
    
    
      19
      PFC
      130
      NaN
      0
    
    
      20
      PFC
      109
      NaN
      0
    
    
      8
      2
      1
      CA1
      113
      riptet
      12
    
    
      2
      CA1
      121
      NaN
      0
    
    
      3
      CA1
      90
      CA1Ref
      0
    
    
      4
      CA1
      116
      riptet
      15
    
    
      5
      CA1
      116
      riptet
      0
    
    
      6
      CA1
      110
      riptet
      0
    
    
      7
      CA1
      114
      riptet
      0
    
    
      8
      iCA1
      114
      riptet
      0
    
    
      9
      iCA1
      100
      riptet
      0
    
    
      10
      iCA1
      96
      NaN
      0
    
    
      11
      iCA1
      106
      riptet
      0
    
    
      12
      iCA1
      114
      riptet
      3
    
    
      13
      iCA1
      120
      NaN
      0
    
    
      14
      iCA1
      105
      riptet
      6
    
    
      15
      PFC
      93
      NaN
      0
    
    
      16
      PFC
      90
      NaN
      0
    
    
      17
      PFC
      90
      NaN
      6
    
    
      18
      PFC
      90
      NaN
      0
    
    
      19
      PFC
      130
      NaN
      0
    
    
      20
      PFC
      109
      NaN
      0
    
  

260 rows × 4 columns

Now say we want only the epochs where the animal is running the w-track and electrodes with at least one cell recorded



In [7]:

    
epoch_keys = list(epochs_df.loc[epochs_df.environment == 'wtr1'].index)
tetrodes_by_epoch = pd.concat([tetrode_info2[key] for key in epoch_keys])
tetrodes_by_epoch.loc[tetrodes_by_epoch.numcells > 0]









    Out[7]:






  
    
      
      
      
      
      area
      depth
      descrip
      numcells
    
    
      animal
      day
      epoch_ind
      tetrode_number
      
      
      
      
    
  
  
    
      HPa
      1
      4
      1
      CA1
      114
      riptet
      7
    
    
      4
      CA1
      117
      riptet
      13
    
    
      8
      iCA1
      116
      riptet
      1
    
    
      9
      iCA1
      100
      riptet
      1
    
    
      14
      iCA1
      106
      riptet
      1
    
    
      15
      PFC
      96
      NaN
      2
    
    
      16
      PFC
      94
      NaN
      2
    
    
      17
      PFC
      98
      NaN
      3
    
    
      18
      PFC
      90
      NaN
      4
    
    
      6
      1
      CA1
      114
      riptet
      7
    
    
      4
      CA1
      117
      riptet
      13
    
    
      6
      CA1
      110
      riptet
      1
    
    
      8
      iCA1
      116
      riptet
      1
    
    
      9
      iCA1
      100
      riptet
      1
    
    
      14
      iCA1
      106
      riptet
      1
    
    
      15
      PFC
      96
      NaN
      2
    
    
      16
      PFC
      94
      NaN
      2
    
    
      17
      PFC
      98
      NaN
      3
    
    
      18
      PFC
      90
      NaN
      4
    
    
      2
      2
      1
      CA1
      114
      riptet
      6
    
    
      4
      CA1
      117
      riptet
      6
    
    
      7
      CA1
      113
      riptet
      1
    
    
      8
      iCA1
      114
      riptet
      1
    
    
      12
      iCA1
      116
      riptet
      2
    
    
      14
      iCA1
      106
      riptet
      3
    
    
      15
      PFC
      100
      NaN
      4
    
    
      17
      PFC
      104
      NaN
      2
    
    
      18
      PFC
      96
      NaN
      4
    
    
      4
      1
      CA1
      114
      riptet
      6
    
    
      4
      CA1
      117
      riptet
      6
    
    
      2
      4
      ...
      ...
      ...
      ...
      ...
    
    
      6
      2
      4
      CA1
      117
      riptet
      8
    
    
      5
      CA1
      117
      riptet
      5
    
    
      8
      iCA1
      114
      riptet
      1
    
    
      9
      iCA1
      101
      riptet
      1
    
    
      10
      iCA1
      98
      NaN
      1
    
    
      11
      iCA1
      108
      riptet
      1
    
    
      12
      iCA1
      116
      riptet
      1
    
    
      14
      iCA1
      105
      riptet
      1
    
    
      17
      PFC
      123
      NaN
      1
    
    
      18
      PFC
      112
      NaN
      1
    
    
      7
      2
      1
      CA1
      114
      riptet
      7
    
    
      2
      CA1
      121
      NaN
      1
    
    
      4
      CA1
      117
      riptet
      10
    
    
      5
      CA1
      117
      riptet
      6
    
    
      6
      CA1
      110
      riptet
      1
    
    
      8
      iCA1
      114
      riptet
      1
    
    
      9
      iCA1
      101
      riptet
      3
    
    
      10
      iCA1
      98
      NaN
      1
    
    
      11
      iCA1
      108
      riptet
      3
    
    
      12
      iCA1
      116
      riptet
      6
    
    
      14
      iCA1
      105
      riptet
      7
    
    
      15
      PFC
      99
      NaN
      3
    
    
      16
      PFC
      96
      NaN
      1
    
    
      17
      PFC
      96
      NaN
      2
    
    
      18
      PFC
      96
      NaN
      3
    
    
      8
      2
      1
      CA1
      113
      riptet
      12
    
    
      4
      CA1
      116
      riptet
      15
    
    
      12
      iCA1
      114
      riptet
      3
    
    
      14
      iCA1
      105
      riptet
      6
    
    
      17
      PFC
      90
      NaN
      6
    
  

122 rows × 4 columns



In [40]:

    
def get_df_index(data_frame):
    ''' Converts pandas dataframe to a list of tuples corresponding to
    the dataframe multi-index
    '''
    index = list(data_frame.index.get_values())
    return index


tetrode_index = get_df_index(tetrodes_by_epoch.loc[tetrodes_by_epoch.numcells > 0])

def get_LFP_file_name(index, animals):
    ''' Given an index tuple (animal, day, epoch, tetrode_number) and the animals dictionary
    return a file name for the tetrode file LFP
    '''
    data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
    return '{data_dir}/{animal.directory}/EEG/{animal.short_name}eeg{day:02d}-{epoch}-{tetrode_number:02d}.mat'.format(
        data_dir=data_dir,
        animal=animals[index[0]],
        day=index[1],
        epoch=index[2],
        tetrode_number=index[3]
    )


print(tetrode_index[-1])
print(get_LFP_file_name(tetrode_index[-1], animals))

def get_LFP_data(tetrode_index, animals):
    ''' Given a tetrode index tuple and the animals dictionary, 
    return the LFP data and start time
    '''
    lfp_file = scipy.io.loadmat(get_LFP_file_name(tetrode_index, animals))
    lfp_data = lfp_file['eeg'][0, -1][0, -1][0, -1]
    data_dict = {'time': _get_LFP_time(lfp_data['starttime'][0,0], lfp_data['data'][0,0].size, lfp_data['samprate'][0,0]),
                 'electric_potential': lfp_data['data'][0,0].squeeze()
                 }
    return pd.DataFrame(data_dict).set_index('time')

def _get_LFP_time(start_time, number_samples, sampling_rate):
    ''' Returns an array of time stamps
    '''
    end_time = start_time + (number_samples / sampling_rate)
    return np.round(np.arange(start_time, end_time, (1 / sampling_rate)), decimals=4)
    

lfp_file = scipy.io.loadmat(get_LFP_file_name(tetrode_index[-1], animals))
print(lfp_file['eeg'][0, -1][0, -1][0, -1].dtype.names)
lfp_data = get_LFP_data(tetrode_index[-1], animals)
lfp_data.plot()









    



('HPa', 8, 2, 17)
/Users/edeno/Documents/GitHub/Jadhav-2016-Data-Analysis/Raw-Data/HPa_direct/EEG/HPaeeg08-2-17.mat
('descript', 'fields', 'starttime', 'samprate', 'data', 'depth')






    Out[40]:





<matplotlib.axes._subplots.AxesSubplot at 0x115470a58>

Now let's do the same thing for the neuron info



In [9]:

    
def get_neuron_info(animal):
    '''Returns the Matlab tetrodeinfo file name assuming it is in the Raw Data directory.
    '''
    data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
    return '{data_dir}/{animal.directory}/{animal.short_name}cellinfo.mat'.format(
        data_dir=data_dir,
        animal=animal)

neuron_file_names = [(get_neuron_info(animals[animal]), animal) for animal in animals]
neuron_data = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in neuron_file_names]

def convert_neuron_epoch_to_dataframe(tetrodes_in_epoch, animal, day, epoch_ind):
    '''
    Given an neuron data structure, return a cleaned up DataFrame
    '''
    DROP_COLUMNS = ['ripmodtag', 'thetamodtag', 'runripmodtag', 'postsleepripmodtag',
                   'presleepripmodtag', 'runthetamodtag', 'ripmodtag2', 'runripmodtag2',
                   'postsleepripmodtag2', 'presleepripmodtag2', 'ripmodtype',
                   'runripmodtype', 'postsleepripmodtype', 'presleepripmodtype',
                   'FStag', 'ripmodtag3', 'runripmodtag3', 'ripmodtype3', 'runripmodtype3',
                   'tag', 'typetag', 'runripmodtype2', 'tag2', 'ripmodtype2', 'descrip']
    neuron_dict_list = [add_to_dict(convert_to_dict(neuron), tetrode_ind, neuron_ind)
                         for tetrode_ind, tetrode in enumerate(tetrodes_in_epoch[0][0])
                         for neuron_ind, neuron in enumerate(tetrode[0])
                        if neuron.size > 0
                       ]
    return (pd.DataFrame(neuron_dict_list)
              .drop(DROP_COLUMNS, 1, errors='ignore')
              .assign(animal=lambda x: animal)
              .assign(day=lambda x: day)
              .assign(epoch_ind=lambda x: epoch_ind)
              .set_index(['animal', 'day', 'epoch_ind', 'tetrode_number', 'neuron_number'])  # set index to identify rows
            )

def add_to_dict(dictionary, tetrode_ind, neuron_ind):
    dictionary['tetrode_number'] = tetrode_ind + 1
    dictionary['neuron_number'] = neuron_ind + 1
    return dictionary

# Make a dictionary with (animal, day, epoch_ind) as the keys
neuron_info = {(animal, day_ind + 1, epoch_ind + 1): 
                  convert_neuron_epoch_to_dataframe(epoch, animal, day_ind + 1, epoch_ind + 1)
                   for info, animal in neuron_data
                   for day_ind, day in enumerate(info['cellinfo'].T)
                   for epoch_ind, epoch in enumerate(day[0].T)
                }
epoch_keys = list(neuron_info.keys())
print(neuron_info[epoch_keys[1]].info())
neuron_info[epoch_keys[1]]









    



<class 'pandas.core.frame.DataFrame'>
MultiIndex: 36 entries, (HPa, 3, 3, 1, 1) to (HPa, 3, 3, 18, 4)
Data columns (total 6 columns):
area          26 non-null object
csi           26 non-null object
meanrate      26 non-null object
numspikes     26 non-null object
propbursts    26 non-null object
spikewidth    26 non-null object
dtypes: object(6)
memory usage: 2.0+ KB
None






    Out[9]:






  
    
      
      
      
      
      
      area
      csi
      meanrate
      numspikes
      propbursts
      spikewidth
    
    
      animal
      day
      epoch_ind
      tetrode_number
      neuron_number
      
      
      
      
      
      
    
  
  
    
      HPa
      3
      3
      1
      1
      CA1
      0.11153846153846154
      0.4276315789473684
      260
      0.3576923076923077
      9.852046830634947
    
    
      2
      CA1
      0.09844559585492228
      0.31743421052631576
      193
      0.35233160621761656
      6.193681221562731
    
    
      3
      CA1
      0.1476510067114094
      0.2450657894736842
      149
      0.4563758389261745
      7.23198180778703
    
    
      4
      CA1
      0.06408483171968649
      3.567434210526316
      2169
      0.2429691101890272
      11.637107077760408
    
    
      5
      CA1
      0.06976744186046512
      0.28289473684210525
      172
      0.31976744186046513
      9.414763982224821
    
    
      6
      CA1
      0.14285714285714285
      0.046052631578947366
      28
      0.5
      7.010773687328376
    
    
      7
      CA1
      0.057692307692307696
      0.17105263157894737
      104
      0.28846153846153844
      7.326065780548666
    
    
      8
      CA1
      0.06870229007633588
      0.4309210526315789
      262
      0.23282442748091603
      9.887929449648837
    
    
      9
      CA1
      0.09090909090909091
      0.16282894736842105
      99
      0.29292929292929293
      8.675470637035936
    
    
      4
      1
      CA1
      0.1008174386920981
      1.207236842105263
      734
      0.3678474114441417
      9.310775428384867
    
    
      2
      CA1
      0.08433734939759036
      0.2730263157894737
      166
      0.26506024096385544
      8.764926016949678
    
    
      3
      CA1
      0.16783216783216784
      0.23519736842105263
      143
      0.5314685314685315
      7.372509201038104
    
    
      4
      CA1
      0.1935483870967742
      0.15296052631578946
      93
      0.5591397849462365
      8.497293315564809
    
    
      5
      CA1
      0.09615384615384616
      0.08552631578947369
      52
      0.2692307692307692
      7.809075007961705
    
    
      6
      CA1
      0.11538461538461539
      0.04276315789473684
      26
      0.2692307692307692
      9.89821354188334
    
    
      7
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      7
      1
      CA1
      0.13548387096774195
      0.5098684210526315
      310
      0.44516129032258067
      8.099867455040723
    
    
      2
      CA1
      0.10861423220973783
      0.8782894736842105
      534
      0.37453183520599254
      10.695540560591898
    
    
      3
      CA1
      0.03773584905660377
      0.08717105263157894
      53
      0.24528301886792453
      10.225901491678394
    
    
      4
      CA1
      0.12449799196787148
      0.4095394736842105
      249
      0.4859437751004016
      9.84830423452351
    
    
      5
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      6
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      7
      CA1
      0.07920792079207921
      0.16611842105263158
      101
      0.25742574257425743
      8.163437415682733
    
    
      8
      CA1
      0.17010309278350516
      0.3190789473684211
      194
      0.5463917525773195
      8.673467413803706
    
    
      8
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      9
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      14
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      15
      1
      PFC
      0.005668016194331984
      2.03125
      1235
      0.025101214574898785
      11.85580178041336
    
    
      2
      PFC
      0
      0.08881578947368421
      54
      0
      12.415151489561344
    
    
      3
      PFC
      0.02765016902140938
      18.97532894736842
      11537
      0.1012394903354425
      7.127713053825276
    
    
      4
      PFC
      0
      0.07894736842105263
      48
      0
      7.691757341921077
    
    
      18
      1
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      PFC
      0.0036231884057971015
      0.45394736842105265
      276
      0.028985507246376812
      10.576494182074454
    
    
      3
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [10]:

    
epoch_keys = get_df_index(epochs_df.loc[epochs_df.environment == 'wtr1'])
pd.concat([neuron_info[key] for key in epoch_keys]).dropna()









    Out[10]:






  
    
      
      
      
      
      
      area
      csi
      meanrate
      numspikes
      propbursts
      spikewidth
    
    
      animal
      day
      epoch_ind
      tetrode_number
      neuron_number
      
      
      
      
      
      
    
  
  
    
      HPa
      1
      4
      1
      1
      CA1
      0.15163699023549684
      1.4317434210526316
      1741
      0.48477886272257326
      8.404000423452445
    
    
      2
      CA1
      0.15368081676518
      3.0608552631578947
      3722
      0.5354648038688877
      10.068207612419641
    
    
      3
      CA1
      0.06485355648535565
      0.7861842105263158
      956
      0.2583682008368201
      10.591460255431754
    
    
      4
      CA1
      0.12368421052631579
      0.3125
      380
      0.4131578947368421
      9.56360765114096
    
    
      5
      CA1
      0.07959949937421777
      3.2853618421052633
      3995
      0.30037546933667086
      11.086349081099883
    
    
      6
      CA1
      0.08576642335766424
      0.4506578947368421
      548
      0.2956204379562044
      8.898837340698611
    
    
      4
      1
      CA1
      0.13953488372093023
      0.6011513157894737
      731
      0.4746922024623803
      8.928423068731131
    
    
      2
      CA1
      0.029411764705882353
      0.3355263157894737
      408
      0.12009803921568628
      9.606738087324494
    
    
      3
      CA1
      0.1457641196013289
      1.980263157894737
      2408
      0.4962624584717608
      8.824516757593013
    
    
      4
      CA1
      0.08823529411764706
      0.027960526315789474
      34
      0.3235294117647059
      8.614027432575318
    
    
      5
      CA1
      0.1522468142186452
      1.2261513157894737
      1491
      0.4936284372904091
      8.542827069173516
    
    
      6
      CA1
      0.11607142857142858
      0.7368421052631579
      896
      0.40513392857142855
      8.835866239911471
    
    
      7
      CA1
      0.07142857142857142
      0.20723684210526316
      252
      0.25396825396825395
      8.14703602100615
    
    
      9
      CA1
      0.1679902260232132
      1.346217105263158
      1637
      0.548564447159438
      7.677985200040181
    
    
      10
      CA1
      0.10616438356164383
      0.7203947368421053
      876
      0.3778538812785388
      9.274112719181899
    
    
      11
      CA1
      0.06056701030927835
      0.6381578947368421
      776
      0.23324742268041238
      9.412296547260718
    
    
      12
      CA1
      0.09714285714285714
      0.14391447368421054
      175
      0.3028571428571429
      7.361349894539315
    
    
      13
      CA1
      0.11013215859030837
      0.18667763157894737
      227
      0.3303964757709251
      8.306329106015445
    
    
      8
      1
      iCA1
      0.14345991561181434
      0.38980263157894735
      474
      0.5189873417721519
      9.54469181741173
    
    
      9
      1
      iCA1
      0.10364145658263306
      0.587171052631579
      714
      0.3641456582633053
      10.236683991169903
    
    
      14
      1
      iCA1
      0.07974423935335988
      6.816611842105263
      8289
      0.2884545783568585
      11.229290642757745
    
    
      15
      1
      PFC
      0.0011736228269639845
      11.211348684210526
      13633
      0.004547788454485439
      11.59722922762468
    
    
      2
      PFC
      0.04433909648633575
      8.84703947368421
      10758
      0.16220487079382784
      11.529344904793362
    
    
      16
      1
      PFC
      0
      0.8207236842105263
      998
      0.002004008016032064
      11.008656376212048
    
    
      2
      PFC
      0.05835108998524832
      5.017269736842105
      6101
      0.2076708736272742
      11.795111252949056
    
    
      17
      2
      PFC
      0.019583539910758552
      3.317434210526316
      4034
      0.07635101636093208
      13.19374374853498
    
    
      18
      1
      PFC
      0
      0.003289473684210526
      4
      0
      7.62186304435293
    
    
      2
      PFC
      0.007438224911749874
      6.5230263157894735
      7932
      0.02773575390821987
      11.362835093966867
    
    
      3
      PFC
      0.008025682182985553
      0.5123355263157895
      623
      0.028892455858747994
      9.501617030121416
    
    
      6
      1
      1
      CA1
      0.08113804004214963
      0.776595744680851
      949
      0.32349841938883034
      8.404000423452445
    
    
      1
      6
      1
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      7
      2
      18
      2
      PFC
      0.006029285099052541
      4.805463576158941
      5805
      0.027906976744186046
      11.833343257416901
    
    
      3
      PFC
      0.01594896331738437
      0.5190397350993378
      627
      0.05422647527910686
      8.279705653387657
    
    
      8
      2
      1
      1
      CA1
      nan
      0
      0
      nan
      7.347367425964567
    
    
      2
      CA1
      0.05963302752293578
      0.7218543046357616
      872
      0.2305045871559633
      10.359959504215627
    
    
      3
      CA1
      0.19162790697674417
      0.8899006622516556
      1075
      0.6409302325581395
      7.624190000134791
    
    
      4
      CA1
      0.08121019108280254
      0.5198675496688742
      628
      0.3200636942675159
      10.734931880073724
    
    
      5
      CA1
      0.14817320703653586
      2.447019867549669
      2956
      0.4983085250338295
      9.1987074003374
    
    
      6
      CA1
      0.1690929451287794
      0.7392384105960265
      893
      0.5442329227323628
      9.028117472746398
    
    
      7
      CA1
      0.053475935828877004
      0.15480132450331127
      187
      0.20855614973262032
      11.719632986962303
    
    
      4
      1
      CA1
      0.09773936170212766
      1.2450331125827814
      1504
      0.3184840425531915
      10.573569863065767
    
    
      2
      CA1
      0.13846153846153847
      1.1837748344370862
      1430
      0.4951048951048951
      8.249598508446987
    
    
      3
      CA1
      0.1411764705882353
      0.7740066225165563
      935
      0.48128342245989303
      9.473909850454165
    
    
      4
      CA1
      0.11428571428571428
      0.23178807947019867
      280
      0.2892857142857143
      7.821072138691519
    
    
      5
      CA1
      0.04081632653061224
      0.20281456953642385
      245
      0.09387755102040816
      8.26724220794418
    
    
      6
      CA1
      0.02981651376146789
      0.3609271523178808
      436
      0.10550458715596331
      13.34939605009777
    
    
      7
      CA1
      0.16352201257861634
      0.1316225165562914
      159
      0.5471698113207547
      8.622862142764975
    
    
      8
      CA1
      0.16104868913857678
      0.22102649006622516
      267
      0.43820224719101125
      9.926903553501516
    
    
      12
      1
      iCA1
      0.02631578947368421
      0.06291390728476821
      76
      0.07894736842105263
      8.965920269909438
    
    
      2
      iCA1
      0.04430379746835443
      0.3923841059602649
      474
      0.1962025316455696
      10.073373561223226
    
    
      3
      iCA1
      0.053717494811378344
      6.780629139072848
      8191
      0.1947259186912465
      5.81454456934001
    
    
      14
      1
      iCA1
      0.06834268977300463
      3.3915562913907285
      4097
      0.24969489870637052
      11.11293649893477
    
    
      2
      iCA1
      0.07884972170686456
      2.6771523178807946
      3234
      0.2786023500309215
      10.208434221893334
    
    
      3
      iCA1
      0.048678720445062586
      0.5951986754966887
      719
      0.22253129346314326
      8.64379513034075
    
    
      6
      iCA1
      0.09302325581395349
      0.10678807947019868
      129
      0.3643410852713178
      7.082362239433368
    
    
      17
      1
      PFC
      0.014189693801344288
      1.1084437086092715
      1339
      0.06572068707991038
      10.417261378112482
    
    
      2
      PFC
      nan
      0
      0
      nan
      7.827389292341298
    
    
      3
      PFC
      0
      0.16473509933774835
      199
      0
      12.93298222642811
    
    
      4
      PFC
      0.008634868421052632
      2.013245033112583
      2432
      0.029605263157894735
      8.619500395429855
    
    
      5
      PFC
      0.00519311911716975
      2.5504966887417218
      3081
      0.023044466082440766
      10.821009588390197
    
    
      6
      PFC
      0.004267668146124957
      9.698675496688741
      11716
      0.014595425059747353
      12.333565861353105
    
  

406 rows × 6 columns



In [23]:

    
neuron_index = get_df_index(pd.concat([neuron_info[key] for key in epoch_keys]).dropna())
print(neuron_index[-1])


def get_neuron_data(neuron_index, animals):
    ''' Given a neuron index tuple and the animals dictionary, 
    return the spike times
    '''
    neuron_file = scipy.io.loadmat(df.get_data_filename(animals[neuron_index[0]], neuron_index[1], 'spikes'))
    return neuron_file['spikes'][0, -1][0, neuron_index[2] - 1][0, neuron_index[3] - 1][0, neuron_index[4] - 1][0]['data']

print(get_neuron_data(neuron_index[-1], animals)[0])









    



('HPa', 8, 2, 17, 6)
[[  2.71344770e+03   9.62000000e+01   1.05950000e+02 ...,   9.62162673e+01
    1.05660690e+02   6.61617632e-01]
 [  2.71362580e+03   9.62000000e+01   1.05300000e+02 ...,   9.61594889e+01
    1.05410010e+02   6.59681126e-01]
 [  2.71366690e+03   9.62000000e+01   1.05300000e+02 ...,   9.61530958e+01
    1.05405284e+02   6.60198741e-01]
 ..., 
 [  3.91980730e+03   1.02050000e+02   1.00750000e+02 ...,   1.02205782e+02
    1.00416081e+02  -2.65838534e-02]
 [  3.91983050e+03   1.01400000e+02   1.00100000e+02 ...,   1.01925888e+02
    1.00393648e+02  -4.35875789e-02]
 [  3.91988440e+03   1.01400000e+02   1.00100000e+02 ...,   1.01611592e+02
    1.00360565e+02  -6.16906623e-02]]



In [ ]:

				area	depth	descrip	numcells
animal	day	epoch_ind	tetrode_number
HPa	1	1	1	CA1	114	riptet	7
			2	CA1	124	NaN	0
			3	CA1	90	CA1Ref	0
			4	CA1	117	riptet	13
			5	CA1	119	riptet	1