Date: 2016-08-23
The goal of this notebook is to:
This first cell converts the tasks data to a single pandas DataFrame.
In [41]:
%matplotlib inline
%reload_ext autoreload
%autoreload 3
import os
import sys
import collections
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sys.path.append('../src/')
import data_filter as df
# Setup
Animal = collections.namedtuple('Animal', {'directory', 'short_name'})
num_days = 8
days = range(1, num_days + 1)
animals = {'HPa': Animal(directory='HPa_direct', short_name='HPa')}
# Get all epochs
tasks = [(df.get_data_structure(animals[animal], day, 'task', 'task'), animal)
for animal in animals
for day in days]
epochs = [(epoch, animal) for day, animal in tasks for epoch in day] # flatten
# Convert into pandas dataframes
ndata = [{name: epoch[0][name][0][0][0]
for name in epoch[0].dtype.names
if name not in 'linearcoord'}
for epoch in epochs]
df1 = pd.DataFrame(ndata)
# print(df1)
day_epoch_ind = [{'animal': day[1], 'day': day_ind + 1, 'epoch_ind': epoch_ind + 1}
for day_ind, day in enumerate(tasks)
for epoch_ind, epoch in enumerate(day[0])]
df2 = pd.DataFrame(day_epoch_ind)
# print(df2)
epochs_df = (pd
.concat([df2, df1], axis=1, join_axes=[df1.index])
.set_index(['animal', 'day', 'epoch_ind'])
.assign(environment=lambda x: pd.Categorical(x['environment']))
.assign(type=lambda x: pd.Categorical(x['type']))
)
print(epochs_df.info())
print('\n')
print(epochs_df)
print('\n')
# Check accessing by Multi-dimensional index (animal HPa, days 6 and 8)
print(epochs_df.loc[(['HPa'], [6,8]), :])
print('\n')
print(epochs_df
.loc[(['HPa'], [6,8]), :]
.loc[epochs_df.environment == 'wtr1'])
print('\n')
Now the same thing with the tetrode info. This is a little trickier, because there are tetrodes for each epoch. It might be better to have one for each day-epoch (a dictionary of data frames).
In [2]:
def get_tetrode_info(animal):
'''Returns the Matlab tetrodeinfo file name assuming it is in the Raw Data directory.
'''
data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
return '{data_dir}/{animal.directory}/{animal.short_name}tetinfo.mat'.format(
data_dir=data_dir,
animal=animal)
def convert_to_dict(struct_array):
try:
return {name: np.squeeze(struct_array[name][0,0][0]) for name in struct_array.dtype.names}
except TypeError:
return {}
tetrode_file_names = [(get_tetrode_info(animals[animal]), animal) for animal in animals]
tetrode_info = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in tetrode_file_names]
tetrode_info = [(convert_to_dict(tetrode[0]), animal, day_ind + 1, epoch_ind + 1, tetrode_ind + 1)
for info, animal in tetrode_info
for day_ind, day in enumerate(info['tetinfo'].T)
for epoch_ind, epoch in enumerate(day[0].T)
for tetrode_ind, tetrode in enumerate(epoch[0].T)]
tet_df = pd.DataFrame([info[0] for info in tetrode_info])
day_epoch_ind = [{'animal': info[1], 'day': info[2], 'epoch_ind': info[3], 'tetrode_number': info[4]}
for info in tetrode_info]
day_epoch_df = pd.DataFrame(day_epoch_ind)
tetrode_df = (pd
.concat([day_epoch_df, tet_df], axis=1, join_axes=[day_epoch_df.index])
.set_index(['animal', 'day', 'epoch_ind', 'tetrode_number']) # set multi-index to identify rows
.assign(numcells=lambda x: x['numcells'].astype(int)) # convert numcells to integer type
.assign(depth=lambda x: x['depth'].astype(int)) # convert depth to integer type
.assign(area=lambda x: pd.Categorical(x['area'])) # convert numcells to integer type
)
print(tetrode_df.info())
print('\n')
# Check accessing by Multi-dimensional index
# Get all electrodes from HPa, day 8, epoch 2,4
print(tetrode_df.loc[(['HPa'], [8], [2,4]), :])
print('\n')
# Now only electrodes with cells recorded on them
print(tetrode_df
.loc[tetrode_df.numcells > 0]
.loc[('HPa', 8, [2,4])])
print('\n')
print(tetrode_df
.loc[tetrode_df.numcells > 0]
.loc[('HPa', 8, [2,4])].index.values)
tetrode_df.head()
Out[2]:
Now let's try organizing tetrodes by a dictionary mapping (animal, day, epoch_ind) -> tetrode datatframe. If several dataframes are selected, they can be concatenated. This might be easier than trying to use the epochs dataframe.
In [3]:
tetrode_data = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in tetrode_file_names]
def convert_tetrode_epoch_to_dataframe(tetrodes_in_epoch, animal, day, epoch_ind):
'''
Given an epoch data structure, return a cleaned up DataFrame
'''
tetrode_dict_list = [convert_to_dict(tetrode) for tetrode in tetrodes_in_epoch[0][0]]
return (pd.DataFrame(tetrode_dict_list)
.assign(numcells=lambda x: x['numcells'].astype(int)) # convert numcells to integer type
.assign(depth=lambda x: x['depth'].astype(int)) # convert depth to integer type
.assign(area=lambda x: pd.Categorical(x['area'])) # convert numcells to integer type
.assign(animal=lambda x: animal)
.assign(day=lambda x: day)
.assign(epoch_ind=lambda x: epoch_ind)
.assign(tetrode_number=lambda x: x.index + 1)
.set_index(['animal', 'day', 'epoch_ind', 'tetrode_number']) # set index to identify rows
)
# Make a dictionary with (animal, day, epoch_ind) as the keys
tetrode_info2 = {(animal, day_ind + 1, epoch_ind + 1):
convert_tetrode_epoch_to_dataframe(epoch, animal, day_ind + 1, epoch_ind + 1)
for info, animal in tetrode_data
for day_ind, day in enumerate(info['tetinfo'].T)
for epoch_ind, epoch in enumerate(day[0].T)}
Get one dataframe based on a particular animal, day, epoch
In [4]:
epoch_keys = list(tetrode_info2.keys())
print(epoch_keys[0])
tetrode_info2[epoch_keys[0]]
Out[4]:
Get two dataframes and concat based on keys
In [5]:
print(epoch_keys[:2])
pd.concat([tetrode_info2[key] for key in epoch_keys[:2]])
Out[5]:
Now use the epoch data structure to get only those epochs with the w-track task and get the corresponding electrodes
In [6]:
epoch_keys = list(epochs_df.loc[epochs_df.environment == 'wtr1'].index)
pd.concat([tetrode_info2[key] for key in epoch_keys])
Out[6]:
Now say we want only the epochs where the animal is running the w-track and electrodes with at least one cell recorded
In [7]:
epoch_keys = list(epochs_df.loc[epochs_df.environment == 'wtr1'].index)
tetrodes_by_epoch = pd.concat([tetrode_info2[key] for key in epoch_keys])
tetrodes_by_epoch.loc[tetrodes_by_epoch.numcells > 0]
Out[7]:
In [40]:
def get_df_index(data_frame):
''' Converts pandas dataframe to a list of tuples corresponding to
the dataframe multi-index
'''
index = list(data_frame.index.get_values())
return index
tetrode_index = get_df_index(tetrodes_by_epoch.loc[tetrodes_by_epoch.numcells > 0])
def get_LFP_file_name(index, animals):
''' Given an index tuple (animal, day, epoch, tetrode_number) and the animals dictionary
return a file name for the tetrode file LFP
'''
data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
return '{data_dir}/{animal.directory}/EEG/{animal.short_name}eeg{day:02d}-{epoch}-{tetrode_number:02d}.mat'.format(
data_dir=data_dir,
animal=animals[index[0]],
day=index[1],
epoch=index[2],
tetrode_number=index[3]
)
print(tetrode_index[-1])
print(get_LFP_file_name(tetrode_index[-1], animals))
def get_LFP_data(tetrode_index, animals):
''' Given a tetrode index tuple and the animals dictionary,
return the LFP data and start time
'''
lfp_file = scipy.io.loadmat(get_LFP_file_name(tetrode_index, animals))
lfp_data = lfp_file['eeg'][0, -1][0, -1][0, -1]
data_dict = {'time': _get_LFP_time(lfp_data['starttime'][0,0], lfp_data['data'][0,0].size, lfp_data['samprate'][0,0]),
'electric_potential': lfp_data['data'][0,0].squeeze()
}
return pd.DataFrame(data_dict).set_index('time')
def _get_LFP_time(start_time, number_samples, sampling_rate):
''' Returns an array of time stamps
'''
end_time = start_time + (number_samples / sampling_rate)
return np.round(np.arange(start_time, end_time, (1 / sampling_rate)), decimals=4)
lfp_file = scipy.io.loadmat(get_LFP_file_name(tetrode_index[-1], animals))
print(lfp_file['eeg'][0, -1][0, -1][0, -1].dtype.names)
lfp_data = get_LFP_data(tetrode_index[-1], animals)
lfp_data.plot()
Out[40]:
Now let's do the same thing for the neuron info
In [9]:
def get_neuron_info(animal):
'''Returns the Matlab tetrodeinfo file name assuming it is in the Raw Data directory.
'''
data_dir = '{working_dir}/Raw-Data'.format(working_dir=os.path.abspath(os.path.pardir))
return '{data_dir}/{animal.directory}/{animal.short_name}cellinfo.mat'.format(
data_dir=data_dir,
animal=animal)
neuron_file_names = [(get_neuron_info(animals[animal]), animal) for animal in animals]
neuron_data = [(scipy.io.loadmat(file_name[0]), file_name[1]) for file_name in neuron_file_names]
def convert_neuron_epoch_to_dataframe(tetrodes_in_epoch, animal, day, epoch_ind):
'''
Given an neuron data structure, return a cleaned up DataFrame
'''
DROP_COLUMNS = ['ripmodtag', 'thetamodtag', 'runripmodtag', 'postsleepripmodtag',
'presleepripmodtag', 'runthetamodtag', 'ripmodtag2', 'runripmodtag2',
'postsleepripmodtag2', 'presleepripmodtag2', 'ripmodtype',
'runripmodtype', 'postsleepripmodtype', 'presleepripmodtype',
'FStag', 'ripmodtag3', 'runripmodtag3', 'ripmodtype3', 'runripmodtype3',
'tag', 'typetag', 'runripmodtype2', 'tag2', 'ripmodtype2', 'descrip']
neuron_dict_list = [add_to_dict(convert_to_dict(neuron), tetrode_ind, neuron_ind)
for tetrode_ind, tetrode in enumerate(tetrodes_in_epoch[0][0])
for neuron_ind, neuron in enumerate(tetrode[0])
if neuron.size > 0
]
return (pd.DataFrame(neuron_dict_list)
.drop(DROP_COLUMNS, 1, errors='ignore')
.assign(animal=lambda x: animal)
.assign(day=lambda x: day)
.assign(epoch_ind=lambda x: epoch_ind)
.set_index(['animal', 'day', 'epoch_ind', 'tetrode_number', 'neuron_number']) # set index to identify rows
)
def add_to_dict(dictionary, tetrode_ind, neuron_ind):
dictionary['tetrode_number'] = tetrode_ind + 1
dictionary['neuron_number'] = neuron_ind + 1
return dictionary
# Make a dictionary with (animal, day, epoch_ind) as the keys
neuron_info = {(animal, day_ind + 1, epoch_ind + 1):
convert_neuron_epoch_to_dataframe(epoch, animal, day_ind + 1, epoch_ind + 1)
for info, animal in neuron_data
for day_ind, day in enumerate(info['cellinfo'].T)
for epoch_ind, epoch in enumerate(day[0].T)
}
epoch_keys = list(neuron_info.keys())
print(neuron_info[epoch_keys[1]].info())
neuron_info[epoch_keys[1]]
Out[9]:
In [10]:
epoch_keys = get_df_index(epochs_df.loc[epochs_df.environment == 'wtr1'])
pd.concat([neuron_info[key] for key in epoch_keys]).dropna()
Out[10]:
In [23]:
neuron_index = get_df_index(pd.concat([neuron_info[key] for key in epoch_keys]).dropna())
print(neuron_index[-1])
def get_neuron_data(neuron_index, animals):
''' Given a neuron index tuple and the animals dictionary,
return the spike times
'''
neuron_file = scipy.io.loadmat(df.get_data_filename(animals[neuron_index[0]], neuron_index[1], 'spikes'))
return neuron_file['spikes'][0, -1][0, neuron_index[2] - 1][0, neuron_index[3] - 1][0, neuron_index[4] - 1][0]['data']
print(get_neuron_data(neuron_index[-1], animals)[0])
In [ ]: