Loading HDF5 files


In [1]:
import pandas as pd
import tables as tb

In [2]:
folder = r"D:\Spike sorting\sirota"
basename = "ec016.694_711"

In [3]:
main_filename = os.path.join(folder, basename + '.main.h5')

In [4]:
main_file = tb.openFile(main_filename)

In [5]:
spikes = main_file.root.shanks.shank0.spikes
spikes


Out[5]:
/shanks/shank0/spikes (Table(10,)) ''
  description := {
  "cluster": UInt32Col(shape=(), dflt=0, pos=0),
  "features": Int16Col(shape=(29,), dflt=0, pos=1),
  "time": UInt64Col(shape=(), dflt=0, pos=2)}
  byteorder := 'little'
  chunkshape := (936,)

In [12]:
waves = main_file.root.shanks.shank0.waveforms()  # () to dereference the link

Benchmarks: columns


In [7]:
%timeit -n 10 -r 2 main_file.root.shanks.shank0.spikes.col('cluster')


10 loops, best of 2: 42 ms per loop

In [8]:
%timeit -n 10 -r 2 main_file.root.shanks.shank0.spikes.col('features')


10 loops, best of 2: 69 ms per loop

In [9]:
%timeit -n 10 -r 2 main_file.root.shanks.shank0.spikes.col('time')


10 loops, best of 2: 42.8 ms per loop

Benchmarks: selecting clusters


In [10]:
clusters = main_file.root.shanks.shank0.spikes.col('cluster')
clusters_unique = np.unique(clusters)

In [11]:
cluster = 5
# WARNING: it can be read_where in newer versions of PyTables!
cluster5 = main_file.root.shanks.shank0.spikes.readWhere('cluster=={0:d}'.format(cluster))
cluster5.shape


Out[11]:
(2174L,)

In [12]:
cluster5_spk = main_file.root.shanks.shank0.spikes.getWhereList('cluster=={0:d}'.format(cluster))
cluster5_spk


Out[12]:
array([    492,     794,     862, ..., 1464179, 1464180, 1464181], dtype=int64)

In [13]:
%timeit -n 10 -r 2 main_file.root.shanks.shank0.spikes.readWhere('cluster=={0:d}'.format(cluster))


10 loops, best of 2: 63.1 ms per loop

In [31]:
features = pd.DataFrame(cluster5['features'], index=cluster5_spk)

Accessing waveforms


In [29]:
plot(waves[5][0].reshape((-1, 8)));