In [3]:

    
%pylab inline
import time
import spikedetekt

try:
    import matplotlib.pyplot as plt
except:
    raise
import networkx as nx
#import graphviz_layout

import itertools
import re
import pprint









    



Populating the interactive namespace from numpy and matplotlib

Mouse 2541

Defining probe layout for .probe file

SpikeDetekt requires a graph describing the probe geometry. For tetrodes, I will represent each wire bundle as four mutually connected nodes.

NOTE: These are uncorrected tetrode numbers. No dead channels assigned.



In [7]:

    
# Channel numbering
# !NOTE: 1-based index
channels_2541 = {1: [ 5, 6, 7, 8], 2: [ 2, 3, 4, 9], 3: [ 1,10,11,32], 4: [12,13,30,31],
                 5: [14,15,16,29], 6: [17,18,19,28], 7: [20,21,26,27], 8: [22,23,24,25],
                 9: [37,38,39,40], 10: [34,35,36,41], 11: [33,42,43,64], 12: [44,45,62,63],
                 13: [46,47,48,61], 14: [49,50,51,60], 15: [52,53,58,59], 16: [54,55,56,57]}

# zero index it...
channels_2541 = {t_num: [channel-1 for channel in tetrode] for t_num, tetrode in channels_2541.items()}

tetrodes_2541 = {k: list(itertools.combinations(l, r=2)) for k, l in channels_2541.items()}
tetrodes_2541









    Out[7]:





{1: [(4, 5), (4, 6), (4, 7), (5, 6), (5, 7), (6, 7)],
 2: [(1, 2), (1, 3), (1, 8), (2, 3), (2, 8), (3, 8)],
 3: [(0, 9), (0, 10), (0, 31), (9, 10), (9, 31), (10, 31)],
 4: [(11, 12), (11, 29), (11, 30), (12, 29), (12, 30), (29, 30)],
 5: [(13, 14), (13, 15), (13, 28), (14, 15), (14, 28), (15, 28)],
 6: [(16, 17), (16, 18), (16, 27), (17, 18), (17, 27), (18, 27)],
 7: [(19, 20), (19, 25), (19, 26), (20, 25), (20, 26), (25, 26)],
 8: [(21, 22), (21, 23), (21, 24), (22, 23), (22, 24), (23, 24)],
 9: [(36, 37), (36, 38), (36, 39), (37, 38), (37, 39), (38, 39)],
 10: [(33, 34), (33, 35), (33, 40), (34, 35), (34, 40), (35, 40)],
 11: [(32, 41), (32, 42), (32, 63), (41, 42), (41, 63), (42, 63)],
 12: [(43, 44), (43, 61), (43, 62), (44, 61), (44, 62), (61, 62)],
 13: [(45, 46), (45, 47), (45, 60), (46, 47), (46, 60), (47, 60)],
 14: [(48, 49), (48, 50), (48, 59), (49, 50), (49, 59), (50, 59)],
 15: [(51, 52), (51, 57), (51, 58), (52, 57), (52, 58), (57, 58)],
 16: [(53, 54), (53, 55), (53, 56), (54, 55), (54, 56), (55, 56)]}



In [8]:

    
tetrodes = tetrodes_2541

G = nx.Graph()
for k, t in tetrodes.items():
    G.add_edges_from(t)
pos = nx.graphviz_layout(G, prog='neato',args='')
plt.figure(figsize=(6, 6))
#colors = np.linspace(0.0, 1.0, num=len(tetrodes))
colors = [random.random() for n in xrange(len(tetrodes))]
C=nx.connected_component_subgraphs(G)
for i, g in enumerate(C):
    #c=*nx.number_of_nodes(g) # random color...
    nx.draw(g, pos,
            node_size=400, node_color=[colors[i]]*nx.number_of_nodes(g),
            vmin=0.0, vmax=1.0, alpha=0.2,
            with_labels=True)

Looking into open ephys .continuous files

Continuous data files (.continuous)

Each continuous channel within each processor has its own file, titled "XXX_CHY.continuous," where XXX = the processor ID #, and Y = the channel number. For each record, it saves:

One int64 timestamp (actually a sample number; this can be converted to seconds using the sampleRate variable in the header)
One uint16 number (N) indicating the samples per record (always 1024, at least for now)
One uint16 recording number (version 0.2 and higher)
1024 int16 samples
10-byte record marker (0 1 2 3 4 5 6 7 8 255)

If a file is opened or closed in the middle of a record, the leading or trailing samples are set to zero.



In [9]:

    
# fixed header describing data
SIZE_HEADER = 1024

# 22 byte record header + 2048 byte samples
SIZE_RECORD = 2070

NUM_SAMPLES = 1024
REC_MARKER = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 255], dtype=uint8)



In [10]:

    
def oe_read_header(fname):
    """ Return a dict with header content.
    """
    # TODO: Use alternative reading method when fname is a file id. If string, use numpy.fromfile
    import re

    # 1 kiB header
    header_dt = np.dtype([('Header', 'S%d' % SIZE_HEADER)])
    header = np.fromfile(fname, dtype=header_dt, count=1)
    
    # alternative which moves file pointer
    #fid = open(fname, 'rb')
    #header = fid.read(SIZE_HEADER)
    #fid.close()
    
    # Stand back! I know regex!
    # Annoyingly, there is a newline character missing in the header
    regex = "header\.([\d\w\.\s]{1,}).=.\'*([^\;\']{1,})\'*"
    header_str = str(header[0][0]).rstrip(' ')
    header_dict = {entry[0]: entry[1] for entry in re.compile(regex).findall(header_str)}
    for key in ['bitVolts', 'sampleRate']:
        header_dict[key] = float(header_dict[key])
    for key in ['blockLength', 'bufferSize', 'header_bytes', 'channel']:
        header_dict[key] = int(header_dict[key]) if not key == 'channel' else int(header_dict[key][2:])
    return header_dict



In [11]:

    
def oe_read_records(fname, offset=0, count=10000):
    # rec_ofs: Offset in number of records
    # rec_cnt: Number of records to read
    with open(fname, 'rb') as fid:
        # move pointer to new position
        fid.seek(SIZE_HEADER + offset * SIZE_RECORD)

        # n times x B data
        data_dt = np.dtype([('timestamp', np.int64),
                            ('n_samples', np.uint16),
                            ('rec_num', np.uint16),
                            # !Note: endianness!
                            ('samples', ('>i2', NUM_SAMPLES)),
                            ('rec_mark', (np.uint8, 10))])

        data = np.fromfile(fid, dtype=data_dt, count=count)

    return data



In [12]:

    
example = 'examples/2541_2014-03-13_20-45-41/100_CH12.continuous'
pprint.pprint(oe_read_header(example))









    



{'bitVolts': 0.194999993,
 'blockLength': 1024,
 'bufferSize': 1024,
 'channel': 12,
 'channelType': 'Continuous',
 'date_created': '13-Mar-2014 204546',
 'description': 'each record contains one 64-bit timestamp, one 16-bit sample count (N), 1 uint16 recordingNumber, N 16-bit samples, and one 10-byte record marker (0 1 2 3 4 5 6 7 8 255)',
 'format': 'Open Ephys Data Format',
 'header_bytes': 1024,
 'sampleRate': 20000.0,
 'version': '0.2'}



In [13]:

    
data = oe_read_records(example, offset=5, count=1000)
n = 2
plt.figure(figsize=(13, 5))
plot(data['samples'][:n].ravel())
xlim(0, 1024*n)
data['samples'].__array_interface__









    Out[13]:





{'data': (140296749948956, False),
 'descr': [('', '>i2')],
 'shape': (1000, 1024),
 'strides': (2070, 2),
 'typestr': '>i2',
 'version': 3}

Read controls



In [14]:

    
# Timestamps should increase monotonically in steps of 1024
assert len(set(np.diff(data['timestamp']))) == 1 and np.diff(data['timestamp'][:2]) == 1024
print 'timestamps: ', data['timestamp'][0]

# Number of samples in each record should be NUM_SAMPLES, or 1024
assert len(set(data['n_samples'])) == 1 and data['n_samples'][0] == NUM_SAMPLES
print 'N samples: ', data['n_samples'][0]

# should be byte pattern [0...8, 255]
stringified = map(str, data['rec_mark']) # <- slow
assert len(set(stringified)) == 1 and str(data['rec_mark'][0]) == str(REC_MARKER)
print 'record marker: ', data['rec_mark'][0]

# should be zero, or there are multiple recordings in this file
assert len(set(data['rec_num'])) == 1 and data['rec_num'][0] == 0
print 'Number recording: ', data['rec_num'][0]









    



timestamps:  20024120
N samples:  1024
record marker:  [  0   1   2   3   4   5   6   7   8 255]
Number recording:  0

Plotting Open Ephys 0.2x files

Chunk-wise reading of all 64 neural channels.

Simplest approach would be to seek and read chunk by chunk, no generators, withs or leaving the files open.

TODO: - timings (esp. see if difference in endianness affects speed)



In [15]:

    
def data_to_buf(path, channels=64, count=1000, proc_node=100, channel_offset=500):
    base_end = '{proc_node:d}_CH{channel:d}.continuous'

    # channels to include either given as number of channels, or as a list
    channels = channels if channels is iterable else list(range(channels))
    
    # temporary storage
    buf = np.zeros((len(channels), num_records*1024), dtype='>i2')

    # load chunk of data from all channels
    print 'Reading chunk'
    for n in channels:
        fname = path+base_end.format(proc_node=proc_node, channel=n+1)
        # channel offset is clunky shortcut for plotting
        if channel_offset:
            buf[n] = oe_read_records(fname, count=count)['samples'].ravel() + channel_offset * (n-32)
        else:
            buf[n] = oe_read_records(fname, count=count)['samples'].ravel()

    return buf



In [16]:

    
# path to folder of current session
example_path = 'examples/2541_2014-03-13_20-45-41/'

# number of records to load at once
num_records=1000

# load first records from ALL channels in 
t = time.time()
tmp = data_to_buf(path=example_path, count=num_records).transpose()
size, elapsed = tmp.nbytes/1e6, time.time() - t

# Take speed with a grain of salt. Linux is smart enough to keep files in RAM once used.
print 'Read {0:.1f} MB in {1:.2f} s, {2:.2f} MB/s'.format(size, elapsed, size*1./elapsed)









    



Reading chunk
Read 131.1 MB in 5.43 s, 24.14 MB/s



In [17]:

    
plt.figure(figsize=(14, 18))
print tmp.shape
plot(tmp[5000:10000]);









    



(1024000, 64)

Conversion to .dat files

Ideally, channels should be taken from the probe file and matched with a list of dead/excluded channels.



In [21]:

    
def oe_to_dat(dirname, outfile='./proc/raw.dat', channels=64, proc_node=100, chunk_size=10000):
    # TODO: Check that no samples are getting lost, at start, chunk-borders or file end
    # TODO: Instead of channel list, give channel dict and write to appropriate raw file grouping
    #       This won't require re-reading for each tetrode. Not that it takes that long...
    import os
    start_t = time.time()
    fname_base = dirname + '{proc_node:d}_CH{channel:d}.continuous'
    size_header = 1024  # Bytes
    size_record = 2070  # Bytes
    num_samples = 1024  # per record

    # should be iterable with channels to include into dat file
    channels = channels if iterable(channels) else list(range(channels)) 
    
    # list of files
    fnames = [fname_base.format(proc_node=proc_node, channel=chan+1) for chan in channels]
    #print fnames
    
    # check that filesizes are equal
    fsizes = [os.path.getsize(fname) for fname in fnames]
    assert len(set(fsizes)) == 1
    
    # there should be an integer multiple of records, i.e. not leftover bytes!
    assert not (fsizes[0] - 1024) % 2070
    
    # calculate number of records from filesize
    num_records = (fsizes[0] - 1024) / 2070
    
    # pre-allocate array
    buf = np.zeros((len(channels), chunk_size*1024), dtype='>i2')
    #print 'Allocated {0:.2f} MB buffer'.format(buf.nbytes/1e6)
    
    with open(outfile, 'wb') as fid:
        # loop over all records, in chunk sizes
        records_left = num_records
        chunk_n = 0
        written = 0
        while records_left:
            count = min(records_left, chunk_size)
            offset = num_records - records_left
            #print '-> Reading chunk {0:d}'.format(chunk_n)
            for i, fname in enumerate(fnames):
                buf[i, 0:count*num_samples] = \
                oe_read_records(fname, count=count, offset=offset)['samples'].ravel()
            records_left -= count

            # write chunk of interleaved data
            #print '<- Writing chunk {0:d}'.format(chunk_n)
            if count == chunk_size:
                buf.transpose().tofile(fid)
            else:
                # We don't want to write the trailing zeros on the last chunk
                buf[:, 0:count*num_samples].transpose().tofile(fid)
            written += (count * 2048 * len(channels))
            chunk_n += 1
            #print '{0:.0f}% completed.'.format((num_records-records_left)*100.0/num_records),
 
    elapsed = time.time() - start_t
    written /= 1e6
    speed = written/elapsed
    msg_str = '\nConverted {0:.2f} MB into {1:s}, took {2:.2f} s, effectively {3:.2f} MB/s'
    print msg_str.format(written, outfile, elapsed, speed)



In [20]:

    
t = time.time()
for i, tt in channels_2541.items():
    outfile = ''.join(['./proc/TT', str(i), '_Ch' ,'-'.join([str(c) for c in tt]), '.dat'])
    oe_to_dat(example_path, outfile=outfile, channels=tt, chunk_size=50000)
print '\nTotal elapsed: {0:.2f} s'.format(time.time()-t)









    



Converted 384.93 MB into ./proc/TT1_Ch4-5-6-7.dat, took 8.82 s, effectively 43.67 MB/s


Converted 384.93 MB into ./proc/TT2_Ch1-2-3-8.dat, took 10.48 s, effectively 36.74 MB/s


Converted 384.93 MB into ./proc/TT3_Ch0-9-10-31.dat, took 8.12 s, effectively 47.38 MB/s


Converted 384.93 MB into ./proc/TT4_Ch11-12-29-30.dat, took 8.04 s, effectively 47.88 MB/s


Converted 384.93 MB into ./proc/TT5_Ch13-14-15-28.dat, took 8.01 s, effectively 48.05 MB/s


Converted 384.93 MB into ./proc/TT6_Ch16-17-18-27.dat, took 11.37 s, effectively 33.86 MB/s


Converted 384.93 MB into ./proc/TT7_Ch19-20-25-26.dat, took 11.17 s, effectively 34.46 MB/s


Converted 384.93 MB into ./proc/TT8_Ch21-22-23-24.dat, took 8.13 s, effectively 47.37 MB/s


Converted 384.93 MB into ./proc/TT9_Ch36-37-38-39.dat, took 8.04 s, effectively 47.85 MB/s


Converted 384.93 MB into ./proc/TT10_Ch33-34-35-40.dat, took 9.95 s, effectively 38.69 MB/s


Converted 384.93 MB into ./proc/TT11_Ch32-41-42-63.dat, took 8.29 s, effectively 46.45 MB/s


Converted 384.93 MB into ./proc/TT12_Ch43-44-61-62.dat, took 14.82 s, effectively 25.98 MB/s


Converted 384.93 MB into ./proc/TT13_Ch45-46-47-60.dat, took 7.97 s, effectively 48.32 MB/s


Converted 384.93 MB into ./proc/TT14_Ch48-49-50-59.dat, took 8.06 s, effectively 47.74 MB/s


Converted 384.93 MB into ./proc/TT15_Ch51-52-57-58.dat, took 8.20 s, effectively 46.93 MB/s


Converted 384.93 MB into ./proc/TT16_Ch53-54-55-56.dat, took 8.49 s, effectively 45.32 MB/s


Total elapsed: 147.98 s



In [24]:

    
%timeit buf = empty((64, 100000), dtype='>i2')









    



100000 loops, best of 3: 4.1 µs per loop



In [33]:

    
buf = zeros((1000, 1000), dtype='>i2')
buf.__array_interface__









    Out[33]:





{'data': (140123308075120, False),
 'descr': [('', '>i2')],
 'shape': (1000, 1000),
 'strides': None,
 'typestr': '>i2',
 'version': 3}



In [35]:

    
buf[1, 900]









    Out[35]:





0