In [3]:
%pylab inline
import time
import spikedetekt
try:
import matplotlib.pyplot as plt
except:
raise
import networkx as nx
#import graphviz_layout
import itertools
import re
import pprint
In [7]:
# Channel numbering
# !NOTE: 1-based index
channels_2541 = {1: [ 5, 6, 7, 8], 2: [ 2, 3, 4, 9], 3: [ 1,10,11,32], 4: [12,13,30,31],
5: [14,15,16,29], 6: [17,18,19,28], 7: [20,21,26,27], 8: [22,23,24,25],
9: [37,38,39,40], 10: [34,35,36,41], 11: [33,42,43,64], 12: [44,45,62,63],
13: [46,47,48,61], 14: [49,50,51,60], 15: [52,53,58,59], 16: [54,55,56,57]}
# zero index it...
channels_2541 = {t_num: [channel-1 for channel in tetrode] for t_num, tetrode in channels_2541.items()}
tetrodes_2541 = {k: list(itertools.combinations(l, r=2)) for k, l in channels_2541.items()}
tetrodes_2541
Out[7]:
In [8]:
tetrodes = tetrodes_2541
G = nx.Graph()
for k, t in tetrodes.items():
G.add_edges_from(t)
pos = nx.graphviz_layout(G, prog='neato',args='')
plt.figure(figsize=(6, 6))
#colors = np.linspace(0.0, 1.0, num=len(tetrodes))
colors = [random.random() for n in xrange(len(tetrodes))]
C=nx.connected_component_subgraphs(G)
for i, g in enumerate(C):
#c=*nx.number_of_nodes(g) # random color...
nx.draw(g, pos,
node_size=400, node_color=[colors[i]]*nx.number_of_nodes(g),
vmin=0.0, vmax=1.0, alpha=0.2,
with_labels=True)
Continuous data files (.continuous)
Each continuous channel within each processor has its own file, titled "XXX_CHY.continuous," where XXX = the processor ID #, and Y = the channel number. For each record, it saves:
If a file is opened or closed in the middle of a record, the leading or trailing samples are set to zero.
In [9]:
# fixed header describing data
SIZE_HEADER = 1024
# 22 byte record header + 2048 byte samples
SIZE_RECORD = 2070
NUM_SAMPLES = 1024
REC_MARKER = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 255], dtype=uint8)
In [10]:
def oe_read_header(fname):
""" Return a dict with header content.
"""
# TODO: Use alternative reading method when fname is a file id. If string, use numpy.fromfile
import re
# 1 kiB header
header_dt = np.dtype([('Header', 'S%d' % SIZE_HEADER)])
header = np.fromfile(fname, dtype=header_dt, count=1)
# alternative which moves file pointer
#fid = open(fname, 'rb')
#header = fid.read(SIZE_HEADER)
#fid.close()
# Stand back! I know regex!
# Annoyingly, there is a newline character missing in the header
regex = "header\.([\d\w\.\s]{1,}).=.\'*([^\;\']{1,})\'*"
header_str = str(header[0][0]).rstrip(' ')
header_dict = {entry[0]: entry[1] for entry in re.compile(regex).findall(header_str)}
for key in ['bitVolts', 'sampleRate']:
header_dict[key] = float(header_dict[key])
for key in ['blockLength', 'bufferSize', 'header_bytes', 'channel']:
header_dict[key] = int(header_dict[key]) if not key == 'channel' else int(header_dict[key][2:])
return header_dict
In [11]:
def oe_read_records(fname, offset=0, count=10000):
# rec_ofs: Offset in number of records
# rec_cnt: Number of records to read
with open(fname, 'rb') as fid:
# move pointer to new position
fid.seek(SIZE_HEADER + offset * SIZE_RECORD)
# n times x B data
data_dt = np.dtype([('timestamp', np.int64),
('n_samples', np.uint16),
('rec_num', np.uint16),
# !Note: endianness!
('samples', ('>i2', NUM_SAMPLES)),
('rec_mark', (np.uint8, 10))])
data = np.fromfile(fid, dtype=data_dt, count=count)
return data
In [12]:
example = 'examples/2541_2014-03-13_20-45-41/100_CH12.continuous'
pprint.pprint(oe_read_header(example))
In [13]:
data = oe_read_records(example, offset=5, count=1000)
n = 2
plt.figure(figsize=(13, 5))
plot(data['samples'][:n].ravel())
xlim(0, 1024*n)
data['samples'].__array_interface__
Out[13]:
In [14]:
# Timestamps should increase monotonically in steps of 1024
assert len(set(np.diff(data['timestamp']))) == 1 and np.diff(data['timestamp'][:2]) == 1024
print 'timestamps: ', data['timestamp'][0]
# Number of samples in each record should be NUM_SAMPLES, or 1024
assert len(set(data['n_samples'])) == 1 and data['n_samples'][0] == NUM_SAMPLES
print 'N samples: ', data['n_samples'][0]
# should be byte pattern [0...8, 255]
stringified = map(str, data['rec_mark']) # <- slow
assert len(set(stringified)) == 1 and str(data['rec_mark'][0]) == str(REC_MARKER)
print 'record marker: ', data['rec_mark'][0]
# should be zero, or there are multiple recordings in this file
assert len(set(data['rec_num'])) == 1 and data['rec_num'][0] == 0
print 'Number recording: ', data['rec_num'][0]
In [15]:
def data_to_buf(path, channels=64, count=1000, proc_node=100, channel_offset=500):
base_end = '{proc_node:d}_CH{channel:d}.continuous'
# channels to include either given as number of channels, or as a list
channels = channels if channels is iterable else list(range(channels))
# temporary storage
buf = np.zeros((len(channels), num_records*1024), dtype='>i2')
# load chunk of data from all channels
print 'Reading chunk'
for n in channels:
fname = path+base_end.format(proc_node=proc_node, channel=n+1)
# channel offset is clunky shortcut for plotting
if channel_offset:
buf[n] = oe_read_records(fname, count=count)['samples'].ravel() + channel_offset * (n-32)
else:
buf[n] = oe_read_records(fname, count=count)['samples'].ravel()
return buf
In [16]:
# path to folder of current session
example_path = 'examples/2541_2014-03-13_20-45-41/'
# number of records to load at once
num_records=1000
# load first records from ALL channels in
t = time.time()
tmp = data_to_buf(path=example_path, count=num_records).transpose()
size, elapsed = tmp.nbytes/1e6, time.time() - t
# Take speed with a grain of salt. Linux is smart enough to keep files in RAM once used.
print 'Read {0:.1f} MB in {1:.2f} s, {2:.2f} MB/s'.format(size, elapsed, size*1./elapsed)
In [17]:
plt.figure(figsize=(14, 18))
print tmp.shape
plot(tmp[5000:10000]);
In [21]:
def oe_to_dat(dirname, outfile='./proc/raw.dat', channels=64, proc_node=100, chunk_size=10000):
# TODO: Check that no samples are getting lost, at start, chunk-borders or file end
# TODO: Instead of channel list, give channel dict and write to appropriate raw file grouping
# This won't require re-reading for each tetrode. Not that it takes that long...
import os
start_t = time.time()
fname_base = dirname + '{proc_node:d}_CH{channel:d}.continuous'
size_header = 1024 # Bytes
size_record = 2070 # Bytes
num_samples = 1024 # per record
# should be iterable with channels to include into dat file
channels = channels if iterable(channels) else list(range(channels))
# list of files
fnames = [fname_base.format(proc_node=proc_node, channel=chan+1) for chan in channels]
#print fnames
# check that filesizes are equal
fsizes = [os.path.getsize(fname) for fname in fnames]
assert len(set(fsizes)) == 1
# there should be an integer multiple of records, i.e. not leftover bytes!
assert not (fsizes[0] - 1024) % 2070
# calculate number of records from filesize
num_records = (fsizes[0] - 1024) / 2070
# pre-allocate array
buf = np.zeros((len(channels), chunk_size*1024), dtype='>i2')
#print 'Allocated {0:.2f} MB buffer'.format(buf.nbytes/1e6)
with open(outfile, 'wb') as fid:
# loop over all records, in chunk sizes
records_left = num_records
chunk_n = 0
written = 0
while records_left:
count = min(records_left, chunk_size)
offset = num_records - records_left
#print '-> Reading chunk {0:d}'.format(chunk_n)
for i, fname in enumerate(fnames):
buf[i, 0:count*num_samples] = \
oe_read_records(fname, count=count, offset=offset)['samples'].ravel()
records_left -= count
# write chunk of interleaved data
#print '<- Writing chunk {0:d}'.format(chunk_n)
if count == chunk_size:
buf.transpose().tofile(fid)
else:
# We don't want to write the trailing zeros on the last chunk
buf[:, 0:count*num_samples].transpose().tofile(fid)
written += (count * 2048 * len(channels))
chunk_n += 1
#print '{0:.0f}% completed.'.format((num_records-records_left)*100.0/num_records),
elapsed = time.time() - start_t
written /= 1e6
speed = written/elapsed
msg_str = '\nConverted {0:.2f} MB into {1:s}, took {2:.2f} s, effectively {3:.2f} MB/s'
print msg_str.format(written, outfile, elapsed, speed)
In [20]:
t = time.time()
for i, tt in channels_2541.items():
outfile = ''.join(['./proc/TT', str(i), '_Ch' ,'-'.join([str(c) for c in tt]), '.dat'])
oe_to_dat(example_path, outfile=outfile, channels=tt, chunk_size=50000)
print '\nTotal elapsed: {0:.2f} s'.format(time.time()-t)
In [24]:
%timeit buf = empty((64, 100000), dtype='>i2')
In [33]:
buf = zeros((1000, 1000), dtype='>i2')
buf.__array_interface__
Out[33]:
In [35]:
buf[1, 900]
Out[35]:
In [ ]: