In [8]:
""" This program is created to be a partial replacement of the MATLAB daqread.m,
which reads the daq binary files created using the Data Signal Acquisition Toolbox.
At the moment it has and will only probably be tested on the Data from Theresa Nicks Tetrode
studies in the HVC of Zebrafinch. 

It is strictly still under construction as does not act as a standalone.
"""

## Import major dependancies: Numpy, Scipy, Struct libraries
import numpy as np
import scipy.io
from struct import *

## Matplot Lib for Testing purposes of viewing data.
import matplotlib.pyplot as plt
%matplotlib inline

## Directory of the binary file to be used
filename = "C://Users//mbar372//Documents//6644_R161_011510_Adult6644_RA//6644_R161_011510_Adult6644_RA//data7341536644.txt"

f = open(filename, mode = 'rb')

## Unpacks Basic Header information, including size of the Header and time info.
MATLAB_code = f.read(32) # First 32 bits are FLOAT,
headersize = unpack('l', f.read(4)) # next 4 bits are LONG int32, represent header size.

# Following is not used yet, but unpacked for example anyway.
fileVer = unpack('hh', f.read(4)) # next 4 bits are SHORT's unsigned, show file version.
creationTime = unpack('dddddd', f.read(48)) # next 48 bits are DOUBLEs, show file version.
engineOffset = unpack('d', f.read(8)) # next 8 bits are DOUBLEs, show engine offset.

## Calculate the length of the file
f.seek(0,2) # set position to end of file (2 denotes origin at end of file)
fsize = f.tell() # Return the last value position, ie size of file
f.seek(headersize[0]) # Return position to the end of the header determined by headersize

## PRINTING FOR TESTING PURPOSES
print " ----------------------------- "
print MATLAB_code
print headersize, fileVer
print creationTime
print engineOffset
print fsize
print " ----------------------------- "

## Create the Navigation arrays
## This creates a rather redundant array called lib-nav , which contains the starts, headersizes , and blocklengths, and type
## Information for every block in the file, 
## data_nav contains the start of each data block (not including header) and its end point, for ease of use.
lib_nav, data_nav= create_block_library(f)
print np.shape(data_nav)

## unpack the header information using the lib_nav navigation array: REDUNDANT , for testing only.
## Doing this on the 0,1 and 2th headers gives valuable information regarding channel count and sampling frequency etc.
## However, for our purposes, we already know much of this information (22050 Hz + 16 Channels etc )
#f.seek(headersize[0])
#testing = unpack_header(f,lib_nav[2][0])
#print testing

## 
f.seek(headersize[0]) # Return to start of file, post header.

## Unpack X number of bits from the data and assign it to an flat array.
## This array will need to be reshaped so that the 16 channels can be seen.
data = unpack_data_library(data_nav,10000)
f.close()

for i in range(len(data)):
    if data[i][0] > 10 or data[i][0]<-10:
        data[i] = [0.0]

print len(data)


 ----------------------------- 
MATLAB Data Acquisition File.
(512,) (0, 1)
(2010.0, 1.0, 15.0, 15.0, 56.0, 46.557291984558105)
(1263592606.557292,)
1750400786
 ----------------------------- 
 CREATING BLOCK LIBRARY...
... DONE
(6676L, 2L)
--- UNPACKING DATA ---
Loading ... 0%
Loading ... 5.0%
Loading ... 10.0%
Loading ... 15.0%
Loading ... 20.0%
Loading ... 25.0%
Loading ... 30.0%
Loading ... 35.0%
Loading ... 40.0%
Loading ... 50.0%
Loading ... 55.0%
Loading ... 60.0%
Loading ... 65.0%
Loading ... 70.0%
Loading ... 75.0%
Loading ... 80.0%
Loading ... 85.0%
Loading ... 90.0%
Loading ... 95.0%
Loading ... 100.0%
--- DONE! ---
10000

In [9]:
def unpack_header(f, position):
    """ This function will unpack a header type block, that is a block with type 0
    
    """
    f.seek(position)
    blocksize = unpack('l',f.read(4))[0] # 1x int 32
    blocktype = unpack('l',f.read(4))[0] # 1x int 32
    
    if blocktype != 0: ## Just a check incase user called this definition on another block type
        print "This block is not a header-type block!!!"
        return "ERROR"
    
    
    headersize = unpack('l',f.read(4))[0] # 1x int32
    number = unpack('L',f.read(4))[0] # 1x uint32
    typestr = f.read(16)[0]

    data = ''
    for i in range(blocksize-headersize):
        data += unpack('c', f.read(1))[0]
        
    return str(data).split(';')

In [10]:
def unpack_data_library(data_pos, iterates):
    """" assume 16 channels, and 'double' data type as per files settings , 
    This definition rolls through all the data, as per the library_data with it's start and stop
    Current it prints each value as it comes across it for testing purposes, will replace with a load % or something eventually.
    """
    # Initialise data array to be added to...
    linear_data = []
    itmax = iterates * 1.0 # iterates will countdown, itmax will hold the inital value for % calc.
    
    # Start by iterating through every block...
    
    print "--- UNPACKING DATA ---"
    print "Loading ... 0%"
    
    for i in range(len(data_pos)):
        # exract start and stop information
        start = data_pos[i][0] 
        stop = data_pos[i][1]
        length = stop - start
        
        ##print "STARTING LOCAL: " + str(start) + ".. length to iterate: " + str(length)
        
        f.seek(start) # navigate to appropriate location within file

        num_samples = ((length)/8) / 16 # Calculate number of bytes per sample. (1 sample = 16 channels * 8 bits) REDUNDANT
        
        n = 0
        for n in range(length/8):
            f.seek(start+n*8) ## ensures position is not upset by failures with f.read()
            testable = f.read(8)
            if len(testable) == 8:
                linear_data.append(unpack('d',testable))
                #print linear_data[i+n]
            else:
                print "Error, f.read() has returned length of " + str(len(testable)) + ", not 8.... trying individiduals..."
                
                
                testingarray = ['','','','','','','','']
                f.seek(start+n*8+0)
                testingarray[0] = f.read(1)
                f.seek(start+n*8+1)
                testingarray[1] = f.read(1)
                f.seek(start+n*8+2)
                testingarray[2] = f.read(1)
                f.seek(start+n*8+3)
                testingarray[3] = f.read(1)
                f.seek(start+n*8+4)
                testingarray[4] = f.read(1)
                f.seek(start+n*8+5)
                testingarray[5] = f.read(1)
                f.seek(start+n*8+6)
                testingarray[6] = f.read(1)
                f.seek(start+n*8+7)
                testingarray[7] = f.read(1)
                
                testable = testingarray[0]
                for t in range(7):
                    testable += testingarray[t+1]
                
                if len(testable) == 8:
                    linear_data.append(unpack('d',testable))
                    print "Linear worked!:  " + str(linear_data[i+n])
                else:
                    print "Error, f.read() has returned length of " + str(len(testable)) + ", not 8.... inserting 0"
                    for t in range(8):
                        print len(testingarray[t])
                    
                    linear_data.append([None])
                
            iterates -= 1
            
            percentcalc = 100-(iterates/itmax*100)
            if percentcalc % 5 == 0:
                print "Loading ... " + str(percentcalc) + "%"
            
            if iterates == 0:
                print "--- DONE! ---"
                return linear_data
                
     
    return linear_data

In [11]:
def create_block_library(f):
    """
    This definition will create an array of the basic values regarding the blocks in the file,
    eg start positions, lengths of headers and the blocks themselves etc, which will allow easy navigation
    later. The format for this array will be as follows:
    each position will be as the number of the block, ie block 0 will be at the 0th position.
    each position will hold an array of the data:
    [Position , blocksize , headersize, Blocktype ]
    Blocktype = 0 , 1 or 2. Where 0 is a header/information, and 1 is Data. and 2 is Event Information.
    
    This definition also returns the very import library_data... This list contains all start and stop positions
    for every data section, irrespective of headers.
    ie if a data block starts at pos 100, and goes to 200 with a header size 10, it will return [110,200] for that block.
    """
    print " CREATING BLOCK LIBRARY..."
    f.seek(0) # Go to start of file.
    
    ## Get initial start conditions
    MATLAB_code = f.read(32) # First 32 bits are FLOAT,
    headersize = unpack('l', f.read(4))[0] # next 4 bits are LONG int32, represent header size.
    
    ## Calculates the length of the file ##
    f.seek(0,2) # set position to end of file (2 denotes origin at end of file)
    fsize = f.tell() # Return the last value position, ie size of file
    f.seek(headersize) # Return position to the end of the header determined by headersize
    
    pos = headersize
    number = 0

    
    ## initialize the first header array, which is a special case.
    library = []
    library_data=[]
    #library[0] = [0,headersize,0,headersize]
    library.append([0, headersize, 0, headersize, 0])
    ## Loops through each block until coming to end of file, determined by fsize.
    while (pos < fsize):
        
        f.seek(pos) # Go to position at start of block,

        ## Read off basic block information
        blocksize = unpack('l',f.read(4))[0] # 1x int 32
        blocktype = unpack('l',f.read(4))[0] # 1x int 32
        headersize = unpack('l',f.read(4))[0] # 1x int32
        
        ##number = unpack('L',f.read(4))[0] # 1x uint32
        ##typestr = f.read(16)[0]
        
        ## Append the data to the library, if it is a data block, append it to the library_data list too.
        library.append([ pos, blocksize, headersize, blocktype])
        if blocktype == 1:
            library_data.append([ pos+headersize, pos+blocksize-headersize])
            
        ## Add on the size of this block so the next iteration starts at the right place
        pos+=blocksize
        
    print "... DONE"
    return library, library_data

In [16]:
##
##
## USE THIS CELL TO QUICKLY SEE IF SOME AUDIO DATA MAY BE PRESENT IN A CHANNEL... 0 is bottom, 15 is top.
##
##
start = 0  #
stop = len(pythondata[:,n])  # 

for n in range(16):
    plt.plot(pythondata[start:stop,n]+n*0.5)## Currently set to 5th Channel, whole trace , this is easily changed for testing purposed.

plt.show()



In [12]:
def xaxis_time(samples,frequency):
    """ 
    Quick definition to calculate a series of points of the size of samples,
    over real time as determined by a sampling frequency
    """
    return np.linspace(0,samples/frequency,num = samples)

In [13]:
## Here we are just loading the MATLAB data (first 5 samples from each channel extracted using daqread.m the proper version)
matlabdata = "C://Users//mbar372//Documents//6644_R161_011510_Adult6644_RA//6644_R161_011510_Adult6644_RA//mydata_6644_first5rows.mat"
mydata = scipy.io.loadmat(matlabdata)['data']

## Plot the MATLAB data, just simply as 16 traces
plt.plot(mydata)
plt.show()

## Here we take the data calculated earlier, and reshape it into 16 channels, so it may be properly represented.
pythondata = np.reshape(data[:], [np.shape(data[:])[0]/16, 16])


## Plot the MATLAB data, just simply as 16 traces, to compare with above.
timeseries = xaxis_time(len(pythondata[:,10]),22050)
for n in range(16):
    plt.plot(pythondata[:,n]+n*0.5)## Currently set to 5th Channel, whole trace , this is easily changed for testing purposed.

plt.show()



## Display each dataset, as raw numbers, to compare.
print "MATLAB DATA"
print mydata
print "PYTHON DATA"
print pythondata[:5,:]


MATLAB DATA
[[ 0.00028257 -0.03760559  0.02602464 -0.01115476 -0.00306808 -0.03354614
  -0.05242577  0.01916223 -0.0262327  -0.00844846 -0.05155589  0.00788598
  -0.01763054  0.00704832 -0.03779889 -0.00548442]
 [-0.0002007  -0.03390054  0.02364051 -0.0135711   0.00057253 -0.02349419
  -0.05142702  0.03211382 -0.02890678 -0.01788829 -0.06019026  0.01500613
  -0.01634183  0.00334327 -0.03702567 -0.0081585 ]
 [ 0.00086249 -0.03170973  0.02209406 -0.01585856 -0.01244347 -0.02887456
  -0.04891403  0.01755134 -0.0262327  -0.0242352  -0.06186558 -0.00306808
  -0.02085233 -0.00809407 -0.05149145 -0.01076815]
 [-0.00081284 -0.0338361   0.02908534 -0.01369997 -0.00451789 -0.03444824
  -0.05052492  0.01210652 -0.00986605 -0.01254013 -0.05261908  0.00592069
  -0.00622543  0.00418093 -0.03986083 -0.00719197]
 [-0.00132832 -0.02906787  0.02589577 -0.01225017  0.00408428 -0.03203191
  -0.05712957  0.01291197 -0.02307535 -0.01659957 -0.05964255  0.00714497
  -0.01621296  0.0073705  -0.03644575 -0.00303587]]
PYTHON DATA
[[ 0.00028257 -0.03760559  0.02602464 -0.01115476 -0.00306808 -0.03354614
  -0.05242577  0.01916223 -0.0262327  -0.00844846 -0.05155589  0.00788598
  -0.01763054  0.00704832 -0.03779889 -0.00548442]
 [-0.0002007  -0.03390054  0.02364051 -0.0135711   0.00057253 -0.02349419
  -0.05142702  0.03211382 -0.02890678 -0.01788829 -0.06019026  0.01500613
  -0.01634183  0.00334327 -0.03702567 -0.0081585 ]
 [ 0.00086249 -0.03170973  0.02209406 -0.01585856 -0.01244347 -0.02887456
  -0.04891403  0.01755134 -0.0262327  -0.0242352  -0.06186558 -0.00306808
  -0.02085233 -0.00809407 -0.05149145 -0.01076815]
 [-0.00081284 -0.0338361   0.02908534 -0.01369997 -0.00451789 -0.03444824
  -0.05052492  0.01210652 -0.00986605 -0.01254013 -0.05261908  0.00592069
  -0.00622543  0.00418093 -0.03986083 -0.00719197]
 [-0.00132832 -0.02906787  0.02589577 -0.01225017  0.00408428 -0.03203191
  -0.05712957  0.01291197 -0.02307535 -0.01659957 -0.05964255  0.00714497
  -0.01621296  0.0073705  -0.03644575 -0.00303587]]

In [4]:


In [6]:
def localPreprocessFile(f,fsize):
    """Determines the number of samples logged, the number of blocks logged,
    and the state of the object after acquisition
    The flag is set in the event of a truncated file, which I am not going to
    build into this probably, as I'm not checking file integrity.
    The info dictionaries are only used to get to the read frames appropriate
    to call the num_of_blocks var.
    
    EDIT: REDUNDANT, no longer used. But want to know how to find samples acquired later...
    """
    
    print "localPreprocessFile Definition called ... "
    
    ## Read the end of file to determine the number of samples acquired, and
    ## Where the end object is stored
    f.seek(fsize-16) ## Position to 16 bits back from end of file
    samplesacquired = unpack('q', f.read(8)) ## 8 bits to the int64
    print "samplesacquired: " + str(samplesacquired)
    lastheaderloc = unpack('q' , f.read(8)) ## 8 bits to int64
    print "lastheaderloc: " + str(lastheaderloc)
    f.seek(lastheaderloc[0]) ## position to the last header location.
    
    return samplesacquired, num_of_blocks, info, lastheaderloc, flag

In [34]:
f.read??

In [35]:
open??

In [36]:
file.__doc__??

In [ ]: