notebook.community

Edit and run



In [9]:

    
import scipy as sp
import scipy.io
from scipy import signal, fftpack
import numpy as np
import matplotlib.pyplot as plt
import scipy.io.wavfile as wav #Can call with wav.read(directory) , reads out the data from our wav file, can put [1] to return data only
import pylab
import contextlib # This is needed for the pcm2float definition.
import os # Redundant unless intending to use the filenames array, as glob captures whole directory / could be faster with this however?
import glob #used to create lists of directories of files within a folder.
import xlwt # Used to create the exporting to Excel document function.
from tempfile import TemporaryFile # Used to create the exporting to Excel document function.



In [10]:

    
%matplotlib inline

working_directory = "C://Users//mbar372//Documents//TheresaAM2//R87//r87//sngr870115wtrigB02-Apr121545" # Set the working directory that contains /bmisong and /rawsong folders



In [13]:

    
def create_song_dataset(directory): # input is the working directory where 'rawsong' folder is kept
    """ This function returns the normalised amplitude data of all songs in a directory, 
    It also creates another array containing just the names of the files, for use in excel data
    """
    ## This is an array of directories to the song files to be tested.
    raw_song_dir_array  = glob.glob(directory + '\\*.mat')

    ## This cycles through every wav file in the array and assigns its 'data' ( [1] is used to return amplitude data only) to song_array
    song_array = [scipy.io.loadmat(n) for n in raw_song_dir_array]
    
    ## Normalises each song individually before creating final output array
    ##song_array = [pcm2float(n,'float64') for n in song_array]
    
    raw_song_filename_array = os.listdir(directory) #The name of the files, not used yet, and needs to be fixed for directory+filename
    
    return song_array,raw_song_filename_array   ## Return array of numpy arrays filled with normalised song data. + names



In [65]:

    
song_file_number = 2


data_array , file_name_array = create_song_dataset(working_directory)


print np.shape(data_array[song_file_number]['sng'][:,0])
print np.shape(data_array[song_file_number]['e1'][0,:])

length_of_data = len(data_array[song_file_number]['e1'][0,:])

times = np.linspace(0,length_of_data/44100.0,num = length_of_data)


plt.plot(data_array[song_file_number]['e1'][0,:])
plt.plot(data_array[song_file_number]['sng'][:,0])
plt.show()


plt.specgram(data_array[song_file_number]['sng'][:,0], Fs = 44100, NFFT = 256)
plt.show()

plt.plot(data_array[song_file_number]['sng'][20000:34000,0])
plt.show()
plt.plot(data_array[song_file_number]['sng'][34000:46000,0])
plt.show()


array1, array2 = create_comparison_arrays(data_array[song_file_number]['sng'][20000:34000,0],data_array[song_file_number]['sng'][34000:46000,0], draw = 'no')

plt.plot(array1)
plt.plot(array2)
plt.show()

print np.corrcoef(array1,array2)[0][1]









    



(67020L,)
(67020L,)






    












    












    












    












    












    



0.299206419522



In [60]:

    
def create_comparison_arrays(array1,array2, draw = "no"):
    """The purpose of this function is to take two arrays, padd them to the same size, 
    find the point of highest similarity using conjugates in the Fourier plane, 
    and then offset these padded arrays by an amount to create two comparable datasets at a point of highest similarity.
    """    
    #sizes and calculate the padding value
    size1 = int(array1.shape[0])
    size2 = int(array2.shape[0])
    
    #store original sizes of real data for future use after padding occurs.
    orig_size1 = size1
    orig_size2 = size2
        
    # Calculate padding necesarry to make size1.len = size2.len
    padding_value = abs( size1 - size2 )
   
    #pad the smallest array's end to be the same size as the larger.
    #print "Padding by difference between sizes: " + str(padding_value)
    if size1<size2:
        array1 = np.pad(array1, (0,padding_value), mode = 'constant', constant_values = 0)
    if size2<size1:
        array2 = np.pad(array2, (0,padding_value), mode = 'constant', constant_values = 0)

    # To check lengths are the same still, and update to new values.
    size1 = int(array1.shape[0])
    size2 = int(array2.shape[0])
    if size1 != size2:
        #print "Difference between shapes is, " + str(size1) + " and " + str(size2) + "." 
        print "Error, the two arrays failed to pad properly."
    
    ## Calculate a new offset using FFT
    A = fftpack.fft(array1)
    B = fftpack.fft(array2)
    Ar = -A.conjugate()
    Br = -B.conjugate()
    offset1test = np.argmax(np.abs(fftpack.ifft(Ar*B)))
    offset2test = np.argmax(np.abs(fftpack.ifft(A*Br))) ## This is simply the overshoot of offset1 past the length of the array2
    
    ## Depending on which offset is shorter, depends on which way to move the arrays relative to each other to achieve maximum correlation.
    if offset1test < offset2test:
        #print "offset of " + str(offset1test) + " applied on RAW data"
        offset1 = offset1test
        offset2 = 0
    else:
        #print "offset of " + str(offset2test) + " applied on BMI data"
        offset1 = 0
        offset2 = offset2test
        
    
    ## Pad by the offset calculated by the correlation onto the beginning of the first array, and onto the end of the second.
    ## Note this is calculated depending on the smallest movement, and is thus applied as oether offset 1 or offset 2, but switching
    ## which dataset to add it to the front or back of.
    array1 = np.pad(array1, (int(offset1),int(offset2)), mode = 'constant', constant_values = 0)
    array2 = np.pad(array2, (int(offset2),int(offset1)), mode = 'constant', constant_values = 0)
    
    # Calculate the points of overlap between the two arrays
    point1 = offset1 + offset2
    point2 = min(offset1 + orig_size1, offset2 + orig_size2)
    
    # If draw is yes, then draw the plots for testingcakes, THIS IF STATEMENT CAN BE DELETED IF NOT PLOTTING
    if draw == 'yes':
        plt.figure(1)
        plt.xlabel('Time')
        plt.ylabel('Frequency')
        plt.subplot(211)
        plt.specgram(array1, NFFT = 512, Fs = 44100)
        plt.axvline((point1+1)/44100.00, linewidth=2, color='r')
        plt.axvline((point2-1)/44100.00, linewidth=2, color='r')
        plt.subplot(212)
        plt.specgram(array2, NFFT = 512, Fs = 44100)
        plt.axvline((point1+1)/44100.00, linewidth=2, color='r')
        plt.axvline((point2-1)/44100.00, linewidth=2, color='r')
        plt.show()
    
    comparearray1 = array1[point1:point2]
    comparearray2 = array2[point1:point2]

    # Return only the maximum overlap of the two arrays at the maximum correlation point offset.
    return comparearray1, comparearray2



In [2]:



In [2]:



In [2]:



In [2]:



In [2]:



In [2]:



In [2]:



In [2]:



In [2]:



In [2]:



In [ ]: