Librosa - detect bat pulses in time domain

This is an early test to check if librosa can be used to detect bat pulses and sequences in time domain.

Bat sound is normally "sparse data" and music is "densed data". The idea here is to first check where there are sound peaks that differ from the background noise and after that use FFT for those parts only.

Hop-length is set to 1 ms in the examples below. Then it's easy to handle all used sampling frequencies from 192 to 500 kHz by dividing by 1000.

Alternatives in librosa that might work (but I have just started to use it and there are probably many more alternatives):

  • onset-detection. (71 ms)
  • rmse and localmax. (5 ms)
  • onset_strength and peak_pick. (68 ms)
  • peak_pick. (13 ms)

Note: When running "%%timeit" no values are stored that can be used in the following notebook cells.


In [ ]:


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (15, 4)

In [2]:
import numpy as np
import librosa

onset_detect


In [3]:
def test_onset_detect(y, hop_length):
    y2 = y.copy()
    rms_tot = np.sqrt(np.mean(y**2))
    y2[(np.abs(y) < (rms_tot * 1.5))] = 0.0
    onsets = librosa.onset.onset_detect(y=y2, sr=sr, hop_length=hop_length) 
    index_list = librosa.frames_to_samples(onsets, hop_length=hop_length)
    return index_list

In [4]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_onset_detect(y, 384)
print(len(index_list))
index_list


12
Out[4]:
array([ 20736,  51072,  85632, 116736, 151680, 182784, 220416, 253056,
       283008, 310272, 342528, 373632])

In [5]:
%%timeit
index_list = test_onset_detect(y, 384)


52.2 ms ± 8.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

In [6]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_onset_detect(y, 384)#Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


12

In [7]:
y, sr = librosa.load('../data_in/Ppip_TE384.wav', sr=None)
index_list = test_onset_detect(y, 384)#Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


16

In [8]:
y, sr = librosa.load('../data_in/Myotis-Plecotus-Eptesicus_TE384.wav', sr=None)
index_list = test_onset_detect(y, 384)#Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


2352

In [ ]:

rmse + localmax


In [9]:
def test_rmse_localmax(y, hop_length):
    y2 = y.copy()
    rms_tot = np.sqrt(np.mean(y**2))
    y2[(np.abs(y) < (rms_tot * 1.5))] = 0.0
    rmse = librosa.feature.rms(y=y2, hop_length=384, frame_length=1024, center=True)
    locmax = librosa.util.localmax(rmse.T)
    maxindexlist = []
    for index, a in enumerate(locmax):
        if a: maxindexlist.append(index)
    index_list = librosa.frames_to_samples(maxindexlist, hop_length=hop_length)
    return index_list

In [10]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_rmse_localmax(y, 384)
print(len(index_list))
index_list


13
Out[10]:
array([ 21120,  51840,  85248,  86784, 117888, 152064, 183936, 220800,
       253824, 283392, 311040, 342912, 374400])

In [11]:
%%timeit
test_rmse_localmax(y, 384)


4.26 ms ± 444 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In [ ]:


In [12]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_rmse_localmax(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


13

In [13]:
y, sr = librosa.load('../data_in/Ppip_TE384.wav', sr=None)
index_list = test_rmse_localmax(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


16

In [14]:
y, sr = librosa.load('../data_in/Myotis-Plecotus-Eptesicus_TE384.wav', sr=None)
index_list = test_rmse_localmax(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x-200:x+200].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


5661

In [ ]:

onset_strength and peak_pick


In [ ]:


In [15]:
def test_onset_strength_and_peak_pick(y, hop_length):
    y2 = y.copy()
    rms_tot = np.sqrt(np.mean(y**2))
    y2[(np.abs(y) < (rms_tot * 1.5))] = 0.0
    onset_env = librosa.onset.onset_strength(y=y2, sr=sr,
                                             hop_length=384,
                                             aggregate=np.median)
    peak_index_list = librosa.util.peak_pick(onset_env, 3, 3, 3, 5, 0.5, 10)
    index_list = librosa.frames_to_samples(peak_index_list, hop_length=hop_length)
    return index_list

In [16]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_onset_strength_and_peak_pick(y, 384)
print(len(index_list))
index_list


12
Out[16]:
array([ 21120,  51072,  86016, 116736, 152064, 183168, 220800, 253056,
       283008, 310656, 342528, 374016])

In [17]:
%%timeit
test_onset_strength_and_peak_pick(y, 384)


49.9 ms ± 5.13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

In [ ]:


In [18]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_onset_strength_and_peak_pick(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


12

In [19]:
y, sr = librosa.load('../data_in/Ppip_TE384.wav', sr=None)
index_list = test_onset_strength_and_peak_pick(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


16

In [20]:
y, sr = librosa.load('../data_in/Myotis-Plecotus-Eptesicus_TE384.wav', sr=None)
index_list = test_onset_strength_and_peak_pick(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


1652

In [ ]:

peak_pick


In [21]:
def test_peak_pick(y, hop_length):
    y2 = y.copy()
    rms_tot = np.sqrt(np.mean(y**2))
    y2[(np.abs(y) < (rms_tot * 1.5))] = 0.0
    
    frames_per_ms = hop_length
    minmax_window = frames_per_ms / 4
    mean_window = frames_per_ms / 8
    sensitivity = rms_tot * 1.5 # 0.1
    skip_ms = 1

    index_list = librosa.util.peak_pick(y2, 
                                        minmax_window, minmax_window, 
                                        mean_window, mean_window, 
                                        sensitivity, 
                                        frames_per_ms * skip_ms)
    
    return index_list

In [22]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_peak_pick(y, 384)
print(len(index_list))
index_list


20
Out[22]:
array([ 21140,  51114,  52002,  85686,  86705, 116787, 117211, 117771,
       151960, 182976, 184089, 220752, 253038, 254041, 283252, 284733,
       311067, 342957, 344337, 374260])

In [23]:
%%timeit
test_peak_pick(y, 384)


12.6 ms ± 520 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In [ ]:


In [24]:
y, sr = librosa.load('../data_in/Mdau_TE384.wav', sr=None)
index_list = test_peak_pick(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


20

In [25]:
y, sr = librosa.load('../data_in/Ppip_TE384.wav', sr=None)
index_list = test_peak_pick(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


61

In [26]:
y, sr = librosa.load('../data_in/Myotis-Plecotus-Eptesicus_TE384.wav', sr=None)
index_list = test_peak_pick(y, 384) # Compare to original signal. Red dots are peaks.
plt.plot(y)
plt.scatter(index_list, [y[x:x+400].max() for x in index_list], color='r')
print(len(index_list))
plt.show()


10272

In [ ]:


In [ ]: